1/*
2 * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 *	Kernel stack management routines.
30 */
31
32#include <mach/mach_host.h>
33#include <mach/mach_types.h>
34#include <mach/processor_set.h>
35
36#include <kern/kern_types.h>
37#include <kern/mach_param.h>
38#include <kern/processor.h>
39#include <kern/thread.h>
40#include <kern/zalloc.h>
41#include <kern/kalloc.h>
42#include <kern/ledger.h>
43
44#include <vm/vm_map.h>
45#include <vm/vm_kern.h>
46
47#include <mach_debug.h>
48
49/*
50 *	We allocate stacks from generic kernel VM.
51 *
52 *	The stack_free_list can only be accessed at splsched,
53 *	because stack_alloc_try/thread_invoke operate at splsched.
54 */
55
56decl_simple_lock_data(static,stack_lock_data)
57#define stack_lock()		simple_lock(&stack_lock_data)
58#define stack_unlock()		simple_unlock(&stack_lock_data)
59
60#define STACK_CACHE_SIZE	2
61
62static vm_offset_t		stack_free_list;
63
64static unsigned int		stack_free_count, stack_free_hiwat;		/* free list count */
65static unsigned int		stack_hiwat;
66unsigned int			stack_total;				/* current total count */
67unsigned long long		stack_allocs;				/* total count of allocations */
68
69static int			stack_fake_zone_index = -1;	/* index in zone_info array */
70
71static unsigned int		stack_free_target;
72static int				stack_free_delta;
73
74static unsigned int		stack_new_count;						/* total new stack allocations */
75
76static vm_offset_t		stack_addr_mask;
77
78unsigned int			kernel_stack_pages = KERNEL_STACK_SIZE / PAGE_SIZE;
79vm_offset_t			kernel_stack_size = KERNEL_STACK_SIZE;
80vm_offset_t			kernel_stack_mask = -KERNEL_STACK_SIZE;
81vm_offset_t			kernel_stack_depth_max = 0;
82
83static inline void
84STACK_ZINFO_PALLOC(thread_t thread)
85{
86	task_t task;
87	zinfo_usage_t zinfo;
88
89	ledger_credit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
90
91	if (stack_fake_zone_index != -1 &&
92	    (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
93		OSAddAtomic64(kernel_stack_size,
94			      (int64_t *)&zinfo[stack_fake_zone_index].alloc);
95}
96
97static inline void
98STACK_ZINFO_PFREE(thread_t thread)
99{
100	task_t task;
101	zinfo_usage_t zinfo;
102
103	ledger_debit(thread->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
104
105	if (stack_fake_zone_index != -1 &&
106	    (task = thread->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
107		OSAddAtomic64(kernel_stack_size,
108			      (int64_t *)&zinfo[stack_fake_zone_index].free);
109}
110
111static inline void
112STACK_ZINFO_HANDOFF(thread_t from, thread_t to)
113{
114	ledger_debit(from->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
115	ledger_credit(to->t_ledger, task_ledgers.tkm_private, kernel_stack_size);
116
117	if (stack_fake_zone_index != -1) {
118		task_t task;
119		zinfo_usage_t zinfo;
120
121		if ((task = from->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
122			OSAddAtomic64(kernel_stack_size,
123				      (int64_t *)&zinfo[stack_fake_zone_index].free);
124
125		if ((task = to->task) != NULL && (zinfo = task->tkm_zinfo) != NULL)
126			OSAddAtomic64(kernel_stack_size,
127				      (int64_t *)&zinfo[stack_fake_zone_index].alloc);
128	}
129}
130
131/*
132 *	The next field is at the base of the stack,
133 *	so the low end is left unsullied.
134 */
135#define stack_next(stack)	\
136	(*((vm_offset_t *)((stack) + kernel_stack_size) - 1))
137
138static inline int
139log2(vm_offset_t size)
140{
141	int	result;
142	for (result = 0; size > 0; result++)
143		size >>= 1;
144	return result;
145}
146
147static inline vm_offset_t
148roundup_pow2(vm_offset_t size)
149{
150	return 1UL << (log2(size - 1) + 1);
151}
152
153static vm_offset_t stack_alloc_internal(void);
154static void stack_free_stack(vm_offset_t);
155
156void
157stack_init(void)
158{
159	simple_lock_init(&stack_lock_data, 0);
160
161	if (PE_parse_boot_argn("kernel_stack_pages",
162			       &kernel_stack_pages,
163			       sizeof (kernel_stack_pages))) {
164		kernel_stack_size = kernel_stack_pages * PAGE_SIZE;
165		printf("stack_init: kernel_stack_pages=%d kernel_stack_size=%p\n",
166			kernel_stack_pages, (void *) kernel_stack_size);
167	}
168
169	if (kernel_stack_size < round_page(kernel_stack_size))
170		panic("stack_init: stack size %p not a multiple of page size %d\n",
171			(void *) kernel_stack_size, PAGE_SIZE);
172
173	stack_addr_mask = roundup_pow2(kernel_stack_size) - 1;
174	kernel_stack_mask = ~stack_addr_mask;
175}
176
177/*
178 *	stack_alloc:
179 *
180 *	Allocate a stack for a thread, may
181 *	block.
182 */
183
184static vm_offset_t
185stack_alloc_internal(void)
186{
187	vm_offset_t		stack;
188	spl_t			s;
189	int			guard_flags;
190
191	s = splsched();
192	stack_lock();
193	stack_allocs++;
194	stack = stack_free_list;
195	if (stack != 0) {
196		stack_free_list = stack_next(stack);
197		stack_free_count--;
198	}
199	else {
200		if (++stack_total > stack_hiwat)
201			stack_hiwat = stack_total;
202		stack_new_count++;
203	}
204	stack_free_delta--;
205	stack_unlock();
206	splx(s);
207
208	if (stack == 0) {
209
210		/*
211		 * Request guard pages on either side of the stack.  Ask
212		 * kernel_memory_allocate() for two extra pages to account
213		 * for these.
214		 */
215
216		guard_flags = KMA_GUARD_FIRST | KMA_GUARD_LAST;
217		if (kernel_memory_allocate(kernel_map, &stack,
218					   kernel_stack_size + (2*PAGE_SIZE),
219					   stack_addr_mask,
220					   KMA_KSTACK | KMA_KOBJECT | guard_flags)
221		    != KERN_SUCCESS)
222			panic("stack_alloc: kernel_memory_allocate");
223
224		/*
225		 * The stack address that comes back is the address of the lower
226		 * guard page.  Skip past it to get the actual stack base address.
227		 */
228
229		stack += PAGE_SIZE;
230	}
231	return stack;
232}
233
234void
235stack_alloc(
236	thread_t	thread)
237{
238
239	assert(thread->kernel_stack == 0);
240	machine_stack_attach(thread, stack_alloc_internal());
241	STACK_ZINFO_PALLOC(thread);
242}
243
244void
245stack_handoff(thread_t from, thread_t to)
246{
247	assert(from == current_thread());
248	machine_stack_handoff(from, to);
249	STACK_ZINFO_HANDOFF(from, to);
250}
251
252/*
253 *	stack_free:
254 *
255 *	Detach and free the stack for a thread.
256 */
257void
258stack_free(
259	thread_t	thread)
260{
261    vm_offset_t		stack = machine_stack_detach(thread);
262
263	assert(stack);
264	if (stack != thread->reserved_stack) {
265		STACK_ZINFO_PFREE(thread);
266		stack_free_stack(stack);
267	}
268}
269
270void
271stack_free_reserved(
272	thread_t	thread)
273{
274	if (thread->reserved_stack != thread->kernel_stack) {
275		stack_free_stack(thread->reserved_stack);
276		STACK_ZINFO_PFREE(thread);
277	}
278}
279
280static void
281stack_free_stack(
282	vm_offset_t		stack)
283{
284	struct stack_cache	*cache;
285	spl_t				s;
286
287	s = splsched();
288	cache = &PROCESSOR_DATA(current_processor(), stack_cache);
289	if (cache->count < STACK_CACHE_SIZE) {
290		stack_next(stack) = cache->free;
291		cache->free = stack;
292		cache->count++;
293	}
294	else {
295		stack_lock();
296		stack_next(stack) = stack_free_list;
297		stack_free_list = stack;
298		if (++stack_free_count > stack_free_hiwat)
299			stack_free_hiwat = stack_free_count;
300		stack_free_delta++;
301		stack_unlock();
302	}
303	splx(s);
304}
305
306/*
307 *	stack_alloc_try:
308 *
309 *	Non-blocking attempt to allocate a
310 *	stack for a thread.
311 *
312 *	Returns TRUE on success.
313 *
314 *	Called at splsched.
315 */
316boolean_t
317stack_alloc_try(
318	thread_t		thread)
319{
320	struct stack_cache	*cache;
321	vm_offset_t			stack;
322
323	cache = &PROCESSOR_DATA(current_processor(), stack_cache);
324	stack = cache->free;
325	if (stack != 0) {
326		STACK_ZINFO_PALLOC(thread);
327		cache->free = stack_next(stack);
328		cache->count--;
329	}
330	else {
331		if (stack_free_list != 0) {
332			stack_lock();
333			stack = stack_free_list;
334			if (stack != 0) {
335				STACK_ZINFO_PALLOC(thread);
336				stack_free_list = stack_next(stack);
337				stack_free_count--;
338				stack_free_delta--;
339			}
340			stack_unlock();
341		}
342	}
343
344	if (stack != 0 || (stack = thread->reserved_stack) != 0) {
345		machine_stack_attach(thread, stack);
346		return (TRUE);
347	}
348
349	return (FALSE);
350}
351
352static unsigned int		stack_collect_tick, last_stack_tick;
353
354/*
355 *	stack_collect:
356 *
357 *	Free excess kernel stacks, may
358 *	block.
359 */
360void
361stack_collect(void)
362{
363	if (stack_collect_tick != last_stack_tick) {
364		unsigned int	target;
365		vm_offset_t		stack;
366		spl_t			s;
367
368		s = splsched();
369		stack_lock();
370
371		target = stack_free_target + (STACK_CACHE_SIZE * processor_count);
372		target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta;
373
374		while (stack_free_count > target) {
375			stack = stack_free_list;
376			stack_free_list = stack_next(stack);
377			stack_free_count--; stack_total--;
378			stack_unlock();
379			splx(s);
380
381			/*
382			 * Get the stack base address, then decrement by one page
383			 * to account for the lower guard page.  Add two extra pages
384			 * to the size to account for the guard pages on both ends
385			 * that were originally requested when the stack was allocated
386			 * back in stack_alloc().
387			 */
388
389			stack = (vm_offset_t)vm_map_trunc_page(stack);
390			stack -= PAGE_SIZE;
391			if (vm_map_remove(
392				    kernel_map,
393				    stack,
394				    stack + kernel_stack_size+(2*PAGE_SIZE),
395				    VM_MAP_REMOVE_KUNWIRE)
396			    != KERN_SUCCESS)
397				panic("stack_collect: vm_map_remove");
398			stack = 0;
399
400			s = splsched();
401			stack_lock();
402
403			target = stack_free_target + (STACK_CACHE_SIZE * processor_count);
404			target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta;
405		}
406
407		last_stack_tick = stack_collect_tick;
408
409		stack_unlock();
410		splx(s);
411	}
412}
413
414/*
415 *	compute_stack_target:
416 *
417 *	Computes a new target free list count
418 *	based on recent alloc / free activity.
419 *
420 *	Limits stack collection to once per
421 *	computation period.
422 */
423void
424compute_stack_target(
425__unused void		*arg)
426{
427	spl_t		s;
428
429	s = splsched();
430	stack_lock();
431
432	if (stack_free_target > 5)
433		stack_free_target = (4 * stack_free_target) / 5;
434	else
435	if (stack_free_target > 0)
436		stack_free_target--;
437
438	stack_free_target += (stack_free_delta >= 0)? stack_free_delta: -stack_free_delta;
439
440	stack_free_delta = 0;
441	stack_collect_tick++;
442
443	stack_unlock();
444	splx(s);
445}
446
447void
448stack_fake_zone_init(int zone_index)
449{
450	stack_fake_zone_index = zone_index;
451}
452
453void
454stack_fake_zone_info(int *count,
455		     vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
456		     uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
457{
458	unsigned int	total, hiwat, free;
459	unsigned long long all;
460	spl_t			s;
461
462	s = splsched();
463	stack_lock();
464	all = stack_allocs;
465	total = stack_total;
466	hiwat = stack_hiwat;
467	free = stack_free_count;
468	stack_unlock();
469	splx(s);
470
471	*count      = total - free;
472	*cur_size   = kernel_stack_size * total;
473	*max_size   = kernel_stack_size * hiwat;
474	*elem_size  = kernel_stack_size;
475	*alloc_size = kernel_stack_size;
476	*sum_size = all * kernel_stack_size;
477
478	*collectable = 1;
479	*exhaustable = 0;
480	*caller_acct = 1;
481}
482
483/* OBSOLETE */
484void	stack_privilege(
485			thread_t	thread);
486
487void
488stack_privilege(
489	__unused thread_t	thread)
490{
491	/* OBSOLETE */
492}
493
494/*
495 * Return info on stack usage for threads in a specific processor set
496 */
497kern_return_t
498processor_set_stack_usage(
499	processor_set_t	pset,
500	unsigned int	*totalp,
501	vm_size_t	*spacep,
502	vm_size_t	*residentp,
503	vm_size_t	*maxusagep,
504	vm_offset_t	*maxstackp)
505{
506#if !MACH_DEBUG
507        return KERN_NOT_SUPPORTED;
508#else
509	unsigned int total;
510	vm_size_t maxusage;
511	vm_offset_t maxstack;
512
513	register thread_t *thread_list;
514	register thread_t thread;
515
516	unsigned int actual;	/* this many things */
517	unsigned int i;
518
519	vm_size_t size, size_needed;
520	void *addr;
521
522	if (pset == PROCESSOR_SET_NULL || pset != &pset0)
523		return KERN_INVALID_ARGUMENT;
524
525	size = 0;
526	addr = NULL;
527
528	for (;;) {
529		lck_mtx_lock(&tasks_threads_lock);
530
531		actual = threads_count;
532
533		/* do we have the memory we need? */
534
535		size_needed = actual * sizeof(thread_t);
536		if (size_needed <= size)
537			break;
538
539		lck_mtx_unlock(&tasks_threads_lock);
540
541		if (size != 0)
542			kfree(addr, size);
543
544		assert(size_needed > 0);
545		size = size_needed;
546
547		addr = kalloc(size);
548		if (addr == 0)
549			return KERN_RESOURCE_SHORTAGE;
550	}
551
552	/* OK, have memory and list is locked */
553	thread_list = (thread_t *) addr;
554	for (i = 0, thread = (thread_t) queue_first(&threads);
555					!queue_end(&threads, (queue_entry_t) thread);
556					thread = (thread_t) queue_next(&thread->threads)) {
557		thread_reference_internal(thread);
558		thread_list[i++] = thread;
559	}
560	assert(i <= actual);
561
562	lck_mtx_unlock(&tasks_threads_lock);
563
564	/* calculate maxusage and free thread references */
565
566	total = 0;
567	maxusage = 0;
568	maxstack = 0;
569	while (i > 0) {
570		thread_t threadref = thread_list[--i];
571
572		if (threadref->kernel_stack != 0)
573			total++;
574
575		thread_deallocate(threadref);
576	}
577
578	if (size != 0)
579		kfree(addr, size);
580
581	*totalp = total;
582	*residentp = *spacep = total * round_page(kernel_stack_size);
583	*maxusagep = maxusage;
584	*maxstackp = maxstack;
585	return KERN_SUCCESS;
586
587#endif	/* MACH_DEBUG */
588}
589
590vm_offset_t min_valid_stack_address(void)
591{
592	return (vm_offset_t)vm_map_min(kernel_map);
593}
594
595vm_offset_t max_valid_stack_address(void)
596{
597	return (vm_offset_t)vm_map_max(kernel_map);
598}
599