vm_machdep.c revision 266277
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary :forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 */
42
43#include <sys/cdefs.h>
44__FBSDID("$FreeBSD: stable/10/sys/arm/arm/vm_machdep.c 266277 2014-05-17 00:53:12Z ian $");
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/kernel.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/proc.h>
52#include <sys/socketvar.h>
53#include <sys/sf_buf.h>
54#include <sys/syscall.h>
55#include <sys/sysctl.h>
56#include <sys/sysent.h>
57#include <sys/unistd.h>
58#include <machine/cpu.h>
59#include <machine/frame.h>
60#include <machine/pcb.h>
61#include <machine/sysarch.h>
62#include <sys/lock.h>
63#include <sys/mutex.h>
64
65#include <vm/vm.h>
66#include <vm/pmap.h>
67#include <vm/vm_extern.h>
68#include <vm/vm_kern.h>
69#include <vm/vm_page.h>
70#include <vm/vm_map.h>
71#include <vm/vm_param.h>
72#include <vm/vm_pageout.h>
73#include <vm/uma.h>
74#include <vm/uma_int.h>
75
76#include <machine/md_var.h>
77
78/*
79 * struct switchframe and trapframe must both be a multiple of 8
80 * for correct stack alignment.
81 */
82CTASSERT(sizeof(struct switchframe) == 24);
83CTASSERT(sizeof(struct trapframe) == 80);
84
85#ifndef NSFBUFS
86#define NSFBUFS		(512 + maxusers * 16)
87#endif
88
89static int nsfbufs;
90static int nsfbufspeak;
91static int nsfbufsused;
92
93SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
94    "Maximum number of sendfile(2) sf_bufs available");
95SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
96    "Number of sendfile(2) sf_bufs at peak usage");
97SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
98    "Number of sendfile(2) sf_bufs in use");
99
100static void     sf_buf_init(void *arg);
101SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
102
103LIST_HEAD(sf_head, sf_buf);
104
105/*
106 * A hash table of active sendfile(2) buffers
107 */
108static struct sf_head *sf_buf_active;
109static u_long sf_buf_hashmask;
110
111#define SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
112
113static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
114static u_int    sf_buf_alloc_want;
115
116/*
117 * A lock used to synchronize access to the hash table and free list
118 */
119static struct mtx sf_buf_lock;
120
121/*
122 * Finish a fork operation, with process p2 nearly set up.
123 * Copy and update the pcb, set up the stack so that the child
124 * ready to run and return to user mode.
125 */
126void
127cpu_fork(register struct thread *td1, register struct proc *p2,
128    struct thread *td2, int flags)
129{
130	struct pcb *pcb2;
131	struct trapframe *tf;
132	struct switchframe *sf;
133	struct mdproc *mdp2;
134
135	if ((flags & RFPROC) == 0)
136		return;
137	pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1;
138#ifdef __XSCALE__
139#ifndef CPU_XSCALE_CORE3
140	pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE);
141#endif
142#endif
143	td2->td_pcb = pcb2;
144	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
145	mdp2 = &p2->p_md;
146	bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2));
147	pcb2->un_32.pcb32_sp = td2->td_kstack +
148	    USPACE_SVC_STACK_TOP - sizeof(*pcb2);
149	pcb2->pcb_vfpcpu = -1;
150	pmap_activate(td2);
151	td2->td_frame = tf = (struct trapframe *)STACKALIGN(
152	    pcb2->un_32.pcb32_sp - sizeof(struct trapframe));
153	*tf = *td1->td_frame;
154	sf = (struct switchframe *)tf - 1;
155	sf->sf_r4 = (u_int)fork_return;
156	sf->sf_r5 = (u_int)td2;
157	sf->sf_pc = (u_int)fork_trampoline;
158	tf->tf_spsr &= ~PSR_C_bit;
159	tf->tf_r0 = 0;
160	tf->tf_r1 = 0;
161	pcb2->un_32.pcb32_sp = (u_int)sf;
162	KASSERT((pcb2->un_32.pcb32_sp & 7) == 0,
163	    ("cpu_fork: Incorrect stack alignment"));
164
165	/* Setup to release spin count in fork_exit(). */
166	td2->td_md.md_spinlock_count = 1;
167	td2->td_md.md_saved_cspr = 0;
168#ifdef ARM_TP_ADDRESS
169	td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS;
170#else
171	td2->td_md.md_tp = (register_t) get_tls();
172#endif
173}
174
175void
176cpu_thread_swapin(struct thread *td)
177{
178}
179
180void
181cpu_thread_swapout(struct thread *td)
182{
183}
184
185/*
186 * Detatch mapped page and release resources back to the system.
187 */
188void
189sf_buf_free(struct sf_buf *sf)
190{
191
192	 mtx_lock(&sf_buf_lock);
193	 sf->ref_count--;
194	 if (sf->ref_count == 0) {
195		 TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
196		 nsfbufsused--;
197		 pmap_kremove(sf->kva);
198		 sf->m = NULL;
199		 LIST_REMOVE(sf, list_entry);
200		 if (sf_buf_alloc_want > 0)
201			 wakeup(&sf_buf_freelist);
202	 }
203	 mtx_unlock(&sf_buf_lock);
204}
205
206/*
207 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
208 */
209static void
210sf_buf_init(void *arg)
211{
212	struct sf_buf *sf_bufs;
213	vm_offset_t sf_base;
214	int i;
215
216	nsfbufs = NSFBUFS;
217	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
218
219	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
220	TAILQ_INIT(&sf_buf_freelist);
221	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
222	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
223	    M_NOWAIT | M_ZERO);
224	for (i = 0; i < nsfbufs; i++) {
225		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
226		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
227	}
228	sf_buf_alloc_want = 0;
229	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
230}
231
232/*
233 * Get an sf_buf from the freelist. Will block if none are available.
234 */
235struct sf_buf *
236sf_buf_alloc(struct vm_page *m, int flags)
237{
238	struct sf_head *hash_list;
239	struct sf_buf *sf;
240	int error;
241
242	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
243	mtx_lock(&sf_buf_lock);
244	LIST_FOREACH(sf, hash_list, list_entry) {
245		if (sf->m == m) {
246			sf->ref_count++;
247			if (sf->ref_count == 1) {
248				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
249				nsfbufsused++;
250				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
251			}
252			goto done;
253		}
254	}
255	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
256		if (flags & SFB_NOWAIT)
257			goto done;
258		sf_buf_alloc_want++;
259		SFSTAT_INC(sf_allocwait);
260		error = msleep(&sf_buf_freelist, &sf_buf_lock,
261		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
262		sf_buf_alloc_want--;
263
264
265		/*
266		 * If we got a signal, don't risk going back to sleep.
267		 */
268		if (error)
269			goto done;
270	}
271	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
272	if (sf->m != NULL)
273		LIST_REMOVE(sf, list_entry);
274	LIST_INSERT_HEAD(hash_list, sf, list_entry);
275	sf->ref_count = 1;
276	sf->m = m;
277	nsfbufsused++;
278	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
279	pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
280done:
281	mtx_unlock(&sf_buf_lock);
282	return (sf);
283}
284
285void
286cpu_set_syscall_retval(struct thread *td, int error)
287{
288	struct trapframe *frame;
289	int fixup;
290#ifdef __ARMEB__
291	u_int call;
292#endif
293
294	frame = td->td_frame;
295	fixup = 0;
296
297#ifdef __ARMEB__
298	/*
299	 * __syscall returns an off_t while most other syscalls return an
300	 * int. As an off_t is 64-bits and an int is 32-bits we need to
301	 * place the returned data into r1. As the lseek and frerebsd6_lseek
302	 * syscalls also return an off_t they do not need this fixup.
303	 */
304#ifdef __ARM_EABI__
305	call = frame->tf_r7;
306#else
307	call = *(u_int32_t *)(frame->tf_pc - INSN_SIZE) & 0x000fffff;
308#endif
309	if (call == SYS___syscall) {
310		register_t *ap = &frame->tf_r0;
311		register_t code = ap[_QUAD_LOWWORD];
312		if (td->td_proc->p_sysent->sv_mask)
313			code &= td->td_proc->p_sysent->sv_mask;
314		fixup = (code != SYS_freebsd6_lseek && code != SYS_lseek)
315		    ? 1 : 0;
316	}
317#endif
318
319	switch (error) {
320	case 0:
321		if (fixup) {
322			frame->tf_r0 = 0;
323			frame->tf_r1 = td->td_retval[0];
324		} else {
325			frame->tf_r0 = td->td_retval[0];
326			frame->tf_r1 = td->td_retval[1];
327		}
328		frame->tf_spsr &= ~PSR_C_bit;   /* carry bit */
329		break;
330	case ERESTART:
331		/*
332		 * Reconstruct the pc to point at the swi.
333		 */
334		frame->tf_pc -= INSN_SIZE;
335		break;
336	case EJUSTRETURN:
337		/* nothing to do */
338		break;
339	default:
340		frame->tf_r0 = error;
341		frame->tf_spsr |= PSR_C_bit;    /* carry bit */
342		break;
343	}
344}
345
346/*
347 * Initialize machine state (pcb and trap frame) for a new thread about to
348 * upcall. Put enough state in the new thread's PCB to get it to go back
349 * userret(), where we can intercept it again to set the return (upcall)
350 * Address and stack, along with those from upcals that are from other sources
351 * such as those generated in thread_userret() itself.
352 */
353void
354cpu_set_upcall(struct thread *td, struct thread *td0)
355{
356	struct trapframe *tf;
357	struct switchframe *sf;
358
359	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
360	bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
361	tf = td->td_frame;
362	sf = (struct switchframe *)tf - 1;
363	sf->sf_r4 = (u_int)fork_return;
364	sf->sf_r5 = (u_int)td;
365	sf->sf_pc = (u_int)fork_trampoline;
366	tf->tf_spsr &= ~PSR_C_bit;
367	tf->tf_r0 = 0;
368	td->td_pcb->un_32.pcb32_sp = (u_int)sf;
369	KASSERT((td->td_pcb->un_32.pcb32_sp & 7) == 0,
370	    ("cpu_set_upcall: Incorrect stack alignment"));
371
372	/* Setup to release spin count in fork_exit(). */
373	td->td_md.md_spinlock_count = 1;
374	td->td_md.md_saved_cspr = 0;
375}
376
377/*
378 * Set that machine state for performing an upcall that has to
379 * be done in thread_userret() so that those upcalls generated
380 * in thread_userret() itself can be done as well.
381 */
382void
383cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
384	stack_t *stack)
385{
386	struct trapframe *tf = td->td_frame;
387
388	tf->tf_usr_sp = STACKALIGN((int)stack->ss_sp + stack->ss_size
389	    - sizeof(struct trapframe));
390	tf->tf_pc = (int)entry;
391	tf->tf_r0 = (int)arg;
392	tf->tf_spsr = PSR_USR32_MODE;
393}
394
395int
396cpu_set_user_tls(struct thread *td, void *tls_base)
397{
398
399	td->td_md.md_tp = (register_t)tls_base;
400	if (td == curthread) {
401		critical_enter();
402#ifdef ARM_TP_ADDRESS
403		*(register_t *)ARM_TP_ADDRESS = (register_t)tls_base;
404#else
405		set_tls((void *)tls_base);
406#endif
407		critical_exit();
408	}
409	return (0);
410}
411
412void
413cpu_thread_exit(struct thread *td)
414{
415}
416
417void
418cpu_thread_alloc(struct thread *td)
419{
420	td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages *
421	    PAGE_SIZE) - 1;
422	/*
423	 * Ensure td_frame is aligned to an 8 byte boundary as it will be
424	 * placed into the stack pointer which must be 8 byte aligned in
425	 * the ARM EABI.
426	 */
427	td->td_frame = (struct trapframe *)STACKALIGN((u_int)td->td_kstack +
428	    USPACE_SVC_STACK_TOP - sizeof(struct pcb) -
429	    sizeof(struct trapframe));
430#ifdef __XSCALE__
431#ifndef CPU_XSCALE_CORE3
432	pmap_use_minicache(td->td_kstack, td->td_kstack_pages * PAGE_SIZE);
433#endif
434#endif
435}
436
437void
438cpu_thread_free(struct thread *td)
439{
440}
441
442void
443cpu_thread_clean(struct thread *td)
444{
445}
446
447/*
448 * Intercept the return address from a freshly forked process that has NOT
449 * been scheduled yet.
450 *
451 * This is needed to make kernel threads stay in kernel mode.
452 */
453void
454cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
455{
456	struct switchframe *sf;
457	struct trapframe *tf;
458
459	tf = td->td_frame;
460	sf = (struct switchframe *)tf - 1;
461	sf->sf_r4 = (u_int)func;
462	sf->sf_r5 = (u_int)arg;
463	td->td_pcb->un_32.pcb32_sp = (u_int)sf;
464	KASSERT((td->td_pcb->un_32.pcb32_sp & 7) == 0,
465	    ("cpu_set_fork_handler: Incorrect stack alignment"));
466}
467
468/*
469 * Software interrupt handler for queued VM system processing.
470 */
471void
472swi_vm(void *dummy)
473{
474
475	if (busdma_swi_pending)
476		busdma_swi();
477}
478
479void
480cpu_exit(struct thread *td)
481{
482}
483
484