vm_machdep.c revision 278614
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary :forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
40 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 */
42
43#include <sys/cdefs.h>
44__FBSDID("$FreeBSD: stable/10/sys/arm/arm/vm_machdep.c 278614 2015-02-12 04:15:55Z ian $");
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/kernel.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/proc.h>
52#include <sys/socketvar.h>
53#include <sys/sf_buf.h>
54#include <sys/syscall.h>
55#include <sys/sysctl.h>
56#include <sys/sysent.h>
57#include <sys/unistd.h>
58#include <machine/cpu.h>
59#include <machine/frame.h>
60#include <machine/pcb.h>
61#include <machine/sysarch.h>
62#include <sys/lock.h>
63#include <sys/mutex.h>
64
65#include <vm/vm.h>
66#include <vm/pmap.h>
67#include <vm/vm_extern.h>
68#include <vm/vm_kern.h>
69#include <vm/vm_page.h>
70#include <vm/vm_map.h>
71#include <vm/vm_param.h>
72#include <vm/vm_pageout.h>
73#include <vm/uma.h>
74#include <vm/uma_int.h>
75
76#include <machine/md_var.h>
77#include <machine/vfp.h>
78
79/*
80 * struct switchframe and trapframe must both be a multiple of 8
81 * for correct stack alignment.
82 */
83CTASSERT(sizeof(struct switchframe) == 48);
84CTASSERT(sizeof(struct trapframe) == 80);
85
86#ifndef NSFBUFS
87#define NSFBUFS		(512 + maxusers * 16)
88#endif
89
90static int nsfbufs;
91static int nsfbufspeak;
92static int nsfbufsused;
93
94SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
95    "Maximum number of sendfile(2) sf_bufs available");
96SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
97    "Number of sendfile(2) sf_bufs at peak usage");
98SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
99    "Number of sendfile(2) sf_bufs in use");
100
101static void     sf_buf_init(void *arg);
102SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
103
104LIST_HEAD(sf_head, sf_buf);
105
106/*
107 * A hash table of active sendfile(2) buffers
108 */
109static struct sf_head *sf_buf_active;
110static u_long sf_buf_hashmask;
111
112#define SF_BUF_HASH(m)  (((m) - vm_page_array) & sf_buf_hashmask)
113
114static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
115static u_int    sf_buf_alloc_want;
116
117/*
118 * A lock used to synchronize access to the hash table and free list
119 */
120static struct mtx sf_buf_lock;
121
122/*
123 * Finish a fork operation, with process p2 nearly set up.
124 * Copy and update the pcb, set up the stack so that the child
125 * ready to run and return to user mode.
126 */
127void
128cpu_fork(register struct thread *td1, register struct proc *p2,
129    struct thread *td2, int flags)
130{
131	struct pcb *pcb2;
132	struct trapframe *tf;
133	struct mdproc *mdp2;
134
135	if ((flags & RFPROC) == 0)
136		return;
137
138	/* Point the pcb to the top of the stack */
139	pcb2 = (struct pcb *)
140	    (td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1;
141#ifdef __XSCALE__
142#ifndef CPU_XSCALE_CORE3
143	pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE);
144#endif
145#endif
146	td2->td_pcb = pcb2;
147
148	/* Clone td1's pcb */
149	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
150
151	/* Point to mdproc and then copy over td1's contents */
152	mdp2 = &p2->p_md;
153	bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2));
154
155	/* Point the frame to the stack in front of pcb and copy td1's frame */
156	td2->td_frame = (struct trapframe *)pcb2 - 1;
157	*td2->td_frame = *td1->td_frame;
158
159	/*
160	 * Create a new fresh stack for the new process.
161	 * Copy the trap frame for the return to user mode as if from a
162	 * syscall.  This copies most of the user mode register values.
163	 */
164	pmap_set_pcb_pagedir(vmspace_pmap(p2->p_vmspace), pcb2);
165	pcb2->pcb_regs.sf_r4 = (register_t)fork_return;
166	pcb2->pcb_regs.sf_r5 = (register_t)td2;
167	pcb2->pcb_regs.sf_lr = (register_t)fork_trampoline;
168	pcb2->pcb_regs.sf_sp = STACKALIGN(td2->td_frame);
169
170	pcb2->pcb_vfpcpu = -1;
171	pcb2->pcb_vfpstate.fpscr = VFPSCR_DN | VFPSCR_FZ;
172
173	tf = td2->td_frame;
174	tf->tf_spsr &= ~PSR_C;
175	tf->tf_r0 = 0;
176	tf->tf_r1 = 0;
177
178
179	/* Setup to release spin count in fork_exit(). */
180	td2->td_md.md_spinlock_count = 1;
181	td2->td_md.md_saved_cspr = PSR_SVC32_MODE;;
182#ifdef ARM_TP_ADDRESS
183	td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS;
184#else
185	td2->td_md.md_tp = (register_t) get_tls();
186#endif
187}
188
189void
190cpu_thread_swapin(struct thread *td)
191{
192}
193
194void
195cpu_thread_swapout(struct thread *td)
196{
197}
198
199/*
200 * Detatch mapped page and release resources back to the system.
201 */
202void
203sf_buf_free(struct sf_buf *sf)
204{
205
206	 mtx_lock(&sf_buf_lock);
207	 sf->ref_count--;
208	 if (sf->ref_count == 0) {
209		 TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
210		 nsfbufsused--;
211		 pmap_kremove(sf->kva);
212		 sf->m = NULL;
213		 LIST_REMOVE(sf, list_entry);
214		 if (sf_buf_alloc_want > 0)
215			 wakeup(&sf_buf_freelist);
216	 }
217	 mtx_unlock(&sf_buf_lock);
218}
219
220/*
221 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
222 */
223static void
224sf_buf_init(void *arg)
225{
226	struct sf_buf *sf_bufs;
227	vm_offset_t sf_base;
228	int i;
229
230	nsfbufs = NSFBUFS;
231	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
232
233	sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
234	TAILQ_INIT(&sf_buf_freelist);
235	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
236	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
237	    M_NOWAIT | M_ZERO);
238	for (i = 0; i < nsfbufs; i++) {
239		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
240		TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
241	}
242	sf_buf_alloc_want = 0;
243	mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
244}
245
246/*
247 * Get an sf_buf from the freelist. Will block if none are available.
248 */
249struct sf_buf *
250sf_buf_alloc(struct vm_page *m, int flags)
251{
252	struct sf_head *hash_list;
253	struct sf_buf *sf;
254	int error;
255
256	hash_list = &sf_buf_active[SF_BUF_HASH(m)];
257	mtx_lock(&sf_buf_lock);
258	LIST_FOREACH(sf, hash_list, list_entry) {
259		if (sf->m == m) {
260			sf->ref_count++;
261			if (sf->ref_count == 1) {
262				TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
263				nsfbufsused++;
264				nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
265			}
266			goto done;
267		}
268	}
269	while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
270		if (flags & SFB_NOWAIT)
271			goto done;
272		sf_buf_alloc_want++;
273		SFSTAT_INC(sf_allocwait);
274		error = msleep(&sf_buf_freelist, &sf_buf_lock,
275		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
276		sf_buf_alloc_want--;
277
278
279		/*
280		 * If we got a signal, don't risk going back to sleep.
281		 */
282		if (error)
283			goto done;
284	}
285	TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
286	if (sf->m != NULL)
287		LIST_REMOVE(sf, list_entry);
288	LIST_INSERT_HEAD(hash_list, sf, list_entry);
289	sf->ref_count = 1;
290	sf->m = m;
291	nsfbufsused++;
292	nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
293	pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
294done:
295	mtx_unlock(&sf_buf_lock);
296	return (sf);
297}
298
299void
300cpu_set_syscall_retval(struct thread *td, int error)
301{
302	struct trapframe *frame;
303	int fixup;
304#ifdef __ARMEB__
305	u_int call;
306#endif
307
308	frame = td->td_frame;
309	fixup = 0;
310
311#ifdef __ARMEB__
312	/*
313	 * __syscall returns an off_t while most other syscalls return an
314	 * int. As an off_t is 64-bits and an int is 32-bits we need to
315	 * place the returned data into r1. As the lseek and frerebsd6_lseek
316	 * syscalls also return an off_t they do not need this fixup.
317	 */
318#ifdef __ARM_EABI__
319	call = frame->tf_r7;
320#else
321	call = *(u_int32_t *)(frame->tf_pc - INSN_SIZE) & 0x000fffff;
322#endif
323	if (call == SYS___syscall) {
324		register_t *ap = &frame->tf_r0;
325		register_t code = ap[_QUAD_LOWWORD];
326		if (td->td_proc->p_sysent->sv_mask)
327			code &= td->td_proc->p_sysent->sv_mask;
328		fixup = (code != SYS_freebsd6_lseek && code != SYS_lseek)
329		    ? 1 : 0;
330	}
331#endif
332
333	switch (error) {
334	case 0:
335		if (fixup) {
336			frame->tf_r0 = 0;
337			frame->tf_r1 = td->td_retval[0];
338		} else {
339			frame->tf_r0 = td->td_retval[0];
340			frame->tf_r1 = td->td_retval[1];
341		}
342		frame->tf_spsr &= ~PSR_C;   /* carry bit */
343		break;
344	case ERESTART:
345		/*
346		 * Reconstruct the pc to point at the swi.
347		 */
348		frame->tf_pc -= INSN_SIZE;
349		break;
350	case EJUSTRETURN:
351		/* nothing to do */
352		break;
353	default:
354		frame->tf_r0 = error;
355		frame->tf_spsr |= PSR_C;    /* carry bit */
356		break;
357	}
358}
359
360/*
361 * Initialize machine state (pcb and trap frame) for a new thread about to
362 * upcall. Put enough state in the new thread's PCB to get it to go back
363 * userret(), where we can intercept it again to set the return (upcall)
364 * Address and stack, along with those from upcals that are from other sources
365 * such as those generated in thread_userret() itself.
366 */
367void
368cpu_set_upcall(struct thread *td, struct thread *td0)
369{
370
371	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
372	bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
373
374	td->td_pcb->pcb_regs.sf_r4 = (register_t)fork_return;
375	td->td_pcb->pcb_regs.sf_r5 = (register_t)td;
376	td->td_pcb->pcb_regs.sf_lr = (register_t)fork_trampoline;
377	td->td_pcb->pcb_regs.sf_sp = STACKALIGN(td->td_frame);
378
379	td->td_frame->tf_spsr &= ~PSR_C;
380	td->td_frame->tf_r0 = 0;
381
382	/* Setup to release spin count in fork_exit(). */
383	td->td_md.md_spinlock_count = 1;
384	td->td_md.md_saved_cspr = PSR_SVC32_MODE;
385}
386
387/*
388 * Set that machine state for performing an upcall that has to
389 * be done in thread_userret() so that those upcalls generated
390 * in thread_userret() itself can be done as well.
391 */
392void
393cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
394	stack_t *stack)
395{
396	struct trapframe *tf = td->td_frame;
397
398	tf->tf_usr_sp = STACKALIGN((int)stack->ss_sp + stack->ss_size);
399	tf->tf_pc = (int)entry;
400	tf->tf_r0 = (int)arg;
401	tf->tf_spsr = PSR_USR32_MODE;
402}
403
404int
405cpu_set_user_tls(struct thread *td, void *tls_base)
406{
407
408	td->td_md.md_tp = (register_t)tls_base;
409	if (td == curthread) {
410		critical_enter();
411#ifdef ARM_TP_ADDRESS
412		*(register_t *)ARM_TP_ADDRESS = (register_t)tls_base;
413#else
414		set_tls((void *)tls_base);
415#endif
416		critical_exit();
417	}
418	return (0);
419}
420
421void
422cpu_thread_exit(struct thread *td)
423{
424}
425
426void
427cpu_thread_alloc(struct thread *td)
428{
429	td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages *
430	    PAGE_SIZE) - 1;
431	/*
432	 * Ensure td_frame is aligned to an 8 byte boundary as it will be
433	 * placed into the stack pointer which must be 8 byte aligned in
434	 * the ARM EABI.
435	 */
436	td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb) - 1;
437
438#ifdef __XSCALE__
439#ifndef CPU_XSCALE_CORE3
440	pmap_use_minicache(td->td_kstack, td->td_kstack_pages * PAGE_SIZE);
441#endif
442#endif
443}
444
445void
446cpu_thread_free(struct thread *td)
447{
448}
449
450void
451cpu_thread_clean(struct thread *td)
452{
453}
454
455/*
456 * Intercept the return address from a freshly forked process that has NOT
457 * been scheduled yet.
458 *
459 * This is needed to make kernel threads stay in kernel mode.
460 */
461void
462cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
463{
464	td->td_pcb->pcb_regs.sf_r4 = (register_t)func;	/* function */
465	td->td_pcb->pcb_regs.sf_r5 = (register_t)arg;	/* first arg */
466}
467
468/*
469 * Software interrupt handler for queued VM system processing.
470 */
471void
472swi_vm(void *dummy)
473{
474
475	if (busdma_swi_pending)
476		busdma_swi();
477}
478
479void
480cpu_exit(struct thread *td)
481{
482}
483
484