vm_machdep.c revision 287945
1/*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * Copyright (c) 2001 Jake Burkholder.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * the Systems Programming Group of the University of Utah Computer
10 * Science Department, and William Jolitz.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
37 *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
38 *	from: FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.167 2001/07/12
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: stable/10/sys/sparc64/sparc64/vm_machdep.c 287945 2015-09-17 23:31:44Z rstone $");
43
44#include "opt_pmap.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/bio.h>
49#include <sys/buf.h>
50#include <sys/kernel.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/sysent.h>
56#include <sys/sf_buf.h>
57#include <sys/sched.h>
58#include <sys/sysctl.h>
59#include <sys/unistd.h>
60#include <sys/vmmeter.h>
61
62#include <dev/ofw/openfirm.h>
63
64#include <vm/vm.h>
65#include <vm/vm_extern.h>
66#include <vm/pmap.h>
67#include <vm/vm_kern.h>
68#include <vm/vm_map.h>
69#include <vm/vm_page.h>
70#include <vm/vm_pageout.h>
71#include <vm/vm_param.h>
72#include <vm/uma.h>
73#include <vm/uma_int.h>
74
75#include <machine/cache.h>
76#include <machine/cpu.h>
77#include <machine/fp.h>
78#include <machine/frame.h>
79#include <machine/fsr.h>
80#include <machine/md_var.h>
81#include <machine/ofw_machdep.h>
82#include <machine/ofw_mem.h>
83#include <machine/pcb.h>
84#include <machine/tlb.h>
85#include <machine/tstate.h>
86
87#ifndef NSFBUFS
88#define	NSFBUFS		(512 + maxusers * 16)
89#endif
90
91static int nsfbufs;
92static int nsfbufspeak;
93static int nsfbufsused;
94
95SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
96    "Maximum number of sendfile(2) sf_bufs available");
97SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
98    "Number of sendfile(2) sf_bufs at peak usage");
99SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
100    "Number of sendfile(2) sf_bufs in use");
101
102static void	sf_buf_init(void *arg);
103SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
104
105/*
106 * Expanded sf_freelist head.  Really an SLIST_HEAD() in disguise, with the
107 * sf_freelist head with the sf_lock mutex.
108 */
109static struct {
110	SLIST_HEAD(, sf_buf) sf_head;
111	struct mtx sf_lock;
112} sf_freelist;
113
114static u_int	sf_buf_alloc_want;
115
116PMAP_STATS_VAR(uma_nsmall_alloc);
117PMAP_STATS_VAR(uma_nsmall_alloc_oc);
118PMAP_STATS_VAR(uma_nsmall_free);
119
120void
121cpu_exit(struct thread *td)
122{
123	struct proc *p;
124
125	p = td->td_proc;
126	p->p_md.md_sigtramp = NULL;
127	if (p->p_md.md_utrap != NULL) {
128		utrap_free(p->p_md.md_utrap);
129		p->p_md.md_utrap = NULL;
130	}
131}
132
133void
134cpu_thread_exit(struct thread *td)
135{
136
137}
138
139void
140cpu_thread_clean(struct thread *td)
141{
142
143}
144
145void
146cpu_thread_alloc(struct thread *td)
147{
148	struct pcb *pcb;
149
150	pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
151	    sizeof(struct pcb)) & ~0x3fUL);
152	pcb->pcb_nsaved = 0;
153	td->td_frame = (struct trapframe *)pcb - 1;
154	td->td_pcb = pcb;
155}
156
157void
158cpu_thread_free(struct thread *td)
159{
160
161}
162
163void
164cpu_thread_swapin(struct thread *td)
165{
166
167}
168
169void
170cpu_thread_swapout(struct thread *td)
171{
172
173}
174
175void
176cpu_set_syscall_retval(struct thread *td, int error)
177{
178
179	switch (error) {
180	case 0:
181		td->td_frame->tf_out[0] = td->td_retval[0];
182		td->td_frame->tf_out[1] = td->td_retval[1];
183		td->td_frame->tf_tstate &= ~TSTATE_XCC_C;
184		break;
185
186	case ERESTART:
187		/*
188		 * Undo the tpc advancement we have done on syscall
189		 * enter, we want to reexecute the system call.
190		 */
191		td->td_frame->tf_tpc = td->td_pcb->pcb_tpc;
192		td->td_frame->tf_tnpc -= 4;
193		break;
194
195	case EJUSTRETURN:
196		break;
197
198	default:
199		if (td->td_proc->p_sysent->sv_errsize) {
200			if (error >= td->td_proc->p_sysent->sv_errsize)
201				error = -1;	/* XXX */
202			else
203				error = td->td_proc->p_sysent->sv_errtbl[error];
204		}
205		td->td_frame->tf_out[0] = error;
206		td->td_frame->tf_tstate |= TSTATE_XCC_C;
207		break;
208	}
209}
210
211void
212cpu_set_upcall(struct thread *td, struct thread *td0)
213{
214	struct trapframe *tf;
215	struct frame *fr;
216	struct pcb *pcb;
217
218	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
219
220	pcb = td->td_pcb;
221	tf = td->td_frame;
222	fr = (struct frame *)tf - 1;
223	fr->fr_local[0] = (u_long)fork_return;
224	fr->fr_local[1] = (u_long)td;
225	fr->fr_local[2] = (u_long)tf;
226	pcb->pcb_pc = (u_long)fork_trampoline - 8;
227	pcb->pcb_sp = (u_long)fr - SPOFF;
228
229	/* Setup to release the spin count in fork_exit(). */
230	td->td_md.md_spinlock_count = 1;
231	td->td_md.md_saved_pil = 0;
232}
233
234void
235cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
236    stack_t *stack)
237{
238	struct trapframe *tf;
239	uint64_t sp;
240
241	if (td == curthread)
242		flushw();
243	tf = td->td_frame;
244	sp = (uint64_t)stack->ss_sp + stack->ss_size;
245	tf->tf_out[0] = (uint64_t)arg;
246	tf->tf_out[6] = sp - SPOFF - sizeof(struct frame);
247	tf->tf_tpc = (uint64_t)entry;
248	tf->tf_tnpc = tf->tf_tpc + 4;
249
250	td->td_retval[0] = tf->tf_out[0];
251	td->td_retval[1] = tf->tf_out[1];
252}
253
254int
255cpu_set_user_tls(struct thread *td, void *tls_base)
256{
257
258	if (td == curthread)
259		flushw();
260	td->td_frame->tf_global[7] = (uint64_t)tls_base;
261	return (0);
262}
263
264/*
265 * Finish a fork operation, with process p2 nearly set up.
266 * Copy and update the pcb, set up the stack so that the child
267 * ready to run and return to user mode.
268 */
269void
270cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
271{
272	struct trapframe *tf;
273	struct frame *fp;
274	struct pcb *pcb1;
275	struct pcb *pcb2;
276	vm_offset_t sp;
277	int error;
278	int i;
279
280	KASSERT(td1 == curthread || td1 == &thread0,
281	    ("cpu_fork: p1 not curproc and not proc0"));
282
283	if ((flags & RFPROC) == 0)
284		return;
285
286	p2->p_md.md_sigtramp = td1->td_proc->p_md.md_sigtramp;
287	p2->p_md.md_utrap = utrap_hold(td1->td_proc->p_md.md_utrap);
288
289	/* The pcb must be aligned on a 64-byte boundary. */
290	pcb1 = td1->td_pcb;
291	pcb2 = (struct pcb *)((td2->td_kstack + td2->td_kstack_pages *
292	    PAGE_SIZE - sizeof(struct pcb)) & ~0x3fUL);
293	td2->td_pcb = pcb2;
294
295	/*
296	 * Ensure that p1's pcb is up to date.
297	 */
298	critical_enter();
299	if ((td1->td_frame->tf_fprs & FPRS_FEF) != 0)
300		savefpctx(pcb1->pcb_ufp);
301	critical_exit();
302	/* Make sure the copied windows are spilled. */
303	flushw();
304	/* Copy the pcb (this will copy the windows saved in the pcb, too). */
305	bcopy(pcb1, pcb2, sizeof(*pcb1));
306
307	/*
308	 * If we're creating a new user process and we're sharing the address
309	 * space, the parent's top most frame must be saved in the pcb.  The
310	 * child will pop the frame when it returns to user mode, and may
311	 * overwrite it with its own data causing much suffering for the
312	 * parent.  We check if its already in the pcb, and if not copy it
313	 * in.  Its unlikely that the copyin will fail, but if so there's not
314	 * much we can do.  The parent will likely crash soon anyway in that
315	 * case.
316	 */
317	if ((flags & RFMEM) != 0 && td1 != &thread0) {
318		sp = td1->td_frame->tf_sp;
319		for (i = 0; i < pcb1->pcb_nsaved; i++) {
320			if (pcb1->pcb_rwsp[i] == sp)
321				break;
322		}
323		if (i == pcb1->pcb_nsaved) {
324			error = copyin((caddr_t)sp + SPOFF, &pcb1->pcb_rw[i],
325			    sizeof(struct rwindow));
326			if (error == 0) {
327				pcb1->pcb_rwsp[i] = sp;
328				pcb1->pcb_nsaved++;
329			}
330		}
331	}
332
333	/*
334	 * Create a new fresh stack for the new process.
335	 * Copy the trap frame for the return to user mode as if from a
336	 * syscall.  This copies most of the user mode register values.
337	 */
338	tf = (struct trapframe *)pcb2 - 1;
339	bcopy(td1->td_frame, tf, sizeof(*tf));
340
341	tf->tf_out[0] = 0;			/* Child returns zero */
342	tf->tf_out[1] = 0;
343	tf->tf_tstate &= ~TSTATE_XCC_C;		/* success */
344	tf->tf_fprs = 0;
345
346	td2->td_frame = tf;
347	fp = (struct frame *)tf - 1;
348	fp->fr_local[0] = (u_long)fork_return;
349	fp->fr_local[1] = (u_long)td2;
350	fp->fr_local[2] = (u_long)tf;
351	/* Terminate stack traces at this frame. */
352	fp->fr_pc = fp->fr_fp = 0;
353	pcb2->pcb_sp = (u_long)fp - SPOFF;
354	pcb2->pcb_pc = (u_long)fork_trampoline - 8;
355
356	/* Setup to release the spin count in fork_exit(). */
357	td2->td_md.md_spinlock_count = 1;
358	td2->td_md.md_saved_pil = 0;
359
360	/*
361	 * Now, cpu_switch() can schedule the new process.
362	 */
363}
364
365void
366cpu_reset(void)
367{
368	static char bspec[64] = "";
369	phandle_t chosen;
370	static struct {
371		cell_t	name;
372		cell_t	nargs;
373		cell_t	nreturns;
374		cell_t	bootspec;
375	} args = {
376		(cell_t)"boot",
377		1,
378		0,
379		(cell_t)bspec
380	};
381
382	if ((chosen = OF_finddevice("/chosen")) != -1) {
383		if (OF_getprop(chosen, "bootpath", bspec, sizeof(bspec)) == -1)
384			bspec[0] = '\0';
385		bspec[sizeof(bspec) - 1] = '\0';
386	}
387
388	cpu_shutdown(&args);
389}
390
391/*
392 * Intercept the return address from a freshly forked process that has NOT
393 * been scheduled yet.
394 *
395 * This is needed to make kernel threads stay in kernel mode.
396 */
397void
398cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
399{
400	struct frame *fp;
401	struct pcb *pcb;
402
403	pcb = td->td_pcb;
404	fp = (struct frame *)(pcb->pcb_sp + SPOFF);
405	fp->fr_local[0] = (u_long)func;
406	fp->fr_local[1] = (u_long)arg;
407}
408
409int
410is_physical_memory(vm_paddr_t addr)
411{
412	struct ofw_mem_region *mr;
413
414	for (mr = sparc64_memreg; mr < sparc64_memreg + sparc64_nmemreg; mr++)
415		if (addr >= mr->mr_start && addr < mr->mr_start + mr->mr_size)
416			return (1);
417	return (0);
418}
419
420/*
421 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
422 */
423static void
424sf_buf_init(void *arg)
425{
426	struct sf_buf *sf_bufs;
427	vm_offset_t sf_base;
428	int i;
429
430	nsfbufs = NSFBUFS;
431	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
432
433	mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
434	SLIST_INIT(&sf_freelist.sf_head);
435	sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
436	sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
437	    M_NOWAIT | M_ZERO);
438	for (i = 0; i < nsfbufs; i++) {
439		sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
440		SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
441	}
442	sf_buf_alloc_want = 0;
443}
444
445/*
446 * Get an sf_buf from the freelist.  Will block if none are available.
447 */
448struct sf_buf *
449sf_buf_alloc(struct vm_page *m, int flags)
450{
451	struct sf_buf *sf;
452	int error;
453
454	mtx_lock(&sf_freelist.sf_lock);
455	while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
456		if (flags & SFB_NOWAIT)
457			break;
458		sf_buf_alloc_want++;
459		SFSTAT_INC(sf_allocwait);
460		error = msleep(&sf_freelist, &sf_freelist.sf_lock,
461		    (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
462		sf_buf_alloc_want--;
463
464		/*
465		 * If we got a signal, don't risk going back to sleep.
466		 */
467		if (error)
468			break;
469	}
470	if (sf != NULL) {
471		SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
472		sf->m = m;
473		nsfbufsused++;
474		nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
475		pmap_qenter(sf->kva, &sf->m, 1);
476	}
477	mtx_unlock(&sf_freelist.sf_lock);
478	return (sf);
479}
480
481/*
482 * Release resources back to the system.
483 */
484void
485sf_buf_free(struct sf_buf *sf)
486{
487
488	pmap_qremove(sf->kva, 1);
489	mtx_lock(&sf_freelist.sf_lock);
490	SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
491	nsfbufsused--;
492	if (sf_buf_alloc_want > 0)
493		wakeup(&sf_freelist);
494	mtx_unlock(&sf_freelist.sf_lock);
495}
496
497void
498swi_vm(void *v)
499{
500
501	/* Nothing to do here - busdma bounce buffers are not implemented. */
502}
503
504void *
505uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
506{
507	vm_paddr_t pa;
508	vm_page_t m;
509	int pflags;
510	void *va;
511
512	PMAP_STATS_INC(uma_nsmall_alloc);
513
514	*flags = UMA_SLAB_PRIV;
515	pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED;
516
517	for (;;) {
518		m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ);
519		if (m == NULL) {
520			if (wait & M_NOWAIT)
521				return (NULL);
522			else
523				VM_WAIT;
524		} else
525			break;
526	}
527
528	pa = VM_PAGE_TO_PHYS(m);
529	if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) {
530		KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0,
531		    ("uma_small_alloc: free page %p still has mappings!", m));
532		PMAP_STATS_INC(uma_nsmall_alloc_oc);
533		m->md.color = DCACHE_COLOR(pa);
534		dcache_page_inval(pa);
535	}
536	va = (void *)TLB_PHYS_TO_DIRECT(pa);
537	if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0)
538		cpu_block_zero(va, PAGE_SIZE);
539	return (va);
540}
541
542void
543uma_small_free(void *mem, vm_size_t size, u_int8_t flags)
544{
545	vm_page_t m;
546
547	PMAP_STATS_INC(uma_nsmall_free);
548	m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)mem));
549	m->wire_count--;
550	vm_page_free(m);
551	atomic_subtract_int(&cnt.v_wire_count, 1);
552}
553