vm_machdep.c revision 287945
1/*- 2 * Copyright (c) 1982, 1986 The Regents of the University of California. 3 * Copyright (c) 1989, 1990 William Jolitz 4 * Copyright (c) 1994 John Dyson 5 * Copyright (c) 2001 Jake Burkholder. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * the Systems Programming Group of the University of Utah Computer 10 * Science Department, and William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 37 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 38 * from: FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.167 2001/07/12 39 */ 40 41#include <sys/cdefs.h> 42__FBSDID("$FreeBSD: stable/10/sys/sparc64/sparc64/vm_machdep.c 287945 2015-09-17 23:31:44Z rstone $"); 43 44#include "opt_pmap.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/bio.h> 49#include <sys/buf.h> 50#include <sys/kernel.h> 51#include <sys/malloc.h> 52#include <sys/mbuf.h> 53#include <sys/mutex.h> 54#include <sys/proc.h> 55#include <sys/sysent.h> 56#include <sys/sf_buf.h> 57#include <sys/sched.h> 58#include <sys/sysctl.h> 59#include <sys/unistd.h> 60#include <sys/vmmeter.h> 61 62#include <dev/ofw/openfirm.h> 63 64#include <vm/vm.h> 65#include <vm/vm_extern.h> 66#include <vm/pmap.h> 67#include <vm/vm_kern.h> 68#include <vm/vm_map.h> 69#include <vm/vm_page.h> 70#include <vm/vm_pageout.h> 71#include <vm/vm_param.h> 72#include <vm/uma.h> 73#include <vm/uma_int.h> 74 75#include <machine/cache.h> 76#include <machine/cpu.h> 77#include <machine/fp.h> 78#include <machine/frame.h> 79#include <machine/fsr.h> 80#include <machine/md_var.h> 81#include <machine/ofw_machdep.h> 82#include <machine/ofw_mem.h> 83#include <machine/pcb.h> 84#include <machine/tlb.h> 85#include <machine/tstate.h> 86 87#ifndef NSFBUFS 88#define NSFBUFS (512 + maxusers * 16) 89#endif 90 91static int nsfbufs; 92static int nsfbufspeak; 93static int nsfbufsused; 94 95SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 96 "Maximum number of sendfile(2) sf_bufs available"); 97SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 98 "Number of sendfile(2) sf_bufs at peak usage"); 99SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 100 "Number of sendfile(2) sf_bufs in use"); 101 102static void sf_buf_init(void *arg); 103SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); 104 105/* 106 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 107 * sf_freelist head with the sf_lock mutex. 108 */ 109static struct { 110 SLIST_HEAD(, sf_buf) sf_head; 111 struct mtx sf_lock; 112} sf_freelist; 113 114static u_int sf_buf_alloc_want; 115 116PMAP_STATS_VAR(uma_nsmall_alloc); 117PMAP_STATS_VAR(uma_nsmall_alloc_oc); 118PMAP_STATS_VAR(uma_nsmall_free); 119 120void 121cpu_exit(struct thread *td) 122{ 123 struct proc *p; 124 125 p = td->td_proc; 126 p->p_md.md_sigtramp = NULL; 127 if (p->p_md.md_utrap != NULL) { 128 utrap_free(p->p_md.md_utrap); 129 p->p_md.md_utrap = NULL; 130 } 131} 132 133void 134cpu_thread_exit(struct thread *td) 135{ 136 137} 138 139void 140cpu_thread_clean(struct thread *td) 141{ 142 143} 144 145void 146cpu_thread_alloc(struct thread *td) 147{ 148 struct pcb *pcb; 149 150 pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE - 151 sizeof(struct pcb)) & ~0x3fUL); 152 pcb->pcb_nsaved = 0; 153 td->td_frame = (struct trapframe *)pcb - 1; 154 td->td_pcb = pcb; 155} 156 157void 158cpu_thread_free(struct thread *td) 159{ 160 161} 162 163void 164cpu_thread_swapin(struct thread *td) 165{ 166 167} 168 169void 170cpu_thread_swapout(struct thread *td) 171{ 172 173} 174 175void 176cpu_set_syscall_retval(struct thread *td, int error) 177{ 178 179 switch (error) { 180 case 0: 181 td->td_frame->tf_out[0] = td->td_retval[0]; 182 td->td_frame->tf_out[1] = td->td_retval[1]; 183 td->td_frame->tf_tstate &= ~TSTATE_XCC_C; 184 break; 185 186 case ERESTART: 187 /* 188 * Undo the tpc advancement we have done on syscall 189 * enter, we want to reexecute the system call. 190 */ 191 td->td_frame->tf_tpc = td->td_pcb->pcb_tpc; 192 td->td_frame->tf_tnpc -= 4; 193 break; 194 195 case EJUSTRETURN: 196 break; 197 198 default: 199 if (td->td_proc->p_sysent->sv_errsize) { 200 if (error >= td->td_proc->p_sysent->sv_errsize) 201 error = -1; /* XXX */ 202 else 203 error = td->td_proc->p_sysent->sv_errtbl[error]; 204 } 205 td->td_frame->tf_out[0] = error; 206 td->td_frame->tf_tstate |= TSTATE_XCC_C; 207 break; 208 } 209} 210 211void 212cpu_set_upcall(struct thread *td, struct thread *td0) 213{ 214 struct trapframe *tf; 215 struct frame *fr; 216 struct pcb *pcb; 217 218 bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); 219 220 pcb = td->td_pcb; 221 tf = td->td_frame; 222 fr = (struct frame *)tf - 1; 223 fr->fr_local[0] = (u_long)fork_return; 224 fr->fr_local[1] = (u_long)td; 225 fr->fr_local[2] = (u_long)tf; 226 pcb->pcb_pc = (u_long)fork_trampoline - 8; 227 pcb->pcb_sp = (u_long)fr - SPOFF; 228 229 /* Setup to release the spin count in fork_exit(). */ 230 td->td_md.md_spinlock_count = 1; 231 td->td_md.md_saved_pil = 0; 232} 233 234void 235cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, 236 stack_t *stack) 237{ 238 struct trapframe *tf; 239 uint64_t sp; 240 241 if (td == curthread) 242 flushw(); 243 tf = td->td_frame; 244 sp = (uint64_t)stack->ss_sp + stack->ss_size; 245 tf->tf_out[0] = (uint64_t)arg; 246 tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); 247 tf->tf_tpc = (uint64_t)entry; 248 tf->tf_tnpc = tf->tf_tpc + 4; 249 250 td->td_retval[0] = tf->tf_out[0]; 251 td->td_retval[1] = tf->tf_out[1]; 252} 253 254int 255cpu_set_user_tls(struct thread *td, void *tls_base) 256{ 257 258 if (td == curthread) 259 flushw(); 260 td->td_frame->tf_global[7] = (uint64_t)tls_base; 261 return (0); 262} 263 264/* 265 * Finish a fork operation, with process p2 nearly set up. 266 * Copy and update the pcb, set up the stack so that the child 267 * ready to run and return to user mode. 268 */ 269void 270cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) 271{ 272 struct trapframe *tf; 273 struct frame *fp; 274 struct pcb *pcb1; 275 struct pcb *pcb2; 276 vm_offset_t sp; 277 int error; 278 int i; 279 280 KASSERT(td1 == curthread || td1 == &thread0, 281 ("cpu_fork: p1 not curproc and not proc0")); 282 283 if ((flags & RFPROC) == 0) 284 return; 285 286 p2->p_md.md_sigtramp = td1->td_proc->p_md.md_sigtramp; 287 p2->p_md.md_utrap = utrap_hold(td1->td_proc->p_md.md_utrap); 288 289 /* The pcb must be aligned on a 64-byte boundary. */ 290 pcb1 = td1->td_pcb; 291 pcb2 = (struct pcb *)((td2->td_kstack + td2->td_kstack_pages * 292 PAGE_SIZE - sizeof(struct pcb)) & ~0x3fUL); 293 td2->td_pcb = pcb2; 294 295 /* 296 * Ensure that p1's pcb is up to date. 297 */ 298 critical_enter(); 299 if ((td1->td_frame->tf_fprs & FPRS_FEF) != 0) 300 savefpctx(pcb1->pcb_ufp); 301 critical_exit(); 302 /* Make sure the copied windows are spilled. */ 303 flushw(); 304 /* Copy the pcb (this will copy the windows saved in the pcb, too). */ 305 bcopy(pcb1, pcb2, sizeof(*pcb1)); 306 307 /* 308 * If we're creating a new user process and we're sharing the address 309 * space, the parent's top most frame must be saved in the pcb. The 310 * child will pop the frame when it returns to user mode, and may 311 * overwrite it with its own data causing much suffering for the 312 * parent. We check if its already in the pcb, and if not copy it 313 * in. Its unlikely that the copyin will fail, but if so there's not 314 * much we can do. The parent will likely crash soon anyway in that 315 * case. 316 */ 317 if ((flags & RFMEM) != 0 && td1 != &thread0) { 318 sp = td1->td_frame->tf_sp; 319 for (i = 0; i < pcb1->pcb_nsaved; i++) { 320 if (pcb1->pcb_rwsp[i] == sp) 321 break; 322 } 323 if (i == pcb1->pcb_nsaved) { 324 error = copyin((caddr_t)sp + SPOFF, &pcb1->pcb_rw[i], 325 sizeof(struct rwindow)); 326 if (error == 0) { 327 pcb1->pcb_rwsp[i] = sp; 328 pcb1->pcb_nsaved++; 329 } 330 } 331 } 332 333 /* 334 * Create a new fresh stack for the new process. 335 * Copy the trap frame for the return to user mode as if from a 336 * syscall. This copies most of the user mode register values. 337 */ 338 tf = (struct trapframe *)pcb2 - 1; 339 bcopy(td1->td_frame, tf, sizeof(*tf)); 340 341 tf->tf_out[0] = 0; /* Child returns zero */ 342 tf->tf_out[1] = 0; 343 tf->tf_tstate &= ~TSTATE_XCC_C; /* success */ 344 tf->tf_fprs = 0; 345 346 td2->td_frame = tf; 347 fp = (struct frame *)tf - 1; 348 fp->fr_local[0] = (u_long)fork_return; 349 fp->fr_local[1] = (u_long)td2; 350 fp->fr_local[2] = (u_long)tf; 351 /* Terminate stack traces at this frame. */ 352 fp->fr_pc = fp->fr_fp = 0; 353 pcb2->pcb_sp = (u_long)fp - SPOFF; 354 pcb2->pcb_pc = (u_long)fork_trampoline - 8; 355 356 /* Setup to release the spin count in fork_exit(). */ 357 td2->td_md.md_spinlock_count = 1; 358 td2->td_md.md_saved_pil = 0; 359 360 /* 361 * Now, cpu_switch() can schedule the new process. 362 */ 363} 364 365void 366cpu_reset(void) 367{ 368 static char bspec[64] = ""; 369 phandle_t chosen; 370 static struct { 371 cell_t name; 372 cell_t nargs; 373 cell_t nreturns; 374 cell_t bootspec; 375 } args = { 376 (cell_t)"boot", 377 1, 378 0, 379 (cell_t)bspec 380 }; 381 382 if ((chosen = OF_finddevice("/chosen")) != -1) { 383 if (OF_getprop(chosen, "bootpath", bspec, sizeof(bspec)) == -1) 384 bspec[0] = '\0'; 385 bspec[sizeof(bspec) - 1] = '\0'; 386 } 387 388 cpu_shutdown(&args); 389} 390 391/* 392 * Intercept the return address from a freshly forked process that has NOT 393 * been scheduled yet. 394 * 395 * This is needed to make kernel threads stay in kernel mode. 396 */ 397void 398cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg) 399{ 400 struct frame *fp; 401 struct pcb *pcb; 402 403 pcb = td->td_pcb; 404 fp = (struct frame *)(pcb->pcb_sp + SPOFF); 405 fp->fr_local[0] = (u_long)func; 406 fp->fr_local[1] = (u_long)arg; 407} 408 409int 410is_physical_memory(vm_paddr_t addr) 411{ 412 struct ofw_mem_region *mr; 413 414 for (mr = sparc64_memreg; mr < sparc64_memreg + sparc64_nmemreg; mr++) 415 if (addr >= mr->mr_start && addr < mr->mr_start + mr->mr_size) 416 return (1); 417 return (0); 418} 419 420/* 421 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 422 */ 423static void 424sf_buf_init(void *arg) 425{ 426 struct sf_buf *sf_bufs; 427 vm_offset_t sf_base; 428 int i; 429 430 nsfbufs = NSFBUFS; 431 TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); 432 433 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 434 SLIST_INIT(&sf_freelist.sf_head); 435 sf_base = kva_alloc(nsfbufs * PAGE_SIZE); 436 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 437 M_NOWAIT | M_ZERO); 438 for (i = 0; i < nsfbufs; i++) { 439 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 440 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 441 } 442 sf_buf_alloc_want = 0; 443} 444 445/* 446 * Get an sf_buf from the freelist. Will block if none are available. 447 */ 448struct sf_buf * 449sf_buf_alloc(struct vm_page *m, int flags) 450{ 451 struct sf_buf *sf; 452 int error; 453 454 mtx_lock(&sf_freelist.sf_lock); 455 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 456 if (flags & SFB_NOWAIT) 457 break; 458 sf_buf_alloc_want++; 459 SFSTAT_INC(sf_allocwait); 460 error = msleep(&sf_freelist, &sf_freelist.sf_lock, 461 (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); 462 sf_buf_alloc_want--; 463 464 /* 465 * If we got a signal, don't risk going back to sleep. 466 */ 467 if (error) 468 break; 469 } 470 if (sf != NULL) { 471 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 472 sf->m = m; 473 nsfbufsused++; 474 nsfbufspeak = imax(nsfbufspeak, nsfbufsused); 475 pmap_qenter(sf->kva, &sf->m, 1); 476 } 477 mtx_unlock(&sf_freelist.sf_lock); 478 return (sf); 479} 480 481/* 482 * Release resources back to the system. 483 */ 484void 485sf_buf_free(struct sf_buf *sf) 486{ 487 488 pmap_qremove(sf->kva, 1); 489 mtx_lock(&sf_freelist.sf_lock); 490 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 491 nsfbufsused--; 492 if (sf_buf_alloc_want > 0) 493 wakeup(&sf_freelist); 494 mtx_unlock(&sf_freelist.sf_lock); 495} 496 497void 498swi_vm(void *v) 499{ 500 501 /* Nothing to do here - busdma bounce buffers are not implemented. */ 502} 503 504void * 505uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) 506{ 507 vm_paddr_t pa; 508 vm_page_t m; 509 int pflags; 510 void *va; 511 512 PMAP_STATS_INC(uma_nsmall_alloc); 513 514 *flags = UMA_SLAB_PRIV; 515 pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; 516 517 for (;;) { 518 m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); 519 if (m == NULL) { 520 if (wait & M_NOWAIT) 521 return (NULL); 522 else 523 VM_WAIT; 524 } else 525 break; 526 } 527 528 pa = VM_PAGE_TO_PHYS(m); 529 if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) { 530 KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0, 531 ("uma_small_alloc: free page %p still has mappings!", m)); 532 PMAP_STATS_INC(uma_nsmall_alloc_oc); 533 m->md.color = DCACHE_COLOR(pa); 534 dcache_page_inval(pa); 535 } 536 va = (void *)TLB_PHYS_TO_DIRECT(pa); 537 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) 538 cpu_block_zero(va, PAGE_SIZE); 539 return (va); 540} 541 542void 543uma_small_free(void *mem, vm_size_t size, u_int8_t flags) 544{ 545 vm_page_t m; 546 547 PMAP_STATS_INC(uma_nsmall_free); 548 m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)mem)); 549 m->wire_count--; 550 vm_page_free(m); 551 atomic_subtract_int(&cnt.v_wire_count, 1); 552} 553