vm_machdep.c revision 301428
1/*- 2 * Copyright (c) 1982, 1986 The Regents of the University of California. 3 * Copyright (c) 1989, 1990 William Jolitz 4 * Copyright (c) 1994 John Dyson 5 * Copyright (c) 2001 Jake Burkholder. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * the Systems Programming Group of the University of Utah Computer 10 * Science Department, and William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 37 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ 38 * from: FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.167 2001/07/12 39 */ 40 41#include <sys/cdefs.h> 42__FBSDID("$FreeBSD: stable/10/sys/sparc64/sparc64/vm_machdep.c 301428 2016-06-05 07:34:10Z dchagin $"); 43 44#include "opt_pmap.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/bio.h> 49#include <sys/buf.h> 50#include <sys/kernel.h> 51#include <sys/malloc.h> 52#include <sys/mbuf.h> 53#include <sys/mutex.h> 54#include <sys/proc.h> 55#include <sys/sysent.h> 56#include <sys/sf_buf.h> 57#include <sys/sched.h> 58#include <sys/sysctl.h> 59#include <sys/unistd.h> 60#include <sys/vmmeter.h> 61 62#include <dev/ofw/openfirm.h> 63 64#include <vm/vm.h> 65#include <vm/vm_extern.h> 66#include <vm/pmap.h> 67#include <vm/vm_kern.h> 68#include <vm/vm_map.h> 69#include <vm/vm_page.h> 70#include <vm/vm_pageout.h> 71#include <vm/vm_param.h> 72#include <vm/uma.h> 73#include <vm/uma_int.h> 74 75#include <machine/cache.h> 76#include <machine/cpu.h> 77#include <machine/fp.h> 78#include <machine/frame.h> 79#include <machine/fsr.h> 80#include <machine/md_var.h> 81#include <machine/ofw_machdep.h> 82#include <machine/ofw_mem.h> 83#include <machine/pcb.h> 84#include <machine/tlb.h> 85#include <machine/tstate.h> 86 87#ifndef NSFBUFS 88#define NSFBUFS (512 + maxusers * 16) 89#endif 90 91static int nsfbufs; 92static int nsfbufspeak; 93static int nsfbufsused; 94 95SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 96 "Maximum number of sendfile(2) sf_bufs available"); 97SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 98 "Number of sendfile(2) sf_bufs at peak usage"); 99SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 100 "Number of sendfile(2) sf_bufs in use"); 101 102static void sf_buf_init(void *arg); 103SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); 104 105/* 106 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 107 * sf_freelist head with the sf_lock mutex. 108 */ 109static struct { 110 SLIST_HEAD(, sf_buf) sf_head; 111 struct mtx sf_lock; 112} sf_freelist; 113 114static u_int sf_buf_alloc_want; 115 116PMAP_STATS_VAR(uma_nsmall_alloc); 117PMAP_STATS_VAR(uma_nsmall_alloc_oc); 118PMAP_STATS_VAR(uma_nsmall_free); 119 120void 121cpu_exit(struct thread *td) 122{ 123 struct proc *p; 124 125 p = td->td_proc; 126 p->p_md.md_sigtramp = NULL; 127 if (p->p_md.md_utrap != NULL) { 128 utrap_free(p->p_md.md_utrap); 129 p->p_md.md_utrap = NULL; 130 } 131} 132 133void 134cpu_thread_exit(struct thread *td) 135{ 136 137} 138 139void 140cpu_thread_clean(struct thread *td) 141{ 142 143} 144 145void 146cpu_thread_alloc(struct thread *td) 147{ 148 struct pcb *pcb; 149 150 pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE - 151 sizeof(struct pcb)) & ~0x3fUL); 152 pcb->pcb_nsaved = 0; 153 td->td_frame = (struct trapframe *)pcb - 1; 154 td->td_pcb = pcb; 155} 156 157void 158cpu_thread_free(struct thread *td) 159{ 160 161} 162 163void 164cpu_thread_swapin(struct thread *td) 165{ 166 167} 168 169void 170cpu_thread_swapout(struct thread *td) 171{ 172 173} 174 175void 176cpu_set_syscall_retval(struct thread *td, int error) 177{ 178 179 switch (error) { 180 case 0: 181 td->td_frame->tf_out[0] = td->td_retval[0]; 182 td->td_frame->tf_out[1] = td->td_retval[1]; 183 td->td_frame->tf_tstate &= ~TSTATE_XCC_C; 184 break; 185 186 case ERESTART: 187 /* 188 * Undo the tpc advancement we have done on syscall 189 * enter, we want to reexecute the system call. 190 */ 191 td->td_frame->tf_tpc = td->td_pcb->pcb_tpc; 192 td->td_frame->tf_tnpc -= 4; 193 break; 194 195 case EJUSTRETURN: 196 break; 197 198 default: 199 td->td_frame->tf_out[0] = SV_ABI_ERRNO(td->td_proc, error); 200 td->td_frame->tf_tstate |= TSTATE_XCC_C; 201 break; 202 } 203} 204 205void 206cpu_set_upcall(struct thread *td, struct thread *td0) 207{ 208 struct trapframe *tf; 209 struct frame *fr; 210 struct pcb *pcb; 211 212 bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); 213 214 pcb = td->td_pcb; 215 tf = td->td_frame; 216 fr = (struct frame *)tf - 1; 217 fr->fr_local[0] = (u_long)fork_return; 218 fr->fr_local[1] = (u_long)td; 219 fr->fr_local[2] = (u_long)tf; 220 pcb->pcb_pc = (u_long)fork_trampoline - 8; 221 pcb->pcb_sp = (u_long)fr - SPOFF; 222 223 /* Setup to release the spin count in fork_exit(). */ 224 td->td_md.md_spinlock_count = 1; 225 td->td_md.md_saved_pil = 0; 226} 227 228void 229cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, 230 stack_t *stack) 231{ 232 struct trapframe *tf; 233 uint64_t sp; 234 235 if (td == curthread) 236 flushw(); 237 tf = td->td_frame; 238 sp = (uint64_t)stack->ss_sp + stack->ss_size; 239 tf->tf_out[0] = (uint64_t)arg; 240 tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); 241 tf->tf_tpc = (uint64_t)entry; 242 tf->tf_tnpc = tf->tf_tpc + 4; 243 244 td->td_retval[0] = tf->tf_out[0]; 245 td->td_retval[1] = tf->tf_out[1]; 246} 247 248int 249cpu_set_user_tls(struct thread *td, void *tls_base) 250{ 251 252 if (td == curthread) 253 flushw(); 254 td->td_frame->tf_global[7] = (uint64_t)tls_base; 255 return (0); 256} 257 258/* 259 * Finish a fork operation, with process p2 nearly set up. 260 * Copy and update the pcb, set up the stack so that the child 261 * ready to run and return to user mode. 262 */ 263void 264cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) 265{ 266 struct trapframe *tf; 267 struct frame *fp; 268 struct pcb *pcb1; 269 struct pcb *pcb2; 270 vm_offset_t sp; 271 int error; 272 int i; 273 274 KASSERT(td1 == curthread || td1 == &thread0, 275 ("cpu_fork: p1 not curproc and not proc0")); 276 277 if ((flags & RFPROC) == 0) 278 return; 279 280 p2->p_md.md_sigtramp = td1->td_proc->p_md.md_sigtramp; 281 p2->p_md.md_utrap = utrap_hold(td1->td_proc->p_md.md_utrap); 282 283 /* The pcb must be aligned on a 64-byte boundary. */ 284 pcb1 = td1->td_pcb; 285 pcb2 = (struct pcb *)((td2->td_kstack + td2->td_kstack_pages * 286 PAGE_SIZE - sizeof(struct pcb)) & ~0x3fUL); 287 td2->td_pcb = pcb2; 288 289 /* 290 * Ensure that p1's pcb is up to date. 291 */ 292 critical_enter(); 293 if ((td1->td_frame->tf_fprs & FPRS_FEF) != 0) 294 savefpctx(pcb1->pcb_ufp); 295 critical_exit(); 296 /* Make sure the copied windows are spilled. */ 297 flushw(); 298 /* Copy the pcb (this will copy the windows saved in the pcb, too). */ 299 bcopy(pcb1, pcb2, sizeof(*pcb1)); 300 301 /* 302 * If we're creating a new user process and we're sharing the address 303 * space, the parent's top most frame must be saved in the pcb. The 304 * child will pop the frame when it returns to user mode, and may 305 * overwrite it with its own data causing much suffering for the 306 * parent. We check if its already in the pcb, and if not copy it 307 * in. Its unlikely that the copyin will fail, but if so there's not 308 * much we can do. The parent will likely crash soon anyway in that 309 * case. 310 */ 311 if ((flags & RFMEM) != 0 && td1 != &thread0) { 312 sp = td1->td_frame->tf_sp; 313 for (i = 0; i < pcb1->pcb_nsaved; i++) { 314 if (pcb1->pcb_rwsp[i] == sp) 315 break; 316 } 317 if (i == pcb1->pcb_nsaved) { 318 error = copyin((caddr_t)sp + SPOFF, &pcb1->pcb_rw[i], 319 sizeof(struct rwindow)); 320 if (error == 0) { 321 pcb1->pcb_rwsp[i] = sp; 322 pcb1->pcb_nsaved++; 323 } 324 } 325 } 326 327 /* 328 * Create a new fresh stack for the new process. 329 * Copy the trap frame for the return to user mode as if from a 330 * syscall. This copies most of the user mode register values. 331 */ 332 tf = (struct trapframe *)pcb2 - 1; 333 bcopy(td1->td_frame, tf, sizeof(*tf)); 334 335 tf->tf_out[0] = 0; /* Child returns zero */ 336 tf->tf_out[1] = 0; 337 tf->tf_tstate &= ~TSTATE_XCC_C; /* success */ 338 tf->tf_fprs = 0; 339 340 td2->td_frame = tf; 341 fp = (struct frame *)tf - 1; 342 fp->fr_local[0] = (u_long)fork_return; 343 fp->fr_local[1] = (u_long)td2; 344 fp->fr_local[2] = (u_long)tf; 345 /* Terminate stack traces at this frame. */ 346 fp->fr_pc = fp->fr_fp = 0; 347 pcb2->pcb_sp = (u_long)fp - SPOFF; 348 pcb2->pcb_pc = (u_long)fork_trampoline - 8; 349 350 /* Setup to release the spin count in fork_exit(). */ 351 td2->td_md.md_spinlock_count = 1; 352 td2->td_md.md_saved_pil = 0; 353 354 /* 355 * Now, cpu_switch() can schedule the new process. 356 */ 357} 358 359void 360cpu_reset(void) 361{ 362 static char bspec[64] = ""; 363 phandle_t chosen; 364 static struct { 365 cell_t name; 366 cell_t nargs; 367 cell_t nreturns; 368 cell_t bootspec; 369 } args = { 370 (cell_t)"boot", 371 1, 372 0, 373 (cell_t)bspec 374 }; 375 376 if ((chosen = OF_finddevice("/chosen")) != -1) { 377 if (OF_getprop(chosen, "bootpath", bspec, sizeof(bspec)) == -1) 378 bspec[0] = '\0'; 379 bspec[sizeof(bspec) - 1] = '\0'; 380 } 381 382 cpu_shutdown(&args); 383} 384 385/* 386 * Intercept the return address from a freshly forked process that has NOT 387 * been scheduled yet. 388 * 389 * This is needed to make kernel threads stay in kernel mode. 390 */ 391void 392cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg) 393{ 394 struct frame *fp; 395 struct pcb *pcb; 396 397 pcb = td->td_pcb; 398 fp = (struct frame *)(pcb->pcb_sp + SPOFF); 399 fp->fr_local[0] = (u_long)func; 400 fp->fr_local[1] = (u_long)arg; 401} 402 403int 404is_physical_memory(vm_paddr_t addr) 405{ 406 struct ofw_mem_region *mr; 407 408 for (mr = sparc64_memreg; mr < sparc64_memreg + sparc64_nmemreg; mr++) 409 if (addr >= mr->mr_start && addr < mr->mr_start + mr->mr_size) 410 return (1); 411 return (0); 412} 413 414/* 415 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 416 */ 417static void 418sf_buf_init(void *arg) 419{ 420 struct sf_buf *sf_bufs; 421 vm_offset_t sf_base; 422 int i; 423 424 nsfbufs = NSFBUFS; 425 TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); 426 427 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 428 SLIST_INIT(&sf_freelist.sf_head); 429 sf_base = kva_alloc(nsfbufs * PAGE_SIZE); 430 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 431 M_NOWAIT | M_ZERO); 432 for (i = 0; i < nsfbufs; i++) { 433 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 434 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 435 } 436 sf_buf_alloc_want = 0; 437} 438 439/* 440 * Get an sf_buf from the freelist. Will block if none are available. 441 */ 442struct sf_buf * 443sf_buf_alloc(struct vm_page *m, int flags) 444{ 445 struct sf_buf *sf; 446 int error; 447 448 mtx_lock(&sf_freelist.sf_lock); 449 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 450 if (flags & SFB_NOWAIT) 451 break; 452 sf_buf_alloc_want++; 453 SFSTAT_INC(sf_allocwait); 454 error = msleep(&sf_freelist, &sf_freelist.sf_lock, 455 (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); 456 sf_buf_alloc_want--; 457 458 /* 459 * If we got a signal, don't risk going back to sleep. 460 */ 461 if (error) 462 break; 463 } 464 if (sf != NULL) { 465 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 466 sf->m = m; 467 nsfbufsused++; 468 nsfbufspeak = imax(nsfbufspeak, nsfbufsused); 469 pmap_qenter(sf->kva, &sf->m, 1); 470 } 471 mtx_unlock(&sf_freelist.sf_lock); 472 return (sf); 473} 474 475/* 476 * Release resources back to the system. 477 */ 478void 479sf_buf_free(struct sf_buf *sf) 480{ 481 482 pmap_qremove(sf->kva, 1); 483 mtx_lock(&sf_freelist.sf_lock); 484 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 485 nsfbufsused--; 486 if (sf_buf_alloc_want > 0) 487 wakeup(&sf_freelist); 488 mtx_unlock(&sf_freelist.sf_lock); 489} 490 491void 492swi_vm(void *v) 493{ 494 495 /* Nothing to do here - busdma bounce buffers are not implemented. */ 496} 497 498void * 499uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) 500{ 501 vm_paddr_t pa; 502 vm_page_t m; 503 int pflags; 504 void *va; 505 506 PMAP_STATS_INC(uma_nsmall_alloc); 507 508 *flags = UMA_SLAB_PRIV; 509 pflags = malloc2vm_flags(wait) | VM_ALLOC_WIRED; 510 511 for (;;) { 512 m = vm_page_alloc(NULL, 0, pflags | VM_ALLOC_NOOBJ); 513 if (m == NULL) { 514 if (wait & M_NOWAIT) 515 return (NULL); 516 else 517 VM_WAIT; 518 } else 519 break; 520 } 521 522 pa = VM_PAGE_TO_PHYS(m); 523 if (dcache_color_ignore == 0 && m->md.color != DCACHE_COLOR(pa)) { 524 KASSERT(m->md.colors[0] == 0 && m->md.colors[1] == 0, 525 ("uma_small_alloc: free page %p still has mappings!", m)); 526 PMAP_STATS_INC(uma_nsmall_alloc_oc); 527 m->md.color = DCACHE_COLOR(pa); 528 dcache_page_inval(pa); 529 } 530 va = (void *)TLB_PHYS_TO_DIRECT(pa); 531 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) 532 cpu_block_zero(va, PAGE_SIZE); 533 return (va); 534} 535 536void 537uma_small_free(void *mem, vm_size_t size, u_int8_t flags) 538{ 539 vm_page_t m; 540 541 PMAP_STATS_INC(uma_nsmall_free); 542 m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS((vm_offset_t)mem)); 543 m->wire_count--; 544 vm_page_free(m); 545 atomic_subtract_int(&cnt.v_wire_count, 1); 546} 547