sys_machdep.c revision 280258
1139731Simp/*- 299123Sobrien * Copyright (c) 1990 The Regents of the University of California. 399123Sobrien * All rights reserved. 499123Sobrien * 599123Sobrien * Redistribution and use in source and binary forms, with or without 699123Sobrien * modification, are permitted provided that the following conditions 799123Sobrien * are met: 899123Sobrien * 1. Redistributions of source code must retain the above copyright 999123Sobrien * notice, this list of conditions and the following disclaimer. 1099123Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1199123Sobrien * notice, this list of conditions and the following disclaimer in the 1299123Sobrien * documentation and/or other materials provided with the distribution. 1399123Sobrien * 4. Neither the name of the University nor the names of its contributors 1499123Sobrien * may be used to endorse or promote products derived from this software 1599123Sobrien * without specific prior written permission. 1699123Sobrien * 1799123Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1899123Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1999123Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2099123Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2199123Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2299123Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2399123Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2499123Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2599123Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2699123Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2799123Sobrien * SUCH DAMAGE. 2899123Sobrien * 2999123Sobrien * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 3099123Sobrien */ 3199123Sobrien 3299123Sobrien#include <sys/cdefs.h> 3399123Sobrien__FBSDID("$FreeBSD: stable/10/sys/i386/i386/sys_machdep.c 280258 2015-03-19 13:37:36Z rwatson $"); 3499123Sobrien 3599123Sobrien#include "opt_capsicum.h" 3699123Sobrien#include "opt_kstack_pages.h" 3799123Sobrien 3899123Sobrien#include <sys/param.h> 3999123Sobrien#include <sys/capsicum.h> 4099123Sobrien#include <sys/systm.h> 4199123Sobrien#include <sys/lock.h> 42196994Sphk#include <sys/malloc.h> 43196968Sphk#include <sys/mutex.h> 44196968Sphk#include <sys/priv.h> 45196968Sphk#include <sys/proc.h> 46196994Sphk#include <sys/smp.h> 47196994Sphk#include <sys/sysproto.h> 4899123Sobrien 49114349Speter#include <vm/vm.h> 5099123Sobrien#include <vm/pmap.h> 5199123Sobrien#include <vm/vm_map.h> 5299123Sobrien#include <vm/vm_extern.h> 53154128Simp 54154128Simp#include <machine/cpu.h> 55154128Simp#include <machine/pcb.h> 5699123Sobrien#include <machine/pcb_ext.h> 57114346Speter#include <machine/proc.h> 5899123Sobrien#include <machine/sysarch.h> 5999123Sobrien 60114346Speter#include <security/audit/audit.h> 6199123Sobrien 62210369Skib#ifdef XEN 63210369Skib#include <machine/xen/xenfunc.h> 64210369Skib 6599123Sobrienvoid i386_reset_ldt(struct proc_ldt *pldt); 66177661Sjb 67224207Sattiliovoid 68284720Skibi386_reset_ldt(struct proc_ldt *pldt) 69224207Sattilio{ 70122849Speter xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); 7199123Sobrien} 72122849Speter#else 7399123Sobrien#define i386_reset_ldt(x) 74250338Sattilio#endif 75250338Sattilio 76250338Sattilio#include <vm/vm_kern.h> /* for kernel_map */ 77250338Sattilio 7899123Sobrien#define MAX_LD 8192 7999123Sobrien#define LD_PER_PAGE 512 80195376Ssam#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) 81195376Ssam#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3) 82195376Ssam#define NULL_LDT_BASE ((caddr_t)NULL) 83195376Ssam 84195376Ssam#ifdef SMP 85195376Ssamstatic void set_user_ldt_rv(struct vmspace *vmsp); 86195376Ssam#endif 8799123Sobrienstatic int i386_set_ldt_data(struct thread *, int start, int num, 88191278Srwatson union descriptor *descs); 89191278Srwatsonstatic int i386_ldt_grow(struct thread *td, int len); 90191278Srwatson 91191278Srwatson#ifndef _SYS_SYSPROTO_H_ 92192331Sjhbstruct sysarch_args { 93191276Srwatson int op; 94115251Speter char *parms; 95114349Speter}; 96114349Speter#endif 97115251Speter 9899123Sobrienint 9999123Sobriensysarch(td, uap) 10099123Sobrien struct thread *td; 101114349Speter register struct sysarch_args *uap; 102114349Speter{ 103115251Speter int error; 104114349Speter union descriptor *lp; 105114349Speter union { 106114349Speter struct i386_ldt_args largs; 107114349Speter struct i386_ioperm_args iargs; 108114349Speter struct i386_get_xfpustate xfpu; 109115251Speter } kargs; 110114349Speter uint32_t base; 111114349Speter struct segment_descriptor sd, *sdp; 112114349Speter 113114349Speter AUDIT_ARG_CMD(uap->op); 114114349Speter 115115251Speter#ifdef CAPABILITY_MODE 116130218Speter /* 117199319Sphk * When adding new operations, add a new case statement here to 118130218Speter * explicitly indicate whether or not the operation is safe to 11999123Sobrien * perform in capability mode. 120197316Salc */ 121197316Salc if (IN_CAPABILITY_MODE(td)) { 12299123Sobrien switch (uap->op) { 12399123Sobrien case I386_GET_LDT: 124118236Speter case I386_SET_LDT: 125114349Speter case I386_GET_IOPERM: 126118236Speter case I386_GET_FSBASE: 127116355Salc case I386_SET_FSBASE: 12899123Sobrien case I386_GET_GSBASE: 12999123Sobrien case I386_SET_GSBASE: 13099123Sobrien case I386_GET_XFPUSTATE: 13199123Sobrien break; 13299123Sobrien 13399123Sobrien case I386_SET_IOPERM: 134114349Speter default: 135114349Speter#ifdef KTRACE 136181112Salc if (KTRPOINT(td, KTR_CAPFAIL)) 13799123Sobrien ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); 13899123Sobrien#endif 13999123Sobrien return (ECAPMODE); 14099123Sobrien } 141114346Speter } 142114346Speter#endif 14399123Sobrien 144114349Speter switch (uap->op) { 14599123Sobrien case I386_GET_IOPERM: 146286305Skib case I386_SET_IOPERM: 147286305Skib if ((error = copyin(uap->parms, &kargs.iargs, 148286305Skib sizeof(struct i386_ioperm_args))) != 0) 149196968Sphk return (error); 150 break; 151 case I386_GET_LDT: 152 case I386_SET_LDT: 153 if ((error = copyin(uap->parms, &kargs.largs, 154 sizeof(struct i386_ldt_args))) != 0) 155 return (error); 156 if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0) 157 return (EINVAL); 158 break; 159 case I386_GET_XFPUSTATE: 160 if ((error = copyin(uap->parms, &kargs.xfpu, 161 sizeof(struct i386_get_xfpustate))) != 0) 162 return (error); 163 break; 164 default: 165 break; 166 } 167 168 switch(uap->op) { 169 case I386_GET_LDT: 170 error = i386_get_ldt(td, &kargs.largs); 171 break; 172 case I386_SET_LDT: 173 if (kargs.largs.descs != NULL) { 174 lp = (union descriptor *)malloc( 175 kargs.largs.num * sizeof(union descriptor), 176 M_TEMP, M_WAITOK); 177 error = copyin(kargs.largs.descs, lp, 178 kargs.largs.num * sizeof(union descriptor)); 179 if (error == 0) 180 error = i386_set_ldt(td, &kargs.largs, lp); 181 free(lp, M_TEMP); 182 } else { 183 error = i386_set_ldt(td, &kargs.largs, NULL); 184 } 185 break; 186 case I386_GET_IOPERM: 187 error = i386_get_ioperm(td, &kargs.iargs); 188 if (error == 0) 189 error = copyout(&kargs.iargs, uap->parms, 190 sizeof(struct i386_ioperm_args)); 191 break; 192 case I386_SET_IOPERM: 193 error = i386_set_ioperm(td, &kargs.iargs); 194 break; 195 case I386_VM86: 196 error = vm86_sysarch(td, uap->parms); 197 break; 198 case I386_GET_FSBASE: 199 sdp = &td->td_pcb->pcb_fsd; 200 base = sdp->sd_hibase << 24 | sdp->sd_lobase; 201 error = copyout(&base, uap->parms, sizeof(base)); 202 break; 203 case I386_SET_FSBASE: 204 error = copyin(uap->parms, &base, sizeof(base)); 205 if (!error) { 206 /* 207 * Construct a descriptor and store it in the pcb for 208 * the next context switch. Also store it in the gdt 209 * so that the load of tf_fs into %fs will activate it 210 * at return to userland. 211 */ 212 sd.sd_lobase = base & 0xffffff; 213 sd.sd_hibase = (base >> 24) & 0xff; 214#ifdef XEN 215 /* need to do nosegneg like Linux */ 216 sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; 217#else 218 sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ 219#endif 220 sd.sd_hilimit = 0xf; 221 sd.sd_type = SDT_MEMRWA; 222 sd.sd_dpl = SEL_UPL; 223 sd.sd_p = 1; 224 sd.sd_xx = 0; 225 sd.sd_def32 = 1; 226 sd.sd_gran = 1; 227 critical_enter(); 228 td->td_pcb->pcb_fsd = sd; 229#ifdef XEN 230 HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]), 231 *(uint64_t *)&sd); 232#else 233 PCPU_GET(fsgs_gdt)[0] = sd; 234#endif 235 critical_exit(); 236 td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); 237 } 238 break; 239 case I386_GET_GSBASE: 240 sdp = &td->td_pcb->pcb_gsd; 241 base = sdp->sd_hibase << 24 | sdp->sd_lobase; 242 error = copyout(&base, uap->parms, sizeof(base)); 243 break; 244 case I386_SET_GSBASE: 245 error = copyin(uap->parms, &base, sizeof(base)); 246 if (!error) { 247 /* 248 * Construct a descriptor and store it in the pcb for 249 * the next context switch. Also store it in the gdt 250 * because we have to do a load_gs() right now. 251 */ 252 sd.sd_lobase = base & 0xffffff; 253 sd.sd_hibase = (base >> 24) & 0xff; 254 255#ifdef XEN 256 /* need to do nosegneg like Linux */ 257 sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; 258#else 259 sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ 260#endif 261 sd.sd_hilimit = 0xf; 262 sd.sd_type = SDT_MEMRWA; 263 sd.sd_dpl = SEL_UPL; 264 sd.sd_p = 1; 265 sd.sd_xx = 0; 266 sd.sd_def32 = 1; 267 sd.sd_gran = 1; 268 critical_enter(); 269 td->td_pcb->pcb_gsd = sd; 270#ifdef XEN 271 HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]), 272 *(uint64_t *)&sd); 273#else 274 PCPU_GET(fsgs_gdt)[1] = sd; 275#endif 276 critical_exit(); 277 load_gs(GSEL(GUGS_SEL, SEL_UPL)); 278 } 279 break; 280 case I386_GET_XFPUSTATE: 281 if (kargs.xfpu.len > cpu_max_ext_state_size - 282 sizeof(union savefpu)) 283 return (EINVAL); 284 npxgetregs(td); 285 error = copyout((char *)(get_pcb_user_save_td(td) + 1), 286 kargs.xfpu.addr, kargs.xfpu.len); 287 break; 288 default: 289 error = EINVAL; 290 break; 291 } 292 return (error); 293} 294 295int 296i386_extend_pcb(struct thread *td) 297{ 298 int i, offset; 299 u_long *addr; 300 struct pcb_ext *ext; 301 struct soft_segment_descriptor ssd = { 302 0, /* segment base address (overwritten) */ 303 ctob(IOPAGES + 1) - 1, /* length */ 304 SDT_SYS386TSS, /* segment type */ 305 0, /* priority level */ 306 1, /* descriptor present */ 307 0, 0, 308 0, /* default 32 size */ 309 0 /* granularity */ 310 }; 311 312 ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1), 313 M_WAITOK | M_ZERO); 314 /* -16 is so we can convert a trapframe into vm86trapframe inplace */ 315 ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) - 316 sizeof(struct pcb) - 16; 317 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 318 /* 319 * The last byte of the i/o map must be followed by an 0xff byte. 320 * We arbitrarily allocate 16 bytes here, to keep the starting 321 * address on a doubleword boundary. 322 */ 323 offset = PAGE_SIZE - 16; 324 ext->ext_tss.tss_ioopt = 325 (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16; 326 ext->ext_iomap = (caddr_t)ext + offset; 327 ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32; 328 329 addr = (u_long *)ext->ext_vm86.vm86_intmap; 330 for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++) 331 *addr++ = ~0; 332 333 ssd.ssd_base = (unsigned)&ext->ext_tss; 334 ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext); 335 ssdtosd(&ssd, &ext->ext_tssd); 336 337 KASSERT(td == curthread, ("giving TSS to !curthread")); 338 KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!")); 339 340 /* Switch to the new TSS. */ 341 critical_enter(); 342 td->td_pcb->pcb_ext = ext; 343 PCPU_SET(private_tss, 1); 344 *PCPU_GET(tss_gdt) = ext->ext_tssd; 345 ltr(GSEL(GPROC0_SEL, SEL_KPL)); 346 critical_exit(); 347 348 return 0; 349} 350 351int 352i386_set_ioperm(td, uap) 353 struct thread *td; 354 struct i386_ioperm_args *uap; 355{ 356 int i, error; 357 char *iomap; 358 359 if ((error = priv_check(td, PRIV_IO)) != 0) 360 return (error); 361 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 362 return (error); 363 /* 364 * XXX 365 * While this is restricted to root, we should probably figure out 366 * whether any other driver is using this i/o address, as so not to 367 * cause confusion. This probably requires a global 'usage registry'. 368 */ 369 370 if (td->td_pcb->pcb_ext == 0) 371 if ((error = i386_extend_pcb(td)) != 0) 372 return (error); 373 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 374 375 if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) 376 return (EINVAL); 377 378 for (i = uap->start; i < uap->start + uap->length; i++) { 379 if (uap->enable) 380 iomap[i >> 3] &= ~(1 << (i & 7)); 381 else 382 iomap[i >> 3] |= (1 << (i & 7)); 383 } 384 return (error); 385} 386 387int 388i386_get_ioperm(td, uap) 389 struct thread *td; 390 struct i386_ioperm_args *uap; 391{ 392 int i, state; 393 char *iomap; 394 395 if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) 396 return (EINVAL); 397 398 if (td->td_pcb->pcb_ext == 0) { 399 uap->length = 0; 400 goto done; 401 } 402 403 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 404 405 i = uap->start; 406 state = (iomap[i >> 3] >> (i & 7)) & 1; 407 uap->enable = !state; 408 uap->length = 1; 409 410 for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { 411 if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) 412 break; 413 uap->length++; 414 } 415 416done: 417 return (0); 418} 419 420/* 421 * Update the GDT entry pointing to the LDT to point to the LDT of the 422 * current process. Manage dt_lock holding/unholding autonomously. 423 */ 424void 425set_user_ldt(struct mdproc *mdp) 426{ 427 struct proc_ldt *pldt; 428 int dtlocked; 429 430 dtlocked = 0; 431 if (!mtx_owned(&dt_lock)) { 432 mtx_lock_spin(&dt_lock); 433 dtlocked = 1; 434 } 435 436 pldt = mdp->md_ldt; 437#ifdef XEN 438 i386_reset_ldt(pldt); 439 PCPU_SET(currentldt, (int)pldt); 440#else 441#ifdef SMP 442 gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd; 443#else 444 gdt[GUSERLDT_SEL].sd = pldt->ldt_sd; 445#endif 446 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 447 PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); 448#endif /* XEN */ 449 if (dtlocked) 450 mtx_unlock_spin(&dt_lock); 451} 452 453#ifdef SMP 454static void 455set_user_ldt_rv(struct vmspace *vmsp) 456{ 457 struct thread *td; 458 459 td = curthread; 460 if (vmsp != td->td_proc->p_vmspace) 461 return; 462 463 set_user_ldt(&td->td_proc->p_md); 464} 465#endif 466 467#ifdef XEN 468 469/* 470 * dt_lock must be held. Returns with dt_lock held. 471 */ 472struct proc_ldt * 473user_ldt_alloc(struct mdproc *mdp, int len) 474{ 475 struct proc_ldt *pldt, *new_ldt; 476 477 mtx_assert(&dt_lock, MA_OWNED); 478 mtx_unlock_spin(&dt_lock); 479 new_ldt = malloc(sizeof(struct proc_ldt), 480 M_SUBPROC, M_WAITOK); 481 482 new_ldt->ldt_len = len = NEW_MAX_LD(len); 483 new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 484 round_page(len * sizeof(union descriptor)), M_WAITOK); 485 new_ldt->ldt_refcnt = 1; 486 new_ldt->ldt_active = 0; 487 488 mtx_lock_spin(&dt_lock); 489 if ((pldt = mdp->md_ldt)) { 490 if (len > pldt->ldt_len) 491 len = pldt->ldt_len; 492 bcopy(pldt->ldt_base, new_ldt->ldt_base, 493 len * sizeof(union descriptor)); 494 } else { 495 bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); 496 } 497 mtx_unlock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 498 pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, 499 new_ldt->ldt_len*sizeof(union descriptor)); 500 mtx_lock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 501 return (new_ldt); 502} 503#else 504/* 505 * dt_lock must be held. Returns with dt_lock held. 506 */ 507struct proc_ldt * 508user_ldt_alloc(struct mdproc *mdp, int len) 509{ 510 struct proc_ldt *pldt, *new_ldt; 511 512 mtx_assert(&dt_lock, MA_OWNED); 513 mtx_unlock_spin(&dt_lock); 514 new_ldt = malloc(sizeof(struct proc_ldt), 515 M_SUBPROC, M_WAITOK); 516 517 new_ldt->ldt_len = len = NEW_MAX_LD(len); 518 new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 519 len * sizeof(union descriptor), M_WAITOK); 520 new_ldt->ldt_refcnt = 1; 521 new_ldt->ldt_active = 0; 522 523 mtx_lock_spin(&dt_lock); 524 gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base; 525 gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1; 526 ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd); 527 528 if ((pldt = mdp->md_ldt) != NULL) { 529 if (len > pldt->ldt_len) 530 len = pldt->ldt_len; 531 bcopy(pldt->ldt_base, new_ldt->ldt_base, 532 len * sizeof(union descriptor)); 533 } else 534 bcopy(ldt, new_ldt->ldt_base, sizeof(ldt)); 535 536 return (new_ldt); 537} 538#endif /* !XEN */ 539 540/* 541 * Must be called with dt_lock held. Returns with dt_lock unheld. 542 */ 543void 544user_ldt_free(struct thread *td) 545{ 546 struct mdproc *mdp = &td->td_proc->p_md; 547 struct proc_ldt *pldt; 548 549 mtx_assert(&dt_lock, MA_OWNED); 550 if ((pldt = mdp->md_ldt) == NULL) { 551 mtx_unlock_spin(&dt_lock); 552 return; 553 } 554 555 if (td == curthread) { 556#ifdef XEN 557 i386_reset_ldt(&default_proc_ldt); 558 PCPU_SET(currentldt, (int)&default_proc_ldt); 559#else 560 lldt(_default_ldt); 561 PCPU_SET(currentldt, _default_ldt); 562#endif 563 } 564 565 mdp->md_ldt = NULL; 566 user_ldt_deref(pldt); 567} 568 569void 570user_ldt_deref(struct proc_ldt *pldt) 571{ 572 573 mtx_assert(&dt_lock, MA_OWNED); 574 if (--pldt->ldt_refcnt == 0) { 575 mtx_unlock_spin(&dt_lock); 576 kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base, 577 pldt->ldt_len * sizeof(union descriptor)); 578 free(pldt, M_SUBPROC); 579 } else 580 mtx_unlock_spin(&dt_lock); 581} 582 583/* 584 * Note for the authors of compat layers (linux, etc): copyout() in 585 * the function below is not a problem since it presents data in 586 * arch-specific format (i.e. i386-specific in this case), not in 587 * the OS-specific one. 588 */ 589int 590i386_get_ldt(td, uap) 591 struct thread *td; 592 struct i386_ldt_args *uap; 593{ 594 int error = 0; 595 struct proc_ldt *pldt; 596 int nldt, num; 597 union descriptor *lp; 598 599#ifdef DEBUG 600 printf("i386_get_ldt: start=%d num=%d descs=%p\n", 601 uap->start, uap->num, (void *)uap->descs); 602#endif 603 604 mtx_lock_spin(&dt_lock); 605 if ((pldt = td->td_proc->p_md.md_ldt) != NULL) { 606 nldt = pldt->ldt_len; 607 lp = &((union descriptor *)(pldt->ldt_base))[uap->start]; 608 mtx_unlock_spin(&dt_lock); 609 num = min(uap->num, nldt); 610 } else { 611 mtx_unlock_spin(&dt_lock); 612 nldt = sizeof(ldt)/sizeof(ldt[0]); 613 num = min(uap->num, nldt); 614 lp = &ldt[uap->start]; 615 } 616 617 if ((uap->start > (unsigned int)nldt) || 618 ((unsigned int)num > (unsigned int)nldt) || 619 ((unsigned int)(uap->start + num) > (unsigned int)nldt)) 620 return(EINVAL); 621 622 error = copyout(lp, uap->descs, num * sizeof(union descriptor)); 623 if (!error) 624 td->td_retval[0] = num; 625 626 return(error); 627} 628 629int 630i386_set_ldt(td, uap, descs) 631 struct thread *td; 632 struct i386_ldt_args *uap; 633 union descriptor *descs; 634{ 635 int error = 0, i; 636 int largest_ld; 637 struct mdproc *mdp = &td->td_proc->p_md; 638 struct proc_ldt *pldt; 639 union descriptor *dp; 640 641#ifdef DEBUG 642 printf("i386_set_ldt: start=%d num=%d descs=%p\n", 643 uap->start, uap->num, (void *)uap->descs); 644#endif 645 646 if (descs == NULL) { 647 /* Free descriptors */ 648 if (uap->start == 0 && uap->num == 0) { 649 /* 650 * Treat this as a special case, so userland needn't 651 * know magic number NLDT. 652 */ 653 uap->start = NLDT; 654 uap->num = MAX_LD - NLDT; 655 } 656 if (uap->num == 0) 657 return (EINVAL); 658 mtx_lock_spin(&dt_lock); 659 if ((pldt = mdp->md_ldt) == NULL || 660 uap->start >= pldt->ldt_len) { 661 mtx_unlock_spin(&dt_lock); 662 return (0); 663 } 664 largest_ld = uap->start + uap->num; 665 if (largest_ld > pldt->ldt_len) 666 largest_ld = pldt->ldt_len; 667 i = largest_ld - uap->start; 668 bzero(&((union descriptor *)(pldt->ldt_base))[uap->start], 669 sizeof(union descriptor) * i); 670 mtx_unlock_spin(&dt_lock); 671 return (0); 672 } 673 674 if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { 675 /* verify range of descriptors to modify */ 676 largest_ld = uap->start + uap->num; 677 if (uap->start >= MAX_LD || largest_ld > MAX_LD) { 678 return (EINVAL); 679 } 680 } 681 682 /* Check descriptors for access violations */ 683 for (i = 0; i < uap->num; i++) { 684 dp = &descs[i]; 685 686 switch (dp->sd.sd_type) { 687 case SDT_SYSNULL: /* system null */ 688 dp->sd.sd_p = 0; 689 break; 690 case SDT_SYS286TSS: /* system 286 TSS available */ 691 case SDT_SYSLDT: /* system local descriptor table */ 692 case SDT_SYS286BSY: /* system 286 TSS busy */ 693 case SDT_SYSTASKGT: /* system task gate */ 694 case SDT_SYS286IGT: /* system 286 interrupt gate */ 695 case SDT_SYS286TGT: /* system 286 trap gate */ 696 case SDT_SYSNULL2: /* undefined by Intel */ 697 case SDT_SYS386TSS: /* system 386 TSS available */ 698 case SDT_SYSNULL3: /* undefined by Intel */ 699 case SDT_SYS386BSY: /* system 386 TSS busy */ 700 case SDT_SYSNULL4: /* undefined by Intel */ 701 case SDT_SYS386IGT: /* system 386 interrupt gate */ 702 case SDT_SYS386TGT: /* system 386 trap gate */ 703 case SDT_SYS286CGT: /* system 286 call gate */ 704 case SDT_SYS386CGT: /* system 386 call gate */ 705 /* I can't think of any reason to allow a user proc 706 * to create a segment of these types. They are 707 * for OS use only. 708 */ 709 return (EACCES); 710 /*NOTREACHED*/ 711 712 /* memory segment types */ 713 case SDT_MEMEC: /* memory execute only conforming */ 714 case SDT_MEMEAC: /* memory execute only accessed conforming */ 715 case SDT_MEMERC: /* memory execute read conforming */ 716 case SDT_MEMERAC: /* memory execute read accessed conforming */ 717 /* Must be "present" if executable and conforming. */ 718 if (dp->sd.sd_p == 0) 719 return (EACCES); 720 break; 721 case SDT_MEMRO: /* memory read only */ 722 case SDT_MEMROA: /* memory read only accessed */ 723 case SDT_MEMRW: /* memory read write */ 724 case SDT_MEMRWA: /* memory read write accessed */ 725 case SDT_MEMROD: /* memory read only expand dwn limit */ 726 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ 727 case SDT_MEMRWD: /* memory read write expand dwn limit */ 728 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ 729 case SDT_MEME: /* memory execute only */ 730 case SDT_MEMEA: /* memory execute only accessed */ 731 case SDT_MEMER: /* memory execute read */ 732 case SDT_MEMERA: /* memory execute read accessed */ 733 break; 734 default: 735 return(EINVAL); 736 /*NOTREACHED*/ 737 } 738 739 /* Only user (ring-3) descriptors may be present. */ 740 if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) 741 return (EACCES); 742 } 743 744 if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { 745 /* Allocate a free slot */ 746 mtx_lock_spin(&dt_lock); 747 if ((pldt = mdp->md_ldt) == NULL) { 748 if ((error = i386_ldt_grow(td, NLDT + 1))) { 749 mtx_unlock_spin(&dt_lock); 750 return (error); 751 } 752 pldt = mdp->md_ldt; 753 } 754again: 755 /* 756 * start scanning a bit up to leave room for NVidia and 757 * Wine, which still user the "Blat" method of allocation. 758 */ 759 dp = &((union descriptor *)(pldt->ldt_base))[NLDT]; 760 for (i = NLDT; i < pldt->ldt_len; ++i) { 761 if (dp->sd.sd_type == SDT_SYSNULL) 762 break; 763 dp++; 764 } 765 if (i >= pldt->ldt_len) { 766 if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) { 767 mtx_unlock_spin(&dt_lock); 768 return (error); 769 } 770 goto again; 771 } 772 uap->start = i; 773 error = i386_set_ldt_data(td, i, 1, descs); 774 mtx_unlock_spin(&dt_lock); 775 } else { 776 largest_ld = uap->start + uap->num; 777 mtx_lock_spin(&dt_lock); 778 if (!(error = i386_ldt_grow(td, largest_ld))) { 779 error = i386_set_ldt_data(td, uap->start, uap->num, 780 descs); 781 } 782 mtx_unlock_spin(&dt_lock); 783 } 784 if (error == 0) 785 td->td_retval[0] = uap->start; 786 return (error); 787} 788#ifdef XEN 789static int 790i386_set_ldt_data(struct thread *td, int start, int num, 791 union descriptor *descs) 792{ 793 struct mdproc *mdp = &td->td_proc->p_md; 794 struct proc_ldt *pldt = mdp->md_ldt; 795 796 mtx_assert(&dt_lock, MA_OWNED); 797 798 while (num) { 799 xen_update_descriptor( 800 &((union descriptor *)(pldt->ldt_base))[start], 801 descs); 802 num--; 803 start++; 804 descs++; 805 } 806 return (0); 807} 808#else 809static int 810i386_set_ldt_data(struct thread *td, int start, int num, 811 union descriptor *descs) 812{ 813 struct mdproc *mdp = &td->td_proc->p_md; 814 struct proc_ldt *pldt = mdp->md_ldt; 815 816 mtx_assert(&dt_lock, MA_OWNED); 817 818 /* Fill in range */ 819 bcopy(descs, 820 &((union descriptor *)(pldt->ldt_base))[start], 821 num * sizeof(union descriptor)); 822 return (0); 823} 824#endif /* !XEN */ 825 826static int 827i386_ldt_grow(struct thread *td, int len) 828{ 829 struct mdproc *mdp = &td->td_proc->p_md; 830 struct proc_ldt *new_ldt, *pldt; 831 caddr_t old_ldt_base = NULL_LDT_BASE; 832 int old_ldt_len = 0; 833 834 mtx_assert(&dt_lock, MA_OWNED); 835 836 if (len > MAX_LD) 837 return (ENOMEM); 838 if (len < NLDT + 1) 839 len = NLDT + 1; 840 841 /* Allocate a user ldt. */ 842 if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) { 843 new_ldt = user_ldt_alloc(mdp, len); 844 if (new_ldt == NULL) 845 return (ENOMEM); 846 pldt = mdp->md_ldt; 847 848 if (pldt != NULL) { 849 if (new_ldt->ldt_len <= pldt->ldt_len) { 850 /* 851 * We just lost the race for allocation, so 852 * free the new object and return. 853 */ 854 mtx_unlock_spin(&dt_lock); 855 kmem_free(kernel_arena, 856 (vm_offset_t)new_ldt->ldt_base, 857 new_ldt->ldt_len * sizeof(union descriptor)); 858 free(new_ldt, M_SUBPROC); 859 mtx_lock_spin(&dt_lock); 860 return (0); 861 } 862 863 /* 864 * We have to substitute the current LDT entry for 865 * curproc with the new one since its size grew. 866 */ 867 old_ldt_base = pldt->ldt_base; 868 old_ldt_len = pldt->ldt_len; 869 pldt->ldt_sd = new_ldt->ldt_sd; 870 pldt->ldt_base = new_ldt->ldt_base; 871 pldt->ldt_len = new_ldt->ldt_len; 872 } else 873 mdp->md_ldt = pldt = new_ldt; 874#ifdef SMP 875 /* 876 * Signal other cpus to reload ldt. We need to unlock dt_lock 877 * here because other CPU will contest on it since their 878 * curthreads won't hold the lock and will block when trying 879 * to acquire it. 880 */ 881 mtx_unlock_spin(&dt_lock); 882 smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, 883 NULL, td->td_proc->p_vmspace); 884#else 885 set_user_ldt(&td->td_proc->p_md); 886 mtx_unlock_spin(&dt_lock); 887#endif 888 if (old_ldt_base != NULL_LDT_BASE) { 889 kmem_free(kernel_arena, (vm_offset_t)old_ldt_base, 890 old_ldt_len * sizeof(union descriptor)); 891 free(new_ldt, M_SUBPROC); 892 } 893 mtx_lock_spin(&dt_lock); 894 } 895 return (0); 896} 897