sys_machdep.c revision 286311
1/*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * from: @(#)sys_machdep.c 5.5 (Berkeley) 1/19/91 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: stable/10/sys/i386/i386/sys_machdep.c 286311 2015-08-05 08:17:10Z kib $"); 34 35#include "opt_capsicum.h" 36#include "opt_kstack_pages.h" 37 38#include <sys/param.h> 39#include <sys/capsicum.h> 40#include <sys/systm.h> 41#include <sys/lock.h> 42#include <sys/malloc.h> 43#include <sys/mutex.h> 44#include <sys/priv.h> 45#include <sys/proc.h> 46#include <sys/smp.h> 47#include <sys/sysproto.h> 48 49#include <vm/vm.h> 50#include <vm/pmap.h> 51#include <vm/vm_map.h> 52#include <vm/vm_extern.h> 53 54#include <machine/cpu.h> 55#include <machine/pcb.h> 56#include <machine/pcb_ext.h> 57#include <machine/proc.h> 58#include <machine/sysarch.h> 59 60#include <security/audit/audit.h> 61 62#ifdef XEN 63#include <machine/xen/xenfunc.h> 64 65void i386_reset_ldt(struct proc_ldt *pldt); 66 67void 68i386_reset_ldt(struct proc_ldt *pldt) 69{ 70 xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); 71} 72#else 73#define i386_reset_ldt(x) 74#endif 75 76#include <vm/vm_kern.h> /* for kernel_map */ 77 78#define MAX_LD 8192 79#define LD_PER_PAGE 512 80#define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) 81#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3) 82#define NULL_LDT_BASE ((caddr_t)NULL) 83 84#ifdef SMP 85static void set_user_ldt_rv(struct vmspace *vmsp); 86#endif 87static int i386_set_ldt_data(struct thread *, int start, int num, 88 union descriptor *descs); 89static int i386_ldt_grow(struct thread *td, int len); 90 91void 92fill_based_sd(struct segment_descriptor *sdp, uint32_t base) 93{ 94 95 sdp->sd_lobase = base & 0xffffff; 96 sdp->sd_hibase = (base >> 24) & 0xff; 97#ifdef XEN 98 /* need to do nosegneg like Linux */ 99 sdp->sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff; 100#else 101 sdp->sd_lolimit = 0xffff; /* 4GB limit, wraps around */ 102#endif 103 sdp->sd_hilimit = 0xf; 104 sdp->sd_type = SDT_MEMRWA; 105 sdp->sd_dpl = SEL_UPL; 106 sdp->sd_p = 1; 107 sdp->sd_xx = 0; 108 sdp->sd_def32 = 1; 109 sdp->sd_gran = 1; 110} 111 112#ifndef _SYS_SYSPROTO_H_ 113struct sysarch_args { 114 int op; 115 char *parms; 116}; 117#endif 118 119int 120sysarch(td, uap) 121 struct thread *td; 122 register struct sysarch_args *uap; 123{ 124 int error; 125 union descriptor *lp; 126 union { 127 struct i386_ldt_args largs; 128 struct i386_ioperm_args iargs; 129 struct i386_get_xfpustate xfpu; 130 } kargs; 131 uint32_t base; 132 struct segment_descriptor sd, *sdp; 133 134 AUDIT_ARG_CMD(uap->op); 135 136#ifdef CAPABILITY_MODE 137 /* 138 * When adding new operations, add a new case statement here to 139 * explicitly indicate whether or not the operation is safe to 140 * perform in capability mode. 141 */ 142 if (IN_CAPABILITY_MODE(td)) { 143 switch (uap->op) { 144 case I386_GET_LDT: 145 case I386_SET_LDT: 146 case I386_GET_IOPERM: 147 case I386_GET_FSBASE: 148 case I386_SET_FSBASE: 149 case I386_GET_GSBASE: 150 case I386_SET_GSBASE: 151 case I386_GET_XFPUSTATE: 152 break; 153 154 case I386_SET_IOPERM: 155 default: 156#ifdef KTRACE 157 if (KTRPOINT(td, KTR_CAPFAIL)) 158 ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL); 159#endif 160 return (ECAPMODE); 161 } 162 } 163#endif 164 165 switch (uap->op) { 166 case I386_GET_IOPERM: 167 case I386_SET_IOPERM: 168 if ((error = copyin(uap->parms, &kargs.iargs, 169 sizeof(struct i386_ioperm_args))) != 0) 170 return (error); 171 break; 172 case I386_GET_LDT: 173 case I386_SET_LDT: 174 if ((error = copyin(uap->parms, &kargs.largs, 175 sizeof(struct i386_ldt_args))) != 0) 176 return (error); 177 if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0) 178 return (EINVAL); 179 break; 180 case I386_GET_XFPUSTATE: 181 if ((error = copyin(uap->parms, &kargs.xfpu, 182 sizeof(struct i386_get_xfpustate))) != 0) 183 return (error); 184 break; 185 default: 186 break; 187 } 188 189 switch(uap->op) { 190 case I386_GET_LDT: 191 error = i386_get_ldt(td, &kargs.largs); 192 break; 193 case I386_SET_LDT: 194 if (kargs.largs.descs != NULL) { 195 lp = (union descriptor *)malloc( 196 kargs.largs.num * sizeof(union descriptor), 197 M_TEMP, M_WAITOK); 198 error = copyin(kargs.largs.descs, lp, 199 kargs.largs.num * sizeof(union descriptor)); 200 if (error == 0) 201 error = i386_set_ldt(td, &kargs.largs, lp); 202 free(lp, M_TEMP); 203 } else { 204 error = i386_set_ldt(td, &kargs.largs, NULL); 205 } 206 break; 207 case I386_GET_IOPERM: 208 error = i386_get_ioperm(td, &kargs.iargs); 209 if (error == 0) 210 error = copyout(&kargs.iargs, uap->parms, 211 sizeof(struct i386_ioperm_args)); 212 break; 213 case I386_SET_IOPERM: 214 error = i386_set_ioperm(td, &kargs.iargs); 215 break; 216 case I386_VM86: 217 error = vm86_sysarch(td, uap->parms); 218 break; 219 case I386_GET_FSBASE: 220 sdp = &td->td_pcb->pcb_fsd; 221 base = sdp->sd_hibase << 24 | sdp->sd_lobase; 222 error = copyout(&base, uap->parms, sizeof(base)); 223 break; 224 case I386_SET_FSBASE: 225 error = copyin(uap->parms, &base, sizeof(base)); 226 if (error == 0) { 227 /* 228 * Construct a descriptor and store it in the pcb for 229 * the next context switch. Also store it in the gdt 230 * so that the load of tf_fs into %fs will activate it 231 * at return to userland. 232 */ 233 fill_based_sd(&sd, base); 234 critical_enter(); 235 td->td_pcb->pcb_fsd = sd; 236#ifdef XEN 237 HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]), 238 *(uint64_t *)&sd); 239#else 240 PCPU_GET(fsgs_gdt)[0] = sd; 241#endif 242 critical_exit(); 243 td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); 244 } 245 break; 246 case I386_GET_GSBASE: 247 sdp = &td->td_pcb->pcb_gsd; 248 base = sdp->sd_hibase << 24 | sdp->sd_lobase; 249 error = copyout(&base, uap->parms, sizeof(base)); 250 break; 251 case I386_SET_GSBASE: 252 error = copyin(uap->parms, &base, sizeof(base)); 253 if (error == 0) { 254 /* 255 * Construct a descriptor and store it in the pcb for 256 * the next context switch. Also store it in the gdt 257 * because we have to do a load_gs() right now. 258 */ 259 fill_based_sd(&sd, base); 260 critical_enter(); 261 td->td_pcb->pcb_gsd = sd; 262#ifdef XEN 263 HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]), 264 *(uint64_t *)&sd); 265#else 266 PCPU_GET(fsgs_gdt)[1] = sd; 267#endif 268 critical_exit(); 269 load_gs(GSEL(GUGS_SEL, SEL_UPL)); 270 } 271 break; 272 case I386_GET_XFPUSTATE: 273 if (kargs.xfpu.len > cpu_max_ext_state_size - 274 sizeof(union savefpu)) 275 return (EINVAL); 276 npxgetregs(td); 277 error = copyout((char *)(get_pcb_user_save_td(td) + 1), 278 kargs.xfpu.addr, kargs.xfpu.len); 279 break; 280 default: 281 error = EINVAL; 282 break; 283 } 284 return (error); 285} 286 287int 288i386_extend_pcb(struct thread *td) 289{ 290 int i, offset; 291 u_long *addr; 292 struct pcb_ext *ext; 293 struct soft_segment_descriptor ssd = { 294 0, /* segment base address (overwritten) */ 295 ctob(IOPAGES + 1) - 1, /* length */ 296 SDT_SYS386TSS, /* segment type */ 297 0, /* priority level */ 298 1, /* descriptor present */ 299 0, 0, 300 0, /* default 32 size */ 301 0 /* granularity */ 302 }; 303 304 ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1), 305 M_WAITOK | M_ZERO); 306 /* -16 is so we can convert a trapframe into vm86trapframe inplace */ 307 ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) - 308 sizeof(struct pcb) - 16; 309 ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 310 /* 311 * The last byte of the i/o map must be followed by an 0xff byte. 312 * We arbitrarily allocate 16 bytes here, to keep the starting 313 * address on a doubleword boundary. 314 */ 315 offset = PAGE_SIZE - 16; 316 ext->ext_tss.tss_ioopt = 317 (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16; 318 ext->ext_iomap = (caddr_t)ext + offset; 319 ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32; 320 321 addr = (u_long *)ext->ext_vm86.vm86_intmap; 322 for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++) 323 *addr++ = ~0; 324 325 ssd.ssd_base = (unsigned)&ext->ext_tss; 326 ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext); 327 ssdtosd(&ssd, &ext->ext_tssd); 328 329 KASSERT(td == curthread, ("giving TSS to !curthread")); 330 KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!")); 331 332 /* Switch to the new TSS. */ 333 critical_enter(); 334 td->td_pcb->pcb_ext = ext; 335 PCPU_SET(private_tss, 1); 336 *PCPU_GET(tss_gdt) = ext->ext_tssd; 337 ltr(GSEL(GPROC0_SEL, SEL_KPL)); 338 critical_exit(); 339 340 return 0; 341} 342 343int 344i386_set_ioperm(td, uap) 345 struct thread *td; 346 struct i386_ioperm_args *uap; 347{ 348 int i, error; 349 char *iomap; 350 351 if ((error = priv_check(td, PRIV_IO)) != 0) 352 return (error); 353 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 354 return (error); 355 /* 356 * XXX 357 * While this is restricted to root, we should probably figure out 358 * whether any other driver is using this i/o address, as so not to 359 * cause confusion. This probably requires a global 'usage registry'. 360 */ 361 362 if (td->td_pcb->pcb_ext == 0) 363 if ((error = i386_extend_pcb(td)) != 0) 364 return (error); 365 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 366 367 if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) 368 return (EINVAL); 369 370 for (i = uap->start; i < uap->start + uap->length; i++) { 371 if (uap->enable) 372 iomap[i >> 3] &= ~(1 << (i & 7)); 373 else 374 iomap[i >> 3] |= (1 << (i & 7)); 375 } 376 return (error); 377} 378 379int 380i386_get_ioperm(td, uap) 381 struct thread *td; 382 struct i386_ioperm_args *uap; 383{ 384 int i, state; 385 char *iomap; 386 387 if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) 388 return (EINVAL); 389 390 if (td->td_pcb->pcb_ext == 0) { 391 uap->length = 0; 392 goto done; 393 } 394 395 iomap = (char *)td->td_pcb->pcb_ext->ext_iomap; 396 397 i = uap->start; 398 state = (iomap[i >> 3] >> (i & 7)) & 1; 399 uap->enable = !state; 400 uap->length = 1; 401 402 for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { 403 if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) 404 break; 405 uap->length++; 406 } 407 408done: 409 return (0); 410} 411 412/* 413 * Update the GDT entry pointing to the LDT to point to the LDT of the 414 * current process. Manage dt_lock holding/unholding autonomously. 415 */ 416void 417set_user_ldt(struct mdproc *mdp) 418{ 419 struct proc_ldt *pldt; 420 int dtlocked; 421 422 dtlocked = 0; 423 if (!mtx_owned(&dt_lock)) { 424 mtx_lock_spin(&dt_lock); 425 dtlocked = 1; 426 } 427 428 pldt = mdp->md_ldt; 429#ifdef XEN 430 i386_reset_ldt(pldt); 431 PCPU_SET(currentldt, (int)pldt); 432#else 433#ifdef SMP 434 gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd; 435#else 436 gdt[GUSERLDT_SEL].sd = pldt->ldt_sd; 437#endif 438 lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); 439 PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); 440#endif /* XEN */ 441 if (dtlocked) 442 mtx_unlock_spin(&dt_lock); 443} 444 445#ifdef SMP 446static void 447set_user_ldt_rv(struct vmspace *vmsp) 448{ 449 struct thread *td; 450 451 td = curthread; 452 if (vmsp != td->td_proc->p_vmspace) 453 return; 454 455 set_user_ldt(&td->td_proc->p_md); 456} 457#endif 458 459#ifdef XEN 460 461/* 462 * dt_lock must be held. Returns with dt_lock held. 463 */ 464struct proc_ldt * 465user_ldt_alloc(struct mdproc *mdp, int len) 466{ 467 struct proc_ldt *pldt, *new_ldt; 468 469 mtx_assert(&dt_lock, MA_OWNED); 470 mtx_unlock_spin(&dt_lock); 471 new_ldt = malloc(sizeof(struct proc_ldt), 472 M_SUBPROC, M_WAITOK); 473 474 new_ldt->ldt_len = len = NEW_MAX_LD(len); 475 new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 476 round_page(len * sizeof(union descriptor)), M_WAITOK); 477 new_ldt->ldt_refcnt = 1; 478 new_ldt->ldt_active = 0; 479 480 mtx_lock_spin(&dt_lock); 481 if ((pldt = mdp->md_ldt)) { 482 if (len > pldt->ldt_len) 483 len = pldt->ldt_len; 484 bcopy(pldt->ldt_base, new_ldt->ldt_base, 485 len * sizeof(union descriptor)); 486 } else { 487 bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); 488 } 489 mtx_unlock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 490 pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, 491 new_ldt->ldt_len*sizeof(union descriptor)); 492 mtx_lock_spin(&dt_lock); /* XXX kill once pmap locking fixed. */ 493 return (new_ldt); 494} 495#else 496/* 497 * dt_lock must be held. Returns with dt_lock held. 498 */ 499struct proc_ldt * 500user_ldt_alloc(struct mdproc *mdp, int len) 501{ 502 struct proc_ldt *pldt, *new_ldt; 503 504 mtx_assert(&dt_lock, MA_OWNED); 505 mtx_unlock_spin(&dt_lock); 506 new_ldt = malloc(sizeof(struct proc_ldt), 507 M_SUBPROC, M_WAITOK); 508 509 new_ldt->ldt_len = len = NEW_MAX_LD(len); 510 new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 511 len * sizeof(union descriptor), M_WAITOK); 512 new_ldt->ldt_refcnt = 1; 513 new_ldt->ldt_active = 0; 514 515 mtx_lock_spin(&dt_lock); 516 gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base; 517 gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1; 518 ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd); 519 520 if ((pldt = mdp->md_ldt) != NULL) { 521 if (len > pldt->ldt_len) 522 len = pldt->ldt_len; 523 bcopy(pldt->ldt_base, new_ldt->ldt_base, 524 len * sizeof(union descriptor)); 525 } else 526 bcopy(ldt, new_ldt->ldt_base, sizeof(ldt)); 527 528 return (new_ldt); 529} 530#endif /* !XEN */ 531 532/* 533 * Must be called with dt_lock held. Returns with dt_lock unheld. 534 */ 535void 536user_ldt_free(struct thread *td) 537{ 538 struct mdproc *mdp = &td->td_proc->p_md; 539 struct proc_ldt *pldt; 540 541 mtx_assert(&dt_lock, MA_OWNED); 542 if ((pldt = mdp->md_ldt) == NULL) { 543 mtx_unlock_spin(&dt_lock); 544 return; 545 } 546 547 if (td == curthread) { 548#ifdef XEN 549 i386_reset_ldt(&default_proc_ldt); 550 PCPU_SET(currentldt, (int)&default_proc_ldt); 551#else 552 lldt(_default_ldt); 553 PCPU_SET(currentldt, _default_ldt); 554#endif 555 } 556 557 mdp->md_ldt = NULL; 558 user_ldt_deref(pldt); 559} 560 561void 562user_ldt_deref(struct proc_ldt *pldt) 563{ 564 565 mtx_assert(&dt_lock, MA_OWNED); 566 if (--pldt->ldt_refcnt == 0) { 567 mtx_unlock_spin(&dt_lock); 568 kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base, 569 pldt->ldt_len * sizeof(union descriptor)); 570 free(pldt, M_SUBPROC); 571 } else 572 mtx_unlock_spin(&dt_lock); 573} 574 575/* 576 * Note for the authors of compat layers (linux, etc): copyout() in 577 * the function below is not a problem since it presents data in 578 * arch-specific format (i.e. i386-specific in this case), not in 579 * the OS-specific one. 580 */ 581int 582i386_get_ldt(td, uap) 583 struct thread *td; 584 struct i386_ldt_args *uap; 585{ 586 int error = 0; 587 struct proc_ldt *pldt; 588 int nldt, num; 589 union descriptor *lp; 590 591#ifdef DEBUG 592 printf("i386_get_ldt: start=%d num=%d descs=%p\n", 593 uap->start, uap->num, (void *)uap->descs); 594#endif 595 596 mtx_lock_spin(&dt_lock); 597 if ((pldt = td->td_proc->p_md.md_ldt) != NULL) { 598 nldt = pldt->ldt_len; 599 lp = &((union descriptor *)(pldt->ldt_base))[uap->start]; 600 mtx_unlock_spin(&dt_lock); 601 num = min(uap->num, nldt); 602 } else { 603 mtx_unlock_spin(&dt_lock); 604 nldt = sizeof(ldt)/sizeof(ldt[0]); 605 num = min(uap->num, nldt); 606 lp = &ldt[uap->start]; 607 } 608 609 if ((uap->start > (unsigned int)nldt) || 610 ((unsigned int)num > (unsigned int)nldt) || 611 ((unsigned int)(uap->start + num) > (unsigned int)nldt)) 612 return(EINVAL); 613 614 error = copyout(lp, uap->descs, num * sizeof(union descriptor)); 615 if (!error) 616 td->td_retval[0] = num; 617 618 return(error); 619} 620 621int 622i386_set_ldt(td, uap, descs) 623 struct thread *td; 624 struct i386_ldt_args *uap; 625 union descriptor *descs; 626{ 627 int error = 0, i; 628 int largest_ld; 629 struct mdproc *mdp = &td->td_proc->p_md; 630 struct proc_ldt *pldt; 631 union descriptor *dp; 632 633#ifdef DEBUG 634 printf("i386_set_ldt: start=%d num=%d descs=%p\n", 635 uap->start, uap->num, (void *)uap->descs); 636#endif 637 638 if (descs == NULL) { 639 /* Free descriptors */ 640 if (uap->start == 0 && uap->num == 0) { 641 /* 642 * Treat this as a special case, so userland needn't 643 * know magic number NLDT. 644 */ 645 uap->start = NLDT; 646 uap->num = MAX_LD - NLDT; 647 } 648 if (uap->num == 0) 649 return (EINVAL); 650 mtx_lock_spin(&dt_lock); 651 if ((pldt = mdp->md_ldt) == NULL || 652 uap->start >= pldt->ldt_len) { 653 mtx_unlock_spin(&dt_lock); 654 return (0); 655 } 656 largest_ld = uap->start + uap->num; 657 if (largest_ld > pldt->ldt_len) 658 largest_ld = pldt->ldt_len; 659 i = largest_ld - uap->start; 660 bzero(&((union descriptor *)(pldt->ldt_base))[uap->start], 661 sizeof(union descriptor) * i); 662 mtx_unlock_spin(&dt_lock); 663 return (0); 664 } 665 666 if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { 667 /* verify range of descriptors to modify */ 668 largest_ld = uap->start + uap->num; 669 if (uap->start >= MAX_LD || largest_ld > MAX_LD) { 670 return (EINVAL); 671 } 672 } 673 674 /* Check descriptors for access violations */ 675 for (i = 0; i < uap->num; i++) { 676 dp = &descs[i]; 677 678 switch (dp->sd.sd_type) { 679 case SDT_SYSNULL: /* system null */ 680 dp->sd.sd_p = 0; 681 break; 682 case SDT_SYS286TSS: /* system 286 TSS available */ 683 case SDT_SYSLDT: /* system local descriptor table */ 684 case SDT_SYS286BSY: /* system 286 TSS busy */ 685 case SDT_SYSTASKGT: /* system task gate */ 686 case SDT_SYS286IGT: /* system 286 interrupt gate */ 687 case SDT_SYS286TGT: /* system 286 trap gate */ 688 case SDT_SYSNULL2: /* undefined by Intel */ 689 case SDT_SYS386TSS: /* system 386 TSS available */ 690 case SDT_SYSNULL3: /* undefined by Intel */ 691 case SDT_SYS386BSY: /* system 386 TSS busy */ 692 case SDT_SYSNULL4: /* undefined by Intel */ 693 case SDT_SYS386IGT: /* system 386 interrupt gate */ 694 case SDT_SYS386TGT: /* system 386 trap gate */ 695 case SDT_SYS286CGT: /* system 286 call gate */ 696 case SDT_SYS386CGT: /* system 386 call gate */ 697 /* I can't think of any reason to allow a user proc 698 * to create a segment of these types. They are 699 * for OS use only. 700 */ 701 return (EACCES); 702 /*NOTREACHED*/ 703 704 /* memory segment types */ 705 case SDT_MEMEC: /* memory execute only conforming */ 706 case SDT_MEMEAC: /* memory execute only accessed conforming */ 707 case SDT_MEMERC: /* memory execute read conforming */ 708 case SDT_MEMERAC: /* memory execute read accessed conforming */ 709 /* Must be "present" if executable and conforming. */ 710 if (dp->sd.sd_p == 0) 711 return (EACCES); 712 break; 713 case SDT_MEMRO: /* memory read only */ 714 case SDT_MEMROA: /* memory read only accessed */ 715 case SDT_MEMRW: /* memory read write */ 716 case SDT_MEMRWA: /* memory read write accessed */ 717 case SDT_MEMROD: /* memory read only expand dwn limit */ 718 case SDT_MEMRODA: /* memory read only expand dwn lim accessed */ 719 case SDT_MEMRWD: /* memory read write expand dwn limit */ 720 case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */ 721 case SDT_MEME: /* memory execute only */ 722 case SDT_MEMEA: /* memory execute only accessed */ 723 case SDT_MEMER: /* memory execute read */ 724 case SDT_MEMERA: /* memory execute read accessed */ 725 break; 726 default: 727 return(EINVAL); 728 /*NOTREACHED*/ 729 } 730 731 /* Only user (ring-3) descriptors may be present. */ 732 if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL)) 733 return (EACCES); 734 } 735 736 if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) { 737 /* Allocate a free slot */ 738 mtx_lock_spin(&dt_lock); 739 if ((pldt = mdp->md_ldt) == NULL) { 740 if ((error = i386_ldt_grow(td, NLDT + 1))) { 741 mtx_unlock_spin(&dt_lock); 742 return (error); 743 } 744 pldt = mdp->md_ldt; 745 } 746again: 747 /* 748 * start scanning a bit up to leave room for NVidia and 749 * Wine, which still user the "Blat" method of allocation. 750 */ 751 dp = &((union descriptor *)(pldt->ldt_base))[NLDT]; 752 for (i = NLDT; i < pldt->ldt_len; ++i) { 753 if (dp->sd.sd_type == SDT_SYSNULL) 754 break; 755 dp++; 756 } 757 if (i >= pldt->ldt_len) { 758 if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) { 759 mtx_unlock_spin(&dt_lock); 760 return (error); 761 } 762 goto again; 763 } 764 uap->start = i; 765 error = i386_set_ldt_data(td, i, 1, descs); 766 mtx_unlock_spin(&dt_lock); 767 } else { 768 largest_ld = uap->start + uap->num; 769 mtx_lock_spin(&dt_lock); 770 if (!(error = i386_ldt_grow(td, largest_ld))) { 771 error = i386_set_ldt_data(td, uap->start, uap->num, 772 descs); 773 } 774 mtx_unlock_spin(&dt_lock); 775 } 776 if (error == 0) 777 td->td_retval[0] = uap->start; 778 return (error); 779} 780#ifdef XEN 781static int 782i386_set_ldt_data(struct thread *td, int start, int num, 783 union descriptor *descs) 784{ 785 struct mdproc *mdp = &td->td_proc->p_md; 786 struct proc_ldt *pldt = mdp->md_ldt; 787 788 mtx_assert(&dt_lock, MA_OWNED); 789 790 while (num) { 791 xen_update_descriptor( 792 &((union descriptor *)(pldt->ldt_base))[start], 793 descs); 794 num--; 795 start++; 796 descs++; 797 } 798 return (0); 799} 800#else 801static int 802i386_set_ldt_data(struct thread *td, int start, int num, 803 union descriptor *descs) 804{ 805 struct mdproc *mdp = &td->td_proc->p_md; 806 struct proc_ldt *pldt = mdp->md_ldt; 807 808 mtx_assert(&dt_lock, MA_OWNED); 809 810 /* Fill in range */ 811 bcopy(descs, 812 &((union descriptor *)(pldt->ldt_base))[start], 813 num * sizeof(union descriptor)); 814 return (0); 815} 816#endif /* !XEN */ 817 818static int 819i386_ldt_grow(struct thread *td, int len) 820{ 821 struct mdproc *mdp = &td->td_proc->p_md; 822 struct proc_ldt *new_ldt, *pldt; 823 caddr_t old_ldt_base = NULL_LDT_BASE; 824 int old_ldt_len = 0; 825 826 mtx_assert(&dt_lock, MA_OWNED); 827 828 if (len > MAX_LD) 829 return (ENOMEM); 830 if (len < NLDT + 1) 831 len = NLDT + 1; 832 833 /* Allocate a user ldt. */ 834 if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) { 835 new_ldt = user_ldt_alloc(mdp, len); 836 if (new_ldt == NULL) 837 return (ENOMEM); 838 pldt = mdp->md_ldt; 839 840 if (pldt != NULL) { 841 if (new_ldt->ldt_len <= pldt->ldt_len) { 842 /* 843 * We just lost the race for allocation, so 844 * free the new object and return. 845 */ 846 mtx_unlock_spin(&dt_lock); 847 kmem_free(kernel_arena, 848 (vm_offset_t)new_ldt->ldt_base, 849 new_ldt->ldt_len * sizeof(union descriptor)); 850 free(new_ldt, M_SUBPROC); 851 mtx_lock_spin(&dt_lock); 852 return (0); 853 } 854 855 /* 856 * We have to substitute the current LDT entry for 857 * curproc with the new one since its size grew. 858 */ 859 old_ldt_base = pldt->ldt_base; 860 old_ldt_len = pldt->ldt_len; 861 pldt->ldt_sd = new_ldt->ldt_sd; 862 pldt->ldt_base = new_ldt->ldt_base; 863 pldt->ldt_len = new_ldt->ldt_len; 864 } else 865 mdp->md_ldt = pldt = new_ldt; 866#ifdef SMP 867 /* 868 * Signal other cpus to reload ldt. We need to unlock dt_lock 869 * here because other CPU will contest on it since their 870 * curthreads won't hold the lock and will block when trying 871 * to acquire it. 872 */ 873 mtx_unlock_spin(&dt_lock); 874 smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, 875 NULL, td->td_proc->p_vmspace); 876#else 877 set_user_ldt(&td->td_proc->p_md); 878 mtx_unlock_spin(&dt_lock); 879#endif 880 if (old_ldt_base != NULL_LDT_BASE) { 881 kmem_free(kernel_arena, (vm_offset_t)old_ldt_base, 882 old_ldt_len * sizeof(union descriptor)); 883 free(new_ldt, M_SUBPROC); 884 } 885 mtx_lock_spin(&dt_lock); 886 } 887 return (0); 888} 889