sys_machdep.c revision 280258
1139731Simp/*-
299123Sobrien * Copyright (c) 1990 The Regents of the University of California.
399123Sobrien * All rights reserved.
499123Sobrien *
599123Sobrien * Redistribution and use in source and binary forms, with or without
699123Sobrien * modification, are permitted provided that the following conditions
799123Sobrien * are met:
899123Sobrien * 1. Redistributions of source code must retain the above copyright
999123Sobrien *    notice, this list of conditions and the following disclaimer.
1099123Sobrien * 2. Redistributions in binary form must reproduce the above copyright
1199123Sobrien *    notice, this list of conditions and the following disclaimer in the
1299123Sobrien *    documentation and/or other materials provided with the distribution.
1399123Sobrien * 4. Neither the name of the University nor the names of its contributors
1499123Sobrien *    may be used to endorse or promote products derived from this software
1599123Sobrien *    without specific prior written permission.
1699123Sobrien *
1799123Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1899123Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1999123Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2099123Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2199123Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2299123Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2399123Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2499123Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2599123Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2699123Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2799123Sobrien * SUCH DAMAGE.
2899123Sobrien *
2999123Sobrien *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
3099123Sobrien */
3199123Sobrien
3299123Sobrien#include <sys/cdefs.h>
3399123Sobrien__FBSDID("$FreeBSD: stable/10/sys/i386/i386/sys_machdep.c 280258 2015-03-19 13:37:36Z rwatson $");
3499123Sobrien
3599123Sobrien#include "opt_capsicum.h"
3699123Sobrien#include "opt_kstack_pages.h"
3799123Sobrien
3899123Sobrien#include <sys/param.h>
3999123Sobrien#include <sys/capsicum.h>
4099123Sobrien#include <sys/systm.h>
4199123Sobrien#include <sys/lock.h>
42196994Sphk#include <sys/malloc.h>
43196968Sphk#include <sys/mutex.h>
44196968Sphk#include <sys/priv.h>
45196968Sphk#include <sys/proc.h>
46196994Sphk#include <sys/smp.h>
47196994Sphk#include <sys/sysproto.h>
4899123Sobrien
49114349Speter#include <vm/vm.h>
5099123Sobrien#include <vm/pmap.h>
5199123Sobrien#include <vm/vm_map.h>
5299123Sobrien#include <vm/vm_extern.h>
53154128Simp
54154128Simp#include <machine/cpu.h>
55154128Simp#include <machine/pcb.h>
5699123Sobrien#include <machine/pcb_ext.h>
57114346Speter#include <machine/proc.h>
5899123Sobrien#include <machine/sysarch.h>
5999123Sobrien
60114346Speter#include <security/audit/audit.h>
6199123Sobrien
62210369Skib#ifdef XEN
63210369Skib#include <machine/xen/xenfunc.h>
64210369Skib
6599123Sobrienvoid i386_reset_ldt(struct proc_ldt *pldt);
66177661Sjb
67224207Sattiliovoid
68284720Skibi386_reset_ldt(struct proc_ldt *pldt)
69224207Sattilio{
70122849Speter        xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len);
7199123Sobrien}
72122849Speter#else
7399123Sobrien#define i386_reset_ldt(x)
74250338Sattilio#endif
75250338Sattilio
76250338Sattilio#include <vm/vm_kern.h>		/* for kernel_map */
77250338Sattilio
7899123Sobrien#define MAX_LD 8192
7999123Sobrien#define LD_PER_PAGE 512
80195376Ssam#define NEW_MAX_LD(num)  ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
81195376Ssam#define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
82195376Ssam#define	NULL_LDT_BASE	((caddr_t)NULL)
83195376Ssam
84195376Ssam#ifdef SMP
85195376Ssamstatic void set_user_ldt_rv(struct vmspace *vmsp);
86195376Ssam#endif
8799123Sobrienstatic int i386_set_ldt_data(struct thread *, int start, int num,
88191278Srwatson	union descriptor *descs);
89191278Srwatsonstatic int i386_ldt_grow(struct thread *td, int len);
90191278Srwatson
91191278Srwatson#ifndef _SYS_SYSPROTO_H_
92192331Sjhbstruct sysarch_args {
93191276Srwatson	int op;
94115251Speter	char *parms;
95114349Speter};
96114349Speter#endif
97115251Speter
9899123Sobrienint
9999123Sobriensysarch(td, uap)
10099123Sobrien	struct thread *td;
101114349Speter	register struct sysarch_args *uap;
102114349Speter{
103115251Speter	int error;
104114349Speter	union descriptor *lp;
105114349Speter	union {
106114349Speter		struct i386_ldt_args largs;
107114349Speter		struct i386_ioperm_args iargs;
108114349Speter		struct i386_get_xfpustate xfpu;
109115251Speter	} kargs;
110114349Speter	uint32_t base;
111114349Speter	struct segment_descriptor sd, *sdp;
112114349Speter
113114349Speter	AUDIT_ARG_CMD(uap->op);
114114349Speter
115115251Speter#ifdef CAPABILITY_MODE
116130218Speter	/*
117199319Sphk	 * When adding new operations, add a new case statement here to
118130218Speter	 * explicitly indicate whether or not the operation is safe to
11999123Sobrien	 * perform in capability mode.
120197316Salc	 */
121197316Salc	if (IN_CAPABILITY_MODE(td)) {
12299123Sobrien		switch (uap->op) {
12399123Sobrien		case I386_GET_LDT:
124118236Speter		case I386_SET_LDT:
125114349Speter		case I386_GET_IOPERM:
126118236Speter		case I386_GET_FSBASE:
127116355Salc		case I386_SET_FSBASE:
12899123Sobrien		case I386_GET_GSBASE:
12999123Sobrien		case I386_SET_GSBASE:
13099123Sobrien		case I386_GET_XFPUSTATE:
13199123Sobrien			break;
13299123Sobrien
13399123Sobrien		case I386_SET_IOPERM:
134114349Speter		default:
135114349Speter#ifdef KTRACE
136181112Salc			if (KTRPOINT(td, KTR_CAPFAIL))
13799123Sobrien				ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL);
13899123Sobrien#endif
13999123Sobrien			return (ECAPMODE);
14099123Sobrien		}
141114346Speter	}
142114346Speter#endif
14399123Sobrien
144114349Speter	switch (uap->op) {
14599123Sobrien	case I386_GET_IOPERM:
146286305Skib	case I386_SET_IOPERM:
147286305Skib		if ((error = copyin(uap->parms, &kargs.iargs,
148286305Skib		    sizeof(struct i386_ioperm_args))) != 0)
149196968Sphk			return (error);
150		break;
151	case I386_GET_LDT:
152	case I386_SET_LDT:
153		if ((error = copyin(uap->parms, &kargs.largs,
154		    sizeof(struct i386_ldt_args))) != 0)
155			return (error);
156		if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0)
157			return (EINVAL);
158		break;
159	case I386_GET_XFPUSTATE:
160		if ((error = copyin(uap->parms, &kargs.xfpu,
161		    sizeof(struct i386_get_xfpustate))) != 0)
162			return (error);
163		break;
164	default:
165		break;
166	}
167
168	switch(uap->op) {
169	case I386_GET_LDT:
170		error = i386_get_ldt(td, &kargs.largs);
171		break;
172	case I386_SET_LDT:
173		if (kargs.largs.descs != NULL) {
174			lp = (union descriptor *)malloc(
175			    kargs.largs.num * sizeof(union descriptor),
176			    M_TEMP, M_WAITOK);
177			error = copyin(kargs.largs.descs, lp,
178			    kargs.largs.num * sizeof(union descriptor));
179			if (error == 0)
180				error = i386_set_ldt(td, &kargs.largs, lp);
181			free(lp, M_TEMP);
182		} else {
183			error = i386_set_ldt(td, &kargs.largs, NULL);
184		}
185		break;
186	case I386_GET_IOPERM:
187		error = i386_get_ioperm(td, &kargs.iargs);
188		if (error == 0)
189			error = copyout(&kargs.iargs, uap->parms,
190			    sizeof(struct i386_ioperm_args));
191		break;
192	case I386_SET_IOPERM:
193		error = i386_set_ioperm(td, &kargs.iargs);
194		break;
195	case I386_VM86:
196		error = vm86_sysarch(td, uap->parms);
197		break;
198	case I386_GET_FSBASE:
199		sdp = &td->td_pcb->pcb_fsd;
200		base = sdp->sd_hibase << 24 | sdp->sd_lobase;
201		error = copyout(&base, uap->parms, sizeof(base));
202		break;
203	case I386_SET_FSBASE:
204		error = copyin(uap->parms, &base, sizeof(base));
205		if (!error) {
206			/*
207			 * Construct a descriptor and store it in the pcb for
208			 * the next context switch.  Also store it in the gdt
209			 * so that the load of tf_fs into %fs will activate it
210			 * at return to userland.
211			 */
212			sd.sd_lobase = base & 0xffffff;
213			sd.sd_hibase = (base >> 24) & 0xff;
214#ifdef XEN
215			/* need to do nosegneg like Linux */
216			sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
217#else
218			sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
219#endif
220			sd.sd_hilimit = 0xf;
221			sd.sd_type  = SDT_MEMRWA;
222			sd.sd_dpl   = SEL_UPL;
223			sd.sd_p     = 1;
224			sd.sd_xx    = 0;
225			sd.sd_def32 = 1;
226			sd.sd_gran  = 1;
227			critical_enter();
228			td->td_pcb->pcb_fsd = sd;
229#ifdef XEN
230			HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]),
231			    *(uint64_t *)&sd);
232#else
233			PCPU_GET(fsgs_gdt)[0] = sd;
234#endif
235			critical_exit();
236			td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
237		}
238		break;
239	case I386_GET_GSBASE:
240		sdp = &td->td_pcb->pcb_gsd;
241		base = sdp->sd_hibase << 24 | sdp->sd_lobase;
242		error = copyout(&base, uap->parms, sizeof(base));
243		break;
244	case I386_SET_GSBASE:
245		error = copyin(uap->parms, &base, sizeof(base));
246		if (!error) {
247			/*
248			 * Construct a descriptor and store it in the pcb for
249			 * the next context switch.  Also store it in the gdt
250			 * because we have to do a load_gs() right now.
251			 */
252			sd.sd_lobase = base & 0xffffff;
253			sd.sd_hibase = (base >> 24) & 0xff;
254
255#ifdef XEN
256			/* need to do nosegneg like Linux */
257			sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
258#else
259			sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
260#endif
261			sd.sd_hilimit = 0xf;
262			sd.sd_type  = SDT_MEMRWA;
263			sd.sd_dpl   = SEL_UPL;
264			sd.sd_p     = 1;
265			sd.sd_xx    = 0;
266			sd.sd_def32 = 1;
267			sd.sd_gran  = 1;
268			critical_enter();
269			td->td_pcb->pcb_gsd = sd;
270#ifdef XEN
271			HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]),
272			    *(uint64_t *)&sd);
273#else
274			PCPU_GET(fsgs_gdt)[1] = sd;
275#endif
276			critical_exit();
277			load_gs(GSEL(GUGS_SEL, SEL_UPL));
278		}
279		break;
280	case I386_GET_XFPUSTATE:
281		if (kargs.xfpu.len > cpu_max_ext_state_size -
282		    sizeof(union savefpu))
283			return (EINVAL);
284		npxgetregs(td);
285		error = copyout((char *)(get_pcb_user_save_td(td) + 1),
286		    kargs.xfpu.addr, kargs.xfpu.len);
287		break;
288	default:
289		error = EINVAL;
290		break;
291	}
292	return (error);
293}
294
295int
296i386_extend_pcb(struct thread *td)
297{
298	int i, offset;
299	u_long *addr;
300	struct pcb_ext *ext;
301	struct soft_segment_descriptor ssd = {
302		0,			/* segment base address (overwritten) */
303		ctob(IOPAGES + 1) - 1,	/* length */
304		SDT_SYS386TSS,		/* segment type */
305		0,			/* priority level */
306		1,			/* descriptor present */
307		0, 0,
308		0,			/* default 32 size */
309		0			/* granularity */
310	};
311
312	ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1),
313	    M_WAITOK | M_ZERO);
314	/* -16 is so we can convert a trapframe into vm86trapframe inplace */
315	ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) -
316	    sizeof(struct pcb) - 16;
317	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
318	/*
319	 * The last byte of the i/o map must be followed by an 0xff byte.
320	 * We arbitrarily allocate 16 bytes here, to keep the starting
321	 * address on a doubleword boundary.
322	 */
323	offset = PAGE_SIZE - 16;
324	ext->ext_tss.tss_ioopt =
325	    (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
326	ext->ext_iomap = (caddr_t)ext + offset;
327	ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
328
329	addr = (u_long *)ext->ext_vm86.vm86_intmap;
330	for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
331		*addr++ = ~0;
332
333	ssd.ssd_base = (unsigned)&ext->ext_tss;
334	ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
335	ssdtosd(&ssd, &ext->ext_tssd);
336
337	KASSERT(td == curthread, ("giving TSS to !curthread"));
338	KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!"));
339
340	/* Switch to the new TSS. */
341	critical_enter();
342	td->td_pcb->pcb_ext = ext;
343	PCPU_SET(private_tss, 1);
344	*PCPU_GET(tss_gdt) = ext->ext_tssd;
345	ltr(GSEL(GPROC0_SEL, SEL_KPL));
346	critical_exit();
347
348	return 0;
349}
350
351int
352i386_set_ioperm(td, uap)
353	struct thread *td;
354	struct i386_ioperm_args *uap;
355{
356	int i, error;
357	char *iomap;
358
359	if ((error = priv_check(td, PRIV_IO)) != 0)
360		return (error);
361	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
362		return (error);
363	/*
364	 * XXX
365	 * While this is restricted to root, we should probably figure out
366	 * whether any other driver is using this i/o address, as so not to
367	 * cause confusion.  This probably requires a global 'usage registry'.
368	 */
369
370	if (td->td_pcb->pcb_ext == 0)
371		if ((error = i386_extend_pcb(td)) != 0)
372			return (error);
373	iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
374
375	if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
376		return (EINVAL);
377
378	for (i = uap->start; i < uap->start + uap->length; i++) {
379		if (uap->enable)
380			iomap[i >> 3] &= ~(1 << (i & 7));
381		else
382			iomap[i >> 3] |= (1 << (i & 7));
383	}
384	return (error);
385}
386
387int
388i386_get_ioperm(td, uap)
389	struct thread *td;
390	struct i386_ioperm_args *uap;
391{
392	int i, state;
393	char *iomap;
394
395	if (uap->start >= IOPAGES * PAGE_SIZE * NBBY)
396		return (EINVAL);
397
398	if (td->td_pcb->pcb_ext == 0) {
399		uap->length = 0;
400		goto done;
401	}
402
403	iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
404
405	i = uap->start;
406	state = (iomap[i >> 3] >> (i & 7)) & 1;
407	uap->enable = !state;
408	uap->length = 1;
409
410	for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
411		if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
412			break;
413		uap->length++;
414	}
415
416done:
417	return (0);
418}
419
420/*
421 * Update the GDT entry pointing to the LDT to point to the LDT of the
422 * current process. Manage dt_lock holding/unholding autonomously.
423 */
424void
425set_user_ldt(struct mdproc *mdp)
426{
427	struct proc_ldt *pldt;
428	int dtlocked;
429
430	dtlocked = 0;
431	if (!mtx_owned(&dt_lock)) {
432		mtx_lock_spin(&dt_lock);
433		dtlocked = 1;
434	}
435
436	pldt = mdp->md_ldt;
437#ifdef XEN
438	i386_reset_ldt(pldt);
439	PCPU_SET(currentldt, (int)pldt);
440#else
441#ifdef SMP
442	gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd;
443#else
444	gdt[GUSERLDT_SEL].sd = pldt->ldt_sd;
445#endif
446	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
447	PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
448#endif /* XEN */
449	if (dtlocked)
450		mtx_unlock_spin(&dt_lock);
451}
452
453#ifdef SMP
454static void
455set_user_ldt_rv(struct vmspace *vmsp)
456{
457	struct thread *td;
458
459	td = curthread;
460	if (vmsp != td->td_proc->p_vmspace)
461		return;
462
463	set_user_ldt(&td->td_proc->p_md);
464}
465#endif
466
467#ifdef XEN
468
469/*
470 * dt_lock must be held. Returns with dt_lock held.
471 */
472struct proc_ldt *
473user_ldt_alloc(struct mdproc *mdp, int len)
474{
475        struct proc_ldt *pldt, *new_ldt;
476
477        mtx_assert(&dt_lock, MA_OWNED);
478        mtx_unlock_spin(&dt_lock);
479        new_ldt = malloc(sizeof(struct proc_ldt),
480                M_SUBPROC, M_WAITOK);
481
482        new_ldt->ldt_len = len = NEW_MAX_LD(len);
483        new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
484	    round_page(len * sizeof(union descriptor)), M_WAITOK);
485        new_ldt->ldt_refcnt = 1;
486        new_ldt->ldt_active = 0;
487
488	mtx_lock_spin(&dt_lock);
489        if ((pldt = mdp->md_ldt)) {
490                if (len > pldt->ldt_len)
491                        len = pldt->ldt_len;
492                bcopy(pldt->ldt_base, new_ldt->ldt_base,
493                    len * sizeof(union descriptor));
494        } else {
495                bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE);
496        }
497        mtx_unlock_spin(&dt_lock);  /* XXX kill once pmap locking fixed. */
498        pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base,
499                          new_ldt->ldt_len*sizeof(union descriptor));
500        mtx_lock_spin(&dt_lock);  /* XXX kill once pmap locking fixed. */
501        return (new_ldt);
502}
503#else
504/*
505 * dt_lock must be held. Returns with dt_lock held.
506 */
507struct proc_ldt *
508user_ldt_alloc(struct mdproc *mdp, int len)
509{
510	struct proc_ldt *pldt, *new_ldt;
511
512	mtx_assert(&dt_lock, MA_OWNED);
513	mtx_unlock_spin(&dt_lock);
514	new_ldt = malloc(sizeof(struct proc_ldt),
515		M_SUBPROC, M_WAITOK);
516
517	new_ldt->ldt_len = len = NEW_MAX_LD(len);
518	new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
519	    len * sizeof(union descriptor), M_WAITOK);
520	new_ldt->ldt_refcnt = 1;
521	new_ldt->ldt_active = 0;
522
523	mtx_lock_spin(&dt_lock);
524	gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
525	gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
526	ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
527
528	if ((pldt = mdp->md_ldt) != NULL) {
529		if (len > pldt->ldt_len)
530			len = pldt->ldt_len;
531		bcopy(pldt->ldt_base, new_ldt->ldt_base,
532		    len * sizeof(union descriptor));
533	} else
534		bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
535
536	return (new_ldt);
537}
538#endif /* !XEN */
539
540/*
541 * Must be called with dt_lock held.  Returns with dt_lock unheld.
542 */
543void
544user_ldt_free(struct thread *td)
545{
546	struct mdproc *mdp = &td->td_proc->p_md;
547	struct proc_ldt *pldt;
548
549	mtx_assert(&dt_lock, MA_OWNED);
550	if ((pldt = mdp->md_ldt) == NULL) {
551		mtx_unlock_spin(&dt_lock);
552		return;
553	}
554
555	if (td == curthread) {
556#ifdef XEN
557		i386_reset_ldt(&default_proc_ldt);
558		PCPU_SET(currentldt, (int)&default_proc_ldt);
559#else
560		lldt(_default_ldt);
561		PCPU_SET(currentldt, _default_ldt);
562#endif
563	}
564
565	mdp->md_ldt = NULL;
566	user_ldt_deref(pldt);
567}
568
569void
570user_ldt_deref(struct proc_ldt *pldt)
571{
572
573	mtx_assert(&dt_lock, MA_OWNED);
574	if (--pldt->ldt_refcnt == 0) {
575		mtx_unlock_spin(&dt_lock);
576		kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base,
577			pldt->ldt_len * sizeof(union descriptor));
578		free(pldt, M_SUBPROC);
579	} else
580		mtx_unlock_spin(&dt_lock);
581}
582
583/*
584 * Note for the authors of compat layers (linux, etc): copyout() in
585 * the function below is not a problem since it presents data in
586 * arch-specific format (i.e. i386-specific in this case), not in
587 * the OS-specific one.
588 */
589int
590i386_get_ldt(td, uap)
591	struct thread *td;
592	struct i386_ldt_args *uap;
593{
594	int error = 0;
595	struct proc_ldt *pldt;
596	int nldt, num;
597	union descriptor *lp;
598
599#ifdef	DEBUG
600	printf("i386_get_ldt: start=%d num=%d descs=%p\n",
601	    uap->start, uap->num, (void *)uap->descs);
602#endif
603
604	mtx_lock_spin(&dt_lock);
605	if ((pldt = td->td_proc->p_md.md_ldt) != NULL) {
606		nldt = pldt->ldt_len;
607		lp = &((union descriptor *)(pldt->ldt_base))[uap->start];
608		mtx_unlock_spin(&dt_lock);
609		num = min(uap->num, nldt);
610	} else {
611		mtx_unlock_spin(&dt_lock);
612		nldt = sizeof(ldt)/sizeof(ldt[0]);
613		num = min(uap->num, nldt);
614		lp = &ldt[uap->start];
615	}
616
617	if ((uap->start > (unsigned int)nldt) ||
618	    ((unsigned int)num > (unsigned int)nldt) ||
619	    ((unsigned int)(uap->start + num) > (unsigned int)nldt))
620		return(EINVAL);
621
622	error = copyout(lp, uap->descs, num * sizeof(union descriptor));
623	if (!error)
624		td->td_retval[0] = num;
625
626	return(error);
627}
628
629int
630i386_set_ldt(td, uap, descs)
631	struct thread *td;
632	struct i386_ldt_args *uap;
633	union descriptor *descs;
634{
635	int error = 0, i;
636	int largest_ld;
637	struct mdproc *mdp = &td->td_proc->p_md;
638	struct proc_ldt *pldt;
639	union descriptor *dp;
640
641#ifdef	DEBUG
642	printf("i386_set_ldt: start=%d num=%d descs=%p\n",
643	    uap->start, uap->num, (void *)uap->descs);
644#endif
645
646	if (descs == NULL) {
647		/* Free descriptors */
648		if (uap->start == 0 && uap->num == 0) {
649			/*
650			 * Treat this as a special case, so userland needn't
651			 * know magic number NLDT.
652			 */
653			uap->start = NLDT;
654			uap->num = MAX_LD - NLDT;
655		}
656		if (uap->num == 0)
657			return (EINVAL);
658		mtx_lock_spin(&dt_lock);
659		if ((pldt = mdp->md_ldt) == NULL ||
660		    uap->start >= pldt->ldt_len) {
661			mtx_unlock_spin(&dt_lock);
662			return (0);
663		}
664		largest_ld = uap->start + uap->num;
665		if (largest_ld > pldt->ldt_len)
666			largest_ld = pldt->ldt_len;
667		i = largest_ld - uap->start;
668		bzero(&((union descriptor *)(pldt->ldt_base))[uap->start],
669		    sizeof(union descriptor) * i);
670		mtx_unlock_spin(&dt_lock);
671		return (0);
672	}
673
674	if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
675		/* verify range of descriptors to modify */
676		largest_ld = uap->start + uap->num;
677		if (uap->start >= MAX_LD || largest_ld > MAX_LD) {
678			return (EINVAL);
679		}
680	}
681
682	/* Check descriptors for access violations */
683	for (i = 0; i < uap->num; i++) {
684		dp = &descs[i];
685
686		switch (dp->sd.sd_type) {
687		case SDT_SYSNULL:	/* system null */
688			dp->sd.sd_p = 0;
689			break;
690		case SDT_SYS286TSS: /* system 286 TSS available */
691		case SDT_SYSLDT:    /* system local descriptor table */
692		case SDT_SYS286BSY: /* system 286 TSS busy */
693		case SDT_SYSTASKGT: /* system task gate */
694		case SDT_SYS286IGT: /* system 286 interrupt gate */
695		case SDT_SYS286TGT: /* system 286 trap gate */
696		case SDT_SYSNULL2:  /* undefined by Intel */
697		case SDT_SYS386TSS: /* system 386 TSS available */
698		case SDT_SYSNULL3:  /* undefined by Intel */
699		case SDT_SYS386BSY: /* system 386 TSS busy */
700		case SDT_SYSNULL4:  /* undefined by Intel */
701		case SDT_SYS386IGT: /* system 386 interrupt gate */
702		case SDT_SYS386TGT: /* system 386 trap gate */
703		case SDT_SYS286CGT: /* system 286 call gate */
704		case SDT_SYS386CGT: /* system 386 call gate */
705			/* I can't think of any reason to allow a user proc
706			 * to create a segment of these types.  They are
707			 * for OS use only.
708			 */
709			return (EACCES);
710			/*NOTREACHED*/
711
712		/* memory segment types */
713		case SDT_MEMEC:   /* memory execute only conforming */
714		case SDT_MEMEAC:  /* memory execute only accessed conforming */
715		case SDT_MEMERC:  /* memory execute read conforming */
716		case SDT_MEMERAC: /* memory execute read accessed conforming */
717			 /* Must be "present" if executable and conforming. */
718			if (dp->sd.sd_p == 0)
719				return (EACCES);
720			break;
721		case SDT_MEMRO:   /* memory read only */
722		case SDT_MEMROA:  /* memory read only accessed */
723		case SDT_MEMRW:   /* memory read write */
724		case SDT_MEMRWA:  /* memory read write accessed */
725		case SDT_MEMROD:  /* memory read only expand dwn limit */
726		case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
727		case SDT_MEMRWD:  /* memory read write expand dwn limit */
728		case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
729		case SDT_MEME:    /* memory execute only */
730		case SDT_MEMEA:   /* memory execute only accessed */
731		case SDT_MEMER:   /* memory execute read */
732		case SDT_MEMERA:  /* memory execute read accessed */
733			break;
734		default:
735			return(EINVAL);
736			/*NOTREACHED*/
737		}
738
739		/* Only user (ring-3) descriptors may be present. */
740		if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL))
741			return (EACCES);
742	}
743
744	if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
745		/* Allocate a free slot */
746		mtx_lock_spin(&dt_lock);
747		if ((pldt = mdp->md_ldt) == NULL) {
748			if ((error = i386_ldt_grow(td, NLDT + 1))) {
749				mtx_unlock_spin(&dt_lock);
750				return (error);
751			}
752			pldt = mdp->md_ldt;
753		}
754again:
755		/*
756		 * start scanning a bit up to leave room for NVidia and
757		 * Wine, which still user the "Blat" method of allocation.
758		 */
759		dp = &((union descriptor *)(pldt->ldt_base))[NLDT];
760		for (i = NLDT; i < pldt->ldt_len; ++i) {
761			if (dp->sd.sd_type == SDT_SYSNULL)
762				break;
763			dp++;
764		}
765		if (i >= pldt->ldt_len) {
766			if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) {
767				mtx_unlock_spin(&dt_lock);
768				return (error);
769			}
770			goto again;
771		}
772		uap->start = i;
773		error = i386_set_ldt_data(td, i, 1, descs);
774		mtx_unlock_spin(&dt_lock);
775	} else {
776		largest_ld = uap->start + uap->num;
777		mtx_lock_spin(&dt_lock);
778		if (!(error = i386_ldt_grow(td, largest_ld))) {
779			error = i386_set_ldt_data(td, uap->start, uap->num,
780			    descs);
781		}
782		mtx_unlock_spin(&dt_lock);
783	}
784	if (error == 0)
785		td->td_retval[0] = uap->start;
786	return (error);
787}
788#ifdef XEN
789static int
790i386_set_ldt_data(struct thread *td, int start, int num,
791	union descriptor *descs)
792{
793	struct mdproc *mdp = &td->td_proc->p_md;
794	struct proc_ldt *pldt = mdp->md_ldt;
795
796	mtx_assert(&dt_lock, MA_OWNED);
797
798	while (num) {
799		xen_update_descriptor(
800		    &((union descriptor *)(pldt->ldt_base))[start],
801		    descs);
802		num--;
803		start++;
804		descs++;
805	}
806	return (0);
807}
808#else
809static int
810i386_set_ldt_data(struct thread *td, int start, int num,
811	union descriptor *descs)
812{
813	struct mdproc *mdp = &td->td_proc->p_md;
814	struct proc_ldt *pldt = mdp->md_ldt;
815
816	mtx_assert(&dt_lock, MA_OWNED);
817
818	/* Fill in range */
819	bcopy(descs,
820	    &((union descriptor *)(pldt->ldt_base))[start],
821	    num * sizeof(union descriptor));
822	return (0);
823}
824#endif /* !XEN */
825
826static int
827i386_ldt_grow(struct thread *td, int len)
828{
829	struct mdproc *mdp = &td->td_proc->p_md;
830	struct proc_ldt *new_ldt, *pldt;
831	caddr_t old_ldt_base = NULL_LDT_BASE;
832	int old_ldt_len = 0;
833
834	mtx_assert(&dt_lock, MA_OWNED);
835
836	if (len > MAX_LD)
837		return (ENOMEM);
838	if (len < NLDT + 1)
839		len = NLDT + 1;
840
841	/* Allocate a user ldt. */
842	if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) {
843		new_ldt = user_ldt_alloc(mdp, len);
844		if (new_ldt == NULL)
845			return (ENOMEM);
846		pldt = mdp->md_ldt;
847
848		if (pldt != NULL) {
849			if (new_ldt->ldt_len <= pldt->ldt_len) {
850				/*
851				 * We just lost the race for allocation, so
852				 * free the new object and return.
853				 */
854				mtx_unlock_spin(&dt_lock);
855				kmem_free(kernel_arena,
856				   (vm_offset_t)new_ldt->ldt_base,
857				   new_ldt->ldt_len * sizeof(union descriptor));
858				free(new_ldt, M_SUBPROC);
859				mtx_lock_spin(&dt_lock);
860				return (0);
861			}
862
863			/*
864			 * We have to substitute the current LDT entry for
865			 * curproc with the new one since its size grew.
866			 */
867			old_ldt_base = pldt->ldt_base;
868			old_ldt_len = pldt->ldt_len;
869			pldt->ldt_sd = new_ldt->ldt_sd;
870			pldt->ldt_base = new_ldt->ldt_base;
871			pldt->ldt_len = new_ldt->ldt_len;
872		} else
873			mdp->md_ldt = pldt = new_ldt;
874#ifdef SMP
875		/*
876		 * Signal other cpus to reload ldt.  We need to unlock dt_lock
877		 * here because other CPU will contest on it since their
878		 * curthreads won't hold the lock and will block when trying
879		 * to acquire it.
880		 */
881		mtx_unlock_spin(&dt_lock);
882		smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv,
883		    NULL, td->td_proc->p_vmspace);
884#else
885		set_user_ldt(&td->td_proc->p_md);
886		mtx_unlock_spin(&dt_lock);
887#endif
888		if (old_ldt_base != NULL_LDT_BASE) {
889			kmem_free(kernel_arena, (vm_offset_t)old_ldt_base,
890			    old_ldt_len * sizeof(union descriptor));
891			free(new_ldt, M_SUBPROC);
892		}
893		mtx_lock_spin(&dt_lock);
894	}
895	return (0);
896}
897