intel_utils.c revision 277315
1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/x86/iommu/intel_utils.c 277315 2015-01-18 09:49:32Z kib $");
32
33#include <sys/param.h>
34#include <sys/bus.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/memdesc.h>
39#include <sys/mutex.h>
40#include <sys/proc.h>
41#include <sys/queue.h>
42#include <sys/rman.h>
43#include <sys/rwlock.h>
44#include <sys/sched.h>
45#include <sys/sf_buf.h>
46#include <sys/sysctl.h>
47#include <sys/systm.h>
48#include <sys/taskqueue.h>
49#include <sys/tree.h>
50#include <vm/vm.h>
51#include <vm/vm_extern.h>
52#include <vm/vm_kern.h>
53#include <vm/vm_object.h>
54#include <vm/vm_page.h>
55#include <vm/vm_map.h>
56#include <vm/vm_pageout.h>
57#include <machine/bus.h>
58#include <machine/cpu.h>
59#include <x86/include/busdma_impl.h>
60#include <x86/iommu/intel_reg.h>
61#include <x86/iommu/busdma_dmar.h>
62#include <x86/iommu/intel_dmar.h>
63
64u_int
65dmar_nd2mask(u_int nd)
66{
67	static const u_int masks[] = {
68		0x000f,	/* nd == 0 */
69		0x002f,	/* nd == 1 */
70		0x00ff,	/* nd == 2 */
71		0x02ff,	/* nd == 3 */
72		0x0fff,	/* nd == 4 */
73		0x2fff,	/* nd == 5 */
74		0xffff,	/* nd == 6 */
75		0x0000,	/* nd == 7 reserved */
76	};
77
78	KASSERT(nd <= 6, ("number of domains %d", nd));
79	return (masks[nd]);
80}
81
82static const struct sagaw_bits_tag {
83	int agaw;
84	int cap;
85	int awlvl;
86	int pglvl;
87} sagaw_bits[] = {
88	{.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL,
89	    .pglvl = 2},
90	{.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL,
91	    .pglvl = 3},
92	{.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL,
93	    .pglvl = 4},
94	{.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL,
95	    .pglvl = 5},
96	{.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL,
97	    .pglvl = 6}
98};
99#define SIZEOF_SAGAW_BITS (sizeof(sagaw_bits) / sizeof(sagaw_bits[0]))
100
101bool
102dmar_pglvl_supported(struct dmar_unit *unit, int pglvl)
103{
104	int i;
105
106	for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
107		if (sagaw_bits[i].pglvl != pglvl)
108			continue;
109		if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
110			return (true);
111	}
112	return (false);
113}
114
115int
116ctx_set_agaw(struct dmar_ctx *ctx, int mgaw)
117{
118	int sagaw, i;
119
120	ctx->mgaw = mgaw;
121	sagaw = DMAR_CAP_SAGAW(ctx->dmar->hw_cap);
122	for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
123		if (sagaw_bits[i].agaw >= mgaw) {
124			ctx->agaw = sagaw_bits[i].agaw;
125			ctx->pglvl = sagaw_bits[i].pglvl;
126			ctx->awlvl = sagaw_bits[i].awlvl;
127			return (0);
128		}
129	}
130	device_printf(ctx->dmar->dev,
131	    "context request mgaw %d for pci%d:%d:%d:%d, "
132	    "no agaw found, sagaw %x\n", mgaw, ctx->dmar->segment, ctx->bus,
133	     ctx->slot, ctx->func, sagaw);
134	return (EINVAL);
135}
136
137/*
138 * Find a best fit mgaw for the given maxaddr:
139 *   - if allow_less is false, must find sagaw which maps all requested
140 *     addresses (used by identity mappings);
141 *   - if allow_less is true, and no supported sagaw can map all requested
142 *     address space, accept the biggest sagaw, whatever is it.
143 */
144int
145dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less)
146{
147	int i;
148
149	for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
150		if ((1ULL << sagaw_bits[i].agaw) >= maxaddr &&
151		    (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
152			break;
153	}
154	if (allow_less && i == SIZEOF_SAGAW_BITS) {
155		do {
156			i--;
157		} while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap)
158		    == 0);
159	}
160	if (i < SIZEOF_SAGAW_BITS)
161		return (sagaw_bits[i].agaw);
162	KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d",
163	    (uintmax_t) maxaddr, allow_less));
164	return (-1);
165}
166
167/*
168 * Calculate the total amount of page table pages needed to map the
169 * whole bus address space on the context with the selected agaw.
170 */
171vm_pindex_t
172pglvl_max_pages(int pglvl)
173{
174	vm_pindex_t res;
175	int i;
176
177	for (res = 0, i = pglvl; i > 0; i--) {
178		res *= DMAR_NPTEPG;
179		res++;
180	}
181	return (res);
182}
183
184/*
185 * Return true if the page table level lvl supports the superpage for
186 * the context ctx.
187 */
188int
189ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl)
190{
191	int alvl, cap_sps;
192	static const int sagaw_sp[] = {
193		DMAR_CAP_SPS_2M,
194		DMAR_CAP_SPS_1G,
195		DMAR_CAP_SPS_512G,
196		DMAR_CAP_SPS_1T
197	};
198
199	alvl = ctx->pglvl - lvl - 1;
200	cap_sps = DMAR_CAP_SPS(ctx->dmar->hw_cap);
201	return (alvl < sizeof(sagaw_sp) / sizeof(sagaw_sp[0]) &&
202	    (sagaw_sp[alvl] & cap_sps) != 0);
203}
204
205dmar_gaddr_t
206pglvl_page_size(int total_pglvl, int lvl)
207{
208	int rlvl;
209	static const dmar_gaddr_t pg_sz[] = {
210		(dmar_gaddr_t)DMAR_PAGE_SIZE,
211		(dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT,
212		(dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT),
213		(dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT),
214		(dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT),
215		(dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT)
216	};
217
218	KASSERT(lvl >= 0 && lvl < total_pglvl,
219	    ("total %d lvl %d", total_pglvl, lvl));
220	rlvl = total_pglvl - lvl - 1;
221	KASSERT(rlvl < sizeof(pg_sz) / sizeof(pg_sz[0]),
222	    ("sizeof pg_sz lvl %d", lvl));
223	return (pg_sz[rlvl]);
224}
225
226dmar_gaddr_t
227ctx_page_size(struct dmar_ctx *ctx, int lvl)
228{
229
230	return (pglvl_page_size(ctx->pglvl, lvl));
231}
232
233int
234calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size,
235    dmar_gaddr_t *isizep)
236{
237	dmar_gaddr_t isize;
238	int am;
239
240	for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) {
241		isize = 1ULL << (am + DMAR_PAGE_SHIFT);
242		if ((base & (isize - 1)) == 0 && size >= isize)
243			break;
244		if (am == 0)
245			break;
246	}
247	*isizep = isize;
248	return (am);
249}
250
251dmar_haddr_t dmar_high;
252int haw;
253int dmar_tbl_pagecnt;
254
255vm_page_t
256dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
257{
258	vm_page_t m;
259	int zeroed;
260
261	zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
262	for (;;) {
263		if ((flags & DMAR_PGF_OBJL) == 0)
264			VM_OBJECT_WLOCK(obj);
265		m = vm_page_lookup(obj, idx);
266		if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) {
267			if ((flags & DMAR_PGF_OBJL) == 0)
268				VM_OBJECT_WUNLOCK(obj);
269			break;
270		}
271		m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY |
272		    VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0,
273		    dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
274		if ((flags & DMAR_PGF_OBJL) == 0)
275			VM_OBJECT_WUNLOCK(obj);
276		if (m != NULL) {
277			if (zeroed && (m->flags & PG_ZERO) == 0)
278				pmap_zero_page(m);
279			atomic_add_int(&dmar_tbl_pagecnt, 1);
280			break;
281		}
282		if ((flags & DMAR_PGF_WAITOK) == 0)
283			break;
284		if ((flags & DMAR_PGF_OBJL) != 0)
285			VM_OBJECT_WUNLOCK(obj);
286		VM_WAIT;
287		if ((flags & DMAR_PGF_OBJL) != 0)
288			VM_OBJECT_WLOCK(obj);
289	}
290	return (m);
291}
292
293void
294dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
295{
296	vm_page_t m;
297
298	if ((flags & DMAR_PGF_OBJL) == 0)
299		VM_OBJECT_WLOCK(obj);
300	m = vm_page_lookup(obj, idx);
301	if (m != NULL) {
302		vm_page_free(m);
303		atomic_subtract_int(&dmar_tbl_pagecnt, 1);
304	}
305	if ((flags & DMAR_PGF_OBJL) == 0)
306		VM_OBJECT_WUNLOCK(obj);
307}
308
309void *
310dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
311    struct sf_buf **sf)
312{
313	vm_page_t m;
314	bool allocated;
315
316	if ((flags & DMAR_PGF_OBJL) == 0)
317		VM_OBJECT_WLOCK(obj);
318	m = vm_page_lookup(obj, idx);
319	if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) {
320		m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL);
321		allocated = true;
322	} else
323		allocated = false;
324	if (m == NULL) {
325		if ((flags & DMAR_PGF_OBJL) == 0)
326			VM_OBJECT_WUNLOCK(obj);
327		return (NULL);
328	}
329	/* Sleepable allocations cannot fail. */
330	if ((flags & DMAR_PGF_WAITOK) != 0)
331		VM_OBJECT_WUNLOCK(obj);
332	sched_pin();
333	*sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK)
334	    == 0 ? SFB_NOWAIT : 0));
335	if (*sf == NULL) {
336		sched_unpin();
337		if (allocated) {
338			VM_OBJECT_ASSERT_WLOCKED(obj);
339			dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL);
340		}
341		if ((flags & DMAR_PGF_OBJL) == 0)
342			VM_OBJECT_WUNLOCK(obj);
343		return (NULL);
344	}
345	if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) ==
346	    (DMAR_PGF_WAITOK | DMAR_PGF_OBJL))
347		VM_OBJECT_WLOCK(obj);
348	else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0)
349		VM_OBJECT_WUNLOCK(obj);
350	return ((void *)sf_buf_kva(*sf));
351}
352
353void
354dmar_unmap_pgtbl(struct sf_buf *sf)
355{
356
357	sf_buf_free(sf);
358	sched_unpin();
359}
360
361static void
362dmar_flush_transl_to_ram(struct dmar_unit *unit, void *dst, size_t sz)
363{
364
365	if (DMAR_IS_COHERENT(unit))
366		return;
367	/*
368	 * If DMAR does not snoop paging structures accesses, flush
369	 * CPU cache to memory.
370	 */
371	pmap_invalidate_cache_range((uintptr_t)dst, (uintptr_t)dst + sz,
372	    TRUE);
373}
374
375void
376dmar_flush_pte_to_ram(struct dmar_unit *unit, dmar_pte_t *dst)
377{
378
379	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
380}
381
382void
383dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst)
384{
385
386	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
387}
388
389void
390dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst)
391{
392
393	dmar_flush_transl_to_ram(unit, dst, sizeof(*dst));
394}
395
396/*
397 * Load the root entry pointer into the hardware, busily waiting for
398 * the completion.
399 */
400int
401dmar_load_root_entry_ptr(struct dmar_unit *unit)
402{
403	vm_page_t root_entry;
404
405	/*
406	 * Access to the GCMD register must be serialized while the
407	 * command is submitted.
408	 */
409	DMAR_ASSERT_LOCKED(unit);
410
411	/* VM_OBJECT_RLOCK(unit->ctx_obj); */
412	VM_OBJECT_WLOCK(unit->ctx_obj);
413	root_entry = vm_page_lookup(unit->ctx_obj, 0);
414	/* VM_OBJECT_RUNLOCK(unit->ctx_obj); */
415	VM_OBJECT_WUNLOCK(unit->ctx_obj);
416	dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry));
417	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP);
418	/* XXXKIB should have a timeout */
419	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) == 0)
420		cpu_spinwait();
421	return (0);
422}
423
424/*
425 * Globally invalidate the context entries cache, busily waiting for
426 * the completion.
427 */
428int
429dmar_inv_ctx_glob(struct dmar_unit *unit)
430{
431
432	/*
433	 * Access to the CCMD register must be serialized while the
434	 * command is submitted.
435	 */
436	DMAR_ASSERT_LOCKED(unit);
437	KASSERT(!unit->qi_enabled, ("QI enabled"));
438
439	/*
440	 * The DMAR_CCMD_ICC bit in the upper dword should be written
441	 * after the low dword write is completed.  Amd64
442	 * dmar_write8() does not have this issue, i386 dmar_write8()
443	 * writes the upper dword last.
444	 */
445	dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB);
446	/* XXXKIB should have a timeout */
447	while ((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) != 0)
448		cpu_spinwait();
449	return (0);
450}
451
452/*
453 * Globally invalidate the IOTLB, busily waiting for the completion.
454 */
455int
456dmar_inv_iotlb_glob(struct dmar_unit *unit)
457{
458	int reg;
459
460	DMAR_ASSERT_LOCKED(unit);
461	KASSERT(!unit->qi_enabled, ("QI enabled"));
462
463	reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap);
464	/* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */
465	dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
466	    DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW);
467	/* XXXKIB should have a timeout */
468	while ((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) &
469	    DMAR_IOTLB_IVT32) != 0)
470		cpu_spinwait();
471	return (0);
472}
473
474/*
475 * Flush the chipset write buffers.  See 11.1 "Write Buffer Flushing"
476 * in the architecture specification.
477 */
478int
479dmar_flush_write_bufs(struct dmar_unit *unit)
480{
481
482	DMAR_ASSERT_LOCKED(unit);
483
484	/*
485	 * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported.
486	 */
487	KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0,
488	    ("dmar%d: no RWBF", unit->unit));
489
490	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF);
491	/* XXXKIB should have a timeout */
492	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) == 0)
493		cpu_spinwait();
494	return (0);
495}
496
497int
498dmar_enable_translation(struct dmar_unit *unit)
499{
500
501	DMAR_ASSERT_LOCKED(unit);
502	unit->hw_gcmd |= DMAR_GCMD_TE;
503	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
504	/* XXXKIB should have a timeout */
505	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0)
506		cpu_spinwait();
507	return (0);
508}
509
510int
511dmar_disable_translation(struct dmar_unit *unit)
512{
513
514	DMAR_ASSERT_LOCKED(unit);
515	unit->hw_gcmd &= ~DMAR_GCMD_TE;
516	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
517	/* XXXKIB should have a timeout */
518	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0)
519		cpu_spinwait();
520	return (0);
521}
522
523#define BARRIER_F				\
524	u_int f_done, f_inproc, f_wakeup;	\
525						\
526	f_done = 1 << (barrier_id * 3);		\
527	f_inproc = 1 << (barrier_id * 3 + 1);	\
528	f_wakeup = 1 << (barrier_id * 3 + 2)
529
530bool
531dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id)
532{
533	BARRIER_F;
534
535	DMAR_LOCK(dmar);
536	if ((dmar->barrier_flags & f_done) != 0) {
537		DMAR_UNLOCK(dmar);
538		return (false);
539	}
540
541	if ((dmar->barrier_flags & f_inproc) != 0) {
542		while ((dmar->barrier_flags & f_inproc) != 0) {
543			dmar->barrier_flags |= f_wakeup;
544			msleep(&dmar->barrier_flags, &dmar->lock, 0,
545			    "dmarb", 0);
546		}
547		KASSERT((dmar->barrier_flags & f_done) != 0,
548		    ("dmar%d barrier %d missing done", dmar->unit, barrier_id));
549		DMAR_UNLOCK(dmar);
550		return (false);
551	}
552
553	dmar->barrier_flags |= f_inproc;
554	DMAR_UNLOCK(dmar);
555	return (true);
556}
557
558void
559dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
560{
561	BARRIER_F;
562
563	DMAR_ASSERT_LOCKED(dmar);
564	KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc,
565	    ("dmar%d barrier %d missed entry", dmar->unit, barrier_id));
566	dmar->barrier_flags |= f_done;
567	if ((dmar->barrier_flags & f_wakeup) != 0)
568		wakeup(&dmar->barrier_flags);
569	dmar->barrier_flags &= ~(f_inproc | f_wakeup);
570	DMAR_UNLOCK(dmar);
571}
572
573int dmar_match_verbose;
574
575static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL,
576    "");
577SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD | CTLFLAG_TUN,
578    &dmar_tbl_pagecnt, 0,
579    "Count of pages used for DMAR pagetables");
580SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RW | CTLFLAG_TUN,
581    &dmar_match_verbose, 0,
582    "Verbose matching of the PCI devices to DMAR paths");
583#ifdef INVARIANTS
584int dmar_check_free;
585SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RW | CTLFLAG_TUN,
586    &dmar_check_free, 0,
587    "Check the GPA RBtree for free_down and free_after validity");
588#endif
589
590