intel_qi.c revision 284019
1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/x86/iommu/intel_qi.c 284019 2015-06-05 08:23:33Z kib $");
32
33#include "opt_acpi.h"
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/malloc.h>
39#include <sys/memdesc.h>
40#include <sys/module.h>
41#include <sys/rman.h>
42#include <sys/taskqueue.h>
43#include <sys/tree.h>
44#include <machine/bus.h>
45#include <contrib/dev/acpica/include/acpi.h>
46#include <contrib/dev/acpica/include/accommon.h>
47#include <dev/acpica/acpivar.h>
48#include <vm/vm.h>
49#include <vm/vm_extern.h>
50#include <vm/vm_kern.h>
51#include <vm/vm_page.h>
52#include <vm/vm_map.h>
53#include <machine/cpu.h>
54#include <x86/include/busdma_impl.h>
55#include <x86/iommu/intel_reg.h>
56#include <x86/iommu/busdma_dmar.h>
57#include <x86/iommu/intel_dmar.h>
58
59static bool
60dmar_qi_seq_processed(const struct dmar_unit *unit,
61    const struct dmar_qi_genseq *pseq)
62{
63
64	return (pseq->gen < unit->inv_waitd_gen ||
65	    (pseq->gen == unit->inv_waitd_gen &&
66	     pseq->seq <= unit->inv_waitd_seq_hw));
67}
68
69static int
70dmar_enable_qi(struct dmar_unit *unit)
71{
72
73	DMAR_ASSERT_LOCKED(unit);
74	unit->hw_gcmd |= DMAR_GCMD_QIE;
75	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
76	/* XXXKIB should have a timeout */
77	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) == 0)
78		cpu_spinwait();
79	return (0);
80}
81
82static int
83dmar_disable_qi(struct dmar_unit *unit)
84{
85
86	DMAR_ASSERT_LOCKED(unit);
87	unit->hw_gcmd &= ~DMAR_GCMD_QIE;
88	dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
89	/* XXXKIB should have a timeout */
90	while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) != 0)
91		cpu_spinwait();
92	return (0);
93}
94
95static void
96dmar_qi_advance_tail(struct dmar_unit *unit)
97{
98
99	DMAR_ASSERT_LOCKED(unit);
100	dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail);
101}
102
103static void
104dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
105{
106	uint32_t head;
107	int bytes;
108
109	DMAR_ASSERT_LOCKED(unit);
110	bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT;
111	for (;;) {
112		if (bytes <= unit->inv_queue_avail)
113			break;
114		/* refill */
115		head = dmar_read4(unit, DMAR_IQH_REG);
116		head &= DMAR_IQH_MASK;
117		unit->inv_queue_avail = head - unit->inv_queue_tail -
118		    DMAR_IQ_DESCR_SZ;
119		if (head <= unit->inv_queue_tail)
120			unit->inv_queue_avail += unit->inv_queue_size;
121		if (bytes <= unit->inv_queue_avail)
122			break;
123
124		/*
125		 * No space in the queue, do busy wait.  Hardware must
126		 * make a progress.  But first advance the tail to
127		 * inform the descriptor streamer about entries we
128		 * might have already filled, otherwise they could
129		 * clog the whole queue..
130		 */
131		dmar_qi_advance_tail(unit);
132		unit->inv_queue_full++;
133		cpu_spinwait();
134	}
135	unit->inv_queue_avail -= bytes;
136}
137
138static void
139dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2)
140{
141
142	DMAR_ASSERT_LOCKED(unit);
143	*(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1;
144	unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
145	KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
146	    ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
147	    (uintmax_t)unit->inv_queue_size));
148	unit->inv_queue_tail &= unit->inv_queue_size - 1;
149	*(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2;
150	unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
151	KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
152	    ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
153	    (uintmax_t)unit->inv_queue_size));
154	unit->inv_queue_tail &= unit->inv_queue_size - 1;
155}
156
157static void
158dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr,
159    bool memw, bool fence)
160{
161
162	DMAR_ASSERT_LOCKED(unit);
163	dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID |
164	    (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) |
165	    (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) |
166	    (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) |
167	    (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0),
168	    memw ? unit->inv_waitd_seq_hw_phys : 0);
169}
170
171static void
172dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct dmar_qi_genseq *pseq)
173{
174	struct dmar_qi_genseq gsec;
175	uint32_t seq;
176
177	KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
178	DMAR_ASSERT_LOCKED(unit);
179	if (unit->inv_waitd_seq == 0xffffffff) {
180		gsec.gen = unit->inv_waitd_gen;
181		gsec.seq = unit->inv_waitd_seq;
182		dmar_qi_ensure(unit, 1);
183		dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false);
184		dmar_qi_advance_tail(unit);
185		while (!dmar_qi_seq_processed(unit, &gsec))
186			cpu_spinwait();
187		unit->inv_waitd_gen++;
188		unit->inv_waitd_seq = 1;
189	}
190	seq = unit->inv_waitd_seq++;
191	pseq->gen = unit->inv_waitd_gen;
192	pseq->seq = seq;
193	dmar_qi_emit_wait_descr(unit, seq, true, true, false);
194}
195
196static void
197dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct dmar_qi_genseq *gseq)
198{
199
200	DMAR_ASSERT_LOCKED(unit);
201	unit->inv_seq_waiters++;
202	while (!dmar_qi_seq_processed(unit, gseq)) {
203		if (cold) {
204			cpu_spinwait();
205		} else {
206			msleep(&unit->inv_seq_waiters, &unit->lock, 0,
207			    "dmarse", hz);
208		}
209	}
210	unit->inv_seq_waiters--;
211}
212
213void
214dmar_qi_invalidate_locked(struct dmar_ctx *ctx, dmar_gaddr_t base,
215    dmar_gaddr_t size, struct dmar_qi_genseq *pseq)
216{
217	struct dmar_unit *unit;
218	dmar_gaddr_t isize;
219	int am;
220
221	unit = ctx->dmar;
222	DMAR_ASSERT_LOCKED(unit);
223	for (; size > 0; base += isize, size -= isize) {
224		am = calc_am(unit, base, size, &isize);
225		dmar_qi_ensure(unit, 1);
226		dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV |
227		    DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW |
228		    DMAR_IQ_DESCR_IOTLB_DR |
229		    DMAR_IQ_DESCR_IOTLB_DID(ctx->domain),
230		    base | am);
231	}
232	if (pseq != NULL) {
233		dmar_qi_ensure(unit, 1);
234		dmar_qi_emit_wait_seq(unit, pseq);
235	}
236	dmar_qi_advance_tail(unit);
237}
238
239void
240dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit)
241{
242	struct dmar_qi_genseq gseq;
243
244	DMAR_ASSERT_LOCKED(unit);
245	dmar_qi_ensure(unit, 2);
246	dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0);
247	dmar_qi_emit_wait_seq(unit, &gseq);
248	dmar_qi_advance_tail(unit);
249	dmar_qi_wait_for_seq(unit, &gseq);
250}
251
252void
253dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit)
254{
255	struct dmar_qi_genseq gseq;
256
257	DMAR_ASSERT_LOCKED(unit);
258	dmar_qi_ensure(unit, 2);
259	dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB |
260	    DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0);
261	dmar_qi_emit_wait_seq(unit, &gseq);
262	dmar_qi_advance_tail(unit);
263	dmar_qi_wait_for_seq(unit, &gseq);
264}
265
266int
267dmar_qi_intr(void *arg)
268{
269	struct dmar_unit *unit;
270
271	unit = arg;
272	KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled", unit->unit));
273	taskqueue_enqueue_fast(unit->qi_taskqueue, &unit->qi_task);
274	return (FILTER_HANDLED);
275}
276
277static void
278dmar_qi_task(void *arg, int pending __unused)
279{
280	struct dmar_unit *unit;
281	struct dmar_map_entry *entry;
282	uint32_t ics;
283
284	unit = arg;
285
286	DMAR_LOCK(unit);
287	for (;;) {
288		entry = TAILQ_FIRST(&unit->tlb_flush_entries);
289		if (entry == NULL)
290			break;
291		if ((entry->gseq.gen == 0 && entry->gseq.seq == 0) ||
292		    !dmar_qi_seq_processed(unit, &entry->gseq))
293			break;
294		TAILQ_REMOVE(&unit->tlb_flush_entries, entry, dmamap_link);
295		DMAR_UNLOCK(unit);
296		dmar_ctx_free_entry(entry, (entry->flags &
297		    DMAR_MAP_ENTRY_QI_NF) == 0);
298		DMAR_LOCK(unit);
299	}
300	ics = dmar_read4(unit, DMAR_ICS_REG);
301	if ((ics & DMAR_ICS_IWC) != 0) {
302		ics = DMAR_ICS_IWC;
303		dmar_write4(unit, DMAR_ICS_REG, ics);
304	}
305	if (unit->inv_seq_waiters > 0)
306		wakeup(&unit->inv_seq_waiters);
307	DMAR_UNLOCK(unit);
308}
309
310int
311dmar_init_qi(struct dmar_unit *unit)
312{
313	uint64_t iqa;
314	uint32_t ics;
315	int qi_sz;
316
317	if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0)
318		return (0);
319	unit->qi_enabled = 1;
320	TUNABLE_INT_FETCH("hw.dmar.qi", &unit->qi_enabled);
321	if (!unit->qi_enabled)
322		return (0);
323
324	TAILQ_INIT(&unit->tlb_flush_entries);
325	TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit);
326	unit->qi_taskqueue = taskqueue_create_fast("dmar", M_WAITOK,
327	    taskqueue_thread_enqueue, &unit->qi_taskqueue);
328	taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV,
329	    "dmar%d qi taskq", unit->unit);
330
331	unit->inv_waitd_gen = 0;
332	unit->inv_waitd_seq = 1;
333
334	qi_sz = DMAR_IQA_QS_DEF;
335	TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz);
336	if (qi_sz > DMAR_IQA_QS_MAX)
337		qi_sz = DMAR_IQA_QS_MAX;
338	unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
339	/* Reserve one descriptor to prevent wraparound. */
340	unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ;
341
342	/* The invalidation queue reads by DMARs are always coherent. */
343	unit->inv_queue = kmem_alloc_contig(kernel_arena, unit->inv_queue_size,
344	    M_WAITOK | M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
345	unit->inv_waitd_seq_hw_phys = pmap_kextract(
346	    (vm_offset_t)&unit->inv_waitd_seq_hw);
347
348	DMAR_LOCK(unit);
349	dmar_write8(unit, DMAR_IQT_REG, 0);
350	iqa = pmap_kextract(unit->inv_queue);
351	iqa |= qi_sz;
352	dmar_write8(unit, DMAR_IQA_REG, iqa);
353	dmar_enable_qi(unit);
354	ics = dmar_read4(unit, DMAR_ICS_REG);
355	if ((ics & DMAR_ICS_IWC) != 0) {
356		ics = DMAR_ICS_IWC;
357		dmar_write4(unit, DMAR_ICS_REG, ics);
358	}
359	dmar_enable_qi_intr(unit);
360	DMAR_UNLOCK(unit);
361
362	return (0);
363}
364
365void
366dmar_fini_qi(struct dmar_unit *unit)
367{
368	struct dmar_qi_genseq gseq;
369
370	if (unit->qi_enabled)
371		return;
372	taskqueue_drain(unit->qi_taskqueue, &unit->qi_task);
373	taskqueue_free(unit->qi_taskqueue);
374	unit->qi_taskqueue = NULL;
375
376	DMAR_LOCK(unit);
377	/* quisce */
378	dmar_qi_ensure(unit, 1);
379	dmar_qi_emit_wait_seq(unit, &gseq);
380	dmar_qi_advance_tail(unit);
381	dmar_qi_wait_for_seq(unit, &gseq);
382	/* only after the quisce, disable queue */
383	dmar_disable_qi_intr(unit);
384	dmar_disable_qi(unit);
385	KASSERT(unit->inv_seq_waiters == 0,
386	    ("dmar%d: waiters on disabled queue", unit->unit));
387	DMAR_UNLOCK(unit);
388
389	kmem_free(kernel_arena, unit->inv_queue, unit->inv_queue_size);
390	unit->inv_queue = 0;
391	unit->inv_queue_size = 0;
392	unit->qi_enabled = 0;
393}
394
395void
396dmar_enable_qi_intr(struct dmar_unit *unit)
397{
398	uint32_t iectl;
399
400	DMAR_ASSERT_LOCKED(unit);
401	KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit));
402	iectl = dmar_read4(unit, DMAR_IECTL_REG);
403	iectl &= ~DMAR_IECTL_IM;
404	dmar_write4(unit, DMAR_IECTL_REG, iectl);
405}
406
407void
408dmar_disable_qi_intr(struct dmar_unit *unit)
409{
410	uint32_t iectl;
411
412	DMAR_ASSERT_LOCKED(unit);
413	KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit));
414	iectl = dmar_read4(unit, DMAR_IECTL_REG);
415	dmar_write4(unit, DMAR_IECTL_REG, iectl | DMAR_IECTL_IM);
416}
417