intel_fault.c revision 279485
1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/x86/iommu/intel_fault.c 279485 2015-03-01 10:35:54Z kib $");
32
33#include "opt_acpi.h"
34
35#include <sys/param.h>
36#include <sys/bus.h>
37#include <sys/kernel.h>
38#include <sys/malloc.h>
39#include <sys/memdesc.h>
40#include <sys/module.h>
41#include <sys/rman.h>
42#include <sys/taskqueue.h>
43#include <sys/tree.h>
44#include <machine/bus.h>
45#include <contrib/dev/acpica/include/acpi.h>
46#include <contrib/dev/acpica/include/accommon.h>
47#include <dev/acpica/acpivar.h>
48#include <dev/pci/pcireg.h>
49#include <dev/pci/pcivar.h>
50#include <vm/vm.h>
51#include <vm/vm_extern.h>
52#include <vm/vm_kern.h>
53#include <vm/vm_page.h>
54#include <vm/vm_map.h>
55#include <x86/include/busdma_impl.h>
56#include <x86/iommu/intel_reg.h>
57#include <x86/iommu/busdma_dmar.h>
58#include <x86/iommu/intel_dmar.h>
59
60/*
61 * Fault interrupt handling for DMARs.  If advanced fault logging is
62 * not implemented by hardware, the code emulates it.  Fast interrupt
63 * handler flushes the fault registers into circular buffer at
64 * unit->fault_log, and schedules a task.
65 *
66 * The fast handler is used since faults usually come in bursts, and
67 * number of fault log registers is limited, e.g. down to one for 5400
68 * MCH.  We are trying to reduce the latency for clearing the fault
69 * register file.  The task is usually long-running, since printf() is
70 * slow, but this is not problematic because bursts are rare.
71 *
72 * For the same reason, each translation unit task is executed in its
73 * own thread.
74 *
75 * XXXKIB It seems there is no hardware available which implements
76 * advanced fault logging, so the code to handle AFL is not written.
77 */
78
79static int
80dmar_fault_next(struct dmar_unit *unit, int faultp)
81{
82
83	faultp += 2;
84	if (faultp == unit->fault_log_size)
85		faultp = 0;
86	return (faultp);
87}
88
89static void
90dmar_fault_intr_clear(struct dmar_unit *unit, uint32_t fsts)
91{
92	uint32_t clear;
93
94	clear = 0;
95	if ((fsts & DMAR_FSTS_ITE) != 0) {
96		printf("DMAR%d: Invalidation timed out\n", unit->unit);
97		clear |= DMAR_FSTS_ITE;
98	}
99	if ((fsts & DMAR_FSTS_ICE) != 0) {
100		printf("DMAR%d: Invalidation completion error\n",
101		    unit->unit);
102		clear |= DMAR_FSTS_ICE;
103	}
104	if ((fsts & DMAR_FSTS_IQE) != 0) {
105		printf("DMAR%d: Invalidation queue error\n",
106		    unit->unit);
107		clear |= DMAR_FSTS_IQE;
108	}
109	if ((fsts & DMAR_FSTS_APF) != 0) {
110		printf("DMAR%d: Advanced pending fault\n", unit->unit);
111		clear |= DMAR_FSTS_APF;
112	}
113	if ((fsts & DMAR_FSTS_AFO) != 0) {
114		printf("DMAR%d: Advanced fault overflow\n", unit->unit);
115		clear |= DMAR_FSTS_AFO;
116	}
117	if (clear != 0)
118		dmar_write4(unit, DMAR_FSTS_REG, clear);
119}
120
121int
122dmar_fault_intr(void *arg)
123{
124	struct dmar_unit *unit;
125	uint64_t fault_rec[2];
126	uint32_t fsts;
127	int fri, frir, faultp;
128	bool enqueue;
129
130	unit = arg;
131	enqueue = false;
132	fsts = dmar_read4(unit, DMAR_FSTS_REG);
133	dmar_fault_intr_clear(unit, fsts);
134
135	if ((fsts & DMAR_FSTS_PPF) == 0)
136		goto done;
137
138	fri = DMAR_FSTS_FRI(fsts);
139	for (;;) {
140		frir = (DMAR_CAP_FRO(unit->hw_cap) + fri) * 16;
141		fault_rec[1] = dmar_read8(unit, frir + 8);
142		if ((fault_rec[1] & DMAR_FRCD2_F) == 0)
143			break;
144		fault_rec[0] = dmar_read8(unit, frir);
145		dmar_write4(unit, frir + 12, DMAR_FRCD2_F32);
146		DMAR_FAULT_LOCK(unit);
147		faultp = unit->fault_log_head;
148		if (dmar_fault_next(unit, faultp) == unit->fault_log_tail) {
149			/* XXXKIB log overflow */
150		} else {
151			unit->fault_log[faultp] = fault_rec[0];
152			unit->fault_log[faultp + 1] = fault_rec[1];
153			unit->fault_log_head = dmar_fault_next(unit, faultp);
154			enqueue = true;
155		}
156		DMAR_FAULT_UNLOCK(unit);
157		fri += 1;
158		if (fri >= DMAR_CAP_NFR(unit->hw_cap))
159			fri = 0;
160	}
161
162done:
163	/*
164	 * On SandyBridge, due to errata BJ124, IvyBridge errata
165	 * BV100, and Haswell errata HSD40, "Spurious Intel VT-d
166	 * Interrupts May Occur When the PFO Bit is Set".  Handle the
167	 * cases by clearing overflow bit even if no fault is
168	 * reported.
169	 *
170	 * On IvyBridge, errata BV30 states that clearing clear
171	 * DMAR_FRCD2_F bit in the fault register causes spurious
172	 * interrupt.  Do nothing.
173	 *
174	 */
175	if ((fsts & DMAR_FSTS_PFO) != 0) {
176		printf("DMAR%d: Fault Overflow\n", unit->unit);
177		dmar_write4(unit, DMAR_FSTS_REG, DMAR_FSTS_PFO);
178	}
179
180	if (enqueue) {
181		taskqueue_enqueue_fast(unit->fault_taskqueue,
182		    &unit->fault_task);
183	}
184	return (FILTER_HANDLED);
185}
186
187static void
188dmar_fault_task(void *arg, int pending __unused)
189{
190	struct dmar_unit *unit;
191	struct dmar_ctx *ctx;
192	uint64_t fault_rec[2];
193	int sid, bus, slot, func, faultp;
194
195	unit = arg;
196	DMAR_FAULT_LOCK(unit);
197	for (;;) {
198		faultp = unit->fault_log_tail;
199		if (faultp == unit->fault_log_head)
200			break;
201
202		fault_rec[0] = unit->fault_log[faultp];
203		fault_rec[1] = unit->fault_log[faultp + 1];
204		unit->fault_log_tail = dmar_fault_next(unit, faultp);
205		DMAR_FAULT_UNLOCK(unit);
206
207		sid = DMAR_FRCD2_SID(fault_rec[1]);
208		printf("DMAR%d: ", unit->unit);
209		DMAR_LOCK(unit);
210		ctx = dmar_find_ctx_locked(unit, sid);
211		if (ctx == NULL) {
212			printf("<unknown dev>:");
213
214			/*
215			 * Note that the slot and function will not be correct
216			 * if ARI is in use, but without a ctx entry we have
217			 * no way of knowing whether ARI is in use or not.
218			 */
219			bus = PCI_RID2BUS(sid);
220			slot = PCI_RID2SLOT(sid);
221			func = PCI_RID2FUNC(sid);
222		} else {
223			ctx->flags |= DMAR_CTX_FAULTED;
224			ctx->last_fault_rec[0] = fault_rec[0];
225			ctx->last_fault_rec[1] = fault_rec[1];
226			device_print_prettyname(ctx->ctx_tag.owner);
227			bus = pci_get_bus(ctx->ctx_tag.owner);
228			slot = pci_get_slot(ctx->ctx_tag.owner);
229			func = pci_get_function(ctx->ctx_tag.owner);
230		}
231		DMAR_UNLOCK(unit);
232		printf(
233		    "pci%d:%d:%d sid %x fault acc %x adt 0x%x reason 0x%x "
234		    "addr %jx\n",
235		    bus, slot, func, sid, DMAR_FRCD2_T(fault_rec[1]),
236		    DMAR_FRCD2_AT(fault_rec[1]), DMAR_FRCD2_FR(fault_rec[1]),
237		    (uintmax_t)fault_rec[0]);
238		DMAR_FAULT_LOCK(unit);
239	}
240	DMAR_FAULT_UNLOCK(unit);
241}
242
243static void
244dmar_clear_faults(struct dmar_unit *unit)
245{
246	uint32_t frec, frir, fsts;
247	int i;
248
249	for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) {
250		frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16;
251		frec = dmar_read4(unit, frir + 12);
252		if ((frec & DMAR_FRCD2_F32) == 0)
253			continue;
254		dmar_write4(unit, frir + 12, DMAR_FRCD2_F32);
255	}
256	fsts = dmar_read4(unit, DMAR_FSTS_REG);
257	dmar_write4(unit, DMAR_FSTS_REG, fsts);
258}
259
260int
261dmar_init_fault_log(struct dmar_unit *unit)
262{
263
264	mtx_init(&unit->fault_lock, "dmarflt", NULL, MTX_SPIN);
265	unit->fault_log_size = 256; /* 128 fault log entries */
266	TUNABLE_INT_FETCH("hw.dmar.fault_log_size", &unit->fault_log_size);
267	if (unit->fault_log_size % 2 != 0)
268		panic("hw.dmar_fault_log_size must be even");
269	unit->fault_log = malloc(sizeof(uint64_t) * unit->fault_log_size,
270	    M_DEVBUF, M_WAITOK | M_ZERO);
271
272	TASK_INIT(&unit->fault_task, 0, dmar_fault_task, unit);
273	unit->fault_taskqueue = taskqueue_create_fast("dmar", M_WAITOK,
274	    taskqueue_thread_enqueue, &unit->fault_taskqueue);
275	taskqueue_start_threads(&unit->fault_taskqueue, 1, PI_AV,
276	    "dmar%d fault taskq", unit->unit);
277
278	DMAR_LOCK(unit);
279	dmar_disable_fault_intr(unit);
280	dmar_clear_faults(unit);
281	dmar_enable_fault_intr(unit);
282	DMAR_UNLOCK(unit);
283
284	return (0);
285}
286
287void
288dmar_fini_fault_log(struct dmar_unit *unit)
289{
290
291	DMAR_LOCK(unit);
292	dmar_disable_fault_intr(unit);
293	DMAR_UNLOCK(unit);
294
295	if (unit->fault_taskqueue == NULL)
296		return;
297
298	taskqueue_drain(unit->fault_taskqueue, &unit->fault_task);
299	taskqueue_free(unit->fault_taskqueue);
300	unit->fault_taskqueue = NULL;
301	mtx_destroy(&unit->fault_lock);
302
303	free(unit->fault_log, M_DEVBUF);
304	unit->fault_log = NULL;
305	unit->fault_log_head = unit->fault_log_tail = 0;
306}
307
308void
309dmar_enable_fault_intr(struct dmar_unit *unit)
310{
311	uint32_t fectl;
312
313	DMAR_ASSERT_LOCKED(unit);
314	fectl = dmar_read4(unit, DMAR_FECTL_REG);
315	fectl &= ~DMAR_FECTL_IM;
316	dmar_write4(unit, DMAR_FECTL_REG, fectl);
317}
318
319void
320dmar_disable_fault_intr(struct dmar_unit *unit)
321{
322	uint32_t fectl;
323
324	DMAR_ASSERT_LOCKED(unit);
325	fectl = dmar_read4(unit, DMAR_FECTL_REG);
326	dmar_write4(unit, DMAR_FECTL_REG, fectl | DMAR_FECTL_IM);
327}
328