busdma_dmar.c revision 284021
1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/x86/iommu/busdma_dmar.c 284021 2015-06-05 08:36:25Z kib $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/malloc.h>
36#include <sys/bus.h>
37#include <sys/conf.h>
38#include <sys/interrupt.h>
39#include <sys/kernel.h>
40#include <sys/ktr.h>
41#include <sys/lock.h>
42#include <sys/proc.h>
43#include <sys/memdesc.h>
44#include <sys/mutex.h>
45#include <sys/sysctl.h>
46#include <sys/rman.h>
47#include <sys/taskqueue.h>
48#include <sys/tree.h>
49#include <sys/uio.h>
50#include <dev/pci/pcireg.h>
51#include <dev/pci/pcivar.h>
52#include <vm/vm.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_kern.h>
55#include <vm/vm_object.h>
56#include <vm/vm_page.h>
57#include <vm/vm_map.h>
58#include <machine/atomic.h>
59#include <machine/bus.h>
60#include <machine/md_var.h>
61#include <machine/specialreg.h>
62#include <x86/include/busdma_impl.h>
63#include <x86/iommu/intel_reg.h>
64#include <x86/iommu/busdma_dmar.h>
65#include <x86/iommu/intel_dmar.h>
66
67/*
68 * busdma_dmar.c, the implementation of the busdma(9) interface using
69 * DMAR units from Intel VT-d.
70 */
71
72static bool
73dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
74{
75	char str[128], *env;
76
77	snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d.bounce",
78	    domain, bus, slot, func);
79	env = getenv(str);
80	if (env == NULL)
81		return (false);
82	freeenv(env);
83	return (true);
84}
85
86/*
87 * Given original device, find the requester ID that will be seen by
88 * the DMAR unit and used for page table lookup.  PCI bridges may take
89 * ownership of transactions from downstream devices, so it may not be
90 * the same as the BSF of the target device.  In those cases, all
91 * devices downstream of the bridge must share a single mapping
92 * domain, and must collectively be assigned to use either DMAR or
93 * bounce mapping.
94 */
95static device_t
96dmar_get_requester(device_t dev, uint16_t *rid)
97{
98	devclass_t pci_class;
99	device_t l, pci, pcib, pcip, pcibp, requester;
100	int cap_offset;
101	uint16_t pcie_flags;
102	bool bridge_is_pcie;
103
104	pci_class = devclass_find("pci");
105	l = requester = dev;
106
107	*rid = pci_get_rid(dev);
108
109	/*
110	 * Walk the bridge hierarchy from the target device to the
111	 * host port to find the translating bridge nearest the DMAR
112	 * unit.
113	 */
114	for (;;) {
115		pci = device_get_parent(l);
116		KASSERT(pci != NULL, ("dmar_get_requester(%s): NULL parent "
117		    "for %s", device_get_name(dev), device_get_name(l)));
118		KASSERT(device_get_devclass(pci) == pci_class,
119		    ("dmar_get_requester(%s): non-pci parent %s for %s",
120		    device_get_name(dev), device_get_name(pci),
121		    device_get_name(l)));
122
123		pcib = device_get_parent(pci);
124		KASSERT(pcib != NULL, ("dmar_get_requester(%s): NULL bridge "
125		    "for %s", device_get_name(dev), device_get_name(pci)));
126
127		/*
128		 * The parent of our "bridge" isn't another PCI bus,
129		 * so pcib isn't a PCI->PCI bridge but rather a host
130		 * port, and the requester ID won't be translated
131		 * further.
132		 */
133		pcip = device_get_parent(pcib);
134		if (device_get_devclass(pcip) != pci_class)
135			break;
136		pcibp = device_get_parent(pcip);
137
138		if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
139			/*
140			 * Do not stop the loop even if the target
141			 * device is PCIe, because it is possible (but
142			 * unlikely) to have a PCI->PCIe bridge
143			 * somewhere in the hierarchy.
144			 */
145			l = pcib;
146		} else {
147			/*
148			 * Device is not PCIe, it cannot be seen as a
149			 * requester by DMAR unit.  Check whether the
150			 * bridge is PCIe.
151			 */
152			bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
153			    &cap_offset) == 0;
154			requester = pcib;
155
156			/*
157			 * Check for a buggy PCIe/PCI bridge that
158			 * doesn't report the express capability.  If
159			 * the bridge above it is express but isn't a
160			 * PCI bridge, then we know pcib is actually a
161			 * PCIe/PCI bridge.
162			 */
163			if (!bridge_is_pcie && pci_find_cap(pcibp,
164			    PCIY_EXPRESS, &cap_offset) == 0) {
165				pcie_flags = pci_read_config(pcibp,
166				    cap_offset + PCIER_FLAGS, 2);
167				if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
168				    PCIEM_TYPE_PCI_BRIDGE)
169					bridge_is_pcie = true;
170			}
171
172			if (bridge_is_pcie) {
173				/*
174				 * The current device is not PCIe, but
175				 * the bridge above it is.  This is a
176				 * PCIe->PCI bridge.  Assume that the
177				 * requester ID will be the secondary
178				 * bus number with slot and function
179				 * set to zero.
180				 *
181				 * XXX: Doesn't handle the case where
182				 * the bridge is PCIe->PCI-X, and the
183				 * bridge will only take ownership of
184				 * requests in some cases.  We should
185				 * provide context entries with the
186				 * same page tables for taken and
187				 * non-taken transactions.
188				 */
189				*rid = PCI_RID(pci_get_bus(l), 0, 0);
190				l = pcibp;
191			} else {
192				/*
193				 * Neither the device nor the bridge
194				 * above it are PCIe.  This is a
195				 * conventional PCI->PCI bridge, which
196				 * will use the bridge's BSF as the
197				 * requester ID.
198				 */
199				*rid = pci_get_rid(pcib);
200				l = pcib;
201			}
202		}
203	}
204	return (requester);
205}
206
207struct dmar_ctx *
208dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
209{
210	device_t requester;
211	struct dmar_ctx *ctx;
212	bool disabled;
213	uint16_t rid;
214
215	requester = dmar_get_requester(dev, &rid);
216
217	/*
218	 * If the user requested the IOMMU disabled for the device, we
219	 * cannot disable the DMAR, due to possibility of other
220	 * devices on the same DMAR still requiring translation.
221	 * Instead provide the identity mapping for the device
222	 * context.
223	 */
224	disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester),
225	    pci_get_bus(requester), pci_get_slot(requester),
226	    pci_get_function(requester));
227	ctx = dmar_get_ctx(dmar, requester, rid, disabled, rmrr);
228	if (ctx == NULL)
229		return (NULL);
230	if (disabled) {
231		/*
232		 * Keep the first reference on context, release the
233		 * later refs.
234		 */
235		DMAR_LOCK(dmar);
236		if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
237			ctx->flags |= DMAR_CTX_DISABLED;
238			DMAR_UNLOCK(dmar);
239		} else {
240			dmar_free_ctx_locked(dmar, ctx);
241		}
242		ctx = NULL;
243	}
244	return (ctx);
245}
246
247bus_dma_tag_t
248dmar_get_dma_tag(device_t dev, device_t child)
249{
250	struct dmar_unit *dmar;
251	struct dmar_ctx *ctx;
252	bus_dma_tag_t res;
253
254	dmar = dmar_find(child);
255	/* Not in scope of any DMAR ? */
256	if (dmar == NULL)
257		return (NULL);
258	dmar_quirks_pre_use(dmar);
259	dmar_instantiate_rmrr_ctxs(dmar);
260
261	ctx = dmar_instantiate_ctx(dmar, child, false);
262	res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
263	return (res);
264}
265
266static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
267
268static void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
269    struct bus_dmamap_dmar *map);
270
271static int
272dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
273    bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
274    bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
275    int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
276    void *lockfuncarg, bus_dma_tag_t *dmat)
277{
278	struct bus_dma_tag_dmar *newtag, *oldtag;
279	int error;
280
281	*dmat = NULL;
282	error = common_bus_dma_tag_create(parent != NULL ?
283	    &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
284	    boundary, lowaddr, highaddr, filter, filterarg, maxsize,
285	    nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
286	    sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
287	if (error != 0)
288		goto out;
289
290	oldtag = (struct bus_dma_tag_dmar *)parent;
291	newtag->common.impl = &bus_dma_dmar_impl;
292	newtag->ctx = oldtag->ctx;
293	newtag->owner = oldtag->owner;
294
295	*dmat = (bus_dma_tag_t)newtag;
296out:
297	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
298	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
299	    error);
300	return (error);
301}
302
303static int
304dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
305{
306	struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
307	int error;
308
309	error = 0;
310	dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
311
312	if (dmat != NULL) {
313		if (dmat->map_count != 0) {
314			error = EBUSY;
315			goto out;
316		}
317		while (dmat != NULL) {
318			parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
319			if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
320			    1) {
321				if (dmat == &dmat->ctx->ctx_tag)
322					dmar_free_ctx(dmat->ctx);
323				free(dmat->segments, M_DMAR_DMAMAP);
324				free(dmat, M_DEVBUF);
325				dmat = parent;
326			} else
327				dmat = NULL;
328		}
329	}
330out:
331	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
332	return (error);
333}
334
335static int
336dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
337{
338	struct bus_dma_tag_dmar *tag;
339	struct bus_dmamap_dmar *map;
340
341	tag = (struct bus_dma_tag_dmar *)dmat;
342	map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO);
343	if (map == NULL) {
344		*mapp = NULL;
345		return (ENOMEM);
346	}
347	if (tag->segments == NULL) {
348		tag->segments = malloc(sizeof(bus_dma_segment_t) *
349		    tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT);
350		if (tag->segments == NULL) {
351			free(map, M_DMAR_DMAMAP);
352			*mapp = NULL;
353			return (ENOMEM);
354		}
355	}
356	TAILQ_INIT(&map->map_entries);
357	map->tag = tag;
358	map->locked = true;
359	map->cansleep = false;
360	tag->map_count++;
361	*mapp = (bus_dmamap_t)map;
362
363	return (0);
364}
365
366static int
367dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
368{
369	struct bus_dma_tag_dmar *tag;
370	struct bus_dmamap_dmar *map;
371
372	tag = (struct bus_dma_tag_dmar *)dmat;
373	map = (struct bus_dmamap_dmar *)map1;
374	if (map != NULL) {
375		DMAR_CTX_LOCK(tag->ctx);
376		if (!TAILQ_EMPTY(&map->map_entries)) {
377			DMAR_CTX_UNLOCK(tag->ctx);
378			return (EBUSY);
379		}
380		DMAR_CTX_UNLOCK(tag->ctx);
381		free(map, M_DMAR_DMAMAP);
382	}
383	tag->map_count--;
384	return (0);
385}
386
387
388static int
389dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
390    bus_dmamap_t *mapp)
391{
392	struct bus_dma_tag_dmar *tag;
393	struct bus_dmamap_dmar *map;
394	int error, mflags;
395	vm_memattr_t attr;
396
397	error = dmar_bus_dmamap_create(dmat, flags, mapp);
398	if (error != 0)
399		return (error);
400
401	mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
402	mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
403	attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
404	    VM_MEMATTR_DEFAULT;
405
406	tag = (struct bus_dma_tag_dmar *)dmat;
407	map = (struct bus_dmamap_dmar *)*mapp;
408
409	if (tag->common.maxsize < PAGE_SIZE &&
410	    tag->common.alignment <= tag->common.maxsize &&
411	    attr == VM_MEMATTR_DEFAULT) {
412		*vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags);
413		map->flags |= BUS_DMAMAP_DMAR_MALLOC;
414	} else {
415		*vaddr = (void *)kmem_alloc_attr(kernel_arena,
416		    tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR,
417		    attr);
418		map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
419	}
420	if (*vaddr == NULL) {
421		dmar_bus_dmamap_destroy(dmat, *mapp);
422		*mapp = NULL;
423		return (ENOMEM);
424	}
425	return (0);
426}
427
428static void
429dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
430{
431	struct bus_dma_tag_dmar *tag;
432	struct bus_dmamap_dmar *map;
433
434	tag = (struct bus_dma_tag_dmar *)dmat;
435	map = (struct bus_dmamap_dmar *)map1;
436
437	if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
438		free(vaddr, M_DEVBUF);
439		map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
440	} else {
441		KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
442		    ("dmar_bus_dmamem_free for non alloced map %p", map));
443		kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize);
444		map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
445	}
446
447	dmar_bus_dmamap_destroy(dmat, map1);
448}
449
450static int
451dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
452    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
453    int flags, bus_dma_segment_t *segs, int *segp,
454    struct dmar_map_entries_tailq *unroll_list)
455{
456	struct dmar_ctx *ctx;
457	struct dmar_map_entry *entry;
458	dmar_gaddr_t size;
459	bus_size_t buflen1;
460	int error, idx, gas_flags, seg;
461
462	KASSERT(offset < DMAR_PAGE_SIZE, ("offset %d", offset));
463	if (segs == NULL)
464		segs = tag->segments;
465	ctx = tag->ctx;
466	seg = *segp;
467	error = 0;
468	idx = 0;
469	while (buflen > 0) {
470		seg++;
471		if (seg >= tag->common.nsegments) {
472			error = EFBIG;
473			break;
474		}
475		buflen1 = buflen > tag->common.maxsegsz ?
476		    tag->common.maxsegsz : buflen;
477		size = round_page(offset + buflen1);
478
479		/*
480		 * (Too) optimistically allow split if there are more
481		 * then one segments left.
482		 */
483		gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
484		if (seg + 1 < tag->common.nsegments)
485			gas_flags |= DMAR_GM_CANSPLIT;
486
487		error = dmar_gas_map(ctx, &tag->common, size, offset,
488		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
489		    gas_flags, ma + idx, &entry);
490		if (error != 0)
491			break;
492		if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
493			KASSERT(size >= entry->end - entry->start,
494			    ("split increased entry size %jx %jx %jx",
495			    (uintmax_t)size, (uintmax_t)entry->start,
496			    (uintmax_t)entry->end));
497			size = entry->end - entry->start;
498			if (buflen1 > size)
499				buflen1 = size;
500		} else {
501			KASSERT(entry->end - entry->start == size,
502			    ("no split allowed %jx %jx %jx",
503			    (uintmax_t)size, (uintmax_t)entry->start,
504			    (uintmax_t)entry->end));
505		}
506		if (offset + buflen1 > size)
507			buflen1 = size - offset;
508		if (buflen1 > tag->common.maxsegsz)
509			buflen1 = tag->common.maxsegsz;
510
511		KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
512		    == 0,
513		    ("alignment failed: ctx %p start 0x%jx offset %x "
514		    "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
515		    (uintmax_t)tag->common.alignment));
516		KASSERT(entry->end <= tag->common.lowaddr ||
517		    entry->start >= tag->common.highaddr,
518		    ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
519		    "lowaddr 0x%jx highaddr 0x%jx", ctx,
520		    (uintmax_t)entry->start, (uintmax_t)entry->end,
521		    (uintmax_t)tag->common.lowaddr,
522		    (uintmax_t)tag->common.highaddr));
523		KASSERT(dmar_test_boundary(entry->start + offset, buflen1,
524		    tag->common.boundary),
525		    ("boundary failed: ctx %p start 0x%jx end 0x%jx "
526		    "boundary 0x%jx", ctx, (uintmax_t)entry->start,
527		    (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
528		KASSERT(buflen1 <= tag->common.maxsegsz,
529		    ("segment too large: ctx %p start 0x%jx end 0x%jx "
530		    "buflen1 0x%jx maxsegsz 0x%jx", ctx,
531		    (uintmax_t)entry->start, (uintmax_t)entry->end,
532		    (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
533
534		DMAR_CTX_LOCK(ctx);
535		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
536		entry->flags |= DMAR_MAP_ENTRY_MAP;
537		DMAR_CTX_UNLOCK(ctx);
538		TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
539
540		segs[seg].ds_addr = entry->start + offset;
541		segs[seg].ds_len = buflen1;
542
543		idx += OFF_TO_IDX(trunc_page(offset + buflen1));
544		offset += buflen1;
545		offset &= DMAR_PAGE_MASK;
546		buflen -= buflen1;
547	}
548	if (error == 0)
549		*segp = seg;
550	return (error);
551}
552
553static int
554dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
555    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
556    int flags, bus_dma_segment_t *segs, int *segp)
557{
558	struct dmar_ctx *ctx;
559	struct dmar_map_entry *entry, *entry1;
560	struct dmar_map_entries_tailq unroll_list;
561	int error;
562
563	ctx = tag->ctx;
564	atomic_add_long(&ctx->loads, 1);
565
566	TAILQ_INIT(&unroll_list);
567	error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
568	    buflen, flags, segs, segp, &unroll_list);
569	if (error != 0) {
570		/*
571		 * The busdma interface does not allow us to report
572		 * partial buffer load, so unfortunately we have to
573		 * revert all work done.
574		 */
575		DMAR_CTX_LOCK(ctx);
576		TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
577		    entry1) {
578			/*
579			 * No entries other than what we have created
580			 * during the failed run might have been
581			 * inserted there in between, since we own ctx
582			 * pglock.
583			 */
584			TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
585			TAILQ_REMOVE(&unroll_list, entry, unroll_link);
586			TAILQ_INSERT_TAIL(&ctx->unload_entries, entry,
587			    dmamap_link);
588		}
589		DMAR_CTX_UNLOCK(ctx);
590		taskqueue_enqueue(ctx->dmar->delayed_taskqueue,
591		    &ctx->unload_task);
592	}
593
594	if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
595	    !map->cansleep)
596		error = EINPROGRESS;
597	if (error == EINPROGRESS)
598		dmar_bus_schedule_dmamap(ctx->dmar, map);
599	return (error);
600}
601
602static int
603dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
604    struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
605    bus_dma_segment_t *segs, int *segp)
606{
607	struct bus_dma_tag_dmar *tag;
608	struct bus_dmamap_dmar *map;
609
610	tag = (struct bus_dma_tag_dmar *)dmat;
611	map = (struct bus_dmamap_dmar *)map1;
612	return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
613	    flags, segs, segp));
614}
615
616static int
617dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
618    vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
619    int *segp)
620{
621	struct bus_dma_tag_dmar *tag;
622	struct bus_dmamap_dmar *map;
623	vm_page_t *ma;
624	vm_paddr_t pstart, pend;
625	int error, i, ma_cnt, offset;
626
627	tag = (struct bus_dma_tag_dmar *)dmat;
628	map = (struct bus_dmamap_dmar *)map1;
629	pstart = trunc_page(buf);
630	pend = round_page(buf + buflen);
631	offset = buf & PAGE_MASK;
632	ma_cnt = OFF_TO_IDX(pend - pstart);
633	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
634	    M_WAITOK : M_NOWAIT);
635	if (ma == NULL)
636		return (ENOMEM);
637	for (i = 0; i < ma_cnt; i++)
638		ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE);
639	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
640	    flags, segs, segp);
641	free(ma, M_DEVBUF);
642	return (error);
643}
644
645static int
646dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
647    bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
648    int *segp)
649{
650	struct bus_dma_tag_dmar *tag;
651	struct bus_dmamap_dmar *map;
652	vm_page_t *ma, fma;
653	vm_paddr_t pstart, pend, paddr;
654	int error, i, ma_cnt, offset;
655
656	tag = (struct bus_dma_tag_dmar *)dmat;
657	map = (struct bus_dmamap_dmar *)map1;
658	pstart = trunc_page((vm_offset_t)buf);
659	pend = round_page((vm_offset_t)buf + buflen);
660	offset = (vm_offset_t)buf & PAGE_MASK;
661	ma_cnt = OFF_TO_IDX(pend - pstart);
662	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
663	    M_WAITOK : M_NOWAIT);
664	if (ma == NULL)
665		return (ENOMEM);
666	if (dumping) {
667		/*
668		 * If dumping, do not attempt to call
669		 * PHYS_TO_VM_PAGE() at all.  It may return non-NULL
670		 * but the vm_page returned might be not initialized,
671		 * e.g. for the kernel itself.
672		 */
673		KASSERT(pmap == kernel_pmap, ("non-kernel address write"));
674		fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF,
675		    M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT));
676		if (fma == NULL) {
677			free(ma, M_DEVBUF);
678			return (ENOMEM);
679		}
680		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
681			paddr = pmap_kextract(pstart);
682			vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
683			ma[i] = &fma[i];
684		}
685	} else {
686		fma = NULL;
687		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
688			if (pmap == kernel_pmap)
689				paddr = pmap_kextract(pstart);
690			else
691				paddr = pmap_extract(pmap, pstart);
692			ma[i] = PHYS_TO_VM_PAGE(paddr);
693			KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr,
694			    ("PHYS_TO_VM_PAGE failed %jx %jx m %p",
695			    (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]),
696			    ma[i]));
697		}
698	}
699	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
700	    flags, segs, segp);
701	free(ma, M_DEVBUF);
702	free(fma, M_DEVBUF);
703	return (error);
704}
705
706static void
707dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
708    struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
709{
710	struct bus_dmamap_dmar *map;
711
712	if (map1 == NULL)
713		return;
714	map = (struct bus_dmamap_dmar *)map1;
715	map->mem = *mem;
716	map->tag = (struct bus_dma_tag_dmar *)dmat;
717	map->callback = callback;
718	map->callback_arg = callback_arg;
719}
720
721static bus_dma_segment_t *
722dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
723    bus_dma_segment_t *segs, int nsegs, int error)
724{
725	struct bus_dma_tag_dmar *tag;
726	struct bus_dmamap_dmar *map;
727
728	tag = (struct bus_dma_tag_dmar *)dmat;
729	map = (struct bus_dmamap_dmar *)map1;
730
731	if (!map->locked) {
732		KASSERT(map->cansleep,
733		    ("map not locked and not sleepable context %p", map));
734
735		/*
736		 * We are called from the delayed context.  Relock the
737		 * driver.
738		 */
739		(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
740		map->locked = true;
741	}
742
743	if (segs == NULL)
744		segs = tag->segments;
745	return (segs);
746}
747
748/*
749 * The limitations of busdma KPI forces the dmar to perform the actual
750 * unload, consisting of the unmapping of the map entries page tables,
751 * from the delayed context on i386, since page table page mapping
752 * might require a sleep to be successfull.  The unfortunate
753 * consequence is that the DMA requests can be served some time after
754 * the bus_dmamap_unload() call returned.
755 *
756 * On amd64, we assume that sf allocation cannot fail.
757 */
758static void
759dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
760{
761	struct bus_dma_tag_dmar *tag;
762	struct bus_dmamap_dmar *map;
763	struct dmar_ctx *ctx;
764#if defined(__amd64__)
765	struct dmar_map_entries_tailq entries;
766#endif
767
768	tag = (struct bus_dma_tag_dmar *)dmat;
769	map = (struct bus_dmamap_dmar *)map1;
770	ctx = tag->ctx;
771	atomic_add_long(&ctx->unloads, 1);
772
773#if defined(__i386__)
774	DMAR_CTX_LOCK(ctx);
775	TAILQ_CONCAT(&ctx->unload_entries, &map->map_entries, dmamap_link);
776	DMAR_CTX_UNLOCK(ctx);
777	taskqueue_enqueue(ctx->dmar->delayed_taskqueue, &ctx->unload_task);
778#else /* defined(__amd64__) */
779	TAILQ_INIT(&entries);
780	DMAR_CTX_LOCK(ctx);
781	TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
782	DMAR_CTX_UNLOCK(ctx);
783	THREAD_NO_SLEEPING();
784	dmar_ctx_unload(ctx, &entries, false);
785	THREAD_SLEEPING_OK();
786	KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
787#endif
788}
789
790static void
791dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
792    bus_dmasync_op_t op)
793{
794}
795
796struct bus_dma_impl bus_dma_dmar_impl = {
797	.tag_create = dmar_bus_dma_tag_create,
798	.tag_destroy = dmar_bus_dma_tag_destroy,
799	.map_create = dmar_bus_dmamap_create,
800	.map_destroy = dmar_bus_dmamap_destroy,
801	.mem_alloc = dmar_bus_dmamem_alloc,
802	.mem_free = dmar_bus_dmamem_free,
803	.load_phys = dmar_bus_dmamap_load_phys,
804	.load_buffer = dmar_bus_dmamap_load_buffer,
805	.load_ma = dmar_bus_dmamap_load_ma,
806	.map_waitok = dmar_bus_dmamap_waitok,
807	.map_complete = dmar_bus_dmamap_complete,
808	.map_unload = dmar_bus_dmamap_unload,
809	.map_sync = dmar_bus_dmamap_sync
810};
811
812static void
813dmar_bus_task_dmamap(void *arg, int pending)
814{
815	struct bus_dma_tag_dmar *tag;
816	struct bus_dmamap_dmar *map;
817	struct dmar_unit *unit;
818
819	unit = arg;
820	DMAR_LOCK(unit);
821	while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
822		TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
823		DMAR_UNLOCK(unit);
824		tag = map->tag;
825		map->cansleep = true;
826		map->locked = false;
827		bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
828		    &map->mem, map->callback, map->callback_arg,
829		    BUS_DMA_WAITOK);
830		map->cansleep = false;
831		if (map->locked) {
832			(tag->common.lockfunc)(tag->common.lockfuncarg,
833			    BUS_DMA_UNLOCK);
834		} else
835			map->locked = true;
836		map->cansleep = false;
837		DMAR_LOCK(unit);
838	}
839	DMAR_UNLOCK(unit);
840}
841
842static void
843dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
844{
845
846	map->locked = false;
847	DMAR_LOCK(unit);
848	TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
849	DMAR_UNLOCK(unit);
850	taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
851}
852
853int
854dmar_init_busdma(struct dmar_unit *unit)
855{
856
857	TAILQ_INIT(&unit->delayed_maps);
858	TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
859	unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
860	    taskqueue_thread_enqueue, &unit->delayed_taskqueue);
861	taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
862	    "dmar%d busdma taskq", unit->unit);
863	return (0);
864}
865
866void
867dmar_fini_busdma(struct dmar_unit *unit)
868{
869
870	if (unit->delayed_taskqueue == NULL)
871		return;
872
873	taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
874	taskqueue_free(unit->delayed_taskqueue);
875	unit->delayed_taskqueue = NULL;
876}
877