busdma_dmar.c revision 263747
1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/x86/iommu/busdma_dmar.c 263747 2014-03-25 20:17:57Z kib $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/malloc.h>
36#include <sys/bus.h>
37#include <sys/conf.h>
38#include <sys/interrupt.h>
39#include <sys/kernel.h>
40#include <sys/ktr.h>
41#include <sys/lock.h>
42#include <sys/proc.h>
43#include <sys/memdesc.h>
44#include <sys/mutex.h>
45#include <sys/sysctl.h>
46#include <sys/rman.h>
47#include <sys/taskqueue.h>
48#include <sys/tree.h>
49#include <sys/uio.h>
50#include <dev/pci/pcireg.h>
51#include <dev/pci/pcivar.h>
52#include <vm/vm.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_kern.h>
55#include <vm/vm_object.h>
56#include <vm/vm_page.h>
57#include <vm/vm_map.h>
58#include <machine/atomic.h>
59#include <machine/bus.h>
60#include <machine/md_var.h>
61#include <machine/specialreg.h>
62#include <x86/include/busdma_impl.h>
63#include <x86/iommu/intel_reg.h>
64#include <x86/iommu/busdma_dmar.h>
65#include <x86/iommu/intel_dmar.h>
66
67/*
68 * busdma_dmar.c, the implementation of the busdma(9) interface using
69 * DMAR units from Intel VT-d.
70 */
71
72static bool
73dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
74{
75	char str[128], *env;
76
77	snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d.bounce",
78	    domain, bus, slot, func);
79	env = getenv(str);
80	if (env == NULL)
81		return (false);
82	freeenv(env);
83	return (true);
84}
85
86/*
87 * Given original device, find the requester ID that will be seen by
88 * the DMAR unit and used for page table lookup.  PCI bridges may take
89 * ownership of transactions from downstream devices, so it may not be
90 * the same as the BSF of the target device.  In those cases, all
91 * devices downstream of the bridge must share a single mapping
92 * domain, and must collectively be assigned to use either DMAR or
93 * bounce mapping.
94 */
95static device_t
96dmar_get_requester(device_t dev, int *bus, int *slot, int *func)
97{
98	devclass_t pci_class;
99	device_t pci, pcib, requester;
100	int cap_offset;
101
102	pci_class = devclass_find("pci");
103	requester = dev;
104
105	*bus = pci_get_bus(dev);
106	*slot = pci_get_slot(dev);
107	*func = pci_get_function(dev);
108
109	/*
110	 * Walk the bridge hierarchy from the target device to the
111	 * host port to find the translating bridge nearest the DMAR
112	 * unit.
113	 */
114	for (;;) {
115		pci = device_get_parent(dev);
116		KASSERT(pci != NULL, ("NULL parent for pci%d:%d:%d:%d",
117		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
118		    pci_get_function(dev)));
119		KASSERT(device_get_devclass(pci) == pci_class,
120		    ("Non-pci parent for pci%d:%d:%d:%d",
121		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
122		    pci_get_function(dev)));
123
124		pcib = device_get_parent(pci);
125		KASSERT(pcib != NULL, ("NULL bridge for pci%d:%d:%d:%d",
126		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
127		    pci_get_function(dev)));
128
129		/*
130		 * The parent of our "bridge" isn't another PCI bus,
131		 * so pcib isn't a PCI->PCI bridge but rather a host
132		 * port, and the requester ID won't be translated
133		 * further.
134		 */
135		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
136			break;
137
138		if (pci_find_cap(dev, PCIY_EXPRESS, &cap_offset) != 0) {
139			/*
140			 * Device is not PCIe, it cannot be seen as a
141			 * requester by DMAR unit.
142			 */
143			requester = pcib;
144
145			/* Check whether the bus above is PCIe. */
146			if (pci_find_cap(pcib, PCIY_EXPRESS,
147			    &cap_offset) == 0) {
148				/*
149				 * The current device is not PCIe, but
150				 * the bridge above it is.  This is a
151				 * PCIe->PCI bridge.  Assume that the
152				 * requester ID will be the secondary
153				 * bus number with slot and function
154				 * set to zero.
155				 *
156				 * XXX: Doesn't handle the case where
157				 * the bridge is PCIe->PCI-X, and the
158				 * bridge will only take ownership of
159				 * requests in some cases.  We should
160				 * provide context entries with the
161				 * same page tables for taken and
162				 * non-taken transactions.
163				 */
164				*bus = pci_get_bus(dev);
165				*slot = *func = 0;
166			} else {
167				/*
168				 * Neither the device nor the bridge
169				 * above it are PCIe.  This is a
170				 * conventional PCI->PCI bridge, which
171				 * will use the bridge's BSF as the
172				 * requester ID.
173				 */
174				*bus = pci_get_bus(pcib);
175				*slot = pci_get_slot(pcib);
176				*func = pci_get_function(pcib);
177			}
178		}
179		/*
180		 * Do not stop the loop even if the target device is
181		 * PCIe, because it is possible (but unlikely) to have
182		 * a PCI->PCIe bridge somewhere in the hierarchy.
183		 */
184
185		dev = pcib;
186	}
187	return (requester);
188}
189
190struct dmar_ctx *
191dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
192{
193	device_t requester;
194	struct dmar_ctx *ctx;
195	bool disabled;
196	int bus, slot, func;
197
198	requester = dmar_get_requester(dev, &bus, &slot, &func);
199
200	/*
201	 * If the user requested the IOMMU disabled for the device, we
202	 * cannot disable the DMAR, due to possibility of other
203	 * devices on the same DMAR still requiring translation.
204	 * Instead provide the identity mapping for the device
205	 * context.
206	 */
207	disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(dev), bus,
208	    slot, func);
209	ctx = dmar_get_ctx(dmar, requester, bus, slot, func, disabled, rmrr);
210	if (ctx == NULL)
211		return (NULL);
212	if (disabled) {
213		/*
214		 * Keep the first reference on context, release the
215		 * later refs.
216		 */
217		DMAR_LOCK(dmar);
218		if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
219			ctx->flags |= DMAR_CTX_DISABLED;
220			DMAR_UNLOCK(dmar);
221		} else {
222			dmar_free_ctx_locked(dmar, ctx);
223		}
224		ctx = NULL;
225	}
226	return (ctx);
227}
228
229bus_dma_tag_t
230dmar_get_dma_tag(device_t dev, device_t child)
231{
232	struct dmar_unit *dmar;
233	struct dmar_ctx *ctx;
234	bus_dma_tag_t res;
235
236	dmar = dmar_find(child);
237	/* Not in scope of any DMAR ? */
238	if (dmar == NULL)
239		return (NULL);
240	dmar_quirks_pre_use(dmar);
241	dmar_instantiate_rmrr_ctxs(dmar);
242
243	ctx = dmar_instantiate_ctx(dmar, child, false);
244	res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
245	return (res);
246}
247
248static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
249
250static void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
251    struct bus_dmamap_dmar *map);
252
253static int
254dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
255    bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
256    bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
257    int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
258    void *lockfuncarg, bus_dma_tag_t *dmat)
259{
260	struct bus_dma_tag_dmar *newtag, *oldtag;
261	int error;
262
263	*dmat = NULL;
264	error = common_bus_dma_tag_create(parent != NULL ?
265	    &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
266	    boundary, lowaddr, highaddr, filter, filterarg, maxsize,
267	    nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
268	    sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
269	if (error != 0)
270		goto out;
271
272	oldtag = (struct bus_dma_tag_dmar *)parent;
273	newtag->common.impl = &bus_dma_dmar_impl;
274	newtag->ctx = oldtag->ctx;
275	newtag->owner = oldtag->owner;
276
277	*dmat = (bus_dma_tag_t)newtag;
278out:
279	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
280	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
281	    error);
282	return (error);
283}
284
285static int
286dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
287{
288	struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
289	int error;
290
291	error = 0;
292	dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
293
294	if (dmat != NULL) {
295		if (dmat->map_count != 0) {
296			error = EBUSY;
297			goto out;
298		}
299		while (dmat != NULL) {
300			parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
301			if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
302			    1) {
303				if (dmat == &dmat->ctx->ctx_tag)
304					dmar_free_ctx(dmat->ctx);
305				free(dmat->segments, M_DMAR_DMAMAP);
306				free(dmat, M_DEVBUF);
307				dmat = parent;
308			} else
309				dmat = NULL;
310		}
311	}
312out:
313	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
314	return (error);
315}
316
317static int
318dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
319{
320	struct bus_dma_tag_dmar *tag;
321	struct bus_dmamap_dmar *map;
322
323	tag = (struct bus_dma_tag_dmar *)dmat;
324	map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO);
325	if (map == NULL) {
326		*mapp = NULL;
327		return (ENOMEM);
328	}
329	if (tag->segments == NULL) {
330		tag->segments = malloc(sizeof(bus_dma_segment_t) *
331		    tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT);
332		if (tag->segments == NULL) {
333			free(map, M_DMAR_DMAMAP);
334			*mapp = NULL;
335			return (ENOMEM);
336		}
337	}
338	TAILQ_INIT(&map->map_entries);
339	map->tag = tag;
340	map->locked = true;
341	map->cansleep = false;
342	tag->map_count++;
343	*mapp = (bus_dmamap_t)map;
344
345	return (0);
346}
347
348static int
349dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
350{
351	struct bus_dma_tag_dmar *tag;
352	struct bus_dmamap_dmar *map;
353
354	tag = (struct bus_dma_tag_dmar *)dmat;
355	map = (struct bus_dmamap_dmar *)map1;
356	if (map != NULL) {
357		DMAR_CTX_LOCK(tag->ctx);
358		if (!TAILQ_EMPTY(&map->map_entries)) {
359			DMAR_CTX_UNLOCK(tag->ctx);
360			return (EBUSY);
361		}
362		DMAR_CTX_UNLOCK(tag->ctx);
363		free(map, M_DMAR_DMAMAP);
364	}
365	tag->map_count--;
366	return (0);
367}
368
369
370static int
371dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
372    bus_dmamap_t *mapp)
373{
374	struct bus_dma_tag_dmar *tag;
375	struct bus_dmamap_dmar *map;
376	int error, mflags;
377	vm_memattr_t attr;
378
379	error = dmar_bus_dmamap_create(dmat, flags, mapp);
380	if (error != 0)
381		return (error);
382
383	mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
384	mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
385	attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
386	    VM_MEMATTR_DEFAULT;
387
388	tag = (struct bus_dma_tag_dmar *)dmat;
389	map = (struct bus_dmamap_dmar *)*mapp;
390
391	if (tag->common.maxsize < PAGE_SIZE &&
392	    tag->common.alignment <= tag->common.maxsize &&
393	    attr == VM_MEMATTR_DEFAULT) {
394		*vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags);
395		map->flags |= BUS_DMAMAP_DMAR_MALLOC;
396	} else {
397		*vaddr = (void *)kmem_alloc_attr(kernel_arena,
398		    tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR,
399		    attr);
400		map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
401	}
402	if (*vaddr == NULL) {
403		dmar_bus_dmamap_destroy(dmat, *mapp);
404		*mapp = NULL;
405		return (ENOMEM);
406	}
407	return (0);
408}
409
410static void
411dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
412{
413	struct bus_dma_tag_dmar *tag;
414	struct bus_dmamap_dmar *map;
415
416	tag = (struct bus_dma_tag_dmar *)dmat;
417	map = (struct bus_dmamap_dmar *)map1;
418
419	if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
420		free(vaddr, M_DEVBUF);
421		map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
422	} else {
423		KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
424		    ("dmar_bus_dmamem_free for non alloced map %p", map));
425		kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize);
426		map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
427	}
428
429	dmar_bus_dmamap_destroy(dmat, map1);
430}
431
432static int
433dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
434    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
435    int flags, bus_dma_segment_t *segs, int *segp,
436    struct dmar_map_entries_tailq *unroll_list)
437{
438	struct dmar_ctx *ctx;
439	struct dmar_map_entry *entry;
440	dmar_gaddr_t size;
441	bus_size_t buflen1;
442	int error, idx, gas_flags, seg;
443
444	if (segs == NULL)
445		segs = tag->segments;
446	ctx = tag->ctx;
447	seg = *segp;
448	error = 0;
449	idx = 0;
450	while (buflen > 0) {
451		seg++;
452		if (seg >= tag->common.nsegments) {
453			error = EFBIG;
454			break;
455		}
456		buflen1 = buflen > tag->common.maxsegsz ?
457		    tag->common.maxsegsz : buflen;
458		buflen -= buflen1;
459		size = round_page(offset + buflen1);
460
461		/*
462		 * (Too) optimistically allow split if there are more
463		 * then one segments left.
464		 */
465		gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
466		if (seg + 1 < tag->common.nsegments)
467			gas_flags |= DMAR_GM_CANSPLIT;
468
469		error = dmar_gas_map(ctx, &tag->common, size,
470		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
471		    gas_flags, ma + idx, &entry);
472		if (error != 0)
473			break;
474		if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
475			KASSERT(size >= entry->end - entry->start,
476			    ("split increased entry size %jx %jx %jx",
477			    (uintmax_t)size, (uintmax_t)entry->start,
478			    (uintmax_t)entry->end));
479			size = entry->end - entry->start;
480			if (buflen1 > size)
481				buflen1 = size;
482		} else {
483			KASSERT(entry->end - entry->start == size,
484			    ("no split allowed %jx %jx %jx",
485			    (uintmax_t)size, (uintmax_t)entry->start,
486			    (uintmax_t)entry->end));
487		}
488
489		KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
490		    == 0,
491		    ("alignment failed: ctx %p start 0x%jx offset %x "
492		    "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
493		    (uintmax_t)tag->common.alignment));
494		KASSERT(entry->end <= tag->common.lowaddr ||
495		    entry->start >= tag->common.highaddr,
496		    ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
497		    "lowaddr 0x%jx highaddr 0x%jx", ctx,
498		    (uintmax_t)entry->start, (uintmax_t)entry->end,
499		    (uintmax_t)tag->common.lowaddr,
500		    (uintmax_t)tag->common.highaddr));
501		KASSERT(dmar_test_boundary(entry->start, entry->end -
502		    entry->start, tag->common.boundary),
503		    ("boundary failed: ctx %p start 0x%jx end 0x%jx "
504		    "boundary 0x%jx", ctx, (uintmax_t)entry->start,
505		    (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
506		KASSERT(buflen1 <= tag->common.maxsegsz,
507		    ("segment too large: ctx %p start 0x%jx end 0x%jx "
508		    "maxsegsz 0x%jx", ctx, (uintmax_t)entry->start,
509		    (uintmax_t)entry->end, (uintmax_t)tag->common.maxsegsz));
510
511		DMAR_CTX_LOCK(ctx);
512		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
513		entry->flags |= DMAR_MAP_ENTRY_MAP;
514		DMAR_CTX_UNLOCK(ctx);
515		TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
516
517		segs[seg].ds_addr = entry->start + offset;
518		segs[seg].ds_len = buflen1;
519
520		idx += OFF_TO_IDX(trunc_page(offset + buflen1));
521		offset += buflen1;
522		offset &= DMAR_PAGE_MASK;
523	}
524	if (error == 0)
525		*segp = seg;
526	return (error);
527}
528
529static int
530dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
531    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
532    int flags, bus_dma_segment_t *segs, int *segp)
533{
534	struct dmar_ctx *ctx;
535	struct dmar_map_entry *entry, *entry1;
536	struct dmar_map_entries_tailq unroll_list;
537	int error;
538
539	ctx = tag->ctx;
540	atomic_add_long(&ctx->loads, 1);
541
542	TAILQ_INIT(&unroll_list);
543	error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
544	    buflen, flags, segs, segp, &unroll_list);
545	if (error != 0) {
546		/*
547		 * The busdma interface does not allow us to report
548		 * partial buffer load, so unfortunately we have to
549		 * revert all work done.
550		 */
551		DMAR_CTX_LOCK(ctx);
552		TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
553		    entry1) {
554			/*
555			 * No entries other than what we have created
556			 * during the failed run might have been
557			 * inserted there in between, since we own ctx
558			 * pglock.
559			 */
560			TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
561			TAILQ_REMOVE(&unroll_list, entry, unroll_link);
562			TAILQ_INSERT_TAIL(&ctx->unload_entries, entry,
563			    dmamap_link);
564		}
565		DMAR_CTX_UNLOCK(ctx);
566		taskqueue_enqueue(ctx->dmar->delayed_taskqueue,
567		    &ctx->unload_task);
568	}
569
570	if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
571	    !map->cansleep)
572		error = EINPROGRESS;
573	if (error == EINPROGRESS)
574		dmar_bus_schedule_dmamap(ctx->dmar, map);
575	return (error);
576}
577
578static int
579dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
580    struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
581    bus_dma_segment_t *segs, int *segp)
582{
583	struct bus_dma_tag_dmar *tag;
584	struct bus_dmamap_dmar *map;
585
586	tag = (struct bus_dma_tag_dmar *)dmat;
587	map = (struct bus_dmamap_dmar *)map1;
588	return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
589	    flags, segs, segp));
590}
591
592static int
593dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
594    vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
595    int *segp)
596{
597	struct bus_dma_tag_dmar *tag;
598	struct bus_dmamap_dmar *map;
599	vm_page_t *ma;
600	vm_paddr_t pstart, pend;
601	int error, i, ma_cnt, offset;
602
603	tag = (struct bus_dma_tag_dmar *)dmat;
604	map = (struct bus_dmamap_dmar *)map1;
605	pstart = trunc_page(buf);
606	pend = round_page(buf + buflen);
607	offset = buf & PAGE_MASK;
608	ma_cnt = OFF_TO_IDX(pend - pstart);
609	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
610	    M_WAITOK : M_NOWAIT);
611	if (ma == NULL)
612		return (ENOMEM);
613	for (i = 0; i < ma_cnt; i++)
614		ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE);
615	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
616	    flags, segs, segp);
617	free(ma, M_DEVBUF);
618	return (error);
619}
620
621static int
622dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
623    bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
624    int *segp)
625{
626	struct bus_dma_tag_dmar *tag;
627	struct bus_dmamap_dmar *map;
628	vm_page_t *ma, fma;
629	vm_paddr_t pstart, pend, paddr;
630	int error, i, ma_cnt, offset;
631
632	tag = (struct bus_dma_tag_dmar *)dmat;
633	map = (struct bus_dmamap_dmar *)map1;
634	pstart = trunc_page((vm_offset_t)buf);
635	pend = round_page((vm_offset_t)buf + buflen);
636	offset = (vm_offset_t)buf & PAGE_MASK;
637	ma_cnt = OFF_TO_IDX(pend - pstart);
638	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
639	    M_WAITOK : M_NOWAIT);
640	if (ma == NULL)
641		return (ENOMEM);
642	if (dumping) {
643		/*
644		 * If dumping, do not attempt to call
645		 * PHYS_TO_VM_PAGE() at all.  It may return non-NULL
646		 * but the vm_page returned might be not initialized,
647		 * e.g. for the kernel itself.
648		 */
649		KASSERT(pmap == kernel_pmap, ("non-kernel address write"));
650		fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF,
651		    M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT));
652		if (fma == NULL) {
653			free(ma, M_DEVBUF);
654			return (ENOMEM);
655		}
656		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
657			paddr = pmap_kextract(pstart);
658			vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
659			ma[i] = &fma[i];
660		}
661	} else {
662		fma = NULL;
663		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
664			if (pmap == kernel_pmap)
665				paddr = pmap_kextract(pstart);
666			else
667				paddr = pmap_extract(pmap, pstart);
668			ma[i] = PHYS_TO_VM_PAGE(paddr);
669			KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr,
670			    ("PHYS_TO_VM_PAGE failed %jx %jx m %p",
671			    (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]),
672			    ma[i]));
673		}
674	}
675	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
676	    flags, segs, segp);
677	free(ma, M_DEVBUF);
678	free(fma, M_DEVBUF);
679	return (error);
680}
681
682static void
683dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
684    struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
685{
686	struct bus_dmamap_dmar *map;
687
688	if (map1 == NULL)
689		return;
690	map = (struct bus_dmamap_dmar *)map1;
691	map->mem = *mem;
692	map->tag = (struct bus_dma_tag_dmar *)dmat;
693	map->callback = callback;
694	map->callback_arg = callback_arg;
695}
696
697static bus_dma_segment_t *
698dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
699    bus_dma_segment_t *segs, int nsegs, int error)
700{
701	struct bus_dma_tag_dmar *tag;
702	struct bus_dmamap_dmar *map;
703
704	tag = (struct bus_dma_tag_dmar *)dmat;
705	map = (struct bus_dmamap_dmar *)map1;
706
707	if (!map->locked) {
708		KASSERT(map->cansleep,
709		    ("map not locked and not sleepable context %p", map));
710
711		/*
712		 * We are called from the delayed context.  Relock the
713		 * driver.
714		 */
715		(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
716		map->locked = true;
717	}
718
719	if (segs == NULL)
720		segs = tag->segments;
721	return (segs);
722}
723
724/*
725 * The limitations of busdma KPI forces the dmar to perform the actual
726 * unload, consisting of the unmapping of the map entries page tables,
727 * from the delayed context on i386, since page table page mapping
728 * might require a sleep to be successfull.  The unfortunate
729 * consequence is that the DMA requests can be served some time after
730 * the bus_dmamap_unload() call returned.
731 *
732 * On amd64, we assume that sf allocation cannot fail.
733 */
734static void
735dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
736{
737	struct bus_dma_tag_dmar *tag;
738	struct bus_dmamap_dmar *map;
739	struct dmar_ctx *ctx;
740#if defined(__amd64__)
741	struct dmar_map_entries_tailq entries;
742#endif
743
744	tag = (struct bus_dma_tag_dmar *)dmat;
745	map = (struct bus_dmamap_dmar *)map1;
746	ctx = tag->ctx;
747	atomic_add_long(&ctx->unloads, 1);
748
749#if defined(__i386__)
750	DMAR_CTX_LOCK(ctx);
751	TAILQ_CONCAT(&ctx->unload_entries, &map->map_entries, dmamap_link);
752	DMAR_CTX_UNLOCK(ctx);
753	taskqueue_enqueue(ctx->dmar->delayed_taskqueue, &ctx->unload_task);
754#else /* defined(__amd64__) */
755	TAILQ_INIT(&entries);
756	DMAR_CTX_LOCK(ctx);
757	TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
758	DMAR_CTX_UNLOCK(ctx);
759	THREAD_NO_SLEEPING();
760	dmar_ctx_unload(ctx, &entries, false);
761	THREAD_SLEEPING_OK();
762	KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
763#endif
764}
765
766static void
767dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
768    bus_dmasync_op_t op)
769{
770}
771
772struct bus_dma_impl bus_dma_dmar_impl = {
773	.tag_create = dmar_bus_dma_tag_create,
774	.tag_destroy = dmar_bus_dma_tag_destroy,
775	.map_create = dmar_bus_dmamap_create,
776	.map_destroy = dmar_bus_dmamap_destroy,
777	.mem_alloc = dmar_bus_dmamem_alloc,
778	.mem_free = dmar_bus_dmamem_free,
779	.load_phys = dmar_bus_dmamap_load_phys,
780	.load_buffer = dmar_bus_dmamap_load_buffer,
781	.load_ma = dmar_bus_dmamap_load_ma,
782	.map_waitok = dmar_bus_dmamap_waitok,
783	.map_complete = dmar_bus_dmamap_complete,
784	.map_unload = dmar_bus_dmamap_unload,
785	.map_sync = dmar_bus_dmamap_sync
786};
787
788static void
789dmar_bus_task_dmamap(void *arg, int pending)
790{
791	struct bus_dma_tag_dmar *tag;
792	struct bus_dmamap_dmar *map;
793	struct dmar_unit *unit;
794	struct dmar_ctx *ctx;
795
796	unit = arg;
797	DMAR_LOCK(unit);
798	while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
799		TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
800		DMAR_UNLOCK(unit);
801		tag = map->tag;
802		ctx = map->tag->ctx;
803		map->cansleep = true;
804		map->locked = false;
805		bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
806		    &map->mem, map->callback, map->callback_arg,
807		    BUS_DMA_WAITOK);
808		map->cansleep = false;
809		if (map->locked) {
810			(tag->common.lockfunc)(tag->common.lockfuncarg,
811			    BUS_DMA_UNLOCK);
812		} else
813			map->locked = true;
814		map->cansleep = false;
815		DMAR_LOCK(unit);
816	}
817	DMAR_UNLOCK(unit);
818}
819
820static void
821dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
822{
823	struct dmar_ctx *ctx;
824
825	ctx = map->tag->ctx;
826	map->locked = false;
827	DMAR_LOCK(unit);
828	TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
829	DMAR_UNLOCK(unit);
830	taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
831}
832
833int
834dmar_init_busdma(struct dmar_unit *unit)
835{
836
837	TAILQ_INIT(&unit->delayed_maps);
838	TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
839	unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
840	    taskqueue_thread_enqueue, &unit->delayed_taskqueue);
841	taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
842	    "dmar%d busdma taskq", unit->unit);
843	return (0);
844}
845
846void
847dmar_fini_busdma(struct dmar_unit *unit)
848{
849
850	if (unit->delayed_taskqueue == NULL)
851		return;
852
853	taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
854	taskqueue_free(unit->delayed_taskqueue);
855	unit->delayed_taskqueue = NULL;
856}
857