busdma_dmar.c revision 279470
1/*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 * All rights reserved.
4 *
5 * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
6 * under sponsorship from the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/x86/iommu/busdma_dmar.c 279470 2015-03-01 04:22:06Z rstone $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/malloc.h>
36#include <sys/bus.h>
37#include <sys/conf.h>
38#include <sys/interrupt.h>
39#include <sys/kernel.h>
40#include <sys/ktr.h>
41#include <sys/lock.h>
42#include <sys/proc.h>
43#include <sys/memdesc.h>
44#include <sys/mutex.h>
45#include <sys/sysctl.h>
46#include <sys/rman.h>
47#include <sys/taskqueue.h>
48#include <sys/tree.h>
49#include <sys/uio.h>
50#include <dev/pci/pcireg.h>
51#include <dev/pci/pcivar.h>
52#include <vm/vm.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_kern.h>
55#include <vm/vm_object.h>
56#include <vm/vm_page.h>
57#include <vm/vm_map.h>
58#include <machine/atomic.h>
59#include <machine/bus.h>
60#include <machine/md_var.h>
61#include <machine/specialreg.h>
62#include <x86/include/busdma_impl.h>
63#include <x86/iommu/intel_reg.h>
64#include <x86/iommu/busdma_dmar.h>
65#include <x86/iommu/intel_dmar.h>
66
67/*
68 * busdma_dmar.c, the implementation of the busdma(9) interface using
69 * DMAR units from Intel VT-d.
70 */
71
72static bool
73dmar_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
74{
75	char str[128], *env;
76
77	snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d.bounce",
78	    domain, bus, slot, func);
79	env = getenv(str);
80	if (env == NULL)
81		return (false);
82	freeenv(env);
83	return (true);
84}
85
86/*
87 * Given original device, find the requester ID that will be seen by
88 * the DMAR unit and used for page table lookup.  PCI bridges may take
89 * ownership of transactions from downstream devices, so it may not be
90 * the same as the BSF of the target device.  In those cases, all
91 * devices downstream of the bridge must share a single mapping
92 * domain, and must collectively be assigned to use either DMAR or
93 * bounce mapping.
94 */
95static device_t
96dmar_get_requester(device_t dev, uint16_t *rid)
97{
98	devclass_t pci_class;
99	device_t pci, pcib, requester;
100	int cap_offset;
101
102	pci_class = devclass_find("pci");
103	requester = dev;
104
105	*rid = pci_get_rid(dev);
106
107	/*
108	 * Walk the bridge hierarchy from the target device to the
109	 * host port to find the translating bridge nearest the DMAR
110	 * unit.
111	 */
112	for (;;) {
113		pci = device_get_parent(dev);
114		KASSERT(pci != NULL, ("NULL parent for pci%d:%d:%d:%d",
115		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
116		    pci_get_function(dev)));
117		KASSERT(device_get_devclass(pci) == pci_class,
118		    ("Non-pci parent for pci%d:%d:%d:%d",
119		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
120		    pci_get_function(dev)));
121
122		pcib = device_get_parent(pci);
123		KASSERT(pcib != NULL, ("NULL bridge for pci%d:%d:%d:%d",
124		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
125		    pci_get_function(dev)));
126
127		/*
128		 * The parent of our "bridge" isn't another PCI bus,
129		 * so pcib isn't a PCI->PCI bridge but rather a host
130		 * port, and the requester ID won't be translated
131		 * further.
132		 */
133		if (device_get_devclass(device_get_parent(pcib)) != pci_class)
134			break;
135
136		if (pci_find_cap(dev, PCIY_EXPRESS, &cap_offset) != 0) {
137			/*
138			 * Device is not PCIe, it cannot be seen as a
139			 * requester by DMAR unit.
140			 */
141			requester = pcib;
142
143			/* Check whether the bus above is PCIe. */
144			if (pci_find_cap(pcib, PCIY_EXPRESS,
145			    &cap_offset) == 0) {
146				/*
147				 * The current device is not PCIe, but
148				 * the bridge above it is.  This is a
149				 * PCIe->PCI bridge.  Assume that the
150				 * requester ID will be the secondary
151				 * bus number with slot and function
152				 * set to zero.
153				 *
154				 * XXX: Doesn't handle the case where
155				 * the bridge is PCIe->PCI-X, and the
156				 * bridge will only take ownership of
157				 * requests in some cases.  We should
158				 * provide context entries with the
159				 * same page tables for taken and
160				 * non-taken transactions.
161				 */
162				*rid = PCI_RID(pci_get_bus(dev), 0, 0);
163			} else {
164				/*
165				 * Neither the device nor the bridge
166				 * above it are PCIe.  This is a
167				 * conventional PCI->PCI bridge, which
168				 * will use the bridge's BSF as the
169				 * requester ID.
170				 */
171				*rid = pci_get_rid(pcib);
172			}
173		}
174		/*
175		 * Do not stop the loop even if the target device is
176		 * PCIe, because it is possible (but unlikely) to have
177		 * a PCI->PCIe bridge somewhere in the hierarchy.
178		 */
179
180		dev = pcib;
181	}
182	return (requester);
183}
184
185struct dmar_ctx *
186dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
187{
188	device_t requester;
189	struct dmar_ctx *ctx;
190	bool disabled;
191	uint16_t rid;
192
193	requester = dmar_get_requester(dev, &rid);
194
195	/*
196	 * If the user requested the IOMMU disabled for the device, we
197	 * cannot disable the DMAR, due to possibility of other
198	 * devices on the same DMAR still requiring translation.
199	 * Instead provide the identity mapping for the device
200	 * context.
201	 */
202	disabled = dmar_bus_dma_is_dev_disabled(pci_get_domain(requester),
203	    pci_get_bus(requester), pci_get_slot(requester),
204	    pci_get_function(requester));
205	ctx = dmar_get_ctx(dmar, requester, rid, disabled, rmrr);
206	if (ctx == NULL)
207		return (NULL);
208	if (disabled) {
209		/*
210		 * Keep the first reference on context, release the
211		 * later refs.
212		 */
213		DMAR_LOCK(dmar);
214		if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
215			ctx->flags |= DMAR_CTX_DISABLED;
216			DMAR_UNLOCK(dmar);
217		} else {
218			dmar_free_ctx_locked(dmar, ctx);
219		}
220		ctx = NULL;
221	}
222	return (ctx);
223}
224
225bus_dma_tag_t
226dmar_get_dma_tag(device_t dev, device_t child)
227{
228	struct dmar_unit *dmar;
229	struct dmar_ctx *ctx;
230	bus_dma_tag_t res;
231
232	dmar = dmar_find(child);
233	/* Not in scope of any DMAR ? */
234	if (dmar == NULL)
235		return (NULL);
236	dmar_quirks_pre_use(dmar);
237	dmar_instantiate_rmrr_ctxs(dmar);
238
239	ctx = dmar_instantiate_ctx(dmar, child, false);
240	res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
241	return (res);
242}
243
244static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
245
246static void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
247    struct bus_dmamap_dmar *map);
248
249static int
250dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
251    bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
252    bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
253    int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
254    void *lockfuncarg, bus_dma_tag_t *dmat)
255{
256	struct bus_dma_tag_dmar *newtag, *oldtag;
257	int error;
258
259	*dmat = NULL;
260	error = common_bus_dma_tag_create(parent != NULL ?
261	    &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
262	    boundary, lowaddr, highaddr, filter, filterarg, maxsize,
263	    nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
264	    sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
265	if (error != 0)
266		goto out;
267
268	oldtag = (struct bus_dma_tag_dmar *)parent;
269	newtag->common.impl = &bus_dma_dmar_impl;
270	newtag->ctx = oldtag->ctx;
271	newtag->owner = oldtag->owner;
272
273	*dmat = (bus_dma_tag_t)newtag;
274out:
275	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
276	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
277	    error);
278	return (error);
279}
280
281static int
282dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
283{
284	struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
285	int error;
286
287	error = 0;
288	dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
289
290	if (dmat != NULL) {
291		if (dmat->map_count != 0) {
292			error = EBUSY;
293			goto out;
294		}
295		while (dmat != NULL) {
296			parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
297			if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
298			    1) {
299				if (dmat == &dmat->ctx->ctx_tag)
300					dmar_free_ctx(dmat->ctx);
301				free(dmat->segments, M_DMAR_DMAMAP);
302				free(dmat, M_DEVBUF);
303				dmat = parent;
304			} else
305				dmat = NULL;
306		}
307	}
308out:
309	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
310	return (error);
311}
312
313static int
314dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
315{
316	struct bus_dma_tag_dmar *tag;
317	struct bus_dmamap_dmar *map;
318
319	tag = (struct bus_dma_tag_dmar *)dmat;
320	map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO);
321	if (map == NULL) {
322		*mapp = NULL;
323		return (ENOMEM);
324	}
325	if (tag->segments == NULL) {
326		tag->segments = malloc(sizeof(bus_dma_segment_t) *
327		    tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT);
328		if (tag->segments == NULL) {
329			free(map, M_DMAR_DMAMAP);
330			*mapp = NULL;
331			return (ENOMEM);
332		}
333	}
334	TAILQ_INIT(&map->map_entries);
335	map->tag = tag;
336	map->locked = true;
337	map->cansleep = false;
338	tag->map_count++;
339	*mapp = (bus_dmamap_t)map;
340
341	return (0);
342}
343
344static int
345dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
346{
347	struct bus_dma_tag_dmar *tag;
348	struct bus_dmamap_dmar *map;
349
350	tag = (struct bus_dma_tag_dmar *)dmat;
351	map = (struct bus_dmamap_dmar *)map1;
352	if (map != NULL) {
353		DMAR_CTX_LOCK(tag->ctx);
354		if (!TAILQ_EMPTY(&map->map_entries)) {
355			DMAR_CTX_UNLOCK(tag->ctx);
356			return (EBUSY);
357		}
358		DMAR_CTX_UNLOCK(tag->ctx);
359		free(map, M_DMAR_DMAMAP);
360	}
361	tag->map_count--;
362	return (0);
363}
364
365
366static int
367dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
368    bus_dmamap_t *mapp)
369{
370	struct bus_dma_tag_dmar *tag;
371	struct bus_dmamap_dmar *map;
372	int error, mflags;
373	vm_memattr_t attr;
374
375	error = dmar_bus_dmamap_create(dmat, flags, mapp);
376	if (error != 0)
377		return (error);
378
379	mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
380	mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
381	attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
382	    VM_MEMATTR_DEFAULT;
383
384	tag = (struct bus_dma_tag_dmar *)dmat;
385	map = (struct bus_dmamap_dmar *)*mapp;
386
387	if (tag->common.maxsize < PAGE_SIZE &&
388	    tag->common.alignment <= tag->common.maxsize &&
389	    attr == VM_MEMATTR_DEFAULT) {
390		*vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags);
391		map->flags |= BUS_DMAMAP_DMAR_MALLOC;
392	} else {
393		*vaddr = (void *)kmem_alloc_attr(kernel_arena,
394		    tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR,
395		    attr);
396		map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
397	}
398	if (*vaddr == NULL) {
399		dmar_bus_dmamap_destroy(dmat, *mapp);
400		*mapp = NULL;
401		return (ENOMEM);
402	}
403	return (0);
404}
405
406static void
407dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
408{
409	struct bus_dma_tag_dmar *tag;
410	struct bus_dmamap_dmar *map;
411
412	tag = (struct bus_dma_tag_dmar *)dmat;
413	map = (struct bus_dmamap_dmar *)map1;
414
415	if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
416		free(vaddr, M_DEVBUF);
417		map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
418	} else {
419		KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
420		    ("dmar_bus_dmamem_free for non alloced map %p", map));
421		kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize);
422		map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
423	}
424
425	dmar_bus_dmamap_destroy(dmat, map1);
426}
427
428static int
429dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
430    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
431    int flags, bus_dma_segment_t *segs, int *segp,
432    struct dmar_map_entries_tailq *unroll_list)
433{
434	struct dmar_ctx *ctx;
435	struct dmar_map_entry *entry;
436	dmar_gaddr_t size;
437	bus_size_t buflen1;
438	int error, idx, gas_flags, seg;
439
440	if (segs == NULL)
441		segs = tag->segments;
442	ctx = tag->ctx;
443	seg = *segp;
444	error = 0;
445	idx = 0;
446	while (buflen > 0) {
447		seg++;
448		if (seg >= tag->common.nsegments) {
449			error = EFBIG;
450			break;
451		}
452		buflen1 = buflen > tag->common.maxsegsz ?
453		    tag->common.maxsegsz : buflen;
454		buflen -= buflen1;
455		size = round_page(offset + buflen1);
456
457		/*
458		 * (Too) optimistically allow split if there are more
459		 * then one segments left.
460		 */
461		gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
462		if (seg + 1 < tag->common.nsegments)
463			gas_flags |= DMAR_GM_CANSPLIT;
464
465		error = dmar_gas_map(ctx, &tag->common, size,
466		    DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
467		    gas_flags, ma + idx, &entry);
468		if (error != 0)
469			break;
470		if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
471			KASSERT(size >= entry->end - entry->start,
472			    ("split increased entry size %jx %jx %jx",
473			    (uintmax_t)size, (uintmax_t)entry->start,
474			    (uintmax_t)entry->end));
475			size = entry->end - entry->start;
476			if (buflen1 > size)
477				buflen1 = size;
478		} else {
479			KASSERT(entry->end - entry->start == size,
480			    ("no split allowed %jx %jx %jx",
481			    (uintmax_t)size, (uintmax_t)entry->start,
482			    (uintmax_t)entry->end));
483		}
484
485		KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
486		    == 0,
487		    ("alignment failed: ctx %p start 0x%jx offset %x "
488		    "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
489		    (uintmax_t)tag->common.alignment));
490		KASSERT(entry->end <= tag->common.lowaddr ||
491		    entry->start >= tag->common.highaddr,
492		    ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
493		    "lowaddr 0x%jx highaddr 0x%jx", ctx,
494		    (uintmax_t)entry->start, (uintmax_t)entry->end,
495		    (uintmax_t)tag->common.lowaddr,
496		    (uintmax_t)tag->common.highaddr));
497		KASSERT(dmar_test_boundary(entry->start, entry->end -
498		    entry->start, tag->common.boundary),
499		    ("boundary failed: ctx %p start 0x%jx end 0x%jx "
500		    "boundary 0x%jx", ctx, (uintmax_t)entry->start,
501		    (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
502		KASSERT(buflen1 <= tag->common.maxsegsz,
503		    ("segment too large: ctx %p start 0x%jx end 0x%jx "
504		    "maxsegsz 0x%jx", ctx, (uintmax_t)entry->start,
505		    (uintmax_t)entry->end, (uintmax_t)tag->common.maxsegsz));
506
507		DMAR_CTX_LOCK(ctx);
508		TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
509		entry->flags |= DMAR_MAP_ENTRY_MAP;
510		DMAR_CTX_UNLOCK(ctx);
511		TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
512
513		segs[seg].ds_addr = entry->start + offset;
514		segs[seg].ds_len = buflen1;
515
516		idx += OFF_TO_IDX(trunc_page(offset + buflen1));
517		offset += buflen1;
518		offset &= DMAR_PAGE_MASK;
519	}
520	if (error == 0)
521		*segp = seg;
522	return (error);
523}
524
525static int
526dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
527    struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
528    int flags, bus_dma_segment_t *segs, int *segp)
529{
530	struct dmar_ctx *ctx;
531	struct dmar_map_entry *entry, *entry1;
532	struct dmar_map_entries_tailq unroll_list;
533	int error;
534
535	ctx = tag->ctx;
536	atomic_add_long(&ctx->loads, 1);
537
538	TAILQ_INIT(&unroll_list);
539	error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
540	    buflen, flags, segs, segp, &unroll_list);
541	if (error != 0) {
542		/*
543		 * The busdma interface does not allow us to report
544		 * partial buffer load, so unfortunately we have to
545		 * revert all work done.
546		 */
547		DMAR_CTX_LOCK(ctx);
548		TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
549		    entry1) {
550			/*
551			 * No entries other than what we have created
552			 * during the failed run might have been
553			 * inserted there in between, since we own ctx
554			 * pglock.
555			 */
556			TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
557			TAILQ_REMOVE(&unroll_list, entry, unroll_link);
558			TAILQ_INSERT_TAIL(&ctx->unload_entries, entry,
559			    dmamap_link);
560		}
561		DMAR_CTX_UNLOCK(ctx);
562		taskqueue_enqueue(ctx->dmar->delayed_taskqueue,
563		    &ctx->unload_task);
564	}
565
566	if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
567	    !map->cansleep)
568		error = EINPROGRESS;
569	if (error == EINPROGRESS)
570		dmar_bus_schedule_dmamap(ctx->dmar, map);
571	return (error);
572}
573
574static int
575dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
576    struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
577    bus_dma_segment_t *segs, int *segp)
578{
579	struct bus_dma_tag_dmar *tag;
580	struct bus_dmamap_dmar *map;
581
582	tag = (struct bus_dma_tag_dmar *)dmat;
583	map = (struct bus_dmamap_dmar *)map1;
584	return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
585	    flags, segs, segp));
586}
587
588static int
589dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
590    vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
591    int *segp)
592{
593	struct bus_dma_tag_dmar *tag;
594	struct bus_dmamap_dmar *map;
595	vm_page_t *ma;
596	vm_paddr_t pstart, pend;
597	int error, i, ma_cnt, offset;
598
599	tag = (struct bus_dma_tag_dmar *)dmat;
600	map = (struct bus_dmamap_dmar *)map1;
601	pstart = trunc_page(buf);
602	pend = round_page(buf + buflen);
603	offset = buf & PAGE_MASK;
604	ma_cnt = OFF_TO_IDX(pend - pstart);
605	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
606	    M_WAITOK : M_NOWAIT);
607	if (ma == NULL)
608		return (ENOMEM);
609	for (i = 0; i < ma_cnt; i++)
610		ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE);
611	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
612	    flags, segs, segp);
613	free(ma, M_DEVBUF);
614	return (error);
615}
616
617static int
618dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
619    bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
620    int *segp)
621{
622	struct bus_dma_tag_dmar *tag;
623	struct bus_dmamap_dmar *map;
624	vm_page_t *ma, fma;
625	vm_paddr_t pstart, pend, paddr;
626	int error, i, ma_cnt, offset;
627
628	tag = (struct bus_dma_tag_dmar *)dmat;
629	map = (struct bus_dmamap_dmar *)map1;
630	pstart = trunc_page((vm_offset_t)buf);
631	pend = round_page((vm_offset_t)buf + buflen);
632	offset = (vm_offset_t)buf & PAGE_MASK;
633	ma_cnt = OFF_TO_IDX(pend - pstart);
634	ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
635	    M_WAITOK : M_NOWAIT);
636	if (ma == NULL)
637		return (ENOMEM);
638	if (dumping) {
639		/*
640		 * If dumping, do not attempt to call
641		 * PHYS_TO_VM_PAGE() at all.  It may return non-NULL
642		 * but the vm_page returned might be not initialized,
643		 * e.g. for the kernel itself.
644		 */
645		KASSERT(pmap == kernel_pmap, ("non-kernel address write"));
646		fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF,
647		    M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT));
648		if (fma == NULL) {
649			free(ma, M_DEVBUF);
650			return (ENOMEM);
651		}
652		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
653			paddr = pmap_kextract(pstart);
654			vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
655			ma[i] = &fma[i];
656		}
657	} else {
658		fma = NULL;
659		for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
660			if (pmap == kernel_pmap)
661				paddr = pmap_kextract(pstart);
662			else
663				paddr = pmap_extract(pmap, pstart);
664			ma[i] = PHYS_TO_VM_PAGE(paddr);
665			KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr,
666			    ("PHYS_TO_VM_PAGE failed %jx %jx m %p",
667			    (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]),
668			    ma[i]));
669		}
670	}
671	error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
672	    flags, segs, segp);
673	free(ma, M_DEVBUF);
674	free(fma, M_DEVBUF);
675	return (error);
676}
677
678static void
679dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
680    struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
681{
682	struct bus_dmamap_dmar *map;
683
684	if (map1 == NULL)
685		return;
686	map = (struct bus_dmamap_dmar *)map1;
687	map->mem = *mem;
688	map->tag = (struct bus_dma_tag_dmar *)dmat;
689	map->callback = callback;
690	map->callback_arg = callback_arg;
691}
692
693static bus_dma_segment_t *
694dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
695    bus_dma_segment_t *segs, int nsegs, int error)
696{
697	struct bus_dma_tag_dmar *tag;
698	struct bus_dmamap_dmar *map;
699
700	tag = (struct bus_dma_tag_dmar *)dmat;
701	map = (struct bus_dmamap_dmar *)map1;
702
703	if (!map->locked) {
704		KASSERT(map->cansleep,
705		    ("map not locked and not sleepable context %p", map));
706
707		/*
708		 * We are called from the delayed context.  Relock the
709		 * driver.
710		 */
711		(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
712		map->locked = true;
713	}
714
715	if (segs == NULL)
716		segs = tag->segments;
717	return (segs);
718}
719
720/*
721 * The limitations of busdma KPI forces the dmar to perform the actual
722 * unload, consisting of the unmapping of the map entries page tables,
723 * from the delayed context on i386, since page table page mapping
724 * might require a sleep to be successfull.  The unfortunate
725 * consequence is that the DMA requests can be served some time after
726 * the bus_dmamap_unload() call returned.
727 *
728 * On amd64, we assume that sf allocation cannot fail.
729 */
730static void
731dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
732{
733	struct bus_dma_tag_dmar *tag;
734	struct bus_dmamap_dmar *map;
735	struct dmar_ctx *ctx;
736#if defined(__amd64__)
737	struct dmar_map_entries_tailq entries;
738#endif
739
740	tag = (struct bus_dma_tag_dmar *)dmat;
741	map = (struct bus_dmamap_dmar *)map1;
742	ctx = tag->ctx;
743	atomic_add_long(&ctx->unloads, 1);
744
745#if defined(__i386__)
746	DMAR_CTX_LOCK(ctx);
747	TAILQ_CONCAT(&ctx->unload_entries, &map->map_entries, dmamap_link);
748	DMAR_CTX_UNLOCK(ctx);
749	taskqueue_enqueue(ctx->dmar->delayed_taskqueue, &ctx->unload_task);
750#else /* defined(__amd64__) */
751	TAILQ_INIT(&entries);
752	DMAR_CTX_LOCK(ctx);
753	TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
754	DMAR_CTX_UNLOCK(ctx);
755	THREAD_NO_SLEEPING();
756	dmar_ctx_unload(ctx, &entries, false);
757	THREAD_SLEEPING_OK();
758	KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
759#endif
760}
761
762static void
763dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
764    bus_dmasync_op_t op)
765{
766}
767
768struct bus_dma_impl bus_dma_dmar_impl = {
769	.tag_create = dmar_bus_dma_tag_create,
770	.tag_destroy = dmar_bus_dma_tag_destroy,
771	.map_create = dmar_bus_dmamap_create,
772	.map_destroy = dmar_bus_dmamap_destroy,
773	.mem_alloc = dmar_bus_dmamem_alloc,
774	.mem_free = dmar_bus_dmamem_free,
775	.load_phys = dmar_bus_dmamap_load_phys,
776	.load_buffer = dmar_bus_dmamap_load_buffer,
777	.load_ma = dmar_bus_dmamap_load_ma,
778	.map_waitok = dmar_bus_dmamap_waitok,
779	.map_complete = dmar_bus_dmamap_complete,
780	.map_unload = dmar_bus_dmamap_unload,
781	.map_sync = dmar_bus_dmamap_sync
782};
783
784static void
785dmar_bus_task_dmamap(void *arg, int pending)
786{
787	struct bus_dma_tag_dmar *tag;
788	struct bus_dmamap_dmar *map;
789	struct dmar_unit *unit;
790	struct dmar_ctx *ctx;
791
792	unit = arg;
793	DMAR_LOCK(unit);
794	while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
795		TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
796		DMAR_UNLOCK(unit);
797		tag = map->tag;
798		ctx = map->tag->ctx;
799		map->cansleep = true;
800		map->locked = false;
801		bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
802		    &map->mem, map->callback, map->callback_arg,
803		    BUS_DMA_WAITOK);
804		map->cansleep = false;
805		if (map->locked) {
806			(tag->common.lockfunc)(tag->common.lockfuncarg,
807			    BUS_DMA_UNLOCK);
808		} else
809			map->locked = true;
810		map->cansleep = false;
811		DMAR_LOCK(unit);
812	}
813	DMAR_UNLOCK(unit);
814}
815
816static void
817dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
818{
819	struct dmar_ctx *ctx;
820
821	ctx = map->tag->ctx;
822	map->locked = false;
823	DMAR_LOCK(unit);
824	TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
825	DMAR_UNLOCK(unit);
826	taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
827}
828
829int
830dmar_init_busdma(struct dmar_unit *unit)
831{
832
833	TAILQ_INIT(&unit->delayed_maps);
834	TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
835	unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
836	    taskqueue_thread_enqueue, &unit->delayed_taskqueue);
837	taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
838	    "dmar%d busdma taskq", unit->unit);
839	return (0);
840}
841
842void
843dmar_fini_busdma(struct dmar_unit *unit)
844{
845
846	if (unit->delayed_taskqueue == NULL)
847		return;
848
849	taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
850	taskqueue_free(unit->delayed_taskqueue);
851	unit->delayed_taskqueue = NULL;
852}
853