at91_mci.c revision 318198
1/*-
2 * Copyright (c) 2006 Bernd Walter.  All rights reserved.
3 * Copyright (c) 2006 M. Warner Losh.  All rights reserved.
4 * Copyright (c) 2010 Greg Ansley.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include "opt_platform.h"
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/10/sys/arm/at91/at91_mci.c 318198 2017-05-11 21:01:02Z marius $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/bus.h>
36#include <sys/endian.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/module.h>
41#include <sys/mutex.h>
42#include <sys/resource.h>
43#include <sys/rman.h>
44#include <sys/sysctl.h>
45
46#include <machine/bus.h>
47#include <machine/cpu.h>
48#include <machine/cpufunc.h>
49#include <machine/resource.h>
50#include <machine/intr.h>
51
52#include <arm/at91/at91var.h>
53#include <arm/at91/at91_mcireg.h>
54#include <arm/at91/at91_pdcreg.h>
55
56#include <dev/mmc/bridge.h>
57#include <dev/mmc/mmcbrvar.h>
58
59#ifdef FDT
60#include <dev/fdt/fdt_common.h>
61#include <dev/ofw/ofw_bus.h>
62#include <dev/ofw/ofw_bus_subr.h>
63#endif
64
65#include "mmcbr_if.h"
66
67#include "opt_at91.h"
68
69/*
70 * About running the MCI bus above 25MHz
71 *
72 * Historically, the MCI bus has been run at 30MHz on systems with a 60MHz
73 * master clock, in part due to a bug in dev/mmc.c making always request
74 * 30MHz, and in part over clocking the bus because 15MHz was too slow.
75 * Fixing that bug causes the mmc driver to request a 25MHz clock (as it
76 * should) and the logic in at91_mci_update_ios() picks the highest speed that
77 * doesn't exceed that limit.  With a 60MHz MCK that would be 15MHz, and
78 * that's a real performance buzzkill when you've been getting away with 30MHz
79 * all along.
80 *
81 * By defining AT91_MCI_ALLOW_OVERCLOCK (or setting the allow_overclock=1
82 * device hint or sysctl) you can enable logic in at91_mci_update_ios() to
83 * overlcock the SD bus a little by running it at MCK / 2 when the requested
84 * speed is 25MHz and the next highest speed is 15MHz or less.  This appears
85 * to work on virtually all SD cards, since it is what this driver has been
86 * doing prior to the introduction of this option, where the overclocking vs
87 * underclocking decision was automaticly "overclock".  Modern SD cards can
88 * run at 45mhz/1-bit in standard mode (high speed mode enable commands not
89 * sent) without problems.
90 *
91 * Speaking of high-speed mode, the rm9200 manual says the MCI device supports
92 * the SD v1.0 specification and can run up to 50MHz.  This is interesting in
93 * that the SD v1.0 spec caps the speed at 25MHz; high speed mode was added in
94 * the v1.10 spec.  Furthermore, high speed mode doesn't just crank up the
95 * clock, it alters the signal timing.  The rm9200 MCI device doesn't support
96 * these altered timings.  So while speeds over 25MHz may work, they only work
97 * in what the SD spec calls "default" speed mode, and it amounts to violating
98 * the spec by overclocking the bus.
99 *
100 * If you also enable 4-wire mode it's possible transfers faster than 25MHz
101 * will fail.  On the AT91RM9200, due to bugs in the bus contention logic, if
102 * you have the USB host device and OHCI driver enabled will fail.  Even
103 * underclocking to 15MHz, intermittant overrun and underrun errors occur.
104 * Note that you don't even need to have usb devices attached to the system,
105 * the errors begin to occur as soon as the OHCI driver sets the register bit
106 * to enable periodic transfers.  It appears (based on brief investigation)
107 * that the usb host controller uses so much ASB bandwidth that sometimes the
108 * DMA for MCI transfers doesn't get a bus grant in time and data gets
109 * dropped.  Adding even a modicum of network activity changes the symptom
110 * from intermittant to very frequent.  Members of the AT91SAM9 family have
111 * corrected this problem, or are at least better about their use of the bus.
112 */
113#ifndef AT91_MCI_ALLOW_OVERCLOCK
114#define AT91_MCI_ALLOW_OVERCLOCK 1
115#endif
116
117/*
118 * Allocate 2 bounce buffers we'll use to endian-swap the data due to the rm9200
119 * erratum.  We use a pair of buffers because when reading that lets us begin
120 * endian-swapping the data in the first buffer while the DMA is reading into
121 * the second buffer.  (We can't use the same trick for writing because we might
122 * not get all the data in the 2nd buffer swapped before the hardware needs it;
123 * dealing with that would add complexity to the driver.)
124 *
125 * The buffers are sized at 16K each due to the way the busdma cache sync
126 * operations work on arm.  A dcache_inv_range() operation on a range larger
127 * than 16K gets turned into a dcache_wbinv_all().  That needlessly flushes the
128 * entire data cache, impacting overall system performance.
129 */
130#define BBCOUNT     2
131#define BBSIZE      (16*1024)
132#define MAX_BLOCKS  ((BBSIZE*BBCOUNT)/512)
133
134static int mci_debug;
135
136struct at91_mci_softc {
137	void *intrhand;			/* Interrupt handle */
138	device_t dev;
139	int sc_cap;
140#define	CAP_HAS_4WIRE		1	/* Has 4 wire bus */
141#define	CAP_NEEDS_BYTESWAP	2	/* broken hardware needing bounce */
142#define	CAP_MCI1_REV2XX		4	/* MCI 1 rev 2.x */
143	int flags;
144#define PENDING_CMD	0x01
145#define PENDING_STOP	0x02
146#define CMD_MULTIREAD	0x10
147#define CMD_MULTIWRITE	0x20
148	int has_4wire;
149	int allow_overclock;
150	struct resource *irq_res;	/* IRQ resource */
151	struct resource	*mem_res;	/* Memory resource */
152	struct mtx sc_mtx;
153	bus_dma_tag_t dmatag;
154	struct mmc_host host;
155	int bus_busy;
156	struct mmc_request *req;
157	struct mmc_command *curcmd;
158	bus_dmamap_t bbuf_map[BBCOUNT];
159	char      *  bbuf_vaddr[BBCOUNT]; /* bounce bufs in KVA space */
160	uint32_t     bbuf_len[BBCOUNT];	  /* len currently queued for bounce buf */
161	uint32_t     bbuf_curidx;	  /* which bbuf is the active DMA buffer */
162	uint32_t     xfer_offset;	  /* offset so far into caller's buf */
163};
164
165/* bus entry points */
166static int at91_mci_probe(device_t dev);
167static int at91_mci_attach(device_t dev);
168static int at91_mci_detach(device_t dev);
169static void at91_mci_intr(void *);
170
171/* helper routines */
172static int at91_mci_activate(device_t dev);
173static void at91_mci_deactivate(device_t dev);
174static int at91_mci_is_mci1rev2xx(void);
175
176#define AT91_MCI_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
177#define	AT91_MCI_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_mtx)
178#define AT91_MCI_LOCK_INIT(_sc) \
179	mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \
180	    "mci", MTX_DEF)
181#define AT91_MCI_LOCK_DESTROY(_sc)	mtx_destroy(&_sc->sc_mtx);
182#define AT91_MCI_ASSERT_LOCKED(_sc)	mtx_assert(&_sc->sc_mtx, MA_OWNED);
183#define AT91_MCI_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED);
184
185static inline uint32_t
186RD4(struct at91_mci_softc *sc, bus_size_t off)
187{
188	return (bus_read_4(sc->mem_res, off));
189}
190
191static inline void
192WR4(struct at91_mci_softc *sc, bus_size_t off, uint32_t val)
193{
194	bus_write_4(sc->mem_res, off, val);
195}
196
197static void
198at91_bswap_buf(struct at91_mci_softc *sc, void * dptr, void * sptr, uint32_t memsize)
199{
200	uint32_t * dst = (uint32_t *)dptr;
201	uint32_t * src = (uint32_t *)sptr;
202	uint32_t   i;
203
204	/*
205	 * If the hardware doesn't need byte-swapping, let bcopy() do the
206	 * work.  Use bounce buffer even if we don't need byteswap, since
207	 * buffer may straddle a page boundry, and we don't handle
208	 * multi-segment transfers in hardware.  Seen from 'bsdlabel -w' which
209	 * uses raw geom access to the volume.  Greg Ansley (gja (at)
210	 * ansley.com)
211	 */
212	if (!(sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
213		memcpy(dptr, sptr, memsize);
214		return;
215	}
216
217	/*
218	 * Nice performance boost for slightly unrolling this loop.
219	 * (But very little extra boost for further unrolling it.)
220	 */
221	for (i = 0; i < memsize; i += 16) {
222		*dst++ = bswap32(*src++);
223		*dst++ = bswap32(*src++);
224		*dst++ = bswap32(*src++);
225		*dst++ = bswap32(*src++);
226	}
227
228	/* Mop up the last 1-3 words, if any. */
229	for (i = 0; i < (memsize & 0x0F); i += 4) {
230		*dst++ = bswap32(*src++);
231	}
232}
233
234static void
235at91_mci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
236{
237	if (error != 0)
238		return;
239	*(bus_addr_t *)arg = segs[0].ds_addr;
240}
241
242static void
243at91_mci_pdc_disable(struct at91_mci_softc *sc)
244{
245	WR4(sc, PDC_PTCR, PDC_PTCR_TXTDIS | PDC_PTCR_RXTDIS);
246	WR4(sc, PDC_RPR, 0);
247	WR4(sc, PDC_RCR, 0);
248	WR4(sc, PDC_RNPR, 0);
249	WR4(sc, PDC_RNCR, 0);
250	WR4(sc, PDC_TPR, 0);
251	WR4(sc, PDC_TCR, 0);
252	WR4(sc, PDC_TNPR, 0);
253	WR4(sc, PDC_TNCR, 0);
254}
255
256/*
257 * Reset the controller, then restore most of the current state.
258 *
259 * This is called after detecting an error.  It's also called after stopping a
260 * multi-block write, to un-wedge the device so that it will handle the NOTBUSY
261 * signal correctly.  See comments in at91_mci_stop_done() for more details.
262 */
263static void at91_mci_reset(struct at91_mci_softc *sc)
264{
265	uint32_t mr;
266	uint32_t sdcr;
267	uint32_t dtor;
268	uint32_t imr;
269
270	at91_mci_pdc_disable(sc);
271
272	/* save current state */
273
274	imr  = RD4(sc, MCI_IMR);
275	mr   = RD4(sc, MCI_MR) & 0x7fff;
276	sdcr = RD4(sc, MCI_SDCR);
277	dtor = RD4(sc, MCI_DTOR);
278
279	/* reset the controller */
280
281	WR4(sc, MCI_IDR, 0xffffffff);
282	WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST);
283
284	/* restore state */
285
286	WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
287	WR4(sc, MCI_MR, mr);
288	WR4(sc, MCI_SDCR, sdcr);
289	WR4(sc, MCI_DTOR, dtor);
290	WR4(sc, MCI_IER, imr);
291
292	/*
293	 * Make sure sdio interrupts will fire.  Not sure why reading
294	 * SR ensures that, but this is in the linux driver.
295	 */
296
297	RD4(sc, MCI_SR);
298}
299
300static void
301at91_mci_init(device_t dev)
302{
303	struct at91_mci_softc *sc = device_get_softc(dev);
304	uint32_t val;
305
306	WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */
307	WR4(sc, MCI_IDR, 0xffffffff);		/* Turn off interrupts */
308	WR4(sc, MCI_DTOR, MCI_DTOR_DTOMUL_1M | 1);
309	val = MCI_MR_PDCMODE;
310	val |= 0x34a;				/* PWSDIV = 3; CLKDIV = 74 */
311//	if (sc->sc_cap & CAP_MCI1_REV2XX)
312//		val |= MCI_MR_RDPROOF | MCI_MR_WRPROOF;
313	WR4(sc, MCI_MR, val);
314#ifndef  AT91_MCI_SLOT_B
315	WR4(sc, MCI_SDCR, 0);			/* SLOT A, 1 bit bus */
316#else
317	/*
318	 * XXX Really should add second "unit" but nobody using using
319	 * a two slot card that we know of. XXX
320	 */
321	WR4(sc, MCI_SDCR, 1);			/* SLOT B, 1 bit bus */
322#endif
323	/*
324	 * Enable controller, including power-save.  The slower clock
325	 * of the power-save mode is only in effect when there is no
326	 * transfer in progress, so it can be left in this mode all
327	 * the time.
328	 */
329	WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
330}
331
332static void
333at91_mci_fini(device_t dev)
334{
335	struct at91_mci_softc *sc = device_get_softc(dev);
336
337	WR4(sc, MCI_IDR, 0xffffffff);		/* Turn off interrupts */
338	at91_mci_pdc_disable(sc);
339	WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */
340}
341
342static int
343at91_mci_probe(device_t dev)
344{
345#ifdef FDT
346	if (!ofw_bus_is_compatible(dev, "atmel,hsmci"))
347		return (ENXIO);
348#endif
349	device_set_desc(dev, "MCI mmc/sd host bridge");
350	return (0);
351}
352
353static int
354at91_mci_attach(device_t dev)
355{
356	struct at91_mci_softc *sc = device_get_softc(dev);
357	struct sysctl_ctx_list *sctx;
358	struct sysctl_oid *soid;
359	device_t child;
360	int err, i;
361
362	sctx = device_get_sysctl_ctx(dev);
363	soid = device_get_sysctl_tree(dev);
364
365	sc->dev = dev;
366	sc->sc_cap = 0;
367	if (at91_is_rm92())
368		sc->sc_cap |= CAP_NEEDS_BYTESWAP;
369	/*
370	 * MCI1 Rev 2 controllers need some workarounds, flag if so.
371	 */
372	if (at91_mci_is_mci1rev2xx())
373		sc->sc_cap |= CAP_MCI1_REV2XX;
374
375	err = at91_mci_activate(dev);
376	if (err)
377		goto out;
378
379	AT91_MCI_LOCK_INIT(sc);
380
381	at91_mci_fini(dev);
382	at91_mci_init(dev);
383
384	/*
385	 * Allocate DMA tags and maps and bounce buffers.
386	 *
387	 * The parms in the tag_create call cause the dmamem_alloc call to
388	 * create each bounce buffer as a single contiguous buffer of BBSIZE
389	 * bytes aligned to a 4096 byte boundary.
390	 *
391	 * Do not use DMA_COHERENT for these buffers because that maps the
392	 * memory as non-cachable, which prevents cache line burst fills/writes,
393	 * which is something we need since we're trying to overlap the
394	 * byte-swapping with the DMA operations.
395	 */
396	err = bus_dma_tag_create(bus_get_dma_tag(dev), 4096, 0,
397	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
398	    BBSIZE, 1, BBSIZE, 0, NULL, NULL, &sc->dmatag);
399	if (err != 0)
400		goto out;
401
402	for (i = 0; i < BBCOUNT; ++i) {
403		err = bus_dmamem_alloc(sc->dmatag, (void **)&sc->bbuf_vaddr[i],
404		    BUS_DMA_NOWAIT, &sc->bbuf_map[i]);
405		if (err != 0)
406			goto out;
407	}
408
409	/*
410	 * Activate the interrupt
411	 */
412	err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE,
413	    NULL, at91_mci_intr, sc, &sc->intrhand);
414	if (err) {
415		AT91_MCI_LOCK_DESTROY(sc);
416		goto out;
417	}
418
419	/*
420	 * Allow 4-wire to be initially set via #define.
421	 * Allow a device hint to override that.
422	 * Allow a sysctl to override that.
423	 */
424#if defined(AT91_MCI_HAS_4WIRE) && AT91_MCI_HAS_4WIRE != 0
425	sc->has_4wire = 1;
426#endif
427	resource_int_value(device_get_name(dev), device_get_unit(dev),
428			   "4wire", &sc->has_4wire);
429	SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "4wire",
430	    CTLFLAG_RW, &sc->has_4wire, 0, "has 4 wire SD Card bus");
431	if (sc->has_4wire)
432		sc->sc_cap |= CAP_HAS_4WIRE;
433
434	sc->allow_overclock = AT91_MCI_ALLOW_OVERCLOCK;
435	resource_int_value(device_get_name(dev), device_get_unit(dev),
436			   "allow_overclock", &sc->allow_overclock);
437	SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "allow_overclock",
438	    CTLFLAG_RW, &sc->allow_overclock, 0,
439	    "Allow up to 30MHz clock for 25MHz request when next highest speed 15MHz or less.");
440
441	SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "debug",
442	    CTLFLAG_RWTUN, &mci_debug, 0, "enable debug output");
443
444	/*
445	 * Our real min freq is master_clock/512, but upper driver layers are
446	 * going to set the min speed during card discovery, and the right speed
447	 * for that is 400kHz, so advertise a safe value just under that.
448	 *
449	 * For max speed, while the rm9200 manual says the max is 50mhz, it also
450	 * says it supports only the SD v1.0 spec, which means the real limit is
451	 * 25mhz. On the other hand, historical use has been to slightly violate
452	 * the standard by running the bus at 30MHz.  For more information on
453	 * that, see the comments at the top of this file.
454	 */
455	sc->host.f_min = 375000;
456	sc->host.f_max = at91_master_clock / 2;
457	if (sc->host.f_max > 25000000)
458		sc->host.f_max = 25000000;
459	sc->host.host_ocr = MMC_OCR_320_330 | MMC_OCR_330_340;
460	sc->host.caps = 0;
461	if (sc->sc_cap & CAP_HAS_4WIRE)
462		sc->host.caps |= MMC_CAP_4_BIT_DATA;
463
464	child = device_add_child(dev, "mmc", 0);
465	device_set_ivars(dev, &sc->host);
466	err = bus_generic_attach(dev);
467out:
468	if (err)
469		at91_mci_deactivate(dev);
470	return (err);
471}
472
473static int
474at91_mci_detach(device_t dev)
475{
476	struct at91_mci_softc *sc = device_get_softc(dev);
477
478	at91_mci_fini(dev);
479	at91_mci_deactivate(dev);
480
481	bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[0], sc->bbuf_map[0]);
482	bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[1], sc->bbuf_map[1]);
483	bus_dma_tag_destroy(sc->dmatag);
484
485	return (EBUSY);	/* XXX */
486}
487
488static int
489at91_mci_activate(device_t dev)
490{
491	struct at91_mci_softc *sc;
492	int rid;
493
494	sc = device_get_softc(dev);
495	rid = 0;
496	sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
497	    RF_ACTIVE);
498	if (sc->mem_res == NULL)
499		goto errout;
500
501	rid = 0;
502	sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
503	    RF_ACTIVE);
504	if (sc->irq_res == NULL)
505		goto errout;
506
507	return (0);
508errout:
509	at91_mci_deactivate(dev);
510	return (ENOMEM);
511}
512
513static void
514at91_mci_deactivate(device_t dev)
515{
516	struct at91_mci_softc *sc;
517
518	sc = device_get_softc(dev);
519	if (sc->intrhand)
520		bus_teardown_intr(dev, sc->irq_res, sc->intrhand);
521	sc->intrhand = 0;
522	bus_generic_detach(sc->dev);
523	if (sc->mem_res)
524		bus_release_resource(dev, SYS_RES_MEMORY,
525		    rman_get_rid(sc->mem_res), sc->mem_res);
526	sc->mem_res = 0;
527	if (sc->irq_res)
528		bus_release_resource(dev, SYS_RES_IRQ,
529		    rman_get_rid(sc->irq_res), sc->irq_res);
530	sc->irq_res = 0;
531	return;
532}
533
534static int
535at91_mci_is_mci1rev2xx(void)
536{
537
538	switch (soc_info.type) {
539	case AT91_T_SAM9260:
540	case AT91_T_SAM9263:
541	case AT91_T_CAP9:
542	case AT91_T_SAM9G10:
543	case AT91_T_SAM9G20:
544	case AT91_T_SAM9RL:
545		return(1);
546	default:
547		return (0);
548	}
549}
550
551static int
552at91_mci_update_ios(device_t brdev, device_t reqdev)
553{
554	struct at91_mci_softc *sc;
555	struct mmc_ios *ios;
556	uint32_t clkdiv;
557	uint32_t freq;
558
559	sc = device_get_softc(brdev);
560	ios = &sc->host.ios;
561
562	/*
563	 * Calculate our closest available clock speed that doesn't exceed the
564	 * requested speed.
565	 *
566	 * When overclocking is allowed, the requested clock is 25MHz, the
567	 * computed frequency is 15MHz or smaller and clockdiv is 1, use
568	 * clockdiv of 0 to double that.  If less than 12.5MHz, double
569	 * regardless of the overclocking setting.
570	 *
571	 * Whatever we come up with, store it back into ios->clock so that the
572	 * upper layer drivers can report the actual speed of the bus.
573	 */
574	if (ios->clock == 0) {
575		WR4(sc, MCI_CR, MCI_CR_MCIDIS);
576		clkdiv = 0;
577	} else {
578		WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
579		if ((at91_master_clock % (ios->clock * 2)) == 0)
580			clkdiv = ((at91_master_clock / ios->clock) / 2) - 1;
581		else
582			clkdiv = (at91_master_clock / ios->clock) / 2;
583		freq = at91_master_clock / ((clkdiv+1) * 2);
584		if (clkdiv == 1 && ios->clock == 25000000 && freq <= 15000000) {
585			if (sc->allow_overclock || freq <= 12500000) {
586				clkdiv = 0;
587				freq = at91_master_clock / ((clkdiv+1) * 2);
588			}
589		}
590		ios->clock = freq;
591	}
592	if (ios->bus_width == bus_width_4)
593		WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) | MCI_SDCR_SDCBUS);
594	else
595		WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) & ~MCI_SDCR_SDCBUS);
596	WR4(sc, MCI_MR, (RD4(sc, MCI_MR) & ~MCI_MR_CLKDIV) | clkdiv);
597	/* Do we need a settle time here? */
598	/* XXX We need to turn the device on/off here with a GPIO pin */
599	return (0);
600}
601
602static void
603at91_mci_start_cmd(struct at91_mci_softc *sc, struct mmc_command *cmd)
604{
605	uint32_t cmdr, mr;
606	struct mmc_data *data;
607
608	sc->curcmd = cmd;
609	data = cmd->data;
610
611	/* XXX Upper layers don't always set this */
612	cmd->mrq = sc->req;
613
614	/* Begin setting up command register. */
615
616	cmdr = cmd->opcode;
617
618	if (sc->host.ios.bus_mode == opendrain)
619		cmdr |= MCI_CMDR_OPDCMD;
620
621	/* Set up response handling.  Allow max timeout for responses. */
622
623	if (MMC_RSP(cmd->flags) == MMC_RSP_NONE)
624		cmdr |= MCI_CMDR_RSPTYP_NO;
625	else {
626		cmdr |= MCI_CMDR_MAXLAT;
627		if (cmd->flags & MMC_RSP_136)
628			cmdr |= MCI_CMDR_RSPTYP_136;
629		else
630			cmdr |= MCI_CMDR_RSPTYP_48;
631	}
632
633	/*
634	 * If there is no data transfer, just set up the right interrupt mask
635	 * and start the command.
636	 *
637	 * The interrupt mask needs to be CMDRDY plus all non-data-transfer
638	 * errors. It's important to leave the transfer-related errors out, to
639	 * avoid spurious timeout or crc errors on a STOP command following a
640	 * multiblock read.  When a multiblock read is in progress, sending a
641	 * STOP in the middle of a block occasionally triggers such errors, but
642	 * we're totally disinterested in them because we've already gotten all
643	 * the data we wanted without error before sending the STOP command.
644	 */
645
646	if (data == NULL) {
647		uint32_t ier = MCI_SR_CMDRDY |
648		    MCI_SR_RTOE | MCI_SR_RENDE |
649		    MCI_SR_RCRCE | MCI_SR_RDIRE | MCI_SR_RINDE;
650
651		at91_mci_pdc_disable(sc);
652
653		if (cmd->opcode == MMC_STOP_TRANSMISSION)
654			cmdr |= MCI_CMDR_TRCMD_STOP;
655
656		/* Ignore response CRC on CMD2 and ACMD41, per standard. */
657
658		if (cmd->opcode == MMC_SEND_OP_COND ||
659		    cmd->opcode == ACMD_SD_SEND_OP_COND)
660			ier &= ~MCI_SR_RCRCE;
661
662		if (mci_debug)
663			printf("CMDR %x (opcode %d) ARGR %x no data\n",
664			    cmdr, cmd->opcode, cmd->arg);
665
666		WR4(sc, MCI_ARGR, cmd->arg);
667		WR4(sc, MCI_CMDR, cmdr);
668		WR4(sc, MCI_IDR, 0xffffffff);
669		WR4(sc, MCI_IER, ier);
670		return;
671	}
672
673	/* There is data, set up the transfer-related parts of the command. */
674
675	if (data->flags & MMC_DATA_READ)
676		cmdr |= MCI_CMDR_TRDIR;
677
678	if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE))
679		cmdr |= MCI_CMDR_TRCMD_START;
680
681	if (data->flags & MMC_DATA_STREAM)
682		cmdr |= MCI_CMDR_TRTYP_STREAM;
683	else if (data->flags & MMC_DATA_MULTI) {
684		cmdr |= MCI_CMDR_TRTYP_MULTIPLE;
685		sc->flags |= (data->flags & MMC_DATA_READ) ?
686		    CMD_MULTIREAD : CMD_MULTIWRITE;
687	}
688
689	/*
690	 * Disable PDC until we're ready.
691	 *
692	 * Set block size and turn on PDC mode for dma xfer.
693	 * Note that the block size is the smaller of the amount of data to be
694	 * transferred, or 512 bytes.  The 512 size is fixed by the standard;
695	 * smaller blocks are possible, but never larger.
696	 */
697
698	WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
699
700	mr = RD4(sc,MCI_MR) & ~MCI_MR_BLKLEN;
701	mr |=  min(data->len, 512) << 16;
702	WR4(sc, MCI_MR, mr | MCI_MR_PDCMODE|MCI_MR_PDCPADV);
703
704	/*
705	 * Set up DMA.
706	 *
707	 * Use bounce buffers even if we don't need to byteswap, because doing
708	 * multi-block IO with large DMA buffers is way fast (compared to
709	 * single-block IO), even after incurring the overhead of also copying
710	 * from/to the caller's buffers (which may be in non-contiguous physical
711	 * pages).
712	 *
713	 * In an ideal non-byteswap world we could create a dma tag that allows
714	 * for discontiguous segments and do the IO directly from/to the
715	 * caller's buffer(s), using ENDRX/ENDTX interrupts to chain the
716	 * discontiguous buffers through the PDC. Someday.
717	 *
718	 * If a read is bigger than 2k, split it in half so that we can start
719	 * byte-swapping the first half while the second half is on the wire.
720	 * It would be best if we could split it into 8k chunks, but we can't
721	 * always keep up with the byte-swapping due to other system activity,
722	 * and if an RXBUFF interrupt happens while we're still handling the
723	 * byte-swap from the prior buffer (IE, we haven't returned from
724	 * handling the prior interrupt yet), then data will get dropped on the
725	 * floor and we can't easily recover from that.  The right fix for that
726	 * would be to have the interrupt handling only keep the DMA flowing and
727	 * enqueue filled buffers to be byte-swapped in a non-interrupt context.
728	 * Even that won't work on the write side of things though; in that
729	 * context we have to have all the data ready to go before starting the
730	 * dma.
731	 *
732	 * XXX what about stream transfers?
733	 */
734	sc->xfer_offset = 0;
735	sc->bbuf_curidx = 0;
736
737	if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE)) {
738		uint32_t len;
739		uint32_t remaining = data->len;
740		bus_addr_t paddr;
741		int err;
742
743		if (remaining > (BBCOUNT*BBSIZE))
744			panic("IO read size exceeds MAXDATA\n");
745
746		if (data->flags & MMC_DATA_READ) {
747			if (remaining > 2048) // XXX
748				len = remaining / 2;
749			else
750				len = remaining;
751			err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0],
752			    sc->bbuf_vaddr[0], len, at91_mci_getaddr,
753			    &paddr, BUS_DMA_NOWAIT);
754			if (err != 0)
755				panic("IO read dmamap_load failed\n");
756			bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0],
757			    BUS_DMASYNC_PREREAD);
758			WR4(sc, PDC_RPR, paddr);
759			WR4(sc, PDC_RCR, len / 4);
760			sc->bbuf_len[0] = len;
761			remaining -= len;
762			if (remaining == 0) {
763				sc->bbuf_len[1] = 0;
764			} else {
765				len = remaining;
766				err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1],
767				    sc->bbuf_vaddr[1], len, at91_mci_getaddr,
768				    &paddr, BUS_DMA_NOWAIT);
769				if (err != 0)
770					panic("IO read dmamap_load failed\n");
771				bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1],
772				    BUS_DMASYNC_PREREAD);
773				WR4(sc, PDC_RNPR, paddr);
774				WR4(sc, PDC_RNCR, len / 4);
775				sc->bbuf_len[1] = len;
776				remaining -= len;
777			}
778			WR4(sc, PDC_PTCR, PDC_PTCR_RXTEN);
779		} else {
780			len = min(BBSIZE, remaining);
781			at91_bswap_buf(sc, sc->bbuf_vaddr[0], data->data, len);
782			err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0],
783			    sc->bbuf_vaddr[0], len, at91_mci_getaddr,
784			    &paddr, BUS_DMA_NOWAIT);
785			if (err != 0)
786				panic("IO write dmamap_load failed\n");
787			bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0],
788			    BUS_DMASYNC_PREWRITE);
789			/*
790			 * Erratum workaround:  PDC transfer length on a write
791			 * must not be smaller than 12 bytes (3 words); only
792			 * blklen bytes (set above) are actually transferred.
793			 */
794			WR4(sc, PDC_TPR,paddr);
795			WR4(sc, PDC_TCR, (len < 12) ? 3 : len / 4);
796			sc->bbuf_len[0] = len;
797			remaining -= len;
798			if (remaining == 0) {
799				sc->bbuf_len[1] = 0;
800			} else {
801				len = remaining;
802				at91_bswap_buf(sc, sc->bbuf_vaddr[1],
803				    ((char *)data->data)+BBSIZE, len);
804				err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1],
805				    sc->bbuf_vaddr[1], len, at91_mci_getaddr,
806				    &paddr, BUS_DMA_NOWAIT);
807				if (err != 0)
808					panic("IO write dmamap_load failed\n");
809				bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1],
810				    BUS_DMASYNC_PREWRITE);
811				WR4(sc, PDC_TNPR, paddr);
812				WR4(sc, PDC_TNCR, (len < 12) ? 3 : len / 4);
813				sc->bbuf_len[1] = len;
814				remaining -= len;
815			}
816			/* do not enable PDC xfer until CMDRDY asserted */
817		}
818		data->xfer_len = 0; /* XXX what's this? appears to be unused. */
819	}
820
821	if (mci_debug)
822		printf("CMDR %x (opcode %d) ARGR %x with data len %d\n",
823		       cmdr, cmd->opcode, cmd->arg, cmd->data->len);
824
825	WR4(sc, MCI_ARGR, cmd->arg);
826	WR4(sc, MCI_CMDR, cmdr);
827	WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_CMDRDY);
828}
829
830static void
831at91_mci_next_operation(struct at91_mci_softc *sc)
832{
833	struct mmc_request *req;
834
835	req = sc->req;
836	if (req == NULL)
837		return;
838
839	if (sc->flags & PENDING_CMD) {
840		sc->flags &= ~PENDING_CMD;
841		at91_mci_start_cmd(sc, req->cmd);
842		return;
843	} else if (sc->flags & PENDING_STOP) {
844		sc->flags &= ~PENDING_STOP;
845		at91_mci_start_cmd(sc, req->stop);
846		return;
847	}
848
849	WR4(sc, MCI_IDR, 0xffffffff);
850	sc->req = NULL;
851	sc->curcmd = NULL;
852	//printf("req done\n");
853	req->done(req);
854}
855
856static int
857at91_mci_request(device_t brdev, device_t reqdev, struct mmc_request *req)
858{
859	struct at91_mci_softc *sc = device_get_softc(brdev);
860
861	AT91_MCI_LOCK(sc);
862	if (sc->req != NULL) {
863		AT91_MCI_UNLOCK(sc);
864		return (EBUSY);
865	}
866	//printf("new req\n");
867	sc->req = req;
868	sc->flags = PENDING_CMD;
869	if (sc->req->stop)
870		sc->flags |= PENDING_STOP;
871	at91_mci_next_operation(sc);
872	AT91_MCI_UNLOCK(sc);
873	return (0);
874}
875
876static int
877at91_mci_get_ro(device_t brdev, device_t reqdev)
878{
879	return (0);
880}
881
882static int
883at91_mci_acquire_host(device_t brdev, device_t reqdev)
884{
885	struct at91_mci_softc *sc = device_get_softc(brdev);
886	int err = 0;
887
888	AT91_MCI_LOCK(sc);
889	while (sc->bus_busy)
890		msleep(sc, &sc->sc_mtx, PZERO, "mciah", hz / 5);
891	sc->bus_busy++;
892	AT91_MCI_UNLOCK(sc);
893	return (err);
894}
895
896static int
897at91_mci_release_host(device_t brdev, device_t reqdev)
898{
899	struct at91_mci_softc *sc = device_get_softc(brdev);
900
901	AT91_MCI_LOCK(sc);
902	sc->bus_busy--;
903	wakeup(sc);
904	AT91_MCI_UNLOCK(sc);
905	return (0);
906}
907
908static void
909at91_mci_read_done(struct at91_mci_softc *sc, uint32_t sr)
910{
911	struct mmc_command *cmd = sc->curcmd;
912	char * dataptr = (char *)cmd->data->data;
913	uint32_t curidx = sc->bbuf_curidx;
914	uint32_t len = sc->bbuf_len[curidx];
915
916	/*
917	 * We arrive here when a DMA transfer for a read is done, whether it's
918	 * a single or multi-block read.
919	 *
920	 * We byte-swap the buffer that just completed, and if that is the
921	 * last buffer that's part of this read then we move on to the next
922	 * operation, otherwise we wait for another ENDRX for the next bufer.
923	 */
924
925	bus_dmamap_sync(sc->dmatag, sc->bbuf_map[curidx], BUS_DMASYNC_POSTREAD);
926	bus_dmamap_unload(sc->dmatag, sc->bbuf_map[curidx]);
927
928	at91_bswap_buf(sc, dataptr + sc->xfer_offset, sc->bbuf_vaddr[curidx], len);
929
930	if (mci_debug) {
931		printf("read done sr %x curidx %d len %d xfer_offset %d\n",
932		       sr, curidx, len, sc->xfer_offset);
933	}
934
935	sc->xfer_offset += len;
936	sc->bbuf_curidx = !curidx; /* swap buffers */
937
938	/*
939	 * If we've transferred all the data, move on to the next operation.
940	 *
941	 * If we're still transferring the last buffer, RNCR is already zero but
942	 * we have to write a zero anyway to clear the ENDRX status so we don't
943	 * re-interrupt until the last buffer is done.
944	 */
945	if (sc->xfer_offset == cmd->data->len) {
946		WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
947		cmd->error = MMC_ERR_NONE;
948		at91_mci_next_operation(sc);
949	} else {
950		WR4(sc, PDC_RNCR, 0);
951		WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_ENDRX);
952	}
953}
954
955static void
956at91_mci_write_done(struct at91_mci_softc *sc, uint32_t sr)
957{
958	struct mmc_command *cmd = sc->curcmd;
959
960	/*
961	 * We arrive here when the entire DMA transfer for a write is done,
962	 * whether it's a single or multi-block write.  If it's multi-block we
963	 * have to immediately move on to the next operation which is to send
964	 * the stop command.  If it's a single-block transfer we need to wait
965	 * for NOTBUSY, but if that's already asserted we can avoid another
966	 * interrupt and just move on to completing the request right away.
967	 */
968
969	WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
970
971	bus_dmamap_sync(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx],
972	    BUS_DMASYNC_POSTWRITE);
973	bus_dmamap_unload(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx]);
974
975	if ((cmd->data->flags & MMC_DATA_MULTI) || (sr & MCI_SR_NOTBUSY)) {
976		cmd->error = MMC_ERR_NONE;
977		at91_mci_next_operation(sc);
978	} else {
979		WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
980	}
981}
982
983static void
984at91_mci_notbusy(struct at91_mci_softc *sc)
985{
986	struct mmc_command *cmd = sc->curcmd;
987
988	/*
989	 * We arrive here by either completion of a single-block write, or
990	 * completion of the stop command that ended a multi-block write (and,
991	 * I suppose, after a card-select or erase, but I haven't tested
992	 * those).  Anyway, we're done and it's time to move on to the next
993	 * command.
994	 */
995
996	cmd->error = MMC_ERR_NONE;
997	at91_mci_next_operation(sc);
998}
999
1000static void
1001at91_mci_stop_done(struct at91_mci_softc *sc, uint32_t sr)
1002{
1003	struct mmc_command *cmd = sc->curcmd;
1004
1005	/*
1006	 * We arrive here after receiving CMDRDY for a MMC_STOP_TRANSMISSION
1007	 * command.  Depending on the operation being stopped, we may have to
1008	 * do some unusual things to work around hardware bugs.
1009	 */
1010
1011	/*
1012	 * This is known to be true of at91rm9200 hardware; it may or may not
1013	 * apply to more recent chips:
1014	 *
1015	 * After stopping a multi-block write, the NOTBUSY bit in MCI_SR does
1016	 * not properly reflect the actual busy state of the card as signaled
1017	 * on the DAT0 line; it always claims the card is not-busy.  If we
1018	 * believe that and let operations continue, following commands will
1019	 * fail with response timeouts (except of course MMC_SEND_STATUS -- it
1020	 * indicates the card is busy in the PRG state, which was the smoking
1021	 * gun that showed MCI_SR NOTBUSY was not tracking DAT0 correctly).
1022	 *
1023	 * The atmel docs are emphatic: "This flag [NOTBUSY] must be used only
1024	 * for Write Operations."  I guess technically since we sent a stop
1025	 * it's not a write operation anymore.  But then just what did they
1026	 * think it meant for the stop command to have "...an optional busy
1027	 * signal transmitted on the data line" according to the SD spec?
1028	 *
1029	 * I tried a variety of things to un-wedge the MCI and get the status
1030	 * register to reflect NOTBUSY correctly again, but the only thing
1031	 * that worked was a full device reset.  It feels like an awfully big
1032	 * hammer, but doing a full reset after every multiblock write is
1033	 * still faster than doing single-block IO (by almost two orders of
1034	 * magnitude: 20KB/sec improves to about 1.8MB/sec best case).
1035	 *
1036	 * After doing the reset, wait for a NOTBUSY interrupt before
1037	 * continuing with the next operation.
1038	 *
1039	 * This workaround breaks multiwrite on the rev2xx parts, but some other
1040	 * workaround is needed.
1041	 */
1042	if ((sc->flags & CMD_MULTIWRITE) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
1043		at91_mci_reset(sc);
1044		WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
1045		return;
1046	}
1047
1048	/*
1049	 * This is known to be true of at91rm9200 hardware; it may or may not
1050	 * apply to more recent chips:
1051	 *
1052	 * After stopping a multi-block read, loop to read and discard any
1053	 * data that coasts in after we sent the stop command.  The docs don't
1054	 * say anything about it, but empirical testing shows that 1-3
1055	 * additional words of data get buffered up in some unmentioned
1056	 * internal fifo and if we don't read and discard them here they end
1057	 * up on the front of the next read DMA transfer we do.
1058	 *
1059	 * This appears to be unnecessary for rev2xx parts.
1060	 */
1061	if ((sc->flags & CMD_MULTIREAD) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
1062		uint32_t sr;
1063		int count = 0;
1064
1065		do {
1066			sr = RD4(sc, MCI_SR);
1067			if (sr & MCI_SR_RXRDY) {
1068				RD4(sc,  MCI_RDR);
1069				++count;
1070			}
1071		} while (sr & MCI_SR_RXRDY);
1072		at91_mci_reset(sc);
1073	}
1074
1075	cmd->error = MMC_ERR_NONE;
1076	at91_mci_next_operation(sc);
1077
1078}
1079
1080static void
1081at91_mci_cmdrdy(struct at91_mci_softc *sc, uint32_t sr)
1082{
1083	struct mmc_command *cmd = sc->curcmd;
1084	int i;
1085
1086	if (cmd == NULL)
1087		return;
1088
1089	/*
1090	 * We get here at the end of EVERY command.  We retrieve the command
1091	 * response (if any) then decide what to do next based on the command.
1092	 */
1093
1094	if (cmd->flags & MMC_RSP_PRESENT) {
1095		for (i = 0; i < ((cmd->flags & MMC_RSP_136) ? 4 : 1); i++) {
1096			cmd->resp[i] = RD4(sc, MCI_RSPR + i * 4);
1097			if (mci_debug)
1098				printf("RSPR[%d] = %x sr=%x\n", i, cmd->resp[i],  sr);
1099		}
1100	}
1101
1102	/*
1103	 * If this was a stop command, go handle the various special
1104	 * conditions (read: bugs) that have to be dealt with following a stop.
1105	 */
1106	if (cmd->opcode == MMC_STOP_TRANSMISSION) {
1107		at91_mci_stop_done(sc, sr);
1108		return;
1109	}
1110
1111	/*
1112	 * If this command can continue to assert BUSY beyond the response then
1113	 * we need to wait for NOTBUSY before the command is really done.
1114	 *
1115	 * Note that this may not work properly on the at91rm9200.  It certainly
1116	 * doesn't work for the STOP command that follows a multi-block write,
1117	 * so post-stop CMDRDY is handled separately; see the special handling
1118	 * in at91_mci_stop_done().
1119	 *
1120	 * Beside STOP, there are other R1B-type commands that use the busy
1121	 * signal after CMDRDY: CMD7 (card select), CMD28-29 (write protect),
1122	 * CMD38 (erase). I haven't tested any of them, but I rather expect
1123	 * them all to have the same sort of problem with MCI_SR not actually
1124	 * reflecting the state of the DAT0-line busy indicator.  So this code
1125	 * may need to grow some sort of special handling for them too. (This
1126	 * just in: CMD7 isn't a problem right now because dev/mmc.c incorrectly
1127	 * sets the response flags to R1 rather than R1B.) XXX
1128	 */
1129	if ((cmd->flags & MMC_RSP_BUSY)) {
1130		WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
1131		return;
1132	}
1133
1134	/*
1135	 * If there is a data transfer with this command, then...
1136	 * - If it's a read, we need to wait for ENDRX.
1137	 * - If it's a write, now is the time to enable the PDC, and we need
1138	 *   to wait for a BLKE that follows a TXBUFE, because if we're doing
1139	 *   a split transfer we get a BLKE after the first half (when TPR/TCR
1140	 *   get loaded from TNPR/TNCR).  So first we wait for the TXBUFE, and
1141	 *   the handling for that interrupt will then invoke the wait for the
1142	 *   subsequent BLKE which indicates actual completion.
1143	 */
1144	if (cmd->data) {
1145		uint32_t ier;
1146		if (cmd->data->flags & MMC_DATA_READ) {
1147			ier = MCI_SR_ENDRX;
1148		} else {
1149			ier = MCI_SR_TXBUFE;
1150			WR4(sc, PDC_PTCR, PDC_PTCR_TXTEN);
1151		}
1152		WR4(sc, MCI_IER, MCI_SR_ERROR | ier);
1153		return;
1154	}
1155
1156	/*
1157	 * If we made it to here, we don't need to wait for anything more for
1158	 * the current command, move on to the next command (will complete the
1159	 * request if there is no next command).
1160	 */
1161	cmd->error = MMC_ERR_NONE;
1162	at91_mci_next_operation(sc);
1163}
1164
1165static void
1166at91_mci_intr(void *arg)
1167{
1168	struct at91_mci_softc *sc = (struct at91_mci_softc*)arg;
1169	struct mmc_command *cmd = sc->curcmd;
1170	uint32_t sr, isr;
1171
1172	AT91_MCI_LOCK(sc);
1173
1174	sr = RD4(sc, MCI_SR);
1175	isr = sr & RD4(sc, MCI_IMR);
1176
1177	if (mci_debug)
1178		printf("i 0x%x sr 0x%x\n", isr, sr);
1179
1180	/*
1181	 * All interrupts are one-shot; disable it now.
1182	 * The next operation will re-enable whatever interrupts it wants.
1183	 */
1184	WR4(sc, MCI_IDR, isr);
1185	if (isr & MCI_SR_ERROR) {
1186		if (isr & (MCI_SR_RTOE | MCI_SR_DTOE))
1187			cmd->error = MMC_ERR_TIMEOUT;
1188		else if (isr & (MCI_SR_RCRCE | MCI_SR_DCRCE))
1189			cmd->error = MMC_ERR_BADCRC;
1190		else if (isr & (MCI_SR_OVRE | MCI_SR_UNRE))
1191			cmd->error = MMC_ERR_FIFO;
1192		else
1193			cmd->error = MMC_ERR_FAILED;
1194		/*
1195		 * CMD8 is used to probe for SDHC cards, a standard SD card
1196		 * will get a response timeout; don't report it because it's a
1197		 * normal and expected condition.  One might argue that all
1198		 * error reporting should be left to higher levels, but when
1199		 * they report at all it's always EIO, which isn't very
1200		 * helpful. XXX bootverbose?
1201		 */
1202		if (cmd->opcode != 8) {
1203			device_printf(sc->dev,
1204			    "IO error; status MCI_SR = 0x%x cmd opcode = %d%s\n",
1205			    sr, cmd->opcode,
1206			    (cmd->opcode != 12) ? "" :
1207			    (sc->flags & CMD_MULTIREAD) ? " after read" : " after write");
1208			at91_mci_reset(sc);
1209		}
1210		at91_mci_next_operation(sc);
1211	} else {
1212		if (isr & MCI_SR_TXBUFE) {
1213//			printf("TXBUFE\n");
1214			/*
1215			 * We need to wait for a BLKE that follows TXBUFE
1216			 * (intermediate BLKEs might happen after ENDTXes if
1217			 * we're chaining multiple buffers).  If BLKE is also
1218			 * asserted at the time we get TXBUFE, we can avoid
1219			 * another interrupt and process it right away, below.
1220			 */
1221			if (sr & MCI_SR_BLKE)
1222				isr |= MCI_SR_BLKE;
1223			else
1224				WR4(sc, MCI_IER, MCI_SR_BLKE);
1225		}
1226		if (isr & MCI_SR_RXBUFF) {
1227//			printf("RXBUFF\n");
1228		}
1229		if (isr & MCI_SR_ENDTX) {
1230//			printf("ENDTX\n");
1231		}
1232		if (isr & MCI_SR_ENDRX) {
1233//			printf("ENDRX\n");
1234			at91_mci_read_done(sc, sr);
1235		}
1236		if (isr & MCI_SR_NOTBUSY) {
1237//			printf("NOTBUSY\n");
1238			at91_mci_notbusy(sc);
1239		}
1240		if (isr & MCI_SR_DTIP) {
1241//			printf("Data transfer in progress\n");
1242		}
1243		if (isr & MCI_SR_BLKE) {
1244//			printf("Block transfer end\n");
1245			at91_mci_write_done(sc, sr);
1246		}
1247		if (isr & MCI_SR_TXRDY) {
1248//			printf("Ready to transmit\n");
1249		}
1250		if (isr & MCI_SR_RXRDY) {
1251//			printf("Ready to receive\n");
1252		}
1253		if (isr & MCI_SR_CMDRDY) {
1254//			printf("Command ready\n");
1255			at91_mci_cmdrdy(sc, sr);
1256		}
1257	}
1258	AT91_MCI_UNLOCK(sc);
1259}
1260
1261static int
1262at91_mci_read_ivar(device_t bus, device_t child, int which, uintptr_t *result)
1263{
1264	struct at91_mci_softc *sc = device_get_softc(bus);
1265
1266	switch (which) {
1267	default:
1268		return (EINVAL);
1269	case MMCBR_IVAR_BUS_MODE:
1270		*(int *)result = sc->host.ios.bus_mode;
1271		break;
1272	case MMCBR_IVAR_BUS_WIDTH:
1273		*(int *)result = sc->host.ios.bus_width;
1274		break;
1275	case MMCBR_IVAR_CHIP_SELECT:
1276		*(int *)result = sc->host.ios.chip_select;
1277		break;
1278	case MMCBR_IVAR_CLOCK:
1279		*(int *)result = sc->host.ios.clock;
1280		break;
1281	case MMCBR_IVAR_F_MIN:
1282		*(int *)result = sc->host.f_min;
1283		break;
1284	case MMCBR_IVAR_F_MAX:
1285		*(int *)result = sc->host.f_max;
1286		break;
1287	case MMCBR_IVAR_HOST_OCR:
1288		*(int *)result = sc->host.host_ocr;
1289		break;
1290	case MMCBR_IVAR_MODE:
1291		*(int *)result = sc->host.mode;
1292		break;
1293	case MMCBR_IVAR_OCR:
1294		*(int *)result = sc->host.ocr;
1295		break;
1296	case MMCBR_IVAR_POWER_MODE:
1297		*(int *)result = sc->host.ios.power_mode;
1298		break;
1299	case MMCBR_IVAR_VDD:
1300		*(int *)result = sc->host.ios.vdd;
1301		break;
1302	case MMCBR_IVAR_CAPS:
1303		if (sc->has_4wire) {
1304			sc->sc_cap |= CAP_HAS_4WIRE;
1305			sc->host.caps |= MMC_CAP_4_BIT_DATA;
1306		} else {
1307			sc->sc_cap &= ~CAP_HAS_4WIRE;
1308			sc->host.caps &= ~MMC_CAP_4_BIT_DATA;
1309		}
1310		*(int *)result = sc->host.caps;
1311		break;
1312	case MMCBR_IVAR_MAX_DATA:
1313		/*
1314		 * Something is wrong with the 2x parts and multiblock, so
1315		 * just do 1 block at a time for now, which really kills
1316		 * performance.
1317		 */
1318		if (sc->sc_cap & CAP_MCI1_REV2XX)
1319			*(int *)result = 1;
1320		else
1321			*(int *)result = MAX_BLOCKS;
1322		break;
1323	}
1324	return (0);
1325}
1326
1327static int
1328at91_mci_write_ivar(device_t bus, device_t child, int which, uintptr_t value)
1329{
1330	struct at91_mci_softc *sc = device_get_softc(bus);
1331
1332	switch (which) {
1333	default:
1334		return (EINVAL);
1335	case MMCBR_IVAR_BUS_MODE:
1336		sc->host.ios.bus_mode = value;
1337		break;
1338	case MMCBR_IVAR_BUS_WIDTH:
1339		sc->host.ios.bus_width = value;
1340		break;
1341	case MMCBR_IVAR_CHIP_SELECT:
1342		sc->host.ios.chip_select = value;
1343		break;
1344	case MMCBR_IVAR_CLOCK:
1345		sc->host.ios.clock = value;
1346		break;
1347	case MMCBR_IVAR_MODE:
1348		sc->host.mode = value;
1349		break;
1350	case MMCBR_IVAR_OCR:
1351		sc->host.ocr = value;
1352		break;
1353	case MMCBR_IVAR_POWER_MODE:
1354		sc->host.ios.power_mode = value;
1355		break;
1356	case MMCBR_IVAR_VDD:
1357		sc->host.ios.vdd = value;
1358		break;
1359	/* These are read-only */
1360	case MMCBR_IVAR_CAPS:
1361	case MMCBR_IVAR_HOST_OCR:
1362	case MMCBR_IVAR_F_MIN:
1363	case MMCBR_IVAR_F_MAX:
1364	case MMCBR_IVAR_MAX_DATA:
1365		return (EINVAL);
1366	}
1367	return (0);
1368}
1369
1370static device_method_t at91_mci_methods[] = {
1371	/* device_if */
1372	DEVMETHOD(device_probe, at91_mci_probe),
1373	DEVMETHOD(device_attach, at91_mci_attach),
1374	DEVMETHOD(device_detach, at91_mci_detach),
1375
1376	/* Bus interface */
1377	DEVMETHOD(bus_read_ivar,	at91_mci_read_ivar),
1378	DEVMETHOD(bus_write_ivar,	at91_mci_write_ivar),
1379
1380	/* mmcbr_if */
1381	DEVMETHOD(mmcbr_update_ios, at91_mci_update_ios),
1382	DEVMETHOD(mmcbr_request, at91_mci_request),
1383	DEVMETHOD(mmcbr_get_ro, at91_mci_get_ro),
1384	DEVMETHOD(mmcbr_acquire_host, at91_mci_acquire_host),
1385	DEVMETHOD(mmcbr_release_host, at91_mci_release_host),
1386
1387	DEVMETHOD_END
1388};
1389
1390static driver_t at91_mci_driver = {
1391	"at91_mci",
1392	at91_mci_methods,
1393	sizeof(struct at91_mci_softc),
1394};
1395
1396static devclass_t at91_mci_devclass;
1397
1398#ifdef FDT
1399DRIVER_MODULE(at91_mci, simplebus, at91_mci_driver, at91_mci_devclass, NULL,
1400    NULL);
1401#else
1402DRIVER_MODULE(at91_mci, atmelarm, at91_mci_driver, at91_mci_devclass, NULL,
1403    NULL);
1404#endif
1405
1406MMC_DECLARE_BRIDGE(at91_mci);
1407