1129198Scognet/*	$NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $	*/
2129198Scognet
3139735Simp/*-
4129198Scognet * Copyright (c) 2001 Ben Harris.
5129198Scognet * Copyright (c) 1994 Mark Brinicombe.
6129198Scognet * Copyright (c) 1994 Brini.
7129198Scognet * All rights reserved.
8129198Scognet *
9129198Scognet * This code is derived from software written for Brini by Mark Brinicombe
10129198Scognet *
11129198Scognet * Redistribution and use in source and binary forms, with or without
12129198Scognet * modification, are permitted provided that the following conditions
13129198Scognet * are met:
14129198Scognet * 1. Redistributions of source code must retain the above copyright
15129198Scognet *    notice, this list of conditions and the following disclaimer.
16129198Scognet * 2. Redistributions in binary form must reproduce the above copyright
17129198Scognet *    notice, this list of conditions and the following disclaimer in the
18129198Scognet *    documentation and/or other materials provided with the distribution.
19129198Scognet * 3. All advertising materials mentioning features or use of this software
20129198Scognet *    must display the following acknowledgement:
21129198Scognet *	This product includes software developed by Brini.
22129198Scognet * 4. The name of the company nor the name of the author may be used to
23129198Scognet *    endorse or promote products derived from this software without specific
24129198Scognet *    prior written permission.
25129198Scognet *
26129198Scognet * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
27129198Scognet * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28129198Scognet * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29129198Scognet * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
30129198Scognet * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
31129198Scognet * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32129198Scognet * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33129198Scognet * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34129198Scognet * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35129198Scognet * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36129198Scognet * SUCH DAMAGE.
37129198Scognet *
38129198Scognet * RiscBSD kernel project
39129198Scognet *
40129198Scognet * blockio.S
41129198Scognet *
42129198Scognet * optimised block read/write from/to IO routines.
43129198Scognet *
44129198Scognet * Created      : 08/10/94
45129198Scognet * Modified	: 22/01/99  -- R.Earnshaw
46129198Scognet *			       Faster, and small tweaks for StrongARM
47129198Scognet */
48129198Scognet
49129198Scognet#include <machine/asm.h>
50129198Scognet
51129198Scognet__FBSDID("$FreeBSD$");
52129198Scognet
53129198Scognet/*
54129198Scognet * Read bytes from an I/O address into a block of memory
55129198Scognet *
56129198Scognet * r0 = address to read from (IO)
57129198Scognet * r1 = address to write to (memory)
58129198Scognet * r2 = length
59129198Scognet */
60129198Scognet
61129198Scognet/* This code will look very familiar if you've read _memcpy(). */
62129198ScognetENTRY(read_multi_1)
63129198Scognet	mov	ip, sp
64129198Scognet	stmfd	sp!, {fp, ip, lr, pc}
65129198Scognet	sub	fp, ip, #4
66129198Scognet	subs	r2, r2, #4		/* r2 = length - 4 */
67129198Scognet	blt	.Lrm1_l4			/* less than 4 bytes */
68129198Scognet	ands	r12, r1, #3
69129198Scognet	beq	.Lrm1_main		/* aligned destination */
70129198Scognet	rsb	r12, r12, #4
71129198Scognet	cmp	r12, #2
72129198Scognet	ldrb	r3, [r0]
73129198Scognet	strb	r3, [r1], #1
74129198Scognet	ldrgeb	r3, [r0]
75129198Scognet	strgeb	r3, [r1], #1
76129198Scognet	ldrgtb	r3, [r0]
77129198Scognet	strgtb	r3, [r1], #1
78129198Scognet	subs	r2, r2, r12
79129198Scognet	blt	.Lrm1_l4
80129198Scognet.Lrm1_main:
81129198Scognet.Lrm1loop:
82129198Scognet	ldrb	r3, [r0]
83129198Scognet	ldrb	r12, [r0]
84129198Scognet	orr	r3, r3, r12, lsl #8
85129198Scognet	ldrb	r12, [r0]
86129198Scognet	orr	r3, r3, r12, lsl #16
87129198Scognet	ldrb	r12, [r0]
88129198Scognet	orr	r3, r3, r12, lsl #24
89129198Scognet	str	r3, [r1], #4
90129198Scognet	subs	r2, r2, #4
91129198Scognet	bge	.Lrm1loop
92129198Scognet.Lrm1_l4:
93129198Scognet	adds	r2, r2, #4			/* r2 = length again */
94129198Scognet	ldmeqdb	fp, {fp, sp, pc}
95137463Scognet	RETeq
96129198Scognet	cmp	r2, #2
97129198Scognet	ldrb	r3, [r0]
98129198Scognet	strb	r3, [r1], #1
99129198Scognet	ldrgeb	r3, [r0]
100129198Scognet	strgeb	r3, [r1], #1
101129198Scognet	ldrgtb	r3, [r0]
102129198Scognet	strgtb	r3, [r1], #1
103129198Scognet	ldmdb	fp, {fp, sp, pc}
104248361SandrewEND(read_multi_1)
105129198Scognet
106129198Scognet/*
107129198Scognet * Write bytes to an I/O address from a block of memory
108129198Scognet *
109129198Scognet * r0 = address to write to (IO)
110129198Scognet * r1 = address to read from (memory)
111129198Scognet * r2 = length
112129198Scognet */
113129198Scognet
114129198Scognet/* This code will look very familiar if you've read _memcpy(). */
115129198ScognetENTRY(write_multi_1)
116129198Scognet	mov	ip, sp
117129198Scognet	stmfd	sp!, {fp, ip, lr, pc}
118129198Scognet	sub	fp, ip, #4
119129198Scognet	subs	r2, r2, #4		/* r2 = length - 4 */
120129198Scognet	blt	.Lwm1_l4		/* less than 4 bytes */
121129198Scognet	ands	r12, r1, #3
122129198Scognet	beq	.Lwm1_main		/* aligned source */
123129198Scognet	rsb	r12, r12, #4
124129198Scognet	cmp	r12, #2
125129198Scognet	ldrb	r3, [r1], #1
126129198Scognet	strb	r3, [r0]
127129198Scognet	ldrgeb	r3, [r1], #1
128129198Scognet	strgeb	r3, [r0]
129129198Scognet	ldrgtb	r3, [r1], #1
130129198Scognet	strgtb	r3, [r0]
131129198Scognet	subs	r2, r2, r12
132129198Scognet	blt	.Lwm1_l4
133129198Scognet.Lwm1_main:
134129198Scognet.Lwm1loop:
135129198Scognet	ldr	r3, [r1], #4
136129198Scognet	strb	r3, [r0]
137129198Scognet	mov	r3, r3, lsr #8
138129198Scognet	strb	r3, [r0]
139129198Scognet	mov	r3, r3, lsr #8
140129198Scognet	strb	r3, [r0]
141129198Scognet	mov	r3, r3, lsr #8
142129198Scognet	strb	r3, [r0]
143129198Scognet	subs	r2, r2, #4
144129198Scognet	bge	.Lwm1loop
145129198Scognet.Lwm1_l4:
146129198Scognet	adds	r2, r2, #4			/* r2 = length again */
147129198Scognet	ldmeqdb	fp, {fp, sp, pc}
148129198Scognet	cmp	r2, #2
149129198Scognet	ldrb	r3, [r1], #1
150129198Scognet	strb	r3, [r0]
151129198Scognet	ldrgeb	r3, [r1], #1
152129198Scognet	strgeb	r3, [r0]
153129198Scognet	ldrgtb	r3, [r1], #1
154129198Scognet	strgtb	r3, [r0]
155129198Scognet	ldmdb	fp, {fp, sp, pc}
156248361SandrewEND(write_multi_1)
157129198Scognet
158129198Scognet/*
159129198Scognet * Reads short ints (16 bits) from an I/O address into a block of memory
160129198Scognet *
161129198Scognet * r0 = address to read from (IO)
162129198Scognet * r1 = address to write to (memory)
163129198Scognet * r2 = length
164129198Scognet */
165129198Scognet
166129198ScognetENTRY(insw)
167129198Scognet/* Make sure that we have a positive length */
168129198Scognet	cmp	r2, #0x00000000
169129198Scognet	movle	pc, lr
170129198Scognet
171129198Scognet/* If the destination address and the size is word aligned, do it fast */
172129198Scognet
173129198Scognet	tst	r2, #0x00000001
174129198Scognet	tsteq	r1, #0x00000003
175129198Scognet	beq	.Lfastinsw
176129198Scognet
177129198Scognet/* Non aligned insw */
178129198Scognet
179129198Scognet.Linswloop:
180129198Scognet	ldr	r3, [r0]
181129198Scognet	subs	r2, r2, #0x00000001	/* Loop test in load delay slot */
182129198Scognet	strb	r3, [r1], #0x0001
183129198Scognet	mov	r3, r3, lsr #8
184129198Scognet	strb	r3, [r1], #0x0001
185129198Scognet	bgt	.Linswloop
186129198Scognet
187137463Scognet	RET
188129198Scognet
189129198Scognet/* Word aligned insw */
190129198Scognet
191129198Scognet.Lfastinsw:
192129198Scognet
193129198Scognet.Lfastinswloop:
194129198Scognet	ldr	r3, [r0, #0x0002]	/* take advantage of nonaligned
195129198Scognet					 * word accesses */
196129198Scognet	ldr	ip, [r0]
197129198Scognet	mov	r3, r3, lsr #16		/* Put the two shorts together */
198129198Scognet	orr	r3, r3, ip, lsl #16
199129198Scognet	str	r3, [r1], #0x0004	/* Store */
200129198Scognet	subs	r2, r2, #0x00000002	/* Next */
201129198Scognet	bgt	.Lfastinswloop
202129198Scognet
203137463Scognet	RET
204248361SandrewEND(insw)
205129198Scognet
206129198Scognet/*
207129198Scognet * Writes short ints (16 bits) from a block of memory to an I/O address
208129198Scognet *
209129198Scognet * r0 = address to write to (IO)
210129198Scognet * r1 = address to read from (memory)
211129198Scognet * r2 = length
212129198Scognet */
213129198Scognet
214129198ScognetENTRY(outsw)
215129198Scognet/* Make sure that we have a positive length */
216129198Scognet	cmp	r2, #0x00000000
217129198Scognet	movle	pc, lr
218129198Scognet
219129198Scognet/* If the destination address and the size is word aligned, do it fast */
220129198Scognet
221129198Scognet	tst	r2, #0x00000001
222129198Scognet	tsteq	r1, #0x00000003
223129198Scognet	beq	.Lfastoutsw
224129198Scognet
225129198Scognet/* Non aligned outsw */
226129198Scognet
227129198Scognet.Loutswloop:
228129198Scognet	ldrb	r3, [r1], #0x0001
229129198Scognet	ldrb	ip, [r1], #0x0001
230129198Scognet	subs	r2, r2, #0x00000001	/* Loop test in load delay slot */
231129198Scognet	orr	r3, r3, ip, lsl #8
232129198Scognet	orr	r3, r3, r3, lsl #16
233129198Scognet	str	r3, [r0]
234129198Scognet	bgt	.Loutswloop
235129198Scognet
236137463Scognet	RET
237129198Scognet
238129198Scognet/* Word aligned outsw */
239129198Scognet
240129198Scognet.Lfastoutsw:
241129198Scognet
242129198Scognet.Lfastoutswloop:
243129198Scognet	ldr	r3, [r1], #0x0004	/* r3 = (H)(L) */
244129198Scognet	subs	r2, r2, #0x00000002	/* Loop test in load delay slot */
245129198Scognet
246129198Scognet	eor	ip, r3, r3, lsr #16	/* ip = (H)(H^L) */
247129198Scognet	eor	r3, r3, ip, lsl #16	/* r3 = (H^H^L)(L) = (L)(L) */
248129198Scognet	eor	ip, ip, r3, lsr #16	/* ip = (H)(H^L^L) = (H)(H) */
249129198Scognet
250129198Scognet	str	r3, [r0]
251129198Scognet	str	ip, [r0]
252129198Scognet
253129198Scognet/*	mov	ip, r3, lsl #16
254129198Scognet *	orr	ip, ip, ip, lsr #16
255129198Scognet *	str	ip, [r0]
256129198Scognet *
257129198Scognet *	mov	ip, r3, lsr #16
258129198Scognet *	orr	ip, ip, ip, lsl #16
259129198Scognet *	str	ip, [r0]
260129198Scognet */
261129198Scognet
262129198Scognet	bgt	.Lfastoutswloop
263129198Scognet
264137463Scognet	RET
265248361SandrewEND(outsw)
266129198Scognet
267129198Scognet/*
268129198Scognet * reads short ints (16 bits) from an I/O address into a block of memory
269129198Scognet * with a length garenteed to be a multiple of 16 bytes
270129198Scognet * with a word aligned destination address
271129198Scognet *
272129198Scognet * r0 = address to read from (IO)
273129198Scognet * r1 = address to write to (memory)
274129198Scognet * r2 = length
275129198Scognet */
276129198Scognet
277129198ScognetENTRY(insw16)
278129198Scognet/* Make sure that we have a positive length */
279129198Scognet	cmp	r2, #0x00000000
280129198Scognet	movle	pc, lr
281129198Scognet
282129198Scognet/* If the destination address is word aligned and the size suitably
283129198Scognet   aligned, do it fast */
284129198Scognet
285129198Scognet	tst	r2, #0x00000007
286129198Scognet	tsteq	r1, #0x00000003
287129198Scognet
288129198Scognet	bne	_C_LABEL(insw)
289129198Scognet
290129198Scognet/* Word aligned insw */
291129198Scognet
292129198Scognet	stmfd	sp!, {r4,r5,lr}
293129198Scognet
294129198Scognet.Linsw16loop:
295129198Scognet	ldr	r3, [r0, #0x0002]	/* take advantage of nonaligned
296129198Scognet					 * word accesses */
297129198Scognet	ldr	lr, [r0]
298129198Scognet	mov	r3, r3, lsr #16		/* Put the two shorts together */
299129198Scognet	orr	r3, r3, lr, lsl #16
300129198Scognet
301129198Scognet	ldr	r4, [r0, #0x0002]	/* take advantage of nonaligned
302129198Scognet					 * word accesses */
303129198Scognet	ldr	lr, [r0]
304129198Scognet	mov	r4, r4, lsr #16		/* Put the two shorts together */
305129198Scognet	orr	r4, r4, lr, lsl #16
306129198Scognet
307129198Scognet	ldr	r5, [r0, #0x0002]	/* take advantage of nonaligned
308129198Scognet					 * word accesses */
309129198Scognet	ldr	lr, [r0]
310129198Scognet	mov	r5, r5, lsr #16		/* Put the two shorts together */
311129198Scognet	orr	r5, r5, lr, lsl #16
312129198Scognet
313129198Scognet	ldr	ip, [r0, #0x0002]	/* take advantage of nonaligned
314129198Scognet					 * word accesses */
315129198Scognet	ldr	lr, [r0]
316129198Scognet	mov	ip, ip, lsr #16		/* Put the two shorts together */
317129198Scognet	orr	ip, ip, lr, lsl #16
318129198Scognet
319129198Scognet	stmia	r1!, {r3-r5,ip}
320129198Scognet	subs	r2, r2, #0x00000008	/* Next */
321129198Scognet	bgt	.Linsw16loop
322129198Scognet
323129198Scognet	ldmfd	sp!, {r4,r5,pc}		/* Restore regs and go home */
324248361SandrewEND(insw16)
325129198Scognet
326129198Scognet/*
327129198Scognet * Writes short ints (16 bits) from a block of memory to an I/O address
328129198Scognet *
329129198Scognet * r0 = address to write to (IO)
330129198Scognet * r1 = address to read from (memory)
331129198Scognet * r2 = length
332129198Scognet */
333129198Scognet
334129198ScognetENTRY(outsw16)
335129198Scognet/* Make sure that we have a positive length */
336129198Scognet	cmp	r2, #0x00000000
337129198Scognet	movle	pc, lr
338129198Scognet
339129198Scognet/* If the destination address is word aligned and the size suitably
340129198Scognet   aligned, do it fast */
341129198Scognet
342129198Scognet	tst	r2, #0x00000007
343129198Scognet	tsteq	r1, #0x00000003
344129198Scognet
345129198Scognet	bne	_C_LABEL(outsw)
346129198Scognet
347129198Scognet/* Word aligned outsw */
348129198Scognet
349129198Scognet	stmfd	sp!, {r4,r5,lr}
350129198Scognet
351129198Scognet.Loutsw16loop:
352129198Scognet	ldmia	r1!, {r4,r5,ip,lr}
353129198Scognet
354129198Scognet	eor	r3, r4, r4, lsl #16	/* r3 = (A^B)(B) */
355129198Scognet	eor	r4, r4, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
356129198Scognet	eor	r3, r3, r4, lsl #16	/* r3 = (A^B^A)(B) = (B)(B) */
357129198Scognet	str	r3, [r0]
358129198Scognet	str	r4, [r0]
359129198Scognet
360129198Scognet/*	mov	r3, r4, lsl #16
361129198Scognet *	orr	r3, r3, r3, lsr #16
362129198Scognet *	str	r3, [r0]
363129198Scognet *
364129198Scognet *	mov	r3, r4, lsr #16
365129198Scognet *	orr	r3, r3, r3, lsl #16
366129198Scognet *	str	r3, [r0]
367129198Scognet */
368129198Scognet
369129198Scognet	eor	r3, r5, r5, lsl #16	/* r3 = (A^B)(B) */
370129198Scognet	eor	r5, r5, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
371129198Scognet	eor	r3, r3, r5, lsl #16	/* r3 = (A^B^A)(B) = (B)(B) */
372129198Scognet	str	r3, [r0]
373129198Scognet	str	r5, [r0]
374129198Scognet
375129198Scognet	eor	r3, ip, ip, lsl #16	/* r3 = (A^B)(B) */
376129198Scognet	eor	ip, ip, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
377129198Scognet	eor	r3, r3, ip, lsl #16	/* r3 = (A^B^A)(B) = (B)(B) */
378129198Scognet	str	r3, [r0]
379129198Scognet	str	ip, [r0]
380129198Scognet
381129198Scognet	eor	r3, lr, lr, lsl #16	/* r3 = (A^B)(B) */
382129198Scognet	eor	lr, lr, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
383129198Scognet	eor	r3, r3, lr, lsl #16	/* r3 = (A^B^A)(B) = (B)(B) */
384129198Scognet	str	r3, [r0]
385129198Scognet	str	lr, [r0]
386129198Scognet
387129198Scognet	subs	r2, r2, #0x00000008
388129198Scognet	bgt	.Loutsw16loop
389129198Scognet
390129198Scognet	ldmfd	sp!, {r4,r5,pc}		/* and go home */
391248361SandrewEND(outsw16)
392129198Scognet
393129198Scognet/*
394129198Scognet * reads short ints (16 bits) from an I/O address into a block of memory
395129198Scognet * The I/O address is assumed to be mapped multiple times in a block of
396129198Scognet * 8 words.
397129198Scognet * The destination address should be word aligned.
398129198Scognet *
399129198Scognet * r0 = address to read from (IO)
400129198Scognet * r1 = address to write to (memory)
401129198Scognet * r2 = length
402129198Scognet */
403129198Scognet
404129198ScognetENTRY(inswm8)
405129198Scognet/* Make sure that we have a positive length */
406129198Scognet	cmp	r2, #0x00000000
407129198Scognet	movle	pc, lr
408129198Scognet
409129198Scognet/* If the destination address is word aligned and the size suitably
410129198Scognet   aligned, do it fast */
411129198Scognet
412129198Scognet	tst	r1, #0x00000003
413129198Scognet
414129198Scognet	bne	_C_LABEL(insw)
415129198Scognet
416129198Scognet/* Word aligned insw */
417129198Scognet
418129198Scognet	stmfd	sp!, {r4-r9,lr}
419129198Scognet
420129198Scognet	mov	lr, #0xff000000
421129198Scognet	orr	lr, lr, #0x00ff0000
422129198Scognet
423129198Scognet.Linswm8_loop8:
424129198Scognet	cmp	r2, #8
425129198Scognet	bcc	.Linswm8_l8
426129198Scognet
427129198Scognet	ldmia	r0, {r3-r9,ip}
428129198Scognet
429129198Scognet	bic	r3, r3, lr
430129198Scognet	orr	r3, r3, r4, lsl #16
431129198Scognet	bic	r5, r5, lr
432129198Scognet	orr	r4, r5, r6, lsl #16
433129198Scognet	bic	r7, r7, lr
434129198Scognet	orr	r5, r7, r8, lsl #16
435129198Scognet	bic	r9, r9, lr
436129198Scognet	orr	r6, r9, ip, lsl #16
437129198Scognet
438129198Scognet	stmia	r1!, {r3-r6}
439129198Scognet
440129198Scognet	subs	r2, r2, #0x00000008	/* Next */
441129198Scognet	bne	.Linswm8_loop8
442129198Scognet	beq	.Linswm8_l1
443129198Scognet
444129198Scognet.Linswm8_l8:
445129198Scognet	cmp	r2, #4
446129198Scognet	bcc	.Linswm8_l4
447129198Scognet
448129198Scognet	ldmia	r0, {r3-r6}
449129198Scognet
450129198Scognet	bic	r3, r3, lr
451129198Scognet	orr	r3, r3, r4, lsl #16
452129198Scognet	bic	r5, r5, lr
453129198Scognet	orr	r4, r5, r6, lsl #16
454129198Scognet
455129198Scognet	stmia	r1!, {r3-r4}
456129198Scognet
457129198Scognet	subs	r2, r2, #0x00000004
458129198Scognet	beq	.Linswm8_l1
459129198Scognet
460129198Scognet.Linswm8_l4:
461129198Scognet	cmp	r2, #2
462129198Scognet	bcc	.Linswm8_l2
463129198Scognet
464129198Scognet	ldmia	r0, {r3-r4}
465129198Scognet
466129198Scognet	bic	r3, r3, lr
467129198Scognet	orr	r3, r3, r4, lsl #16
468129198Scognet	str	r3, [r1], #0x0004
469129198Scognet
470129198Scognet	subs	r2, r2, #0x00000002
471129198Scognet	beq	.Linswm8_l1
472129198Scognet
473129198Scognet.Linswm8_l2:
474129198Scognet	cmp	r2, #1
475129198Scognet	bcc	.Linswm8_l1
476129198Scognet
477129198Scognet	ldr	r3, [r0]
478129198Scognet	subs	r2, r2, #0x00000001	/* Test in load delay slot */
479129198Scognet					/* XXX, why don't we use result?  */
480129198Scognet
481129198Scognet	strb	r3, [r1], #0x0001
482129198Scognet	mov	r3, r3, lsr #8
483129198Scognet	strb	r3, [r1], #0x0001
484129198Scognet
485129198Scognet
486129198Scognet.Linswm8_l1:
487129198Scognet	ldmfd	sp!, {r4-r9,pc}		/* And go home */
488248361SandrewEND(inswm8)
489129198Scognet
490129198Scognet/*
491129198Scognet * write short ints (16 bits) to an I/O address from a block of memory
492129198Scognet * The I/O address is assumed to be mapped multiple times in a block of
493129198Scognet * 8 words.
494129198Scognet * The source address should be word aligned.
495129198Scognet *
496129198Scognet * r0 = address to read to (IO)
497129198Scognet * r1 = address to write from (memory)
498129198Scognet * r2 = length
499129198Scognet */
500129198Scognet
501129198ScognetENTRY(outswm8)
502129198Scognet/* Make sure that we have a positive length */
503129198Scognet	cmp	r2, #0x00000000
504129198Scognet	movle	pc, lr
505129198Scognet
506129198Scognet/* If the destination address is word aligned and the size suitably
507129198Scognet   aligned, do it fast */
508129198Scognet
509129198Scognet	tst	r1, #0x00000003
510129198Scognet
511129198Scognet	bne	_C_LABEL(outsw)
512129198Scognet
513129198Scognet/* Word aligned outsw */
514129198Scognet
515129198Scognet	stmfd	sp!, {r4-r8,lr}
516129198Scognet
517129198Scognet.Loutswm8_loop8:
518129198Scognet	cmp	r2, #8
519129198Scognet	bcc	.Loutswm8_l8
520129198Scognet
521129198Scognet	ldmia	r1!, {r3,r5,r7,ip}
522129198Scognet
523129198Scognet	eor	r4, r3, r3, lsr #16	/* r4 = (A)(A^B) */
524129198Scognet	eor	r3, r3, r4, lsl #16	/* r3 = (A^A^B)(B) = (B)(B) */
525129198Scognet	eor	r4, r4, r3, lsr #16	/* r4 = (A)(B^A^B) = (A)(A) */
526129198Scognet
527129198Scognet	eor	r6, r5, r5, lsr #16	/* r6 = (A)(A^B) */
528129198Scognet	eor	r5, r5, r6, lsl #16	/* r5 = (A^A^B)(B) = (B)(B) */
529129198Scognet	eor	r6, r6, r5, lsr #16	/* r6 = (A)(B^A^B) = (A)(A) */
530129198Scognet
531129198Scognet	eor	r8, r7, r7, lsr #16	/* r8 = (A)(A^B) */
532129198Scognet	eor	r7, r7, r8, lsl #16	/* r7 = (A^A^B)(B) = (B)(B) */
533129198Scognet	eor	r8, r8, r7, lsr #16	/* r8 = (A)(B^A^B) = (A)(A) */
534129198Scognet
535129198Scognet	eor	lr, ip, ip, lsr #16	/* lr = (A)(A^B) */
536129198Scognet	eor	ip, ip, lr, lsl #16	/* ip = (A^A^B)(B) = (B)(B) */
537129198Scognet	eor	lr, lr, ip, lsr #16	/* lr = (A)(B^A^B) = (A)(A) */
538129198Scognet
539129198Scognet	stmia	r0, {r3-r8,ip,lr}
540129198Scognet
541129198Scognet	subs	r2, r2, #0x00000008	/* Next */
542129198Scognet	bne	.Loutswm8_loop8
543129198Scognet	beq	.Loutswm8_l1
544129198Scognet
545129198Scognet.Loutswm8_l8:
546129198Scognet	cmp	r2, #4
547129198Scognet	bcc	.Loutswm8_l4
548129198Scognet
549129198Scognet	ldmia	r1!, {r3-r4}
550129198Scognet
551129198Scognet	eor	r6, r3, r3, lsr #16	/* r6 = (A)(A^B) */
552129198Scognet	eor	r5, r3, r6, lsl #16	/* r5 = (A^A^B)(B) = (B)(B) */
553129198Scognet	eor	r6, r6, r5, lsr #16	/* r6 = (A)(B^A^B) = (A)(A) */
554129198Scognet
555129198Scognet	eor	r8, r4, r4, lsr #16	/* r8 = (A)(A^B) */
556129198Scognet	eor	r7, r4, r8, lsl #16	/* r7 = (A^A^B)(B) = (B)(B) */
557129198Scognet	eor	r8, r8, r7, lsr #16	/* r8 = (A)(B^A^B) = (A)(A) */
558129198Scognet
559129198Scognet	stmia	r0, {r5-r8}
560129198Scognet
561129198Scognet	subs	r2, r2, #0x00000004
562129198Scognet	beq	.Loutswm8_l1
563129198Scognet
564129198Scognet.Loutswm8_l4:
565129198Scognet	cmp	r2, #2
566129198Scognet	bcc	.Loutswm8_l2
567129198Scognet
568129198Scognet	ldr	r3, [r1], #0x0004	/* r3 = (A)(B) */
569129198Scognet	subs	r2, r2, #0x00000002	/* Done test in Load delay slot */
570129198Scognet
571129198Scognet	eor	r5, r3, r3, lsr #16	/* r5 = (A)(A^B)*/
572129198Scognet	eor	r4, r3, r5, lsl #16	/* r4 = (A^A^B)(B) = (B)(B) */
573129198Scognet	eor	r5, r5, r4, lsr #16	/* r5 = (A)(B^A^B) = (A)(A) */
574129198Scognet
575129198Scognet	stmia	r0, {r4, r5}
576129198Scognet
577129198Scognet	beq	.Loutswm8_l1
578129198Scognet
579129198Scognet.Loutswm8_l2:
580129198Scognet	cmp	r2, #1
581129198Scognet	bcc	.Loutswm8_l1
582129198Scognet
583129198Scognet	ldrb	r3, [r1], #0x0001
584129198Scognet	ldrb	r4, [r1], #0x0001
585129198Scognet	subs	r2, r2, #0x00000001	/* Done test in load delay slot */
586129198Scognet					/* XXX This test isn't used?  */
587129198Scognet	orr	r3, r3, r4, lsl #8
588129198Scognet	orr	r3, r3, r3, lsl #16
589129198Scognet	str	r3, [r0]
590129198Scognet
591129198Scognet.Loutswm8_l1:
592129198Scognet	ldmfd	sp!, {r4-r8,pc}		/* And go home */
593248361SandrewEND(outswm8)
594248361Sandrew
595