1129198Scognet/*-
2129198Scognet * Copyright (c) 2004 Olivier Houchard
3129198Scognet * All rights reserved.
4129198Scognet *
5129198Scognet * Redistribution and use in source and binary forms, with or without
6129198Scognet * modification, are permitted provided that the following conditions
7129198Scognet * are met:
8129198Scognet * 1. Redistributions of source code must retain the above copyright
9129198Scognet *    notice, this list of conditions and the following disclaimer.
10129198Scognet * 2. Redistributions in binary form must reproduce the above copyright
11129198Scognet *    notice, this list of conditions and the following disclaimer in the
12129198Scognet *    documentation and/or other materials provided with the distribution.
13129198Scognet *
14129198Scognet * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15129198Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16129198Scognet * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17129198Scognet * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18129198Scognet * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19129198Scognet * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20129198Scognet * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21129198Scognet * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22129198Scognet * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23129198Scognet * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24129198Scognet * SUCH DAMAGE.
25129198Scognet */
26175255Scognet/*
27175255Scognet * Copyright 2003 Wasabi Systems, Inc.
28175255Scognet * All rights reserved.
29175255Scognet *
30175255Scognet * Written by Steve C. Woodford for Wasabi Systems, Inc.
31175255Scognet *
32175255Scognet * Redistribution and use in source and binary forms, with or without
33175255Scognet * modification, are permitted provided that the following conditions
34175255Scognet * are met:
35175255Scognet * 1. Redistributions of source code must retain the above copyright
36175255Scognet *    notice, this list of conditions and the following disclaimer.
37175255Scognet * 2. Redistributions in binary form must reproduce the above copyright
38175255Scognet *    notice, this list of conditions and the following disclaimer in the
39175255Scognet *    documentation and/or other materials provided with the distribution.
40175255Scognet * 3. All advertising materials mentioning features or use of this software
41175255Scognet *    must display the following acknowledgement:
42175255Scognet *      This product includes software developed for the NetBSD Project by
43175255Scognet *      Wasabi Systems, Inc.
44175255Scognet * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45175255Scognet *    or promote products derived from this software without specific prior
46175255Scognet *    written permission.
47175255Scognet *
48175255Scognet * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49175255Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50175255Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51175255Scognet * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
52175255Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53175255Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54175255Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55175255Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56175255Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57175255Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58175255Scognet * POSSIBILITY OF SUCH DAMAGE.
59175255Scognet */
60175255Scognet/*
61175255Scognet * Copyright (c) 1997 The NetBSD Foundation, Inc.
62175255Scognet * All rights reserved.
63175255Scognet *
64175255Scognet * This code is derived from software contributed to The NetBSD Foundation
65175255Scognet * by Neil A. Carson and Mark Brinicombe
66175255Scognet *
67175255Scognet * Redistribution and use in source and binary forms, with or without
68175255Scognet * modification, are permitted provided that the following conditions
69175255Scognet * are met:
70175255Scognet * 1. Redistributions of source code must retain the above copyright
71175255Scognet *    notice, this list of conditions and the following disclaimer.
72175255Scognet * 2. Redistributions in binary form must reproduce the above copyright
73175255Scognet *    notice, this list of conditions and the following disclaimer in the
74175255Scognet *    documentation and/or other materials provided with the distribution.
75175255Scognet *
76175255Scognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
77175255Scognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
78175255Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
79175255Scognet * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
80175255Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
81175255Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
82175255Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
83175255Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
84175255Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
85175255Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
86175255Scognet * POSSIBILITY OF SUCH DAMAGE.
87175255Scognet */
88129198Scognet
89129198Scognet#include <machine/asm.h>
90129198Scognet#include <machine/asmacros.h>
91129198Scognet__FBSDID("$FreeBSD$");
92129198Scognet
93129198Scognet#include "assym.s"
94129198Scognet
95150864Scognet.L_arm_memcpy:
96150864Scognet	.word	_C_LABEL(_arm_memcpy)
97150864Scognet.L_arm_bzero:
98150864Scognet	.word	_C_LABEL(_arm_bzero)
99150864Scognet.L_min_memcpy_size:
100150864Scognet	.word	_C_LABEL(_min_memcpy_size)
101150864Scognet.L_min_bzero_size:
102150864Scognet	.word	_C_LABEL(_min_bzero_size)
103129198Scognet/*
104129250Scognet * memset: Sets a block of memory to the specified value
105129250Scognet *
106129250Scognet * On entry:
107129250Scognet *   r0 - dest address
108129250Scognet *   r1 - byte to write
109129250Scognet *   r2 - number of bytes to write
110129250Scognet *
111129250Scognet * On exit:
112129250Scognet *   r0 - dest address
113129250Scognet */
114129250Scognet/* LINTSTUB: Func: void bzero(void *, size_t) */
115129250ScognetENTRY(bzero)
116150864Scognet	ldr	r3, .L_arm_bzero
117150864Scognet	ldr	r3, [r3]
118150864Scognet	cmp	r3, #0
119150864Scognet	beq	.Lnormal0
120150864Scognet	ldr	r2, .L_min_bzero_size
121150864Scognet	ldr	r2, [r2]
122150864Scognet	cmp	r1, r2
123150864Scognet	blt	.Lnormal0
124150864Scognet	stmfd	sp!, {r0, r1, lr}
125150864Scognet	mov	r2, #0
126150864Scognet	mov	lr, pc
127150864Scognet	mov	pc, r3
128150864Scognet	cmp	r0, #0
129150864Scognet	ldmfd	sp!, {r0, r1, lr}
130150864Scognet	RETeq
131150864Scognet.Lnormal0:
132129250Scognet	mov	r3, #0x00
133129250Scognet	b	do_memset
134129250Scognet
135129250Scognet/* LINTSTUB: Func: void *memset(void *, int, size_t) */
136129250ScognetENTRY(memset)
137129250Scognet	and	r3, r1, #0xff		/* We deal with bytes */
138129250Scognet	mov	r1, r2
139129250Scognetdo_memset:
140129250Scognet	cmp	r1, #0x04		/* Do we have less than 4 bytes */
141129250Scognet	mov	ip, r0
142129250Scognet	blt	.Lmemset_lessthanfour
143129250Scognet
144129250Scognet	/* Ok first we will word align the address */
145129250Scognet	ands	r2, ip, #0x03		/* Get the bottom two bits */
146129250Scognet	bne	.Lmemset_wordunaligned	/* The address is not word aligned */
147129250Scognet
148129250Scognet	/* We are now word aligned */
149129250Scognet.Lmemset_wordaligned:
150129250Scognet	orr	r3, r3, r3, lsl #8	/* Extend value to 16-bits */
151172614Scognet#ifdef _ARM_ARCH_5E
152172614Scognet	tst	ip, #0x04		/* Quad-align for armv5e */
153129250Scognet#else
154129250Scognet	cmp	r1, #0x10
155129250Scognet#endif
156129250Scognet	orr	r3, r3, r3, lsl #16	/* Extend value to 32-bits */
157172614Scognet#ifdef _ARM_ARCH_5E
158129250Scognet	subne	r1, r1, #0x04		/* Quad-align if necessary */
159129250Scognet	strne	r3, [ip], #0x04
160129250Scognet	cmp	r1, #0x10
161129250Scognet#endif
162129250Scognet	blt	.Lmemset_loop4		/* If less than 16 then use words */
163129250Scognet	mov	r2, r3			/* Duplicate data */
164129250Scognet	cmp	r1, #0x80		/* If < 128 then skip the big loop */
165129250Scognet	blt	.Lmemset_loop32
166129250Scognet
167129250Scognet	/* Do 128 bytes at a time */
168129250Scognet.Lmemset_loop128:
169129250Scognet	subs	r1, r1, #0x80
170172614Scognet#ifdef _ARM_ARCH_5E
171129250Scognet	strged	r2, [ip], #0x08
172129250Scognet	strged	r2, [ip], #0x08
173129250Scognet	strged	r2, [ip], #0x08
174129250Scognet	strged	r2, [ip], #0x08
175129250Scognet	strged	r2, [ip], #0x08
176129250Scognet	strged	r2, [ip], #0x08
177129250Scognet	strged	r2, [ip], #0x08
178129250Scognet	strged	r2, [ip], #0x08
179129250Scognet	strged	r2, [ip], #0x08
180129250Scognet	strged	r2, [ip], #0x08
181129250Scognet	strged	r2, [ip], #0x08
182129250Scognet	strged	r2, [ip], #0x08
183129250Scognet	strged	r2, [ip], #0x08
184129250Scognet	strged	r2, [ip], #0x08
185129250Scognet	strged	r2, [ip], #0x08
186129250Scognet	strged	r2, [ip], #0x08
187129250Scognet#else
188129250Scognet	stmgeia	ip!, {r2-r3}
189129250Scognet	stmgeia	ip!, {r2-r3}
190129250Scognet	stmgeia	ip!, {r2-r3}
191129250Scognet	stmgeia	ip!, {r2-r3}
192129250Scognet	stmgeia	ip!, {r2-r3}
193129250Scognet	stmgeia	ip!, {r2-r3}
194129250Scognet	stmgeia	ip!, {r2-r3}
195129250Scognet	stmgeia	ip!, {r2-r3}
196129250Scognet	stmgeia	ip!, {r2-r3}
197129250Scognet	stmgeia	ip!, {r2-r3}
198129250Scognet	stmgeia	ip!, {r2-r3}
199129250Scognet	stmgeia	ip!, {r2-r3}
200129250Scognet	stmgeia	ip!, {r2-r3}
201129250Scognet	stmgeia	ip!, {r2-r3}
202129250Scognet	stmgeia	ip!, {r2-r3}
203129250Scognet	stmgeia	ip!, {r2-r3}
204129250Scognet#endif
205129250Scognet	bgt	.Lmemset_loop128
206137463Scognet	RETeq			/* Zero length so just exit */
207129250Scognet
208129250Scognet	add	r1, r1, #0x80		/* Adjust for extra sub */
209129250Scognet
210129250Scognet	/* Do 32 bytes at a time */
211129250Scognet.Lmemset_loop32:
212129250Scognet	subs	r1, r1, #0x20
213172614Scognet#ifdef _ARM_ARCH_5E
214129250Scognet	strged	r2, [ip], #0x08
215129250Scognet	strged	r2, [ip], #0x08
216129250Scognet	strged	r2, [ip], #0x08
217129250Scognet	strged	r2, [ip], #0x08
218129250Scognet#else
219129250Scognet	stmgeia	ip!, {r2-r3}
220129250Scognet	stmgeia	ip!, {r2-r3}
221129250Scognet	stmgeia	ip!, {r2-r3}
222129250Scognet	stmgeia	ip!, {r2-r3}
223129250Scognet#endif
224129250Scognet	bgt	.Lmemset_loop32
225137463Scognet	RETeq			/* Zero length so just exit */
226129250Scognet
227129250Scognet	adds	r1, r1, #0x10		/* Partially adjust for extra sub */
228129250Scognet
229129250Scognet	/* Deal with 16 bytes or more */
230172614Scognet#ifdef _ARM_ARCH_5E
231129250Scognet	strged	r2, [ip], #0x08
232129250Scognet	strged	r2, [ip], #0x08
233129250Scognet#else
234129250Scognet	stmgeia	ip!, {r2-r3}
235129250Scognet	stmgeia	ip!, {r2-r3}
236129250Scognet#endif
237137463Scognet	RETeq			/* Zero length so just exit */
238129250Scognet
239129250Scognet	addlt	r1, r1, #0x10		/* Possibly adjust for extra sub */
240129250Scognet
241129250Scognet	/* We have at least 4 bytes so copy as words */
242129250Scognet.Lmemset_loop4:
243129250Scognet	subs	r1, r1, #0x04
244129250Scognet	strge	r3, [ip], #0x04
245129250Scognet	bgt	.Lmemset_loop4
246137463Scognet	RETeq			/* Zero length so just exit */
247129250Scognet
248172614Scognet#ifdef _ARM_ARCH_5E
249129250Scognet	/* Compensate for 64-bit alignment check */
250129250Scognet	adds	r1, r1, #0x04
251137463Scognet	RETeq
252129250Scognet	cmp	r1, #2
253129250Scognet#else
254129250Scognet	cmp	r1, #-2
255129250Scognet#endif
256129250Scognet
257129250Scognet	strb	r3, [ip], #0x01		/* Set 1 byte */
258129250Scognet	strgeb	r3, [ip], #0x01		/* Set another byte */
259129250Scognet	strgtb	r3, [ip]		/* and a third */
260137463Scognet	RET			/* Exit */
261129250Scognet
262129250Scognet.Lmemset_wordunaligned:
263129250Scognet	rsb	r2, r2, #0x004
264129250Scognet	strb	r3, [ip], #0x01		/* Set 1 byte */
265129250Scognet	cmp	r2, #0x02
266129250Scognet	strgeb	r3, [ip], #0x01		/* Set another byte */
267129250Scognet	sub	r1, r1, r2
268129250Scognet	strgtb	r3, [ip], #0x01		/* and a third */
269129250Scognet	cmp	r1, #0x04		/* More than 4 bytes left? */
270129250Scognet	bge	.Lmemset_wordaligned	/* Yup */
271129250Scognet
272129250Scognet.Lmemset_lessthanfour:
273129250Scognet	cmp	r1, #0x00
274137463Scognet	RETeq			/* Zero length so exit */
275129250Scognet	strb	r3, [ip], #0x01		/* Set 1 byte */
276129250Scognet	cmp	r1, #0x02
277129250Scognet	strgeb	r3, [ip], #0x01		/* Set another byte */
278129250Scognet	strgtb	r3, [ip]		/* and a third */
279137463Scognet	RET			/* Exit */
280248361SandrewEND(bzero)
281248361SandrewEND(memset)
282129254Scognet
283144967ScognetENTRY(bcmp)
284129254Scognet	mov	ip, r0
285129254Scognet	cmp	r2, #0x06
286129254Scognet	beq	.Lmemcmp_6bytes
287129254Scognet	mov	r0, #0x00
288129254Scognet
289129254Scognet	/* Are both addresses aligned the same way? */
290129254Scognet	cmp	r2, #0x00
291129254Scognet	eornes	r3, ip, r1
292137463Scognet	RETeq			/* len == 0, or same addresses! */
293129254Scognet	tst	r3, #0x03
294129254Scognet	subne	r2, r2, #0x01
295129254Scognet	bne	.Lmemcmp_bytewise2	/* Badly aligned. Do it the slow way */
296129254Scognet
297129254Scognet	/* Word-align the addresses, if necessary */
298129254Scognet	sub	r3, r1, #0x05
299129254Scognet	ands	r3, r3, #0x03
300129254Scognet	add	r3, r3, r3, lsl #1
301129254Scognet	addne	pc, pc, r3, lsl #3
302129254Scognet	nop
303129254Scognet
304129254Scognet	/* Compare up to 3 bytes */
305129254Scognet	ldrb	r0, [ip], #0x01
306129254Scognet	ldrb	r3, [r1], #0x01
307129254Scognet	subs	r0, r0, r3
308137463Scognet	RETne
309129254Scognet	subs	r2, r2, #0x01
310137463Scognet	RETeq
311129254Scognet
312129254Scognet	/* Compare up to 2 bytes */
313129254Scognet	ldrb	r0, [ip], #0x01
314129254Scognet	ldrb	r3, [r1], #0x01
315129254Scognet	subs	r0, r0, r3
316137463Scognet	RETne
317129254Scognet	subs	r2, r2, #0x01
318137463Scognet	RETeq
319129254Scognet
320129254Scognet	/* Compare 1 byte */
321129254Scognet	ldrb	r0, [ip], #0x01
322129254Scognet	ldrb	r3, [r1], #0x01
323129254Scognet	subs	r0, r0, r3
324137463Scognet	RETne
325129254Scognet	subs	r2, r2, #0x01
326137463Scognet	RETeq
327129254Scognet
328129254Scognet	/* Compare 4 bytes at a time, if possible */
329129254Scognet	subs	r2, r2, #0x04
330129254Scognet	bcc	.Lmemcmp_bytewise
331129254Scognet.Lmemcmp_word_aligned:
332129254Scognet	ldr	r0, [ip], #0x04
333129254Scognet	ldr	r3, [r1], #0x04
334129254Scognet	subs	r2, r2, #0x04
335129254Scognet	cmpcs	r0, r3
336129254Scognet	beq	.Lmemcmp_word_aligned
337129254Scognet	sub	r0, r0, r3
338129254Scognet
339129254Scognet	/* Correct for extra subtraction, and check if done */
340129254Scognet	adds	r2, r2, #0x04
341129254Scognet	cmpeq	r0, #0x00		/* If done, did all bytes match? */
342137463Scognet	RETeq			/* Yup. Just return */
343129254Scognet
344129254Scognet	/* Re-do the final word byte-wise */
345129254Scognet	sub	ip, ip, #0x04
346129254Scognet	sub	r1, r1, #0x04
347129254Scognet
348129254Scognet.Lmemcmp_bytewise:
349129254Scognet	add	r2, r2, #0x03
350129254Scognet.Lmemcmp_bytewise2:
351129254Scognet	ldrb	r0, [ip], #0x01
352129254Scognet	ldrb	r3, [r1], #0x01
353129254Scognet	subs	r2, r2, #0x01
354129254Scognet	cmpcs	r0, r3
355129254Scognet	beq	.Lmemcmp_bytewise2
356129254Scognet	sub	r0, r0, r3
357137463Scognet	RET
358129254Scognet
359129254Scognet	/*
360129254Scognet	 * 6 byte compares are very common, thanks to the network stack.
361129254Scognet	 * This code is hand-scheduled to reduce the number of stalls for
362129254Scognet	 * load results. Everything else being equal, this will be ~32%
363129254Scognet	 * faster than a byte-wise memcmp.
364129254Scognet	 */
365129254Scognet	.align	5
366129254Scognet.Lmemcmp_6bytes:
367129254Scognet	ldrb	r3, [r1, #0x00]		/* r3 = b2#0 */
368129254Scognet	ldrb	r0, [ip, #0x00]		/* r0 = b1#0 */
369129254Scognet	ldrb	r2, [r1, #0x01]		/* r2 = b2#1 */
370129254Scognet	subs	r0, r0, r3		/* r0 = b1#0 - b2#0 */
371129254Scognet	ldreqb	r3, [ip, #0x01]		/* r3 = b1#1 */
372137463Scognet	RETne			/* Return if mismatch on #0 */
373129254Scognet	subs	r0, r3, r2		/* r0 = b1#1 - b2#1 */
374129254Scognet	ldreqb	r3, [r1, #0x02]		/* r3 = b2#2 */
375129254Scognet	ldreqb	r0, [ip, #0x02]		/* r0 = b1#2 */
376137463Scognet	RETne			/* Return if mismatch on #1 */
377129254Scognet	ldrb	r2, [r1, #0x03]		/* r2 = b2#3 */
378129254Scognet	subs	r0, r0, r3		/* r0 = b1#2 - b2#2 */
379129254Scognet	ldreqb	r3, [ip, #0x03]		/* r3 = b1#3 */
380137463Scognet	RETne			/* Return if mismatch on #2 */
381129254Scognet	subs	r0, r3, r2		/* r0 = b1#3 - b2#3 */
382129254Scognet	ldreqb	r3, [r1, #0x04]		/* r3 = b2#4 */
383129254Scognet	ldreqb	r0, [ip, #0x04]		/* r0 = b1#4 */
384137463Scognet	RETne			/* Return if mismatch on #3 */
385129254Scognet	ldrb	r2, [r1, #0x05]		/* r2 = b2#5 */
386129254Scognet	subs	r0, r0, r3		/* r0 = b1#4 - b2#4 */
387129254Scognet	ldreqb	r3, [ip, #0x05]		/* r3 = b1#5 */
388137463Scognet	RETne			/* Return if mismatch on #4 */
389129254Scognet	sub	r0, r3, r2		/* r0 = b1#5 - b2#5 */
390137463Scognet	RET
391248361SandrewEND(bcmp)
392129254Scognet
393129254ScognetENTRY(bcopy)
394143175Scognet	/* switch the source and destination registers */
395236991Simp	eor     r0, r1, r0
396236991Simp	eor     r1, r0, r1
397236991Simp	eor     r0, r1, r0
398143175ScognetENTRY(memmove)
399143175Scognet	/* Do the buffers overlap? */
400143175Scognet	cmp	r0, r1
401143175Scognet	RETeq		/* Bail now if src/dst are the same */
402143175Scognet	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
403143175Scognet	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
404143175Scognet	cmp	r3, r2		/* if (r3 < len) we have an overlap */
405143175Scognet	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
406143175Scognet
407143175Scognet	/* Determine copy direction */
408143175Scognet	cmp	r1, r0
409143175Scognet	bcc	.Lmemmove_backwards
410143175Scognet
411143175Scognet	moveq	r0, #0			/* Quick abort for len=0 */
412143175Scognet	RETeq
413143175Scognet
414143175Scognet	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
415143175Scognet	subs	r2, r2, #4
416143175Scognet	blt	.Lmemmove_fl4		/* less than 4 bytes */
417143175Scognet	ands	r12, r0, #3
418143175Scognet	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
419143175Scognet	ands	r12, r1, #3
420143175Scognet	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
421143175Scognet
422143175Scognet.Lmemmove_ft8:
423143175Scognet	/* We have aligned source and destination */
424143175Scognet	subs	r2, r2, #8
425143175Scognet	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
426236991Simp	subs	r2, r2, #0x14
427143175Scognet	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
428143175Scognet	stmdb	sp!, {r4}		/* borrow r4 */
429143175Scognet
430143175Scognet	/* blat 32 bytes at a time */
431143175Scognet	/* XXX for really big copies perhaps we should use more registers */
432143175Scognet.Lmemmove_floop32:
433143175Scognet	ldmia	r1!, {r3, r4, r12, lr}
434143175Scognet	stmia	r0!, {r3, r4, r12, lr}
435143175Scognet	ldmia	r1!, {r3, r4, r12, lr}
436143175Scognet	stmia	r0!, {r3, r4, r12, lr}
437236991Simp	subs	r2, r2, #0x20
438143175Scognet	bge	.Lmemmove_floop32
439143175Scognet
440143175Scognet	cmn	r2, #0x10
441143175Scognet	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
442143175Scognet	stmgeia	r0!, {r3, r4, r12, lr}
443236991Simp	subge	r2, r2, #0x10
444143175Scognet	ldmia	sp!, {r4}		/* return r4 */
445143175Scognet
446143175Scognet.Lmemmove_fl32:
447236991Simp	adds	r2, r2, #0x14
448143175Scognet
449143175Scognet	/* blat 12 bytes at a time */
450143175Scognet.Lmemmove_floop12:
451143175Scognet	ldmgeia	r1!, {r3, r12, lr}
452143175Scognet	stmgeia	r0!, {r3, r12, lr}
453236991Simp	subges	r2, r2, #0x0c
454143175Scognet	bge	.Lmemmove_floop12
455143175Scognet
456143175Scognet.Lmemmove_fl12:
457143175Scognet	adds	r2, r2, #8
458143175Scognet	blt	.Lmemmove_fl4
459143175Scognet
460143175Scognet	subs	r2, r2, #4
461143175Scognet	ldrlt	r3, [r1], #4
462143175Scognet	strlt	r3, [r0], #4
463143175Scognet	ldmgeia	r1!, {r3, r12}
464143175Scognet	stmgeia	r0!, {r3, r12}
465143175Scognet	subge	r2, r2, #4
466143175Scognet
467143175Scognet.Lmemmove_fl4:
468143175Scognet	/* less than 4 bytes to go */
469143175Scognet	adds	r2, r2, #4
470143175Scognet	ldmeqia	sp!, {r0, pc}		/* done */
471143175Scognet
472143175Scognet	/* copy the crud byte at a time */
473143175Scognet	cmp	r2, #2
474143175Scognet	ldrb	r3, [r1], #1
475143175Scognet	strb	r3, [r0], #1
476143175Scognet	ldrgeb	r3, [r1], #1
477143175Scognet	strgeb	r3, [r0], #1
478143175Scognet	ldrgtb	r3, [r1], #1
479143175Scognet	strgtb	r3, [r0], #1
480143175Scognet	ldmia	sp!, {r0, pc}
481143175Scognet
482143175Scognet	/* erg - unaligned destination */
483143175Scognet.Lmemmove_fdestul:
484143175Scognet	rsb	r12, r12, #4
485143175Scognet	cmp	r12, #2
486143175Scognet
487143175Scognet	/* align destination with byte copies */
488143175Scognet	ldrb	r3, [r1], #1
489143175Scognet	strb	r3, [r0], #1
490143175Scognet	ldrgeb	r3, [r1], #1
491143175Scognet	strgeb	r3, [r0], #1
492143175Scognet	ldrgtb	r3, [r1], #1
493143175Scognet	strgtb	r3, [r0], #1
494143175Scognet	subs	r2, r2, r12
495143175Scognet	blt	.Lmemmove_fl4		/* less the 4 bytes */
496143175Scognet
497143175Scognet	ands	r12, r1, #3
498143175Scognet	beq	.Lmemmove_ft8		/* we have an aligned source */
499143175Scognet
500143175Scognet	/* erg - unaligned source */
501143175Scognet	/* This is where it gets nasty ... */
502143175Scognet.Lmemmove_fsrcul:
503143175Scognet	bic	r1, r1, #3
504143175Scognet	ldr	lr, [r1], #4
505143175Scognet	cmp	r12, #2
506143175Scognet	bgt	.Lmemmove_fsrcul3
507143175Scognet	beq	.Lmemmove_fsrcul2
508236991Simp	cmp	r2, #0x0c
509143175Scognet	blt	.Lmemmove_fsrcul1loop4
510236991Simp	sub	r2, r2, #0x0c
511143175Scognet	stmdb	sp!, {r4, r5}
512143175Scognet
513143175Scognet.Lmemmove_fsrcul1loop16:
514143175Scognet#ifdef __ARMEB__
515143175Scognet	mov	r3, lr, lsl #8
516143175Scognet#else
517143175Scognet	mov	r3, lr, lsr #8
518143175Scognet#endif
519143175Scognet	ldmia	r1!, {r4, r5, r12, lr}
520143175Scognet#ifdef __ARMEB__
521143175Scognet	orr	r3, r3, r4, lsr #24
522143175Scognet	mov	r4, r4, lsl #8
523143175Scognet	orr	r4, r4, r5, lsr #24
524143175Scognet	mov	r5, r5, lsl #8
525143175Scognet	orr	r5, r5, r12, lsr #24
526143175Scognet	mov	r12, r12, lsl #8
527143175Scognet	orr	r12, r12, lr, lsr #24
528143175Scognet#else
529143175Scognet	orr	r3, r3, r4, lsl #24
530143175Scognet	mov	r4, r4, lsr #8
531143175Scognet	orr	r4, r4, r5, lsl #24
532143175Scognet	mov	r5, r5, lsr #8
533143175Scognet	orr	r5, r5, r12, lsl #24
534143175Scognet	mov	r12, r12, lsr #8
535143175Scognet	orr	r12, r12, lr, lsl #24
536143175Scognet#endif
537143175Scognet	stmia	r0!, {r3-r5, r12}
538236991Simp	subs	r2, r2, #0x10
539143175Scognet	bge	.Lmemmove_fsrcul1loop16
540143175Scognet	ldmia	sp!, {r4, r5}
541236991Simp	adds	r2, r2, #0x0c
542143175Scognet	blt	.Lmemmove_fsrcul1l4
543143175Scognet
544143175Scognet.Lmemmove_fsrcul1loop4:
545143175Scognet#ifdef __ARMEB__
546143175Scognet	mov	r12, lr, lsl #8
547143175Scognet#else
548143175Scognet	mov	r12, lr, lsr #8
549143175Scognet#endif
550143175Scognet	ldr	lr, [r1], #4
551143175Scognet#ifdef __ARMEB__
552143175Scognet	orr	r12, r12, lr, lsr #24
553143175Scognet#else
554143175Scognet	orr	r12, r12, lr, lsl #24
555143175Scognet#endif
556143175Scognet	str	r12, [r0], #4
557143175Scognet	subs	r2, r2, #4
558143175Scognet	bge	.Lmemmove_fsrcul1loop4
559143175Scognet
560143175Scognet.Lmemmove_fsrcul1l4:
561143175Scognet	sub	r1, r1, #3
562143175Scognet	b	.Lmemmove_fl4
563143175Scognet
564143175Scognet.Lmemmove_fsrcul2:
565236991Simp	cmp	r2, #0x0c
566143175Scognet	blt	.Lmemmove_fsrcul2loop4
567236991Simp	sub	r2, r2, #0x0c
568143175Scognet	stmdb	sp!, {r4, r5}
569143175Scognet
570143175Scognet.Lmemmove_fsrcul2loop16:
571143175Scognet#ifdef __ARMEB__
572143175Scognet	mov	r3, lr, lsl #16
573143175Scognet#else
574143175Scognet	mov	r3, lr, lsr #16
575143175Scognet#endif
576143175Scognet	ldmia	r1!, {r4, r5, r12, lr}
577143175Scognet#ifdef __ARMEB__
578143175Scognet	orr	r3, r3, r4, lsr #16
579143175Scognet	mov	r4, r4, lsl #16
580143175Scognet	orr	r4, r4, r5, lsr #16
581143175Scognet	mov	r5, r5, lsl #16
582143175Scognet	orr	r5, r5, r12, lsr #16
583143175Scognet	mov	r12, r12, lsl #16
584143175Scognet	orr	r12, r12, lr, lsr #16
585143175Scognet#else
586143175Scognet	orr	r3, r3, r4, lsl #16
587143175Scognet	mov	r4, r4, lsr #16
588143175Scognet	orr	r4, r4, r5, lsl #16
589143175Scognet	mov	r5, r5, lsr #16
590143175Scognet	orr	r5, r5, r12, lsl #16
591143175Scognet	mov	r12, r12, lsr #16
592143175Scognet	orr	r12, r12, lr, lsl #16
593143175Scognet#endif
594143175Scognet	stmia	r0!, {r3-r5, r12}
595236991Simp	subs	r2, r2, #0x10
596143175Scognet	bge	.Lmemmove_fsrcul2loop16
597143175Scognet	ldmia	sp!, {r4, r5}
598236991Simp	adds	r2, r2, #0x0c
599143175Scognet	blt	.Lmemmove_fsrcul2l4
600143175Scognet
601143175Scognet.Lmemmove_fsrcul2loop4:
602143175Scognet#ifdef __ARMEB__
603143175Scognet	mov	r12, lr, lsl #16
604143175Scognet#else
605143175Scognet	mov	r12, lr, lsr #16
606143175Scognet#endif
607143175Scognet	ldr	lr, [r1], #4
608143175Scognet#ifdef __ARMEB__
609143175Scognet	orr	r12, r12, lr, lsr #16
610143175Scognet#else
611143175Scognet	orr	r12, r12, lr, lsl #16
612143175Scognet#endif
613143175Scognet	str	r12, [r0], #4
614143175Scognet	subs	r2, r2, #4
615143175Scognet	bge	.Lmemmove_fsrcul2loop4
616143175Scognet
617143175Scognet.Lmemmove_fsrcul2l4:
618143175Scognet	sub	r1, r1, #2
619143175Scognet	b	.Lmemmove_fl4
620143175Scognet
621143175Scognet.Lmemmove_fsrcul3:
622236991Simp	cmp	r2, #0x0c
623143175Scognet	blt	.Lmemmove_fsrcul3loop4
624236991Simp	sub	r2, r2, #0x0c
625143175Scognet	stmdb	sp!, {r4, r5}
626143175Scognet
627143175Scognet.Lmemmove_fsrcul3loop16:
628143175Scognet#ifdef __ARMEB__
629143175Scognet	mov	r3, lr, lsl #24
630143175Scognet#else
631143175Scognet	mov	r3, lr, lsr #24
632143175Scognet#endif
633143175Scognet	ldmia	r1!, {r4, r5, r12, lr}
634143175Scognet#ifdef __ARMEB__
635143175Scognet	orr	r3, r3, r4, lsr #8
636143175Scognet	mov	r4, r4, lsl #24
637143175Scognet	orr	r4, r4, r5, lsr #8
638143175Scognet	mov	r5, r5, lsl #24
639143175Scognet	orr	r5, r5, r12, lsr #8
640143175Scognet	mov	r12, r12, lsl #24
641143175Scognet	orr	r12, r12, lr, lsr #8
642143175Scognet#else
643143175Scognet	orr	r3, r3, r4, lsl #8
644143175Scognet	mov	r4, r4, lsr #24
645143175Scognet	orr	r4, r4, r5, lsl #8
646143175Scognet	mov	r5, r5, lsr #24
647143175Scognet	orr	r5, r5, r12, lsl #8
648143175Scognet	mov	r12, r12, lsr #24
649143175Scognet	orr	r12, r12, lr, lsl #8
650143175Scognet#endif
651143175Scognet	stmia	r0!, {r3-r5, r12}
652236991Simp	subs	r2, r2, #0x10
653143175Scognet	bge	.Lmemmove_fsrcul3loop16
654143175Scognet	ldmia	sp!, {r4, r5}
655236991Simp	adds	r2, r2, #0x0c
656143175Scognet	blt	.Lmemmove_fsrcul3l4
657143175Scognet
658143175Scognet.Lmemmove_fsrcul3loop4:
659143175Scognet#ifdef __ARMEB__
660143175Scognet	mov	r12, lr, lsl #24
661143175Scognet#else
662143175Scognet	mov	r12, lr, lsr #24
663143175Scognet#endif
664143175Scognet	ldr	lr, [r1], #4
665143175Scognet#ifdef __ARMEB__
666143175Scognet	orr	r12, r12, lr, lsr #8
667143175Scognet#else
668143175Scognet	orr	r12, r12, lr, lsl #8
669143175Scognet#endif
670143175Scognet	str	r12, [r0], #4
671143175Scognet	subs	r2, r2, #4
672143175Scognet	bge	.Lmemmove_fsrcul3loop4
673143175Scognet
674143175Scognet.Lmemmove_fsrcul3l4:
675143175Scognet	sub	r1, r1, #1
676143175Scognet	b	.Lmemmove_fl4
677143175Scognet
678143175Scognet.Lmemmove_backwards:
679143175Scognet	add	r1, r1, r2
680143175Scognet	add	r0, r0, r2
681143175Scognet	subs	r2, r2, #4
682143175Scognet	blt	.Lmemmove_bl4		/* less than 4 bytes */
683143175Scognet	ands	r12, r0, #3
684143175Scognet	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
685143175Scognet	ands	r12, r1, #3
686143175Scognet	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
687143175Scognet
688143175Scognet.Lmemmove_bt8:
689143175Scognet	/* We have aligned source and destination */
690143175Scognet	subs	r2, r2, #8
691143175Scognet	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
692143175Scognet	stmdb	sp!, {r4, lr}
693143175Scognet	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
694143175Scognet	blt	.Lmemmove_bl32
695143175Scognet
696143175Scognet	/* blat 32 bytes at a time */
697143175Scognet	/* XXX for really big copies perhaps we should use more registers */
698143175Scognet.Lmemmove_bloop32:
699143175Scognet	ldmdb	r1!, {r3, r4, r12, lr}
700143175Scognet	stmdb	r0!, {r3, r4, r12, lr}
701143175Scognet	ldmdb	r1!, {r3, r4, r12, lr}
702143175Scognet	stmdb	r0!, {r3, r4, r12, lr}
703236991Simp	subs	r2, r2, #0x20
704143175Scognet	bge	.Lmemmove_bloop32
705143175Scognet
706143175Scognet.Lmemmove_bl32:
707236991Simp	cmn	r2, #0x10
708143175Scognet	ldmgedb	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
709143175Scognet	stmgedb	r0!, {r3, r4, r12, lr}
710236991Simp	subge	r2, r2, #0x10
711236991Simp	adds	r2, r2, #0x14
712143175Scognet	ldmgedb	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
713143175Scognet	stmgedb	r0!, {r3, r12, lr}
714236991Simp	subge	r2, r2, #0x0c
715143175Scognet	ldmia	sp!, {r4, lr}
716143175Scognet
717143175Scognet.Lmemmove_bl12:
718143175Scognet	adds	r2, r2, #8
719143175Scognet	blt	.Lmemmove_bl4
720143175Scognet	subs	r2, r2, #4
721143175Scognet	ldrlt	r3, [r1, #-4]!
722143175Scognet	strlt	r3, [r0, #-4]!
723143175Scognet	ldmgedb	r1!, {r3, r12}
724143175Scognet	stmgedb	r0!, {r3, r12}
725143175Scognet	subge	r2, r2, #4
726143175Scognet
727143175Scognet.Lmemmove_bl4:
728143175Scognet	/* less than 4 bytes to go */
729143175Scognet	adds	r2, r2, #4
730143175Scognet	RETeq			/* done */
731143175Scognet
732143175Scognet	/* copy the crud byte at a time */
733143175Scognet	cmp	r2, #2
734143175Scognet	ldrb	r3, [r1, #-1]!
735143175Scognet	strb	r3, [r0, #-1]!
736143175Scognet	ldrgeb	r3, [r1, #-1]!
737143175Scognet	strgeb	r3, [r0, #-1]!
738143175Scognet	ldrgtb	r3, [r1, #-1]!
739143175Scognet	strgtb	r3, [r0, #-1]!
740143175Scognet	RET
741143175Scognet
742143175Scognet	/* erg - unaligned destination */
743143175Scognet.Lmemmove_bdestul:
744143175Scognet	cmp	r12, #2
745143175Scognet
746143175Scognet	/* align destination with byte copies */
747143175Scognet	ldrb	r3, [r1, #-1]!
748143175Scognet	strb	r3, [r0, #-1]!
749143175Scognet	ldrgeb	r3, [r1, #-1]!
750143175Scognet	strgeb	r3, [r0, #-1]!
751143175Scognet	ldrgtb	r3, [r1, #-1]!
752143175Scognet	strgtb	r3, [r0, #-1]!
753143175Scognet	subs	r2, r2, r12
754143175Scognet	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
755143175Scognet	ands	r12, r1, #3
756143175Scognet	beq	.Lmemmove_bt8		/* we have an aligned source */
757143175Scognet
758143175Scognet	/* erg - unaligned source */
759143175Scognet	/* This is where it gets nasty ... */
760143175Scognet.Lmemmove_bsrcul:
761143175Scognet	bic	r1, r1, #3
762143175Scognet	ldr	r3, [r1, #0]
763143175Scognet	cmp	r12, #2
764143175Scognet	blt	.Lmemmove_bsrcul1
765143175Scognet	beq	.Lmemmove_bsrcul2
766236991Simp	cmp	r2, #0x0c
767143175Scognet	blt	.Lmemmove_bsrcul3loop4
768236991Simp	sub	r2, r2, #0x0c
769143175Scognet	stmdb	sp!, {r4, r5, lr}
770143175Scognet
771143175Scognet.Lmemmove_bsrcul3loop16:
772143175Scognet#ifdef __ARMEB__
773143175Scognet	mov	lr, r3, lsr #8
774143175Scognet#else
775143175Scognet	mov	lr, r3, lsl #8
776143175Scognet#endif
777143175Scognet	ldmdb	r1!, {r3-r5, r12}
778143175Scognet#ifdef __ARMEB__
779143175Scognet	orr	lr, lr, r12, lsl #24
780143175Scognet	mov	r12, r12, lsr #8
781143175Scognet	orr	r12, r12, r5, lsl #24
782143175Scognet	mov	r5, r5, lsr #8
783143175Scognet	orr	r5, r5, r4, lsl #24
784143175Scognet	mov	r4, r4, lsr #8
785143175Scognet	orr	r4, r4, r3, lsl #24
786143175Scognet#else
787143175Scognet	orr	lr, lr, r12, lsr #24
788143175Scognet	mov	r12, r12, lsl #8
789143175Scognet	orr	r12, r12, r5, lsr #24
790143175Scognet	mov	r5, r5, lsl #8
791143175Scognet	orr	r5, r5, r4, lsr #24
792143175Scognet	mov	r4, r4, lsl #8
793143175Scognet	orr	r4, r4, r3, lsr #24
794143175Scognet#endif
795143175Scognet	stmdb	r0!, {r4, r5, r12, lr}
796236991Simp	subs	r2, r2, #0x10
797143175Scognet	bge	.Lmemmove_bsrcul3loop16
798143175Scognet	ldmia	sp!, {r4, r5, lr}
799236991Simp	adds	r2, r2, #0x0c
800143175Scognet	blt	.Lmemmove_bsrcul3l4
801143175Scognet
802143175Scognet.Lmemmove_bsrcul3loop4:
803143175Scognet#ifdef __ARMEB__
804143175Scognet	mov	r12, r3, lsr #8
805143175Scognet#else
806143175Scognet	mov	r12, r3, lsl #8
807143175Scognet#endif
808143175Scognet	ldr	r3, [r1, #-4]!
809143175Scognet#ifdef __ARMEB__
810143175Scognet	orr	r12, r12, r3, lsl #24
811143175Scognet#else
812143175Scognet	orr	r12, r12, r3, lsr #24
813143175Scognet#endif
814143175Scognet	str	r12, [r0, #-4]!
815143175Scognet	subs	r2, r2, #4
816143175Scognet	bge	.Lmemmove_bsrcul3loop4
817143175Scognet
818143175Scognet.Lmemmove_bsrcul3l4:
819143175Scognet	add	r1, r1, #3
820143175Scognet	b	.Lmemmove_bl4
821143175Scognet
822143175Scognet.Lmemmove_bsrcul2:
823236991Simp	cmp	r2, #0x0c
824143175Scognet	blt	.Lmemmove_bsrcul2loop4
825236991Simp	sub	r2, r2, #0x0c
826143175Scognet	stmdb	sp!, {r4, r5, lr}
827143175Scognet
828143175Scognet.Lmemmove_bsrcul2loop16:
829143175Scognet#ifdef __ARMEB__
830143175Scognet	mov	lr, r3, lsr #16
831143175Scognet#else
832143175Scognet	mov	lr, r3, lsl #16
833143175Scognet#endif
834143175Scognet	ldmdb	r1!, {r3-r5, r12}
835143175Scognet#ifdef __ARMEB__
836143175Scognet	orr	lr, lr, r12, lsl #16
837143175Scognet	mov	r12, r12, lsr #16
838143175Scognet	orr	r12, r12, r5, lsl #16
839143175Scognet	mov	r5, r5, lsr #16
840143175Scognet	orr	r5, r5, r4, lsl #16
841143175Scognet	mov	r4, r4, lsr #16
842143175Scognet	orr	r4, r4, r3, lsl #16
843143175Scognet#else
844143175Scognet	orr	lr, lr, r12, lsr #16
845143175Scognet	mov	r12, r12, lsl #16
846143175Scognet	orr	r12, r12, r5, lsr #16
847143175Scognet	mov	r5, r5, lsl #16
848143175Scognet	orr	r5, r5, r4, lsr #16
849143175Scognet	mov	r4, r4, lsl #16
850143175Scognet	orr	r4, r4, r3, lsr #16
851143175Scognet#endif
852143175Scognet	stmdb	r0!, {r4, r5, r12, lr}
853236991Simp	subs	r2, r2, #0x10
854143175Scognet	bge	.Lmemmove_bsrcul2loop16
855143175Scognet	ldmia	sp!, {r4, r5, lr}
856236991Simp	adds	r2, r2, #0x0c
857143175Scognet	blt	.Lmemmove_bsrcul2l4
858143175Scognet
859143175Scognet.Lmemmove_bsrcul2loop4:
860143175Scognet#ifdef __ARMEB__
861143175Scognet	mov	r12, r3, lsr #16
862143175Scognet#else
863143175Scognet	mov	r12, r3, lsl #16
864143175Scognet#endif
865143175Scognet	ldr	r3, [r1, #-4]!
866143175Scognet#ifdef __ARMEB__
867143175Scognet	orr	r12, r12, r3, lsl #16
868143175Scognet#else
869143175Scognet	orr	r12, r12, r3, lsr #16
870143175Scognet#endif
871143175Scognet	str	r12, [r0, #-4]!
872143175Scognet	subs	r2, r2, #4
873143175Scognet	bge	.Lmemmove_bsrcul2loop4
874143175Scognet
875143175Scognet.Lmemmove_bsrcul2l4:
876143175Scognet	add	r1, r1, #2
877143175Scognet	b	.Lmemmove_bl4
878143175Scognet
879143175Scognet.Lmemmove_bsrcul1:
880236991Simp	cmp	r2, #0x0c
881143175Scognet	blt	.Lmemmove_bsrcul1loop4
882236991Simp	sub	r2, r2, #0x0c
883143175Scognet	stmdb	sp!, {r4, r5, lr}
884143175Scognet
885143175Scognet.Lmemmove_bsrcul1loop32:
886143175Scognet#ifdef __ARMEB__
887143175Scognet	mov	lr, r3, lsr #24
888143175Scognet#else
889143175Scognet	mov	lr, r3, lsl #24
890143175Scognet#endif
891143175Scognet	ldmdb	r1!, {r3-r5, r12}
892143175Scognet#ifdef __ARMEB__
893143175Scognet	orr	lr, lr, r12, lsl #8
894143175Scognet	mov	r12, r12, lsr #24
895143175Scognet	orr	r12, r12, r5, lsl #8
896143175Scognet	mov	r5, r5, lsr #24
897143175Scognet	orr	r5, r5, r4, lsl #8
898143175Scognet	mov	r4, r4, lsr #24
899143175Scognet	orr	r4, r4, r3, lsl #8
900143175Scognet#else
901143175Scognet	orr	lr, lr, r12, lsr #8
902143175Scognet	mov	r12, r12, lsl #24
903143175Scognet	orr	r12, r12, r5, lsr #8
904143175Scognet	mov	r5, r5, lsl #24
905143175Scognet	orr	r5, r5, r4, lsr #8
906143175Scognet	mov	r4, r4, lsl #24
907143175Scognet	orr	r4, r4, r3, lsr #8
908143175Scognet#endif
909143175Scognet	stmdb	r0!, {r4, r5, r12, lr}
910236991Simp	subs	r2, r2, #0x10
911143175Scognet	bge	.Lmemmove_bsrcul1loop32
912143175Scognet	ldmia	sp!, {r4, r5, lr}
913236991Simp	adds	r2, r2, #0x0c
914143175Scognet	blt	.Lmemmove_bsrcul1l4
915143175Scognet
916143175Scognet.Lmemmove_bsrcul1loop4:
917143175Scognet#ifdef __ARMEB__
918143175Scognet	mov	r12, r3, lsr #24
919143175Scognet#else
920143175Scognet	mov	r12, r3, lsl #24
921143175Scognet#endif
922143175Scognet	ldr	r3, [r1, #-4]!
923143175Scognet#ifdef __ARMEB__
924143175Scognet	orr	r12, r12, r3, lsl #8
925143175Scognet#else
926143175Scognet	orr	r12, r12, r3, lsr #8
927143175Scognet#endif
928143175Scognet	str	r12, [r0, #-4]!
929143175Scognet	subs	r2, r2, #4
930143175Scognet	bge	.Lmemmove_bsrcul1loop4
931143175Scognet
932143175Scognet.Lmemmove_bsrcul1l4:
933143175Scognet	add	r1, r1, #1
934143175Scognet	b	.Lmemmove_bl4
935248361SandrewEND(bcopy)
936248361SandrewEND(memmove)
937143175Scognet
938172614Scognet#if !defined(_ARM_ARCH_5E)
939129254ScognetENTRY(memcpy)
940129254Scognet	/* save leaf functions having to store this away */
941167003Scognet	/* Do not check arm_memcpy if we're running from flash */
942167003Scognet#ifdef FLASHADDR
943167003Scognet#if FLASHADDR > PHYSADDR
944167003Scognet	ldr	r3, =FLASHADDR
945167003Scognet	cmp	r3, pc
946167003Scognet	bls	.Lnormal
947167003Scognet#else
948167003Scognet	ldr	r3, =FLASHADDR
949167003Scognet	cmp	r3, pc
950167003Scognet	bhi	.Lnormal
951167003Scognet#endif
952167003Scognet#endif
953150864Scognet	ldr	r3, .L_arm_memcpy
954150864Scognet	ldr	r3, [r3]
955150864Scognet	cmp	r3, #0
956150864Scognet	beq	.Lnormal
957150864Scognet	ldr	r3, .L_min_memcpy_size
958150864Scognet	ldr	r3, [r3]
959150864Scognet	cmp	r2, r3
960150864Scognet	blt	.Lnormal
961150864Scognet	stmfd	sp!, {r0-r2, r4, lr}
962150864Scognet	mov	r3, #0
963150864Scognet	ldr	r4, .L_arm_memcpy
964150864Scognet	mov	lr, pc
965150864Scognet	ldr	pc, [r4]
966150864Scognet	cmp	r0, #0
967150864Scognet	ldmfd	sp!, {r0-r2, r4, lr}
968150864Scognet	RETeq
969150864Scognet
970151596Scognet.Lnormal:
971129254Scognet	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
972129254Scognet
973129254Scognet	subs	r2, r2, #4
974129254Scognet	blt	.Lmemcpy_l4		/* less than 4 bytes */
975129254Scognet	ands	r12, r0, #3
976129254Scognet	bne	.Lmemcpy_destul		/* oh unaligned destination addr */
977129254Scognet	ands	r12, r1, #3
978129254Scognet	bne	.Lmemcpy_srcul		/* oh unaligned source addr */
979129254Scognet
980129254Scognet.Lmemcpy_t8:
981129254Scognet	/* We have aligned source and destination */
982129254Scognet	subs	r2, r2, #8
983129254Scognet	blt	.Lmemcpy_l12		/* less than 12 bytes (4 from above) */
984236991Simp	subs	r2, r2, #0x14
985129254Scognet	blt	.Lmemcpy_l32		/* less than 32 bytes (12 from above) */
986129254Scognet	stmdb	sp!, {r4}		/* borrow r4 */
987129254Scognet
988129254Scognet	/* blat 32 bytes at a time */
989129254Scognet	/* XXX for really big copies perhaps we should use more registers */
990129254Scognet.Lmemcpy_loop32:
991129254Scognet	ldmia	r1!, {r3, r4, r12, lr}
992129254Scognet	stmia	r0!, {r3, r4, r12, lr}
993129254Scognet	ldmia	r1!, {r3, r4, r12, lr}
994129254Scognet	stmia	r0!, {r3, r4, r12, lr}
995236991Simp	subs	r2, r2, #0x20
996129254Scognet	bge	.Lmemcpy_loop32
997129254Scognet
998129254Scognet	cmn	r2, #0x10
999129254Scognet	ldmgeia	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
1000129254Scognet	stmgeia	r0!, {r3, r4, r12, lr}
1001236991Simp	subge	r2, r2, #0x10
1002129254Scognet	ldmia	sp!, {r4}		/* return r4 */
1003129254Scognet
1004129254Scognet.Lmemcpy_l32:
1005236991Simp	adds	r2, r2, #0x14
1006129254Scognet
1007129254Scognet	/* blat 12 bytes at a time */
1008129254Scognet.Lmemcpy_loop12:
1009129254Scognet	ldmgeia	r1!, {r3, r12, lr}
1010129254Scognet	stmgeia	r0!, {r3, r12, lr}
1011236991Simp	subges	r2, r2, #0x0c
1012129254Scognet	bge	.Lmemcpy_loop12
1013129254Scognet
1014129254Scognet.Lmemcpy_l12:
1015129254Scognet	adds	r2, r2, #8
1016129254Scognet	blt	.Lmemcpy_l4
1017129254Scognet
1018129254Scognet	subs	r2, r2, #4
1019129254Scognet	ldrlt	r3, [r1], #4
1020129254Scognet	strlt	r3, [r0], #4
1021129254Scognet	ldmgeia	r1!, {r3, r12}
1022129254Scognet	stmgeia	r0!, {r3, r12}
1023129254Scognet	subge	r2, r2, #4
1024129254Scognet
1025129254Scognet.Lmemcpy_l4:
1026129254Scognet	/* less than 4 bytes to go */
1027129254Scognet	adds	r2, r2, #4
1028129254Scognet#ifdef __APCS_26_
1029129254Scognet	ldmeqia sp!, {r0, pc}^		/* done */
1030129254Scognet#else
1031129254Scognet	ldmeqia	sp!, {r0, pc}		/* done */
1032129254Scognet#endif
1033129254Scognet	/* copy the crud byte at a time */
1034129254Scognet	cmp	r2, #2
1035129254Scognet	ldrb	r3, [r1], #1
1036129254Scognet	strb	r3, [r0], #1
1037129254Scognet	ldrgeb	r3, [r1], #1
1038129254Scognet	strgeb	r3, [r0], #1
1039129254Scognet	ldrgtb	r3, [r1], #1
1040129254Scognet	strgtb	r3, [r0], #1
1041129254Scognet	ldmia	sp!, {r0, pc}
1042129254Scognet
1043129254Scognet	/* erg - unaligned destination */
1044129254Scognet.Lmemcpy_destul:
1045129254Scognet	rsb	r12, r12, #4
1046129254Scognet	cmp	r12, #2
1047129254Scognet
1048129254Scognet	/* align destination with byte copies */
1049129254Scognet	ldrb	r3, [r1], #1
1050129254Scognet	strb	r3, [r0], #1
1051129254Scognet	ldrgeb	r3, [r1], #1
1052129254Scognet	strgeb	r3, [r0], #1
1053129254Scognet	ldrgtb	r3, [r1], #1
1054129254Scognet	strgtb	r3, [r0], #1
1055129254Scognet	subs	r2, r2, r12
1056129254Scognet	blt	.Lmemcpy_l4		/* less the 4 bytes */
1057129254Scognet
1058129254Scognet	ands	r12, r1, #3
1059129254Scognet	beq	.Lmemcpy_t8		/* we have an aligned source */
1060129254Scognet
1061129254Scognet	/* erg - unaligned source */
1062129254Scognet	/* This is where it gets nasty ... */
1063129254Scognet.Lmemcpy_srcul:
1064129254Scognet	bic	r1, r1, #3
1065129254Scognet	ldr	lr, [r1], #4
1066129254Scognet	cmp	r12, #2
1067129254Scognet	bgt	.Lmemcpy_srcul3
1068129254Scognet	beq	.Lmemcpy_srcul2
1069236991Simp	cmp	r2, #0x0c
1070129254Scognet	blt	.Lmemcpy_srcul1loop4
1071236991Simp	sub	r2, r2, #0x0c
1072129254Scognet	stmdb	sp!, {r4, r5}
1073129254Scognet
1074129254Scognet.Lmemcpy_srcul1loop16:
1075129254Scognet	mov	r3, lr, lsr #8
1076129254Scognet	ldmia	r1!, {r4, r5, r12, lr}
1077129254Scognet	orr	r3, r3, r4, lsl #24
1078129254Scognet	mov	r4, r4, lsr #8
1079129254Scognet	orr	r4, r4, r5, lsl #24
1080129254Scognet	mov	r5, r5, lsr #8
1081129254Scognet	orr	r5, r5, r12, lsl #24
1082129254Scognet	mov	r12, r12, lsr #8
1083129254Scognet	orr	r12, r12, lr, lsl #24
1084129254Scognet	stmia	r0!, {r3-r5, r12}
1085236991Simp	subs	r2, r2, #0x10
1086129254Scognet	bge	.Lmemcpy_srcul1loop16
1087129254Scognet	ldmia	sp!, {r4, r5}
1088236991Simp	adds	r2, r2, #0x0c
1089129254Scognet	blt	.Lmemcpy_srcul1l4
1090129254Scognet
1091129254Scognet.Lmemcpy_srcul1loop4:
1092129254Scognet	mov	r12, lr, lsr #8
1093129254Scognet	ldr	lr, [r1], #4
1094129254Scognet	orr	r12, r12, lr, lsl #24
1095129254Scognet	str	r12, [r0], #4
1096129254Scognet	subs	r2, r2, #4
1097129254Scognet	bge	.Lmemcpy_srcul1loop4
1098129254Scognet
1099129254Scognet.Lmemcpy_srcul1l4:
1100129254Scognet	sub	r1, r1, #3
1101129254Scognet	b	.Lmemcpy_l4
1102129254Scognet
1103129254Scognet.Lmemcpy_srcul2:
1104236991Simp	cmp	r2, #0x0c
1105129254Scognet	blt	.Lmemcpy_srcul2loop4
1106236991Simp	sub	r2, r2, #0x0c
1107129254Scognet	stmdb	sp!, {r4, r5}
1108129254Scognet
1109129254Scognet.Lmemcpy_srcul2loop16:
1110129254Scognet	mov	r3, lr, lsr #16
1111129254Scognet	ldmia	r1!, {r4, r5, r12, lr}
1112129254Scognet	orr	r3, r3, r4, lsl #16
1113129254Scognet	mov	r4, r4, lsr #16
1114129254Scognet	orr	r4, r4, r5, lsl #16
1115129254Scognet	mov	r5, r5, lsr #16
1116129254Scognet	orr	r5, r5, r12, lsl #16
1117129254Scognet	mov	r12, r12, lsr #16
1118129254Scognet	orr	r12, r12, lr, lsl #16
1119129254Scognet	stmia	r0!, {r3-r5, r12}
1120236991Simp	subs	r2, r2, #0x10
1121129254Scognet	bge	.Lmemcpy_srcul2loop16
1122129254Scognet	ldmia	sp!, {r4, r5}
1123236991Simp	adds	r2, r2, #0x0c
1124129254Scognet	blt	.Lmemcpy_srcul2l4
1125129254Scognet
1126129254Scognet.Lmemcpy_srcul2loop4:
1127129254Scognet	mov	r12, lr, lsr #16
1128129254Scognet	ldr	lr, [r1], #4
1129129254Scognet	orr	r12, r12, lr, lsl #16
1130129254Scognet	str	r12, [r0], #4
1131129254Scognet	subs	r2, r2, #4
1132129254Scognet	bge	.Lmemcpy_srcul2loop4
1133129254Scognet
1134129254Scognet.Lmemcpy_srcul2l4:
1135129254Scognet	sub	r1, r1, #2
1136129254Scognet	b	.Lmemcpy_l4
1137129254Scognet
1138129254Scognet.Lmemcpy_srcul3:
1139236991Simp	cmp	r2, #0x0c
1140129254Scognet	blt	.Lmemcpy_srcul3loop4
1141236991Simp	sub	r2, r2, #0x0c
1142129254Scognet	stmdb	sp!, {r4, r5}
1143129254Scognet
1144129254Scognet.Lmemcpy_srcul3loop16:
1145129254Scognet	mov	r3, lr, lsr #24
1146129254Scognet	ldmia	r1!, {r4, r5, r12, lr}
1147129254Scognet	orr	r3, r3, r4, lsl #8
1148129254Scognet	mov	r4, r4, lsr #24
1149129254Scognet	orr	r4, r4, r5, lsl #8
1150129254Scognet	mov	r5, r5, lsr #24
1151129254Scognet	orr	r5, r5, r12, lsl #8
1152129254Scognet	mov	r12, r12, lsr #24
1153129254Scognet	orr	r12, r12, lr, lsl #8
1154129254Scognet	stmia	r0!, {r3-r5, r12}
1155236991Simp	subs	r2, r2, #0x10
1156129254Scognet	bge	.Lmemcpy_srcul3loop16
1157129254Scognet	ldmia	sp!, {r4, r5}
1158236991Simp	adds	r2, r2, #0x0c
1159129254Scognet	blt	.Lmemcpy_srcul3l4
1160129254Scognet
1161129254Scognet.Lmemcpy_srcul3loop4:
1162129254Scognet	mov	r12, lr, lsr #24
1163129254Scognet	ldr	lr, [r1], #4
1164129254Scognet	orr	r12, r12, lr, lsl #8
1165129254Scognet	str	r12, [r0], #4
1166129254Scognet	subs	r2, r2, #4
1167129254Scognet	bge	.Lmemcpy_srcul3loop4
1168129254Scognet
1169129254Scognet.Lmemcpy_srcul3l4:
1170129254Scognet	sub	r1, r1, #1
1171129254Scognet	b	.Lmemcpy_l4
1172248361SandrewEND(memcpy)
1173248361Sandrew
1174129254Scognet#else
1175129254Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1176129254ScognetENTRY(memcpy)
1177129254Scognet	pld	[r1]
1178129254Scognet	cmp	r2, #0x0c
1179129254Scognet	ble	.Lmemcpy_short		/* <= 12 bytes */
1180167003Scognet#ifdef FLASHADDR
1181167003Scognet#if FLASHADDR > PHYSADDR
1182167003Scognet	ldr	r3, =FLASHADDR
1183167003Scognet	cmp	r3, pc
1184167003Scognet	bls	.Lnormal
1185167003Scognet#else
1186167003Scognet	ldr	r3, =FLASHADDR
1187167003Scognet	cmp	r3, pc
1188167003Scognet	bhi	.Lnormal
1189167003Scognet#endif
1190167003Scognet#endif
1191150864Scognet	ldr	r3, .L_arm_memcpy
1192150864Scognet	ldr	r3, [r3]
1193150864Scognet	cmp	r3, #0
1194150864Scognet	beq	.Lnormal
1195150864Scognet	ldr	r3, .L_min_memcpy_size
1196150864Scognet	ldr	r3, [r3]
1197150864Scognet	cmp	r2, r3
1198150864Scognet	blt	.Lnormal
1199150864Scognet	stmfd	sp!, {r0-r2, r4, lr}
1200150864Scognet	mov	r3, #0
1201150864Scognet	ldr	r4, .L_arm_memcpy
1202150864Scognet	mov	lr, pc
1203150864Scognet	ldr	pc, [r4]
1204150864Scognet	cmp	r0, #0
1205150864Scognet	ldmfd	sp!, {r0-r2, r4, lr}
1206150864Scognet	RETeq
1207150864Scognet.Lnormal:
1208129254Scognet	mov	r3, r0			/* We must not clobber r0 */
1209129254Scognet
1210129254Scognet	/* Word-align the destination buffer */
1211129254Scognet	ands	ip, r3, #0x03		/* Already word aligned? */
1212129254Scognet	beq	.Lmemcpy_wordaligned	/* Yup */
1213129254Scognet	cmp	ip, #0x02
1214129254Scognet	ldrb	ip, [r1], #0x01
1215129254Scognet	sub	r2, r2, #0x01
1216129254Scognet	strb	ip, [r3], #0x01
1217129254Scognet	ldrleb	ip, [r1], #0x01
1218129254Scognet	suble	r2, r2, #0x01
1219129254Scognet	strleb	ip, [r3], #0x01
1220129254Scognet	ldrltb	ip, [r1], #0x01
1221129254Scognet	sublt	r2, r2, #0x01
1222129254Scognet	strltb	ip, [r3], #0x01
1223129254Scognet
1224129254Scognet	/* Destination buffer is now word aligned */
1225129254Scognet.Lmemcpy_wordaligned:
1226129254Scognet	ands	ip, r1, #0x03		/* Is src also word-aligned? */
1227129254Scognet	bne	.Lmemcpy_bad_align	/* Nope. Things just got bad */
1228129254Scognet
1229129254Scognet	/* Quad-align the destination buffer */
1230129254Scognet	tst	r3, #0x07		/* Already quad aligned? */
1231129254Scognet	ldrne	ip, [r1], #0x04
1232129254Scognet	stmfd	sp!, {r4-r9}		/* Free up some registers */
1233129254Scognet	subne	r2, r2, #0x04
1234129254Scognet	strne	ip, [r3], #0x04
1235129254Scognet
1236129254Scognet	/* Destination buffer quad aligned, source is at least word aligned */
1237129254Scognet	subs	r2, r2, #0x80
1238129254Scognet	blt	.Lmemcpy_w_lessthan128
1239129254Scognet
1240129254Scognet	/* Copy 128 bytes at a time */
1241129254Scognet.Lmemcpy_w_loop128:
1242129254Scognet	ldr	r4, [r1], #0x04		/* LD:00-03 */
1243129254Scognet	ldr	r5, [r1], #0x04		/* LD:04-07 */
1244129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x20 */
1245129254Scognet	ldr	r6, [r1], #0x04		/* LD:08-0b */
1246129254Scognet	ldr	r7, [r1], #0x04		/* LD:0c-0f */
1247129254Scognet	ldr	r8, [r1], #0x04		/* LD:10-13 */
1248129254Scognet	ldr	r9, [r1], #0x04		/* LD:14-17 */
1249129254Scognet	strd	r4, [r3], #0x08		/* ST:00-07 */
1250129254Scognet	ldr	r4, [r1], #0x04		/* LD:18-1b */
1251129254Scognet	ldr	r5, [r1], #0x04		/* LD:1c-1f */
1252129254Scognet	strd	r6, [r3], #0x08		/* ST:08-0f */
1253129254Scognet	ldr	r6, [r1], #0x04		/* LD:20-23 */
1254129254Scognet	ldr	r7, [r1], #0x04		/* LD:24-27 */
1255129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x40 */
1256129254Scognet	strd	r8, [r3], #0x08		/* ST:10-17 */
1257129254Scognet	ldr	r8, [r1], #0x04		/* LD:28-2b */
1258129254Scognet	ldr	r9, [r1], #0x04		/* LD:2c-2f */
1259129254Scognet	strd	r4, [r3], #0x08		/* ST:18-1f */
1260129254Scognet	ldr	r4, [r1], #0x04		/* LD:30-33 */
1261129254Scognet	ldr	r5, [r1], #0x04		/* LD:34-37 */
1262129254Scognet	strd	r6, [r3], #0x08		/* ST:20-27 */
1263129254Scognet	ldr	r6, [r1], #0x04		/* LD:38-3b */
1264129254Scognet	ldr	r7, [r1], #0x04		/* LD:3c-3f */
1265129254Scognet	strd	r8, [r3], #0x08		/* ST:28-2f */
1266129254Scognet	ldr	r8, [r1], #0x04		/* LD:40-43 */
1267129254Scognet	ldr	r9, [r1], #0x04		/* LD:44-47 */
1268129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x60 */
1269129254Scognet	strd	r4, [r3], #0x08		/* ST:30-37 */
1270129254Scognet	ldr	r4, [r1], #0x04		/* LD:48-4b */
1271129254Scognet	ldr	r5, [r1], #0x04		/* LD:4c-4f */
1272129254Scognet	strd	r6, [r3], #0x08		/* ST:38-3f */
1273129254Scognet	ldr	r6, [r1], #0x04		/* LD:50-53 */
1274129254Scognet	ldr	r7, [r1], #0x04		/* LD:54-57 */
1275129254Scognet	strd	r8, [r3], #0x08		/* ST:40-47 */
1276129254Scognet	ldr	r8, [r1], #0x04		/* LD:58-5b */
1277129254Scognet	ldr	r9, [r1], #0x04		/* LD:5c-5f */
1278129254Scognet	strd	r4, [r3], #0x08		/* ST:48-4f */
1279129254Scognet	ldr	r4, [r1], #0x04		/* LD:60-63 */
1280129254Scognet	ldr	r5, [r1], #0x04		/* LD:64-67 */
1281129254Scognet	pld	[r1, #0x18]		/* Prefetch 0x80 */
1282129254Scognet	strd	r6, [r3], #0x08		/* ST:50-57 */
1283129254Scognet	ldr	r6, [r1], #0x04		/* LD:68-6b */
1284129254Scognet	ldr	r7, [r1], #0x04		/* LD:6c-6f */
1285129254Scognet	strd	r8, [r3], #0x08		/* ST:58-5f */
1286129254Scognet	ldr	r8, [r1], #0x04		/* LD:70-73 */
1287129254Scognet	ldr	r9, [r1], #0x04		/* LD:74-77 */
1288129254Scognet	strd	r4, [r3], #0x08		/* ST:60-67 */
1289129254Scognet	ldr	r4, [r1], #0x04		/* LD:78-7b */
1290129254Scognet	ldr	r5, [r1], #0x04		/* LD:7c-7f */
1291129254Scognet	strd	r6, [r3], #0x08		/* ST:68-6f */
1292129254Scognet	strd	r8, [r3], #0x08		/* ST:70-77 */
1293129254Scognet	subs	r2, r2, #0x80
1294129254Scognet	strd	r4, [r3], #0x08		/* ST:78-7f */
1295129254Scognet	bge	.Lmemcpy_w_loop128
1296129254Scognet
1297129254Scognet.Lmemcpy_w_lessthan128:
1298129254Scognet	adds	r2, r2, #0x80		/* Adjust for extra sub */
1299129254Scognet	ldmeqfd	sp!, {r4-r9}
1300137463Scognet	RETeq			/* Return now if done */
1301129254Scognet	subs	r2, r2, #0x20
1302129254Scognet	blt	.Lmemcpy_w_lessthan32
1303129254Scognet
1304129254Scognet	/* Copy 32 bytes at a time */
1305129254Scognet.Lmemcpy_w_loop32:
1306129254Scognet	ldr	r4, [r1], #0x04
1307129254Scognet	ldr	r5, [r1], #0x04
1308129254Scognet	pld	[r1, #0x18]
1309129254Scognet	ldr	r6, [r1], #0x04
1310129254Scognet	ldr	r7, [r1], #0x04
1311129254Scognet	ldr	r8, [r1], #0x04
1312129254Scognet	ldr	r9, [r1], #0x04
1313129254Scognet	strd	r4, [r3], #0x08
1314129254Scognet	ldr	r4, [r1], #0x04
1315129254Scognet	ldr	r5, [r1], #0x04
1316129254Scognet	strd	r6, [r3], #0x08
1317129254Scognet	strd	r8, [r3], #0x08
1318129254Scognet	subs	r2, r2, #0x20
1319129254Scognet	strd	r4, [r3], #0x08
1320129254Scognet	bge	.Lmemcpy_w_loop32
1321129254Scognet
1322129254Scognet.Lmemcpy_w_lessthan32:
1323129254Scognet	adds	r2, r2, #0x20		/* Adjust for extra sub */
1324129254Scognet	ldmeqfd	sp!, {r4-r9}
1325137463Scognet	RETeq			/* Return now if done */
1326129254Scognet
1327129254Scognet	and	r4, r2, #0x18
1328129254Scognet	rsbs	r4, r4, #0x18
1329129254Scognet	addne	pc, pc, r4, lsl #1
1330129254Scognet	nop
1331129254Scognet
1332129254Scognet	/* At least 24 bytes remaining */
1333129254Scognet	ldr	r4, [r1], #0x04
1334129254Scognet	ldr	r5, [r1], #0x04
1335129254Scognet	sub	r2, r2, #0x08
1336129254Scognet	strd	r4, [r3], #0x08
1337129254Scognet
1338129254Scognet	/* At least 16 bytes remaining */
1339129254Scognet	ldr	r4, [r1], #0x04
1340129254Scognet	ldr	r5, [r1], #0x04
1341129254Scognet	sub	r2, r2, #0x08
1342129254Scognet	strd	r4, [r3], #0x08
1343129254Scognet
1344129254Scognet	/* At least 8 bytes remaining */
1345129254Scognet	ldr	r4, [r1], #0x04
1346129254Scognet	ldr	r5, [r1], #0x04
1347129254Scognet	subs	r2, r2, #0x08
1348129254Scognet	strd	r4, [r3], #0x08
1349129254Scognet
1350129254Scognet	/* Less than 8 bytes remaining */
1351129254Scognet	ldmfd	sp!, {r4-r9}
1352137463Scognet	RETeq			/* Return now if done */
1353129254Scognet	subs	r2, r2, #0x04
1354129254Scognet	ldrge	ip, [r1], #0x04
1355129254Scognet	strge	ip, [r3], #0x04
1356137463Scognet	RETeq			/* Return now if done */
1357129254Scognet	addlt	r2, r2, #0x04
1358129254Scognet	ldrb	ip, [r1], #0x01
1359129254Scognet	cmp	r2, #0x02
1360129254Scognet	ldrgeb	r2, [r1], #0x01
1361129254Scognet	strb	ip, [r3], #0x01
1362129254Scognet	ldrgtb	ip, [r1]
1363129254Scognet	strgeb	r2, [r3], #0x01
1364129254Scognet	strgtb	ip, [r3]
1365137463Scognet	RET
1366129254Scognet
1367129254Scognet
1368129254Scognet/*
1369129254Scognet * At this point, it has not been possible to word align both buffers.
1370129254Scognet * The destination buffer is word aligned, but the source buffer is not.
1371129254Scognet */
1372129254Scognet.Lmemcpy_bad_align:
1373129254Scognet	stmfd	sp!, {r4-r7}
1374129254Scognet	bic	r1, r1, #0x03
1375129254Scognet	cmp	ip, #2
1376129254Scognet	ldr	ip, [r1], #0x04
1377129254Scognet	bgt	.Lmemcpy_bad3
1378129254Scognet	beq	.Lmemcpy_bad2
1379129254Scognet	b	.Lmemcpy_bad1
1380129254Scognet
1381129254Scognet.Lmemcpy_bad1_loop16:
1382129254Scognet#ifdef __ARMEB__
1383129254Scognet	mov	r4, ip, lsl #8
1384129254Scognet#else
1385129254Scognet	mov	r4, ip, lsr #8
1386129254Scognet#endif
1387129254Scognet	ldr	r5, [r1], #0x04
1388129254Scognet	pld	[r1, #0x018]
1389129254Scognet	ldr	r6, [r1], #0x04
1390129254Scognet	ldr	r7, [r1], #0x04
1391129254Scognet	ldr	ip, [r1], #0x04
1392129254Scognet#ifdef __ARMEB__
1393129254Scognet	orr	r4, r4, r5, lsr #24
1394129254Scognet	mov	r5, r5, lsl #8
1395129254Scognet	orr	r5, r5, r6, lsr #24
1396129254Scognet	mov	r6, r6, lsl #8
1397129254Scognet	orr	r6, r6, r7, lsr #24
1398129254Scognet	mov	r7, r7, lsl #8
1399129254Scognet	orr	r7, r7, ip, lsr #24
1400129254Scognet#else
1401129254Scognet	orr	r4, r4, r5, lsl #24
1402129254Scognet	mov	r5, r5, lsr #8
1403129254Scognet	orr	r5, r5, r6, lsl #24
1404129254Scognet	mov	r6, r6, lsr #8
1405129254Scognet	orr	r6, r6, r7, lsl #24
1406129254Scognet	mov	r7, r7, lsr #8
1407129254Scognet	orr	r7, r7, ip, lsl #24
1408129254Scognet#endif
1409129254Scognet	str	r4, [r3], #0x04
1410129254Scognet	str	r5, [r3], #0x04
1411129254Scognet	str	r6, [r3], #0x04
1412129254Scognet	str	r7, [r3], #0x04
1413129254Scognet.Lmemcpy_bad1:
1414236991Simp	subs	r2, r2, #0x10
1415129254Scognet	bge	.Lmemcpy_bad1_loop16
1416129254Scognet
1417236991Simp	adds	r2, r2, #0x10
1418129254Scognet	ldmeqfd	sp!, {r4-r7}
1419137463Scognet	RETeq			/* Return now if done */
1420129254Scognet	subs	r2, r2, #0x04
1421129254Scognet	sublt	r1, r1, #0x03
1422129254Scognet	blt	.Lmemcpy_bad_done
1423129254Scognet
1424129254Scognet.Lmemcpy_bad1_loop4:
1425129254Scognet#ifdef __ARMEB__
1426129254Scognet	mov	r4, ip, lsl #8
1427129254Scognet#else
1428129254Scognet	mov	r4, ip, lsr #8
1429129254Scognet#endif
1430129254Scognet	ldr	ip, [r1], #0x04
1431129254Scognet	subs	r2, r2, #0x04
1432129254Scognet#ifdef __ARMEB__
1433129254Scognet	orr	r4, r4, ip, lsr #24
1434129254Scognet#else
1435129254Scognet	orr	r4, r4, ip, lsl #24
1436129254Scognet#endif
1437129254Scognet	str	r4, [r3], #0x04
1438129254Scognet	bge	.Lmemcpy_bad1_loop4
1439129254Scognet	sub	r1, r1, #0x03
1440129254Scognet	b	.Lmemcpy_bad_done
1441129254Scognet
1442129254Scognet.Lmemcpy_bad2_loop16:
1443129254Scognet#ifdef __ARMEB__
1444129254Scognet	mov	r4, ip, lsl #16
1445129254Scognet#else
1446129254Scognet	mov	r4, ip, lsr #16
1447129254Scognet#endif
1448129254Scognet	ldr	r5, [r1], #0x04
1449129254Scognet	pld	[r1, #0x018]
1450129254Scognet	ldr	r6, [r1], #0x04
1451129254Scognet	ldr	r7, [r1], #0x04
1452129254Scognet	ldr	ip, [r1], #0x04
1453129254Scognet#ifdef __ARMEB__
1454129254Scognet	orr	r4, r4, r5, lsr #16
1455129254Scognet	mov	r5, r5, lsl #16
1456129254Scognet	orr	r5, r5, r6, lsr #16
1457129254Scognet	mov	r6, r6, lsl #16
1458129254Scognet	orr	r6, r6, r7, lsr #16
1459129254Scognet	mov	r7, r7, lsl #16
1460129254Scognet	orr	r7, r7, ip, lsr #16
1461129254Scognet#else
1462129254Scognet	orr	r4, r4, r5, lsl #16
1463129254Scognet	mov	r5, r5, lsr #16
1464129254Scognet	orr	r5, r5, r6, lsl #16
1465129254Scognet	mov	r6, r6, lsr #16
1466129254Scognet	orr	r6, r6, r7, lsl #16
1467129254Scognet	mov	r7, r7, lsr #16
1468129254Scognet	orr	r7, r7, ip, lsl #16
1469129254Scognet#endif
1470129254Scognet	str	r4, [r3], #0x04
1471129254Scognet	str	r5, [r3], #0x04
1472129254Scognet	str	r6, [r3], #0x04
1473129254Scognet	str	r7, [r3], #0x04
1474129254Scognet.Lmemcpy_bad2:
1475236991Simp	subs	r2, r2, #0x10
1476129254Scognet	bge	.Lmemcpy_bad2_loop16
1477129254Scognet
1478236991Simp	adds	r2, r2, #0x10
1479129254Scognet	ldmeqfd	sp!, {r4-r7}
1480137463Scognet	RETeq			/* Return now if done */
1481129254Scognet	subs	r2, r2, #0x04
1482129254Scognet	sublt	r1, r1, #0x02
1483129254Scognet	blt	.Lmemcpy_bad_done
1484129254Scognet
1485129254Scognet.Lmemcpy_bad2_loop4:
1486129254Scognet#ifdef __ARMEB__
1487129254Scognet	mov	r4, ip, lsl #16
1488129254Scognet#else
1489129254Scognet	mov	r4, ip, lsr #16
1490129254Scognet#endif
1491129254Scognet	ldr	ip, [r1], #0x04
1492129254Scognet	subs	r2, r2, #0x04
1493129254Scognet#ifdef __ARMEB__
1494129254Scognet	orr	r4, r4, ip, lsr #16
1495129254Scognet#else
1496129254Scognet	orr	r4, r4, ip, lsl #16
1497129254Scognet#endif
1498129254Scognet	str	r4, [r3], #0x04
1499129254Scognet	bge	.Lmemcpy_bad2_loop4
1500129254Scognet	sub	r1, r1, #0x02
1501129254Scognet	b	.Lmemcpy_bad_done
1502129254Scognet
1503129254Scognet.Lmemcpy_bad3_loop16:
1504129254Scognet#ifdef __ARMEB__
1505129254Scognet	mov	r4, ip, lsl #24
1506129254Scognet#else
1507129254Scognet	mov	r4, ip, lsr #24
1508129254Scognet#endif
1509129254Scognet	ldr	r5, [r1], #0x04
1510129254Scognet	pld	[r1, #0x018]
1511129254Scognet	ldr	r6, [r1], #0x04
1512129254Scognet	ldr	r7, [r1], #0x04
1513129254Scognet	ldr	ip, [r1], #0x04
1514129254Scognet#ifdef __ARMEB__
1515129254Scognet	orr	r4, r4, r5, lsr #8
1516129254Scognet	mov	r5, r5, lsl #24
1517129254Scognet	orr	r5, r5, r6, lsr #8
1518129254Scognet	mov	r6, r6, lsl #24
1519129254Scognet	orr	r6, r6, r7, lsr #8
1520129254Scognet	mov	r7, r7, lsl #24
1521129254Scognet	orr	r7, r7, ip, lsr #8
1522129254Scognet#else
1523129254Scognet	orr	r4, r4, r5, lsl #8
1524129254Scognet	mov	r5, r5, lsr #24
1525129254Scognet	orr	r5, r5, r6, lsl #8
1526129254Scognet	mov	r6, r6, lsr #24
1527129254Scognet	orr	r6, r6, r7, lsl #8
1528129254Scognet	mov	r7, r7, lsr #24
1529129254Scognet	orr	r7, r7, ip, lsl #8
1530129254Scognet#endif
1531129254Scognet	str	r4, [r3], #0x04
1532129254Scognet	str	r5, [r3], #0x04
1533129254Scognet	str	r6, [r3], #0x04
1534129254Scognet	str	r7, [r3], #0x04
1535129254Scognet.Lmemcpy_bad3:
1536236991Simp	subs	r2, r2, #0x10
1537129254Scognet	bge	.Lmemcpy_bad3_loop16
1538129254Scognet
1539236991Simp	adds	r2, r2, #0x10
1540129254Scognet	ldmeqfd	sp!, {r4-r7}
1541137463Scognet	RETeq			/* Return now if done */
1542129254Scognet	subs	r2, r2, #0x04
1543129254Scognet	sublt	r1, r1, #0x01
1544129254Scognet	blt	.Lmemcpy_bad_done
1545129254Scognet
1546129254Scognet.Lmemcpy_bad3_loop4:
1547129254Scognet#ifdef __ARMEB__
1548129254Scognet	mov	r4, ip, lsl #24
1549129254Scognet#else
1550129254Scognet	mov	r4, ip, lsr #24
1551129254Scognet#endif
1552129254Scognet	ldr	ip, [r1], #0x04
1553129254Scognet	subs	r2, r2, #0x04
1554129254Scognet#ifdef __ARMEB__
1555129254Scognet	orr	r4, r4, ip, lsr #8
1556129254Scognet#else
1557129254Scognet	orr	r4, r4, ip, lsl #8
1558129254Scognet#endif
1559129254Scognet	str	r4, [r3], #0x04
1560129254Scognet	bge	.Lmemcpy_bad3_loop4
1561129254Scognet	sub	r1, r1, #0x01
1562129254Scognet
1563129254Scognet.Lmemcpy_bad_done:
1564129254Scognet	ldmfd	sp!, {r4-r7}
1565129254Scognet	adds	r2, r2, #0x04
1566137463Scognet	RETeq
1567129254Scognet	ldrb	ip, [r1], #0x01
1568129254Scognet	cmp	r2, #0x02
1569129254Scognet	ldrgeb	r2, [r1], #0x01
1570129254Scognet	strb	ip, [r3], #0x01
1571129254Scognet	ldrgtb	ip, [r1]
1572129254Scognet	strgeb	r2, [r3], #0x01
1573129254Scognet	strgtb	ip, [r3]
1574137463Scognet	RET
1575129254Scognet
1576129254Scognet
1577129254Scognet/*
1578129254Scognet * Handle short copies (less than 16 bytes), possibly misaligned.
1579129254Scognet * Some of these are *very* common, thanks to the network stack,
1580129254Scognet * and so are handled specially.
1581129254Scognet */
1582129254Scognet.Lmemcpy_short:
1583129254Scognet	add	pc, pc, r2, lsl #2
1584129254Scognet	nop
1585137463Scognet	RET			/* 0x00 */
1586129254Scognet	b	.Lmemcpy_bytewise	/* 0x01 */
1587129254Scognet	b	.Lmemcpy_bytewise	/* 0x02 */
1588129254Scognet	b	.Lmemcpy_bytewise	/* 0x03 */
1589129254Scognet	b	.Lmemcpy_4		/* 0x04 */
1590129254Scognet	b	.Lmemcpy_bytewise	/* 0x05 */
1591129254Scognet	b	.Lmemcpy_6		/* 0x06 */
1592129254Scognet	b	.Lmemcpy_bytewise	/* 0x07 */
1593129254Scognet	b	.Lmemcpy_8		/* 0x08 */
1594129254Scognet	b	.Lmemcpy_bytewise	/* 0x09 */
1595129254Scognet	b	.Lmemcpy_bytewise	/* 0x0a */
1596129254Scognet	b	.Lmemcpy_bytewise	/* 0x0b */
1597129254Scognet	b	.Lmemcpy_c		/* 0x0c */
1598129254Scognet.Lmemcpy_bytewise:
1599129254Scognet	mov	r3, r0			/* We must not clobber r0 */
1600129254Scognet	ldrb	ip, [r1], #0x01
1601129254Scognet1:	subs	r2, r2, #0x01
1602129254Scognet	strb	ip, [r3], #0x01
1603129254Scognet	ldrneb	ip, [r1], #0x01
1604129254Scognet	bne	1b
1605137463Scognet	RET
1606129254Scognet
1607129254Scognet/******************************************************************************
1608129254Scognet * Special case for 4 byte copies
1609129254Scognet */
1610129254Scognet#define	LMEMCPY_4_LOG2	6	/* 64 bytes */
1611129254Scognet#define	LMEMCPY_4_PAD	.align LMEMCPY_4_LOG2
1612129254Scognet	LMEMCPY_4_PAD
1613129254Scognet.Lmemcpy_4:
1614129254Scognet	and	r2, r1, #0x03
1615129254Scognet	orr	r2, r2, r0, lsl #2
1616129254Scognet	ands	r2, r2, #0x0f
1617129254Scognet	sub	r3, pc, #0x14
1618129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_4_LOG2
1619129254Scognet
1620129254Scognet/*
1621129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
1622129254Scognet */
1623129254Scognet	ldr	r2, [r1]
1624129254Scognet	str	r2, [r0]
1625137463Scognet	RET
1626129254Scognet	LMEMCPY_4_PAD
1627129254Scognet
1628129254Scognet/*
1629129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
1630129254Scognet */
1631129254Scognet	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
1632129254Scognet	ldr	r2, [r1, #3]		/* BE:r2 = 3xxx  LE:r2 = xxx3 */
1633129254Scognet#ifdef __ARMEB__
1634129254Scognet	mov	r3, r3, lsl #8		/* r3 = 012. */
1635129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = 0123 */
1636129254Scognet#else
1637129254Scognet	mov	r3, r3, lsr #8		/* r3 = .210 */
1638129254Scognet	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
1639129254Scognet#endif
1640129254Scognet	str	r3, [r0]
1641137463Scognet	RET
1642129254Scognet	LMEMCPY_4_PAD
1643129254Scognet
1644129254Scognet/*
1645129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
1646129254Scognet */
1647129254Scognet#ifdef __ARMEB__
1648129254Scognet	ldrh	r3, [r1]
1649129254Scognet	ldrh	r2, [r1, #0x02]
1650129254Scognet#else
1651129254Scognet	ldrh	r3, [r1, #0x02]
1652129254Scognet	ldrh	r2, [r1]
1653129254Scognet#endif
1654129254Scognet	orr	r3, r2, r3, lsl #16
1655129254Scognet	str	r3, [r0]
1656137463Scognet	RET
1657129254Scognet	LMEMCPY_4_PAD
1658129254Scognet
1659129254Scognet/*
1660129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
1661129254Scognet */
1662129254Scognet	ldr	r3, [r1, #-3]		/* BE:r3 = xxx0  LE:r3 = 0xxx */
1663129254Scognet	ldr	r2, [r1, #1]		/* BE:r2 = 123x  LE:r2 = x321 */
1664129254Scognet#ifdef __ARMEB__
1665129254Scognet	mov	r3, r3, lsl #24		/* r3 = 0... */
1666129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 0123 */
1667129254Scognet#else
1668129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...0 */
1669129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
1670129254Scognet#endif
1671129254Scognet	str	r3, [r0]
1672137463Scognet	RET
1673129254Scognet	LMEMCPY_4_PAD
1674129254Scognet
1675129254Scognet/*
1676129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned
1677129254Scognet */
1678129254Scognet	ldr	r2, [r1]
1679129254Scognet#ifdef __ARMEB__
1680129254Scognet	strb	r2, [r0, #0x03]
1681129254Scognet	mov	r3, r2, lsr #8
1682129254Scognet	mov	r1, r2, lsr #24
1683129254Scognet	strb	r1, [r0]
1684129254Scognet#else
1685129254Scognet	strb	r2, [r0]
1686129254Scognet	mov	r3, r2, lsr #8
1687129254Scognet	mov	r1, r2, lsr #24
1688129254Scognet	strb	r1, [r0, #0x03]
1689129254Scognet#endif
1690129254Scognet	strh	r3, [r0, #0x01]
1691137463Scognet	RET
1692129254Scognet	LMEMCPY_4_PAD
1693129254Scognet
1694129254Scognet/*
1695129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned
1696129254Scognet */
1697129254Scognet	ldrb	r2, [r1]
1698129254Scognet	ldrh	r3, [r1, #0x01]
1699129254Scognet	ldrb	r1, [r1, #0x03]
1700129254Scognet	strb	r2, [r0]
1701129254Scognet	strh	r3, [r0, #0x01]
1702129254Scognet	strb	r1, [r0, #0x03]
1703137463Scognet	RET
1704129254Scognet	LMEMCPY_4_PAD
1705129254Scognet
1706129254Scognet/*
1707129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned
1708129254Scognet */
1709129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1710129254Scognet	ldrh	r3, [r1, #0x02]		/* LE:r3 = ..23  LE:r3 = ..32 */
1711129254Scognet#ifdef __ARMEB__
1712129254Scognet	mov	r1, r2, lsr #8		/* r1 = ...0 */
1713129254Scognet	strb	r1, [r0]
1714129254Scognet	mov	r2, r2, lsl #8		/* r2 = .01. */
1715129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = .012 */
1716129254Scognet#else
1717129254Scognet	strb	r2, [r0]
1718129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
1719129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
1720129254Scognet	mov	r3, r3, lsr #8		/* r3 = ...3 */
1721129254Scognet#endif
1722129254Scognet	strh	r2, [r0, #0x01]
1723129254Scognet	strb	r3, [r0, #0x03]
1724137463Scognet	RET
1725129254Scognet	LMEMCPY_4_PAD
1726129254Scognet
1727129254Scognet/*
1728129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned
1729129254Scognet */
1730129254Scognet	ldrb	r2, [r1]
1731129254Scognet	ldrh	r3, [r1, #0x01]
1732129254Scognet	ldrb	r1, [r1, #0x03]
1733129254Scognet	strb	r2, [r0]
1734129254Scognet	strh	r3, [r0, #0x01]
1735129254Scognet	strb	r1, [r0, #0x03]
1736137463Scognet	RET
1737129254Scognet	LMEMCPY_4_PAD
1738129254Scognet
1739129254Scognet/*
1740129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
1741129254Scognet */
1742129254Scognet	ldr	r2, [r1]
1743129254Scognet#ifdef __ARMEB__
1744129254Scognet	strh	r2, [r0, #0x02]
1745129254Scognet	mov	r3, r2, lsr #16
1746129254Scognet	strh	r3, [r0]
1747129254Scognet#else
1748129254Scognet	strh	r2, [r0]
1749129254Scognet	mov	r3, r2, lsr #16
1750129254Scognet	strh	r3, [r0, #0x02]
1751129254Scognet#endif
1752137463Scognet	RET
1753129254Scognet	LMEMCPY_4_PAD
1754129254Scognet
1755129254Scognet/*
1756129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned
1757129254Scognet */
1758129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
1759129254Scognet	ldr	r3, [r1, #3]		/* BE:r3 = 3xxx  LE:r3 = xxx3 */
1760129254Scognet	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
1761129254Scognet	strh	r1, [r0]
1762129254Scognet#ifdef __ARMEB__
1763129254Scognet	mov	r2, r2, lsl #8		/* r2 = 012. */
1764129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 0123 */
1765129254Scognet#else
1766129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...2 */
1767129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = xx32 */
1768129254Scognet#endif
1769129254Scognet	strh	r2, [r0, #0x02]
1770137463Scognet	RET
1771129254Scognet	LMEMCPY_4_PAD
1772129254Scognet
1773129254Scognet/*
1774129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
1775129254Scognet */
1776129254Scognet	ldrh	r2, [r1]
1777129254Scognet	ldrh	r3, [r1, #0x02]
1778129254Scognet	strh	r2, [r0]
1779129254Scognet	strh	r3, [r0, #0x02]
1780137463Scognet	RET
1781129254Scognet	LMEMCPY_4_PAD
1782129254Scognet
1783129254Scognet/*
1784129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned
1785129254Scognet */
1786129254Scognet	ldr	r3, [r1, #1]		/* BE:r3 = 123x  LE:r3 = x321 */
1787129254Scognet	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
1788129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .123  LE:r1 = .x32 */
1789129254Scognet	strh	r1, [r0, #0x02]
1790129254Scognet#ifdef __ARMEB__
1791129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...1 */
1792129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = xx01 */
1793129254Scognet#else
1794129254Scognet	mov	r3, r3, lsl #8		/* r3 = 321. */
1795129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = 3210 */
1796129254Scognet#endif
1797129254Scognet	strh	r3, [r0]
1798137463Scognet	RET
1799129254Scognet	LMEMCPY_4_PAD
1800129254Scognet
1801129254Scognet/*
1802129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned
1803129254Scognet */
1804129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
1805129254Scognet#ifdef __ARMEB__
1806129254Scognet	strb	r2, [r0, #0x03]
1807129254Scognet	mov	r3, r2, lsr #8
1808129254Scognet	mov	r1, r2, lsr #24
1809129254Scognet	strh	r3, [r0, #0x01]
1810129254Scognet	strb	r1, [r0]
1811129254Scognet#else
1812129254Scognet	strb	r2, [r0]
1813129254Scognet	mov	r3, r2, lsr #8
1814129254Scognet	mov	r1, r2, lsr #24
1815129254Scognet	strh	r3, [r0, #0x01]
1816129254Scognet	strb	r1, [r0, #0x03]
1817129254Scognet#endif
1818137463Scognet	RET
1819129254Scognet	LMEMCPY_4_PAD
1820129254Scognet
1821129254Scognet/*
1822129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned
1823129254Scognet */
1824129254Scognet	ldrb	r2, [r1]
1825129254Scognet	ldrh	r3, [r1, #0x01]
1826129254Scognet	ldrb	r1, [r1, #0x03]
1827129254Scognet	strb	r2, [r0]
1828129254Scognet	strh	r3, [r0, #0x01]
1829129254Scognet	strb	r1, [r0, #0x03]
1830137463Scognet	RET
1831129254Scognet	LMEMCPY_4_PAD
1832129254Scognet
1833129254Scognet/*
1834129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned
1835129254Scognet */
1836129254Scognet#ifdef __ARMEB__
1837129254Scognet	ldrh	r3, [r1, #0x02]		/* BE:r3 = ..23  LE:r3 = ..32 */
1838129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1839129254Scognet	strb	r3, [r0, #0x03]
1840129254Scognet	mov	r3, r3, lsr #8		/* r3 = ...2 */
1841129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = ..12 */
1842129254Scognet	strh	r3, [r0, #0x01]
1843129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...0 */
1844129254Scognet	strb	r2, [r0]
1845129254Scognet#else
1846129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1847129254Scognet	ldrh	r3, [r1, #0x02]		/* BE:r3 = ..23  LE:r3 = ..32 */
1848129254Scognet	strb	r2, [r0]
1849129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
1850129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = .321 */
1851129254Scognet	strh	r2, [r0, #0x01]
1852129254Scognet	mov	r3, r3, lsr #8		/* r3 = ...3 */
1853129254Scognet	strb	r3, [r0, #0x03]
1854129254Scognet#endif
1855137463Scognet	RET
1856129254Scognet	LMEMCPY_4_PAD
1857129254Scognet
1858129254Scognet/*
1859129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned
1860129254Scognet */
1861129254Scognet	ldrb	r2, [r1]
1862129254Scognet	ldrh	r3, [r1, #0x01]
1863129254Scognet	ldrb	r1, [r1, #0x03]
1864129254Scognet	strb	r2, [r0]
1865129254Scognet	strh	r3, [r0, #0x01]
1866129254Scognet	strb	r1, [r0, #0x03]
1867137463Scognet	RET
1868129254Scognet	LMEMCPY_4_PAD
1869129254Scognet
1870129254Scognet
1871129254Scognet/******************************************************************************
1872129254Scognet * Special case for 6 byte copies
1873129254Scognet */
1874129254Scognet#define	LMEMCPY_6_LOG2	6	/* 64 bytes */
1875129254Scognet#define	LMEMCPY_6_PAD	.align LMEMCPY_6_LOG2
1876129254Scognet	LMEMCPY_6_PAD
1877129254Scognet.Lmemcpy_6:
1878129254Scognet	and	r2, r1, #0x03
1879129254Scognet	orr	r2, r2, r0, lsl #2
1880129254Scognet	ands	r2, r2, #0x0f
1881129254Scognet	sub	r3, pc, #0x14
1882129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_6_LOG2
1883129254Scognet
1884129254Scognet/*
1885129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
1886129254Scognet */
1887129254Scognet	ldr	r2, [r1]
1888129254Scognet	ldrh	r3, [r1, #0x04]
1889129254Scognet	str	r2, [r0]
1890129254Scognet	strh	r3, [r0, #0x04]
1891137463Scognet	RET
1892129254Scognet	LMEMCPY_6_PAD
1893129254Scognet
1894129254Scognet/*
1895129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
1896129254Scognet */
1897129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
1898129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 345x  LE:r3 = x543 */
1899129254Scognet#ifdef __ARMEB__
1900129254Scognet	mov	r2, r2, lsl #8		/* r2 = 012. */
1901129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 0123 */
1902129254Scognet#else
1903129254Scognet	mov	r2, r2, lsr #8		/* r2 = .210 */
1904129254Scognet	orr	r2, r2, r3, lsl #24	/* r2 = 3210 */
1905129254Scognet#endif
1906129254Scognet	mov	r3, r3, lsr #8		/* BE:r3 = .345  LE:r3 = .x54 */
1907129254Scognet	str	r2, [r0]
1908129254Scognet	strh	r3, [r0, #0x04]
1909137463Scognet	RET
1910129254Scognet	LMEMCPY_6_PAD
1911129254Scognet
1912129254Scognet/*
1913129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
1914129254Scognet */
1915129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
1916129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1917129254Scognet#ifdef __ARMEB__
1918129254Scognet	mov	r1, r3, lsr #16		/* r1 = ..23 */
1919129254Scognet	orr	r1, r1, r2, lsl #16	/* r1 = 0123 */
1920129254Scognet	str	r1, [r0]
1921129254Scognet	strh	r3, [r0, #0x04]
1922129254Scognet#else
1923129254Scognet	mov	r1, r3, lsr #16		/* r1 = ..54 */
1924129254Scognet	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
1925129254Scognet	str	r2, [r0]
1926129254Scognet	strh	r1, [r0, #0x04]
1927129254Scognet#endif
1928137463Scognet	RET
1929129254Scognet	LMEMCPY_6_PAD
1930129254Scognet
1931129254Scognet/*
1932129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
1933129254Scognet */
1934129254Scognet	ldr	r2, [r1, #-3]		/* BE:r2 = xxx0  LE:r2 = 0xxx */
1935129254Scognet	ldr	r3, [r1, #1]		/* BE:r3 = 1234  LE:r3 = 4321 */
1936129254Scognet	ldr	r1, [r1, #5]		/* BE:r1 = 5xxx  LE:r3 = xxx5 */
1937129254Scognet#ifdef __ARMEB__
1938129254Scognet	mov	r2, r2, lsl #24		/* r2 = 0... */
1939129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 0123 */
1940129254Scognet	mov	r3, r3, lsl #8		/* r3 = 234. */
1941129254Scognet	orr	r1, r3, r1, lsr #24	/* r1 = 2345 */
1942129254Scognet#else
1943129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...0 */
1944129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
1945129254Scognet	mov	r1, r1, lsl #8		/* r1 = xx5. */
1946129254Scognet	orr	r1, r1, r3, lsr #24	/* r1 = xx54 */
1947129254Scognet#endif
1948129254Scognet	str	r2, [r0]
1949129254Scognet	strh	r1, [r0, #0x04]
1950137463Scognet	RET
1951129254Scognet	LMEMCPY_6_PAD
1952129254Scognet
1953129254Scognet/*
1954129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned
1955129254Scognet */
1956129254Scognet	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
1957129254Scognet	ldrh	r2, [r1, #0x04]		/* BE:r2 = ..45  LE:r2 = ..54 */
1958129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
1959129254Scognet	strh	r1, [r0, #0x01]
1960129254Scognet#ifdef __ARMEB__
1961129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...0 */
1962129254Scognet	strb	r1, [r0]
1963129254Scognet	mov	r3, r3, lsl #8		/* r3 = 123. */
1964129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 1234 */
1965129254Scognet#else
1966129254Scognet	strb	r3, [r0]
1967129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...3 */
1968129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = .543 */
1969129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...5 */
1970129254Scognet#endif
1971129254Scognet	strh	r3, [r0, #0x03]
1972129254Scognet	strb	r2, [r0, #0x05]
1973137463Scognet	RET
1974129254Scognet	LMEMCPY_6_PAD
1975129254Scognet
1976129254Scognet/*
1977129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned
1978129254Scognet */
1979129254Scognet	ldrb	r2, [r1]
1980129254Scognet	ldrh	r3, [r1, #0x01]
1981129254Scognet	ldrh	ip, [r1, #0x03]
1982129254Scognet	ldrb	r1, [r1, #0x05]
1983129254Scognet	strb	r2, [r0]
1984129254Scognet	strh	r3, [r0, #0x01]
1985129254Scognet	strh	ip, [r0, #0x03]
1986129254Scognet	strb	r1, [r0, #0x05]
1987137463Scognet	RET
1988129254Scognet	LMEMCPY_6_PAD
1989129254Scognet
1990129254Scognet/*
1991129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned
1992129254Scognet */
1993129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
1994129254Scognet	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
1995129254Scognet#ifdef __ARMEB__
1996129254Scognet	mov	r3, r2, lsr #8		/* r3 = ...0 */
1997129254Scognet	strb	r3, [r0]
1998129254Scognet	strb	r1, [r0, #0x05]
1999129254Scognet	mov	r3, r1, lsr #8		/* r3 = .234 */
2000129254Scognet	strh	r3, [r0, #0x03]
2001129254Scognet	mov	r3, r2, lsl #8		/* r3 = .01. */
2002129254Scognet	orr	r3, r3, r1, lsr #24	/* r3 = .012 */
2003129254Scognet	strh	r3, [r0, #0x01]
2004129254Scognet#else
2005129254Scognet	strb	r2, [r0]
2006129254Scognet	mov	r3, r1, lsr #24
2007129254Scognet	strb	r3, [r0, #0x05]
2008129254Scognet	mov	r3, r1, lsr #8		/* r3 = .543 */
2009129254Scognet	strh	r3, [r0, #0x03]
2010129254Scognet	mov	r3, r2, lsr #8		/* r3 = ...1 */
2011129254Scognet	orr	r3, r3, r1, lsl #8	/* r3 = 4321 */
2012129254Scognet	strh	r3, [r0, #0x01]
2013129254Scognet#endif
2014137463Scognet	RET
2015129254Scognet	LMEMCPY_6_PAD
2016129254Scognet
2017129254Scognet/*
2018129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned
2019129254Scognet */
2020129254Scognet	ldrb	r2, [r1]
2021129254Scognet	ldrh	r3, [r1, #0x01]
2022129254Scognet	ldrh	ip, [r1, #0x03]
2023129254Scognet	ldrb	r1, [r1, #0x05]
2024129254Scognet	strb	r2, [r0]
2025129254Scognet	strh	r3, [r0, #0x01]
2026129254Scognet	strh	ip, [r0, #0x03]
2027129254Scognet	strb	r1, [r0, #0x05]
2028137463Scognet	RET
2029129254Scognet	LMEMCPY_6_PAD
2030129254Scognet
2031129254Scognet/*
2032129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
2033129254Scognet */
2034129254Scognet#ifdef __ARMEB__
2035129254Scognet	ldr	r2, [r1]		/* r2 = 0123 */
2036129254Scognet	ldrh	r3, [r1, #0x04]		/* r3 = ..45 */
2037129254Scognet	mov	r1, r2, lsr #16		/* r1 = ..01 */
2038129254Scognet	orr	r3, r3, r2, lsl#16	/* r3 = 2345 */
2039129254Scognet	strh	r1, [r0]
2040129254Scognet	str	r3, [r0, #0x02]
2041129254Scognet#else
2042129254Scognet	ldrh	r2, [r1, #0x04]		/* r2 = ..54 */
2043129254Scognet	ldr	r3, [r1]		/* r3 = 3210 */
2044129254Scognet	mov	r2, r2, lsl #16		/* r2 = 54.. */
2045129254Scognet	orr	r2, r2, r3, lsr #16	/* r2 = 5432 */
2046129254Scognet	strh	r3, [r0]
2047129254Scognet	str	r2, [r0, #0x02]
2048129254Scognet#endif
2049137463Scognet	RET
2050129254Scognet	LMEMCPY_6_PAD
2051129254Scognet
2052129254Scognet/*
2053129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned
2054129254Scognet */
2055129254Scognet	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
2056129254Scognet	ldr	r2, [r1, #3]		/* BE:r2 = 345x  LE:r2 = x543 */
2057129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
2058129254Scognet#ifdef __ARMEB__
2059129254Scognet	mov	r2, r2, lsr #8		/* r2 = .345 */
2060129254Scognet	orr	r2, r2, r3, lsl #24	/* r2 = 2345 */
2061129254Scognet#else
2062129254Scognet	mov	r2, r2, lsl #8		/* r2 = 543. */
2063129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 5432 */
2064129254Scognet#endif
2065129254Scognet	strh	r1, [r0]
2066129254Scognet	str	r2, [r0, #0x02]
2067137463Scognet	RET
2068129254Scognet	LMEMCPY_6_PAD
2069129254Scognet
2070129254Scognet/*
2071129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
2072129254Scognet */
2073129254Scognet	ldrh	r2, [r1]
2074129254Scognet	ldr	r3, [r1, #0x02]
2075129254Scognet	strh	r2, [r0]
2076129254Scognet	str	r3, [r0, #0x02]
2077137463Scognet	RET
2078129254Scognet	LMEMCPY_6_PAD
2079129254Scognet
2080129254Scognet/*
2081129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned
2082129254Scognet */
2083129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2084129254Scognet	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
2085129254Scognet	ldrb	r1, [r1, #0x05]		/* r1 = ...5 */
2086129254Scognet#ifdef __ARMEB__
2087129254Scognet	mov	r3, r3, lsl #8		/* r3 = ..0. */
2088129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = ..01 */
2089129254Scognet	orr	r1, r1, r2, lsl #8	/* r1 = 2345 */
2090129254Scognet#else
2091129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
2092129254Scognet	mov	r1, r1, lsl #24		/* r1 = 5... */
2093129254Scognet	orr	r1, r1, r2, lsr #8	/* r1 = 5432 */
2094129254Scognet#endif
2095129254Scognet	strh	r3, [r0]
2096129254Scognet	str	r1, [r0, #0x02]
2097137463Scognet	RET
2098129254Scognet	LMEMCPY_6_PAD
2099129254Scognet
2100129254Scognet/*
2101129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned
2102129254Scognet */
2103129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2104129254Scognet	ldrh	r1, [r1, #0x04]		/* BE:r1 = ..45  LE:r1 = ..54 */
2105129254Scognet#ifdef __ARMEB__
2106129254Scognet	mov	r3, r2, lsr #24		/* r3 = ...0 */
2107129254Scognet	strb	r3, [r0]
2108129254Scognet	mov	r2, r2, lsl #8		/* r2 = 123. */
2109129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 1234 */
2110129254Scognet#else
2111129254Scognet	strb	r2, [r0]
2112129254Scognet	mov	r2, r2, lsr #8		/* r2 = .321 */
2113129254Scognet	orr	r2, r2, r1, lsl #24	/* r2 = 4321 */
2114129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...5 */
2115129254Scognet#endif
2116129254Scognet	str	r2, [r0, #0x01]
2117129254Scognet	strb	r1, [r0, #0x05]
2118137463Scognet	RET
2119129254Scognet	LMEMCPY_6_PAD
2120129254Scognet
2121129254Scognet/*
2122129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned
2123129254Scognet */
2124129254Scognet	ldrb	r2, [r1]
2125129254Scognet	ldrh	r3, [r1, #0x01]
2126129254Scognet	ldrh	ip, [r1, #0x03]
2127129254Scognet	ldrb	r1, [r1, #0x05]
2128129254Scognet	strb	r2, [r0]
2129129254Scognet	strh	r3, [r0, #0x01]
2130129254Scognet	strh	ip, [r0, #0x03]
2131129254Scognet	strb	r1, [r0, #0x05]
2132137463Scognet	RET
2133129254Scognet	LMEMCPY_6_PAD
2134129254Scognet
2135129254Scognet/*
2136129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned
2137129254Scognet */
2138129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2139129254Scognet	ldr	r1, [r1, #0x02]		/* BE:r1 = 2345  LE:r1 = 5432 */
2140129254Scognet#ifdef __ARMEB__
2141129254Scognet	mov	r3, r2, lsr #8		/* r3 = ...0 */
2142129254Scognet	strb	r3, [r0]
2143129254Scognet	mov	r2, r2, lsl #24		/* r2 = 1... */
2144129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 1234 */
2145129254Scognet#else
2146129254Scognet	strb	r2, [r0]
2147129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
2148129254Scognet	orr	r2, r2, r1, lsl #8	/* r2 = 4321 */
2149129254Scognet	mov	r1, r1, lsr #24		/* r1 = ...5 */
2150129254Scognet#endif
2151129254Scognet	str	r2, [r0, #0x01]
2152129254Scognet	strb	r1, [r0, #0x05]
2153137463Scognet	RET
2154129254Scognet	LMEMCPY_6_PAD
2155129254Scognet
2156129254Scognet/*
2157129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned
2158129254Scognet */
2159129254Scognet	ldrb	r2, [r1]
2160129254Scognet	ldr	r3, [r1, #0x01]
2161129254Scognet	ldrb	r1, [r1, #0x05]
2162129254Scognet	strb	r2, [r0]
2163129254Scognet	str	r3, [r0, #0x01]
2164129254Scognet	strb	r1, [r0, #0x05]
2165137463Scognet	RET
2166129254Scognet	LMEMCPY_6_PAD
2167129254Scognet
2168129254Scognet
2169129254Scognet/******************************************************************************
2170129254Scognet * Special case for 8 byte copies
2171129254Scognet */
2172129254Scognet#define	LMEMCPY_8_LOG2	6	/* 64 bytes */
2173129254Scognet#define	LMEMCPY_8_PAD	.align LMEMCPY_8_LOG2
2174129254Scognet	LMEMCPY_8_PAD
2175129254Scognet.Lmemcpy_8:
2176129254Scognet	and	r2, r1, #0x03
2177129254Scognet	orr	r2, r2, r0, lsl #2
2178129254Scognet	ands	r2, r2, #0x0f
2179129254Scognet	sub	r3, pc, #0x14
2180129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_8_LOG2
2181129254Scognet
2182129254Scognet/*
2183129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
2184129254Scognet */
2185129254Scognet	ldr	r2, [r1]
2186129254Scognet	ldr	r3, [r1, #0x04]
2187129254Scognet	str	r2, [r0]
2188129254Scognet	str	r3, [r0, #0x04]
2189137463Scognet	RET
2190129254Scognet	LMEMCPY_8_PAD
2191129254Scognet
2192129254Scognet/*
2193129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
2194129254Scognet */
2195129254Scognet	ldr	r3, [r1, #-1]		/* BE:r3 = x012  LE:r3 = 210x */
2196129254Scognet	ldr	r2, [r1, #0x03]		/* BE:r2 = 3456  LE:r2 = 6543 */
2197129254Scognet	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
2198129254Scognet#ifdef __ARMEB__
2199129254Scognet	mov	r3, r3, lsl #8		/* r3 = 012. */
2200129254Scognet	orr	r3, r3, r2, lsr #24	/* r3 = 0123 */
2201129254Scognet	orr	r2, r1, r2, lsl #8	/* r2 = 4567 */
2202129254Scognet#else
2203129254Scognet	mov	r3, r3, lsr #8		/* r3 = .210 */
2204129254Scognet	orr	r3, r3, r2, lsl #24	/* r3 = 3210 */
2205129254Scognet	mov	r1, r1, lsl #24		/* r1 = 7... */
2206129254Scognet	orr	r2, r1, r2, lsr #8	/* r2 = 7654 */
2207129254Scognet#endif
2208129254Scognet	str	r3, [r0]
2209129254Scognet	str	r2, [r0, #0x04]
2210137463Scognet	RET
2211129254Scognet	LMEMCPY_8_PAD
2212129254Scognet
2213129254Scognet/*
2214129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
2215129254Scognet */
2216129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2217129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2218129254Scognet	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
2219129254Scognet#ifdef __ARMEB__
2220129254Scognet	mov	r2, r2, lsl #16		/* r2 = 01.. */
2221129254Scognet	orr	r2, r2, r3, lsr #16	/* r2 = 0123 */
2222129254Scognet	orr	r3, r1, r3, lsl #16	/* r3 = 4567 */
2223129254Scognet#else
2224129254Scognet	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
2225129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..54 */
2226129254Scognet	orr	r3, r3, r1, lsl #16	/* r3 = 7654 */
2227129254Scognet#endif
2228129254Scognet	str	r2, [r0]
2229129254Scognet	str	r3, [r0, #0x04]
2230137463Scognet	RET
2231129254Scognet	LMEMCPY_8_PAD
2232129254Scognet
2233129254Scognet/*
2234129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
2235129254Scognet */
2236129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2237129254Scognet	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
2238129254Scognet	ldr	r1, [r1, #0x05]		/* BE:r1 = 567x  LE:r1 = x765 */
2239129254Scognet#ifdef __ARMEB__
2240129254Scognet	mov	r3, r3, lsl #24		/* r3 = 0... */
2241129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 0123 */
2242129254Scognet	mov	r2, r2, lsl #24		/* r2 = 4... */
2243129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 4567 */
2244129254Scognet#else
2245129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 3210 */
2246129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...4 */
2247129254Scognet	orr	r2, r2, r1, lsl #8	/* r2 = 7654 */
2248129254Scognet#endif
2249129254Scognet	str	r3, [r0]
2250129254Scognet	str	r2, [r0, #0x04]
2251137463Scognet	RET
2252129254Scognet	LMEMCPY_8_PAD
2253129254Scognet
2254129254Scognet/*
2255129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned
2256129254Scognet */
2257129254Scognet	ldr	r3, [r1]		/* BE:r3 = 0123  LE:r3 = 3210 */
2258129254Scognet	ldr	r2, [r1, #0x04]		/* BE:r2 = 4567  LE:r2 = 7654 */
2259129254Scognet#ifdef __ARMEB__
2260129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...0 */
2261129254Scognet	strb	r1, [r0]
2262129254Scognet	mov	r1, r3, lsr #8		/* r1 = .012 */
2263129254Scognet	strb	r2, [r0, #0x07]
2264129254Scognet	mov	r3, r3, lsl #24		/* r3 = 3... */
2265129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 3456 */
2266129254Scognet#else
2267129254Scognet	strb	r3, [r0]
2268129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...7 */
2269129254Scognet	strb	r1, [r0, #0x07]
2270129254Scognet	mov	r1, r3, lsr #8		/* r1 = .321 */
2271129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...3 */
2272129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 6543 */
2273129254Scognet#endif
2274129254Scognet	strh	r1, [r0, #0x01]
2275129254Scognet	str	r3, [r0, #0x03]
2276137463Scognet	RET
2277129254Scognet	LMEMCPY_8_PAD
2278129254Scognet
2279129254Scognet/*
2280129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned
2281129254Scognet */
2282129254Scognet	ldrb	r2, [r1]
2283129254Scognet	ldrh	r3, [r1, #0x01]
2284129254Scognet	ldr	ip, [r1, #0x03]
2285129254Scognet	ldrb	r1, [r1, #0x07]
2286129254Scognet	strb	r2, [r0]
2287129254Scognet	strh	r3, [r0, #0x01]
2288129254Scognet	str	ip, [r0, #0x03]
2289129254Scognet	strb	r1, [r0, #0x07]
2290137463Scognet	RET
2291129254Scognet	LMEMCPY_8_PAD
2292129254Scognet
2293129254Scognet/*
2294129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned
2295129254Scognet */
2296129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2297129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2298129254Scognet	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
2299129254Scognet#ifdef __ARMEB__
2300129254Scognet	mov	ip, r2, lsr #8		/* ip = ...0 */
2301129254Scognet	strb	ip, [r0]
2302129254Scognet	mov	ip, r2, lsl #8		/* ip = .01. */
2303129254Scognet	orr	ip, ip, r3, lsr #24	/* ip = .012 */
2304129254Scognet	strb	r1, [r0, #0x07]
2305129254Scognet	mov	r3, r3, lsl #8		/* r3 = 345. */
2306129254Scognet	orr	r3, r3, r1, lsr #8	/* r3 = 3456 */
2307129254Scognet#else
2308129254Scognet	strb	r2, [r0]		/* 0 */
2309129254Scognet	mov	ip, r1, lsr #8		/* ip = ...7 */
2310129254Scognet	strb	ip, [r0, #0x07]		/* 7 */
2311129254Scognet	mov	ip, r2, lsr #8		/* ip = ...1 */
2312129254Scognet	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
2313129254Scognet	mov	r3, r3, lsr #8		/* r3 = .543 */
2314129254Scognet	orr	r3, r3, r1, lsl #24	/* r3 = 6543 */
2315129254Scognet#endif
2316129254Scognet	strh	ip, [r0, #0x01]
2317129254Scognet	str	r3, [r0, #0x03]
2318137463Scognet	RET
2319129254Scognet	LMEMCPY_8_PAD
2320129254Scognet
2321129254Scognet/*
2322129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned
2323129254Scognet */
2324129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2325129254Scognet	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
2326129254Scognet	ldrh	r2, [r1, #0x05]		/* BE:r2 = ..56  LE:r2 = ..65 */
2327129254Scognet	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
2328129254Scognet	strb	r3, [r0]
2329129254Scognet	mov	r3, ip, lsr #16		/* BE:r3 = ..12  LE:r3 = ..43 */
2330129254Scognet#ifdef __ARMEB__
2331129254Scognet	strh	r3, [r0, #0x01]
2332129254Scognet	orr	r2, r2, ip, lsl #16	/* r2 = 3456 */
2333129254Scognet#else
2334129254Scognet	strh	ip, [r0, #0x01]
2335129254Scognet	orr	r2, r3, r2, lsl #16	/* r2 = 6543 */
2336129254Scognet#endif
2337129254Scognet	str	r2, [r0, #0x03]
2338129254Scognet	strb	r1, [r0, #0x07]
2339137463Scognet	RET
2340129254Scognet	LMEMCPY_8_PAD
2341129254Scognet
2342129254Scognet/*
2343129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
2344129254Scognet */
2345129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2346129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2347129254Scognet	mov	r1, r2, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
2348129254Scognet#ifdef __ARMEB__
2349129254Scognet	strh	r1, [r0]
2350129254Scognet	mov	r1, r3, lsr #16		/* r1 = ..45 */
2351129254Scognet	orr	r2, r1 ,r2, lsl #16	/* r2 = 2345 */
2352129254Scognet#else
2353129254Scognet	strh	r2, [r0]
2354129254Scognet	orr	r2, r1, r3, lsl #16	/* r2 = 5432 */
2355129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..76 */
2356129254Scognet#endif
2357129254Scognet	str	r2, [r0, #0x02]
2358129254Scognet	strh	r3, [r0, #0x06]
2359137463Scognet	RET
2360129254Scognet	LMEMCPY_8_PAD
2361129254Scognet
2362129254Scognet/*
2363129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned
2364129254Scognet */
2365129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
2366129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
2367129254Scognet	ldrb	ip, [r1, #0x07]		/* ip = ...7 */
2368129254Scognet	mov	r1, r2, lsr #8		/* BE:r1 = .x01  LE:r1 = .210 */
2369129254Scognet	strh	r1, [r0]
2370129254Scognet#ifdef __ARMEB__
2371129254Scognet	mov	r1, r2, lsl #24		/* r1 = 2... */
2372129254Scognet	orr	r1, r1, r3, lsr #8	/* r1 = 2345 */
2373129254Scognet	orr	r3, ip, r3, lsl #8	/* r3 = 4567 */
2374129254Scognet#else
2375129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...2 */
2376129254Scognet	orr	r1, r1, r3, lsl #8	/* r1 = 5432 */
2377129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...6 */
2378129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = ..76 */
2379129254Scognet#endif
2380129254Scognet	str	r1, [r0, #0x02]
2381129254Scognet	strh	r3, [r0, #0x06]
2382137463Scognet	RET
2383129254Scognet	LMEMCPY_8_PAD
2384129254Scognet
2385129254Scognet/*
2386129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
2387129254Scognet */
2388129254Scognet	ldrh	r2, [r1]
2389129254Scognet	ldr	ip, [r1, #0x02]
2390129254Scognet	ldrh	r3, [r1, #0x06]
2391129254Scognet	strh	r2, [r0]
2392129254Scognet	str	ip, [r0, #0x02]
2393129254Scognet	strh	r3, [r0, #0x06]
2394137463Scognet	RET
2395129254Scognet	LMEMCPY_8_PAD
2396129254Scognet
2397129254Scognet/*
2398129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned
2399129254Scognet */
2400129254Scognet	ldr	r3, [r1, #0x05]		/* BE:r3 = 567x  LE:r3 = x765 */
2401129254Scognet	ldr	r2, [r1, #0x01]		/* BE:r2 = 1234  LE:r2 = 4321 */
2402129254Scognet	ldrb	ip, [r1]		/* ip = ...0 */
2403129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .567  LE:r1 = .x76 */
2404129254Scognet	strh	r1, [r0, #0x06]
2405129254Scognet#ifdef __ARMEB__
2406129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...5 */
2407129254Scognet	orr	r3, r3, r2, lsl #8	/* r3 = 2345 */
2408129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...1 */
2409129254Scognet	orr	r2, r2, ip, lsl #8	/* r2 = ..01 */
2410129254Scognet#else
2411129254Scognet	mov	r3, r3, lsl #24		/* r3 = 5... */
2412129254Scognet	orr	r3, r3, r2, lsr #8	/* r3 = 5432 */
2413129254Scognet	orr	r2, ip, r2, lsl #8	/* r2 = 3210 */
2414129254Scognet#endif
2415129254Scognet	str	r3, [r0, #0x02]
2416129254Scognet	strh	r2, [r0]
2417137463Scognet	RET
2418129254Scognet	LMEMCPY_8_PAD
2419129254Scognet
2420129254Scognet/*
2421129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned
2422129254Scognet */
2423129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2424129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2425129254Scognet	mov	r1, r3, lsr #8		/* BE:r1 = .456  LE:r1 = .765 */
2426129254Scognet	strh	r1, [r0, #0x05]
2427129254Scognet#ifdef __ARMEB__
2428129254Scognet	strb	r3, [r0, #0x07]
2429129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...0 */
2430129254Scognet	strb	r1, [r0]
2431129254Scognet	mov	r2, r2, lsl #8		/* r2 = 123. */
2432129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = 1234 */
2433129254Scognet	str	r2, [r0, #0x01]
2434129254Scognet#else
2435129254Scognet	strb	r2, [r0]
2436129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...7 */
2437129254Scognet	strb	r1, [r0, #0x07]
2438129254Scognet	mov	r2, r2, lsr #8		/* r2 = .321 */
2439129254Scognet	orr	r2, r2, r3, lsl #24	/* r2 = 4321 */
2440129254Scognet	str	r2, [r0, #0x01]
2441129254Scognet#endif
2442137463Scognet	RET
2443129254Scognet	LMEMCPY_8_PAD
2444129254Scognet
2445129254Scognet/*
2446129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned
2447129254Scognet */
2448129254Scognet	ldrb	r3, [r1]		/* r3 = ...0 */
2449129254Scognet	ldrh	r2, [r1, #0x01]		/* BE:r2 = ..12  LE:r2 = ..21 */
2450129254Scognet	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
2451129254Scognet	ldrb	r1, [r1, #0x07]		/* r1 = ...7 */
2452129254Scognet	strb	r3, [r0]
2453129254Scognet	mov	r3, ip, lsr #16		/* BE:r3 = ..34  LE:r3 = ..65 */
2454129254Scognet#ifdef __ARMEB__
2455129254Scognet	strh	ip, [r0, #0x05]
2456129254Scognet	orr	r2, r3, r2, lsl #16	/* r2 = 1234 */
2457129254Scognet#else
2458129254Scognet	strh	r3, [r0, #0x05]
2459129254Scognet	orr	r2, r2, ip, lsl #16	/* r2 = 4321 */
2460129254Scognet#endif
2461129254Scognet	str	r2, [r0, #0x01]
2462129254Scognet	strb	r1, [r0, #0x07]
2463137463Scognet	RET
2464129254Scognet	LMEMCPY_8_PAD
2465129254Scognet
2466129254Scognet/*
2467129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned
2468129254Scognet */
2469129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2470129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2471129254Scognet	ldrh	r1, [r1, #0x06]		/* BE:r1 = ..67  LE:r1 = ..76 */
2472129254Scognet#ifdef __ARMEB__
2473129254Scognet	mov	ip, r2, lsr #8		/* ip = ...0 */
2474129254Scognet	strb	ip, [r0]
2475129254Scognet	mov	ip, r2, lsl #24		/* ip = 1... */
2476129254Scognet	orr	ip, ip, r3, lsr #8	/* ip = 1234 */
2477129254Scognet	strb	r1, [r0, #0x07]
2478129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...6 */
2479129254Scognet	orr	r1, r1, r3, lsl #8	/* r1 = 3456 */
2480129254Scognet#else
2481129254Scognet	strb	r2, [r0]
2482129254Scognet	mov	ip, r2, lsr #8		/* ip = ...1 */
2483129254Scognet	orr	ip, ip, r3, lsl #8	/* ip = 4321 */
2484129254Scognet	mov	r2, r1, lsr #8		/* r2 = ...7 */
2485129254Scognet	strb	r2, [r0, #0x07]
2486129254Scognet	mov	r1, r1, lsl #8		/* r1 = .76. */
2487129254Scognet	orr	r1, r1, r3, lsr #24	/* r1 = .765 */
2488129254Scognet#endif
2489129254Scognet	str	ip, [r0, #0x01]
2490129254Scognet	strh	r1, [r0, #0x05]
2491137463Scognet	RET
2492129254Scognet	LMEMCPY_8_PAD
2493129254Scognet
2494129254Scognet/*
2495129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned
2496129254Scognet */
2497129254Scognet	ldrb	r2, [r1]
2498129254Scognet	ldr	ip, [r1, #0x01]
2499129254Scognet	ldrh	r3, [r1, #0x05]
2500129254Scognet	ldrb	r1, [r1, #0x07]
2501129254Scognet	strb	r2, [r0]
2502129254Scognet	str	ip, [r0, #0x01]
2503129254Scognet	strh	r3, [r0, #0x05]
2504129254Scognet	strb	r1, [r0, #0x07]
2505137463Scognet	RET
2506129254Scognet	LMEMCPY_8_PAD
2507129254Scognet
2508129254Scognet/******************************************************************************
2509129254Scognet * Special case for 12 byte copies
2510129254Scognet */
2511129254Scognet#define	LMEMCPY_C_LOG2	7	/* 128 bytes */
2512129254Scognet#define	LMEMCPY_C_PAD	.align LMEMCPY_C_LOG2
2513129254Scognet	LMEMCPY_C_PAD
2514129254Scognet.Lmemcpy_c:
2515129254Scognet	and	r2, r1, #0x03
2516129254Scognet	orr	r2, r2, r0, lsl #2
2517129254Scognet	ands	r2, r2, #0x0f
2518129254Scognet	sub	r3, pc, #0x14
2519129254Scognet	addne	pc, r3, r2, lsl #LMEMCPY_C_LOG2
2520129254Scognet
2521129254Scognet/*
2522129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned
2523129254Scognet */
2524129254Scognet	ldr	r2, [r1]
2525129254Scognet	ldr	r3, [r1, #0x04]
2526129254Scognet	ldr	r1, [r1, #0x08]
2527129254Scognet	str	r2, [r0]
2528129254Scognet	str	r3, [r0, #0x04]
2529129254Scognet	str	r1, [r0, #0x08]
2530137463Scognet	RET
2531129254Scognet	LMEMCPY_C_PAD
2532129254Scognet
2533129254Scognet/*
2534129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned
2535129254Scognet */
2536129254Scognet	ldrb	r2, [r1, #0xb]		/* r2 = ...B */
2537129254Scognet	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
2538129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
2539129254Scognet	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
2540129254Scognet#ifdef __ARMEB__
2541129254Scognet	orr	r2, r2, ip, lsl #8	/* r2 = 89AB */
2542129254Scognet	str	r2, [r0, #0x08]
2543129254Scognet	mov	r2, ip, lsr #24		/* r2 = ...7 */
2544129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 4567 */
2545129254Scognet	mov	r1, r1, lsl #8		/* r1 = 012. */
2546129254Scognet	orr	r1, r1, r3, lsr #24	/* r1 = 0123 */
2547129254Scognet#else
2548129254Scognet	mov	r2, r2, lsl #24		/* r2 = B... */
2549129254Scognet	orr	r2, r2, ip, lsr #8	/* r2 = BA98 */
2550129254Scognet	str	r2, [r0, #0x08]
2551129254Scognet	mov	r2, ip, lsl #24		/* r2 = 7... */
2552129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 7654 */
2553129254Scognet	mov	r1, r1, lsr #8		/* r1 = .210 */
2554129254Scognet	orr	r1, r1, r3, lsl #24	/* r1 = 3210 */
2555129254Scognet#endif
2556129254Scognet	str	r2, [r0, #0x04]
2557129254Scognet	str	r1, [r0]
2558137463Scognet	RET
2559129254Scognet	LMEMCPY_C_PAD
2560129254Scognet
2561129254Scognet/*
2562129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned
2563129254Scognet */
2564129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2565129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2566129254Scognet	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
2567129254Scognet	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
2568129254Scognet#ifdef __ARMEB__
2569129254Scognet	mov	r2, r2, lsl #16		/* r2 = 01.. */
2570129254Scognet	orr	r2, r2, r3, lsr #16	/* r2 = 0123 */
2571129254Scognet	str	r2, [r0]
2572129254Scognet	mov	r3, r3, lsl #16		/* r3 = 45.. */
2573129254Scognet	orr	r3, r3, ip, lsr #16	/* r3 = 4567 */
2574129254Scognet	orr	r1, r1, ip, lsl #16	/* r1 = 89AB */
2575129254Scognet#else
2576129254Scognet	orr	r2, r2, r3, lsl #16	/* r2 = 3210 */
2577129254Scognet	str	r2, [r0]
2578129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..54 */
2579129254Scognet	orr	r3, r3, ip, lsl #16	/* r3 = 7654 */
2580129254Scognet	mov	r1, r1, lsl #16		/* r1 = BA.. */
2581129254Scognet	orr	r1, r1, ip, lsr #16	/* r1 = BA98 */
2582129254Scognet#endif
2583129254Scognet	str	r3, [r0, #0x04]
2584129254Scognet	str	r1, [r0, #0x08]
2585137463Scognet	RET
2586129254Scognet	LMEMCPY_C_PAD
2587129254Scognet
2588129254Scognet/*
2589129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned
2590129254Scognet */
2591129254Scognet	ldrb	r2, [r1]		/* r2 = ...0 */
2592129254Scognet	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
2593129254Scognet	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
2594129254Scognet	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
2595129254Scognet#ifdef __ARMEB__
2596129254Scognet	mov	r2, r2, lsl #24		/* r2 = 0... */
2597129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 0123 */
2598129254Scognet	str	r2, [r0]
2599129254Scognet	mov	r3, r3, lsl #24		/* r3 = 4... */
2600129254Scognet	orr	r3, r3, ip, lsr #8	/* r3 = 4567 */
2601129254Scognet	mov	r1, r1, lsr #8		/* r1 = .9AB */
2602129254Scognet	orr	r1, r1, ip, lsl #24	/* r1 = 89AB */
2603129254Scognet#else
2604129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 3210 */
2605129254Scognet	str	r2, [r0]
2606129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...4 */
2607129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 7654 */
2608129254Scognet	mov	r1, r1, lsl #8		/* r1 = BA9. */
2609129254Scognet	orr	r1, r1, ip, lsr #24	/* r1 = BA98 */
2610129254Scognet#endif
2611129254Scognet	str	r3, [r0, #0x04]
2612129254Scognet	str	r1, [r0, #0x08]
2613137463Scognet	RET
2614129254Scognet	LMEMCPY_C_PAD
2615129254Scognet
2616129254Scognet/*
2617129254Scognet * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2618129254Scognet */
2619129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2620129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2621129254Scognet	ldr	ip, [r1, #0x08]		/* BE:ip = 89AB  LE:ip = BA98 */
2622129254Scognet	mov	r1, r2, lsr #8		/* BE:r1 = .012  LE:r1 = .321 */
2623129254Scognet	strh	r1, [r0, #0x01]
2624129254Scognet#ifdef __ARMEB__
2625129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...0 */
2626129254Scognet	strb	r1, [r0]
2627129254Scognet	mov	r1, r2, lsl #24		/* r1 = 3... */
2628129254Scognet	orr	r2, r1, r3, lsr #8	/* r1 = 3456 */
2629129254Scognet	mov	r1, r3, lsl #24		/* r1 = 7... */
2630129254Scognet	orr	r1, r1, ip, lsr #8	/* r1 = 789A */
2631129254Scognet#else
2632129254Scognet	strb	r2, [r0]
2633129254Scognet	mov	r1, r2, lsr #24		/* r1 = ...3 */
2634129254Scognet	orr	r2, r1, r3, lsl #8	/* r1 = 6543 */
2635129254Scognet	mov	r1, r3, lsr #24		/* r1 = ...7 */
2636129254Scognet	orr	r1, r1, ip, lsl #8	/* r1 = A987 */
2637129254Scognet	mov	ip, ip, lsr #24		/* ip = ...B */
2638129254Scognet#endif
2639129254Scognet	str	r2, [r0, #0x03]
2640129254Scognet	str	r1, [r0, #0x07]
2641129254Scognet	strb	ip, [r0, #0x0b]
2642137463Scognet	RET
2643129254Scognet	LMEMCPY_C_PAD
2644129254Scognet
2645129254Scognet/*
2646129254Scognet * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2647129254Scognet */
2648129254Scognet	ldrb	r2, [r1]
2649129254Scognet	ldrh	r3, [r1, #0x01]
2650129254Scognet	ldr	ip, [r1, #0x03]
2651129254Scognet	strb	r2, [r0]
2652129254Scognet	ldr	r2, [r1, #0x07]
2653129254Scognet	ldrb	r1, [r1, #0x0b]
2654129254Scognet	strh	r3, [r0, #0x01]
2655129254Scognet	str	ip, [r0, #0x03]
2656129254Scognet	str	r2, [r0, #0x07]
2657129254Scognet	strb	r1, [r0, #0x0b]
2658137463Scognet	RET
2659129254Scognet	LMEMCPY_C_PAD
2660129254Scognet
2661129254Scognet/*
2662129254Scognet * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2663129254Scognet */
2664129254Scognet	ldrh	r2, [r1]		/* BE:r2 = ..01  LE:r2 = ..10 */
2665129254Scognet	ldr	r3, [r1, #0x02]		/* BE:r3 = 2345  LE:r3 = 5432 */
2666129254Scognet	ldr	ip, [r1, #0x06]		/* BE:ip = 6789  LE:ip = 9876 */
2667129254Scognet	ldrh	r1, [r1, #0x0a]		/* BE:r1 = ..AB  LE:r1 = ..BA */
2668129254Scognet#ifdef __ARMEB__
2669129254Scognet	mov	r2, r2, ror #8		/* r2 = 1..0 */
2670129254Scognet	strb	r2, [r0]
2671129254Scognet	mov	r2, r2, lsr #16		/* r2 = ..1. */
2672129254Scognet	orr	r2, r2, r3, lsr #24	/* r2 = ..12 */
2673129254Scognet	strh	r2, [r0, #0x01]
2674129254Scognet	mov	r2, r3, lsl #8		/* r2 = 345. */
2675129254Scognet	orr	r3, r2, ip, lsr #24	/* r3 = 3456 */
2676129254Scognet	mov	r2, ip, lsl #8		/* r2 = 789. */
2677129254Scognet	orr	r2, r2, r1, lsr #8	/* r2 = 789A */
2678129254Scognet#else
2679129254Scognet	strb	r2, [r0]
2680129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
2681129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
2682129254Scognet	strh	r2, [r0, #0x01]
2683129254Scognet	mov	r2, r3, lsr #8		/* r2 = .543 */
2684129254Scognet	orr	r3, r2, ip, lsl #24	/* r3 = 6543 */
2685129254Scognet	mov	r2, ip, lsr #8		/* r2 = .987 */
2686129254Scognet	orr	r2, r2, r1, lsl #24	/* r2 = A987 */
2687129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...B */
2688129254Scognet#endif
2689129254Scognet	str	r3, [r0, #0x03]
2690129254Scognet	str	r2, [r0, #0x07]
2691129254Scognet	strb	r1, [r0, #0x0b]
2692137463Scognet	RET
2693129254Scognet	LMEMCPY_C_PAD
2694129254Scognet
2695129254Scognet/*
2696129254Scognet * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2697129254Scognet */
2698129254Scognet	ldrb	r2, [r1]
2699129254Scognet	ldr	r3, [r1, #0x01]		/* BE:r3 = 1234  LE:r3 = 4321 */
2700129254Scognet	ldr	ip, [r1, #0x05]		/* BE:ip = 5678  LE:ip = 8765 */
2701129254Scognet	ldr	r1, [r1, #0x09]		/* BE:r1 = 9ABx  LE:r1 = xBA9 */
2702129254Scognet	strb	r2, [r0]
2703129254Scognet#ifdef __ARMEB__
2704129254Scognet	mov	r2, r3, lsr #16		/* r2 = ..12 */
2705129254Scognet	strh	r2, [r0, #0x01]
2706129254Scognet	mov	r3, r3, lsl #16		/* r3 = 34.. */
2707129254Scognet	orr	r3, r3, ip, lsr #16	/* r3 = 3456 */
2708129254Scognet	mov	ip, ip, lsl #16		/* ip = 78.. */
2709129254Scognet	orr	ip, ip, r1, lsr #16	/* ip = 789A */
2710129254Scognet	mov	r1, r1, lsr #8		/* r1 = .9AB */
2711129254Scognet#else
2712129254Scognet	strh	r3, [r0, #0x01]
2713129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..43 */
2714129254Scognet	orr	r3, r3, ip, lsl #16	/* r3 = 6543 */
2715129254Scognet	mov	ip, ip, lsr #16		/* ip = ..87 */
2716129254Scognet	orr	ip, ip, r1, lsl #16	/* ip = A987 */
2717129254Scognet	mov	r1, r1, lsr #16		/* r1 = ..xB */
2718129254Scognet#endif
2719129254Scognet	str	r3, [r0, #0x03]
2720129254Scognet	str	ip, [r0, #0x07]
2721129254Scognet	strb	r1, [r0, #0x0b]
2722137463Scognet	RET
2723129254Scognet	LMEMCPY_C_PAD
2724129254Scognet
2725129254Scognet/*
2726129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned
2727129254Scognet */
2728129254Scognet	ldr	ip, [r1]		/* BE:ip = 0123  LE:ip = 3210 */
2729129254Scognet	ldr	r3, [r1, #0x04]		/* BE:r3 = 4567  LE:r3 = 7654 */
2730129254Scognet	ldr	r2, [r1, #0x08]		/* BE:r2 = 89AB  LE:r2 = BA98 */
2731129254Scognet	mov	r1, ip, lsr #16		/* BE:r1 = ..01  LE:r1 = ..32 */
2732129254Scognet#ifdef __ARMEB__
2733129254Scognet	strh	r1, [r0]
2734129254Scognet	mov	r1, ip, lsl #16		/* r1 = 23.. */
2735129254Scognet	orr	r1, r1, r3, lsr #16	/* r1 = 2345 */
2736129254Scognet	mov	r3, r3, lsl #16		/* r3 = 67.. */
2737129254Scognet	orr	r3, r3, r2, lsr #16	/* r3 = 6789 */
2738129254Scognet#else
2739129254Scognet	strh	ip, [r0]
2740129254Scognet	orr	r1, r1, r3, lsl #16	/* r1 = 5432 */
2741129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..76 */
2742129254Scognet	orr	r3, r3, r2, lsl #16	/* r3 = 9876 */
2743129254Scognet	mov	r2, r2, lsr #16		/* r2 = ..BA */
2744129254Scognet#endif
2745129254Scognet	str	r1, [r0, #0x02]
2746129254Scognet	str	r3, [r0, #0x06]
2747129254Scognet	strh	r2, [r0, #0x0a]
2748137463Scognet	RET
2749129254Scognet	LMEMCPY_C_PAD
2750129254Scognet
2751129254Scognet/*
2752129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2753129254Scognet */
2754129254Scognet	ldr	r2, [r1, #-1]		/* BE:r2 = x012  LE:r2 = 210x */
2755129254Scognet	ldr	r3, [r1, #0x03]		/* BE:r3 = 3456  LE:r3 = 6543 */
2756129254Scognet	mov	ip, r2, lsr #8		/* BE:ip = .x01  LE:ip = .210 */
2757129254Scognet	strh	ip, [r0]
2758129254Scognet	ldr	ip, [r1, #0x07]		/* BE:ip = 789A  LE:ip = A987 */
2759129254Scognet	ldrb	r1, [r1, #0x0b]		/* r1 = ...B */
2760129254Scognet#ifdef __ARMEB__
2761129254Scognet	mov	r2, r2, lsl #24		/* r2 = 2... */
2762129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 2345 */
2763129254Scognet	mov	r3, r3, lsl #24		/* r3 = 6... */
2764129254Scognet	orr	r3, r3, ip, lsr #8	/* r3 = 6789 */
2765129254Scognet	orr	r1, r1, ip, lsl #8	/* r1 = 89AB */
2766129254Scognet#else
2767129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...2 */
2768129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 5432 */
2769129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...6 */
2770129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 9876 */
2771129254Scognet	mov	r1, r1, lsl #8		/* r1 = ..B. */
2772129254Scognet	orr	r1, r1, ip, lsr #24	/* r1 = ..BA */
2773129254Scognet#endif
2774129254Scognet	str	r2, [r0, #0x02]
2775129254Scognet	str	r3, [r0, #0x06]
2776129254Scognet	strh	r1, [r0, #0x0a]
2777137463Scognet	RET
2778129254Scognet	LMEMCPY_C_PAD
2779129254Scognet
2780129254Scognet/*
2781129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned
2782129254Scognet */
2783129254Scognet	ldrh	r2, [r1]
2784129254Scognet	ldr	r3, [r1, #0x02]
2785129254Scognet	ldr	ip, [r1, #0x06]
2786129254Scognet	ldrh	r1, [r1, #0x0a]
2787129254Scognet	strh	r2, [r0]
2788129254Scognet	str	r3, [r0, #0x02]
2789129254Scognet	str	ip, [r0, #0x06]
2790129254Scognet	strh	r1, [r0, #0x0a]
2791137463Scognet	RET
2792129254Scognet	LMEMCPY_C_PAD
2793129254Scognet
2794129254Scognet/*
2795129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2796129254Scognet */
2797129254Scognet	ldr	r2, [r1, #0x09]		/* BE:r2 = 9ABx  LE:r2 = xBA9 */
2798129254Scognet	ldr	r3, [r1, #0x05]		/* BE:r3 = 5678  LE:r3 = 8765 */
2799129254Scognet	mov	ip, r2, lsr #8		/* BE:ip = .9AB  LE:ip = .xBA */
2800129254Scognet	strh	ip, [r0, #0x0a]
2801129254Scognet	ldr	ip, [r1, #0x01]		/* BE:ip = 1234  LE:ip = 4321 */
2802129254Scognet	ldrb	r1, [r1]		/* r1 = ...0 */
2803129254Scognet#ifdef __ARMEB__
2804129254Scognet	mov	r2, r2, lsr #24		/* r2 = ...9 */
2805129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 6789 */
2806129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...5 */
2807129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 2345 */
2808129254Scognet	mov	r1, r1, lsl #8		/* r1 = ..0. */
2809129254Scognet	orr	r1, r1, ip, lsr #24	/* r1 = ..01 */
2810129254Scognet#else
2811129254Scognet	mov	r2, r2, lsl #24		/* r2 = 9... */
2812129254Scognet	orr	r2, r2, r3, lsr #8	/* r2 = 9876 */
2813129254Scognet	mov	r3, r3, lsl #24		/* r3 = 5... */
2814129254Scognet	orr	r3, r3, ip, lsr #8	/* r3 = 5432 */
2815129254Scognet	orr	r1, r1, ip, lsl #8	/* r1 = 3210 */
2816129254Scognet#endif
2817129254Scognet	str	r2, [r0, #0x06]
2818129254Scognet	str	r3, [r0, #0x02]
2819129254Scognet	strh	r1, [r0]
2820137463Scognet	RET
2821129254Scognet	LMEMCPY_C_PAD
2822129254Scognet
2823129254Scognet/*
2824129254Scognet * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2825129254Scognet */
2826129254Scognet	ldr	r2, [r1]		/* BE:r2 = 0123  LE:r2 = 3210 */
2827129254Scognet	ldr	ip, [r1, #0x04]		/* BE:ip = 4567  LE:ip = 7654 */
2828129254Scognet	ldr	r1, [r1, #0x08]		/* BE:r1 = 89AB  LE:r1 = BA98 */
2829129254Scognet#ifdef __ARMEB__
2830129254Scognet	mov	r3, r2, lsr #24		/* r3 = ...0 */
2831129254Scognet	strb	r3, [r0]
2832129254Scognet	mov	r2, r2, lsl #8		/* r2 = 123. */
2833129254Scognet	orr	r2, r2, ip, lsr #24	/* r2 = 1234 */
2834129254Scognet	str	r2, [r0, #0x01]
2835129254Scognet	mov	r2, ip, lsl #8		/* r2 = 567. */
2836129254Scognet	orr	r2, r2, r1, lsr #24	/* r2 = 5678 */
2837129254Scognet	str	r2, [r0, #0x05]
2838129254Scognet	mov	r2, r1, lsr #8		/* r2 = ..9A */
2839129254Scognet	strh	r2, [r0, #0x09]
2840129254Scognet	strb	r1, [r0, #0x0b]
2841129254Scognet#else
2842129254Scognet	strb	r2, [r0]
2843129254Scognet	mov	r3, r2, lsr #8		/* r3 = .321 */
2844129254Scognet	orr	r3, r3, ip, lsl #24	/* r3 = 4321 */
2845129254Scognet	str	r3, [r0, #0x01]
2846129254Scognet	mov	r3, ip, lsr #8		/* r3 = .765 */
2847129254Scognet	orr	r3, r3, r1, lsl #24	/* r3 = 8765 */
2848129254Scognet	str	r3, [r0, #0x05]
2849129254Scognet	mov	r1, r1, lsr #8		/* r1 = .BA9 */
2850129254Scognet	strh	r1, [r0, #0x09]
2851129254Scognet	mov	r1, r1, lsr #16		/* r1 = ...B */
2852129254Scognet	strb	r1, [r0, #0x0b]
2853129254Scognet#endif
2854137463Scognet	RET
2855129254Scognet	LMEMCPY_C_PAD
2856129254Scognet
2857129254Scognet/*
2858129254Scognet * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2859129254Scognet */
2860129254Scognet	ldrb	r2, [r1, #0x0b]		/* r2 = ...B */
2861129254Scognet	ldr	r3, [r1, #0x07]		/* BE:r3 = 789A  LE:r3 = A987 */
2862129254Scognet	ldr	ip, [r1, #0x03]		/* BE:ip = 3456  LE:ip = 6543 */
2863129254Scognet	ldr	r1, [r1, #-1]		/* BE:r1 = x012  LE:r1 = 210x */
2864129254Scognet	strb	r2, [r0, #0x0b]
2865129254Scognet#ifdef __ARMEB__
2866129254Scognet	strh	r3, [r0, #0x09]
2867129254Scognet	mov	r3, r3, lsr #16		/* r3 = ..78 */
2868129254Scognet	orr	r3, r3, ip, lsl #16	/* r3 = 5678 */
2869129254Scognet	mov	ip, ip, lsr #16		/* ip = ..34 */
2870129254Scognet	orr	ip, ip, r1, lsl #16	/* ip = 1234 */
2871129254Scognet	mov	r1, r1, lsr #16		/* r1 = ..x0 */
2872129254Scognet#else
2873129254Scognet	mov	r2, r3, lsr #16		/* r2 = ..A9 */
2874129254Scognet	strh	r2, [r0, #0x09]
2875129254Scognet	mov	r3, r3, lsl #16		/* r3 = 87.. */
2876129254Scognet	orr	r3, r3, ip, lsr #16	/* r3 = 8765 */
2877129254Scognet	mov	ip, ip, lsl #16		/* ip = 43.. */
2878129254Scognet	orr	ip, ip, r1, lsr #16	/* ip = 4321 */
2879129254Scognet	mov	r1, r1, lsr #8		/* r1 = .210 */
2880129254Scognet#endif
2881129254Scognet	str	r3, [r0, #0x05]
2882129254Scognet	str	ip, [r0, #0x01]
2883129254Scognet	strb	r1, [r0]
2884137463Scognet	RET
2885129254Scognet	LMEMCPY_C_PAD
2886129254Scognet
2887129254Scognet/*
2888129254Scognet * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2889129254Scognet */
2890129254Scognet#ifdef __ARMEB__
2891129254Scognet	ldrh	r2, [r1, #0x0a]		/* r2 = ..AB */
2892129254Scognet	ldr	ip, [r1, #0x06]		/* ip = 6789 */
2893129254Scognet	ldr	r3, [r1, #0x02]		/* r3 = 2345 */
2894129254Scognet	ldrh	r1, [r1]		/* r1 = ..01 */
2895129254Scognet	strb	r2, [r0, #0x0b]
2896129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...A */
2897129254Scognet	orr	r2, r2, ip, lsl #8	/* r2 = 789A */
2898129254Scognet	mov	ip, ip, lsr #8		/* ip = .678 */
2899129254Scognet	orr	ip, ip, r3, lsl #24	/* ip = 5678 */
2900129254Scognet	mov	r3, r3, lsr #8		/* r3 = .234 */
2901129254Scognet	orr	r3, r3, r1, lsl #24	/* r3 = 1234 */
2902129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...0 */
2903129254Scognet	strb	r1, [r0]
2904129254Scognet	str	r3, [r0, #0x01]
2905129254Scognet	str	ip, [r0, #0x05]
2906129254Scognet	strh	r2, [r0, #0x09]
2907129254Scognet#else
2908129254Scognet	ldrh	r2, [r1]		/* r2 = ..10 */
2909129254Scognet	ldr	r3, [r1, #0x02]		/* r3 = 5432 */
2910129254Scognet	ldr	ip, [r1, #0x06]		/* ip = 9876 */
2911129254Scognet	ldrh	r1, [r1, #0x0a]		/* r1 = ..BA */
2912129254Scognet	strb	r2, [r0]
2913129254Scognet	mov	r2, r2, lsr #8		/* r2 = ...1 */
2914129254Scognet	orr	r2, r2, r3, lsl #8	/* r2 = 4321 */
2915129254Scognet	mov	r3, r3, lsr #24		/* r3 = ...5 */
2916129254Scognet	orr	r3, r3, ip, lsl #8	/* r3 = 8765 */
2917129254Scognet	mov	ip, ip, lsr #24		/* ip = ...9 */
2918129254Scognet	orr	ip, ip, r1, lsl #8	/* ip = .BA9 */
2919129254Scognet	mov	r1, r1, lsr #8		/* r1 = ...B */
2920129254Scognet	str	r2, [r0, #0x01]
2921129254Scognet	str	r3, [r0, #0x05]
2922129254Scognet	strh	ip, [r0, #0x09]
2923129254Scognet	strb	r1, [r0, #0x0b]
2924129254Scognet#endif
2925137463Scognet	RET
2926129254Scognet	LMEMCPY_C_PAD
2927129254Scognet
2928129254Scognet/*
2929129254Scognet * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2930129254Scognet */
2931129254Scognet	ldrb	r2, [r1]
2932129254Scognet	ldr	r3, [r1, #0x01]
2933129254Scognet	ldr	ip, [r1, #0x05]
2934129254Scognet	strb	r2, [r0]
2935129254Scognet	ldrh	r2, [r1, #0x09]
2936129254Scognet	ldrb	r1, [r1, #0x0b]
2937129254Scognet	str	r3, [r0, #0x01]
2938129254Scognet	str	ip, [r0, #0x05]
2939129254Scognet	strh	r2, [r0, #0x09]
2940129254Scognet	strb	r1, [r0, #0x0b]
2941137463Scognet	RET
2942248361SandrewEND(memcpy)
2943172614Scognet#endif /* _ARM_ARCH_5E */
2944135654Scognet
2945135654Scognet#ifdef GPROF
2946135654Scognet
2947135654ScognetENTRY(user)
2948135654Scognet	nop
2949135654ScognetENTRY(btrap)
2950135654Scognet	nop
2951135654ScognetENTRY(etrap)
2952135654Scognet	nop
2953135654ScognetENTRY(bintr)
2954135654Scognet	nop
2955135654ScognetENTRY(eintr)
2956135654Scognet	nop
2957135654Scognet
2958135654Scognet#endif
2959