1129198Scognet/*- 2129198Scognet * Copyright (c) 2004 Olivier Houchard 3129198Scognet * All rights reserved. 4129198Scognet * 5129198Scognet * Redistribution and use in source and binary forms, with or without 6129198Scognet * modification, are permitted provided that the following conditions 7129198Scognet * are met: 8129198Scognet * 1. Redistributions of source code must retain the above copyright 9129198Scognet * notice, this list of conditions and the following disclaimer. 10129198Scognet * 2. Redistributions in binary form must reproduce the above copyright 11129198Scognet * notice, this list of conditions and the following disclaimer in the 12129198Scognet * documentation and/or other materials provided with the distribution. 13129198Scognet * 14129198Scognet * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15129198Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16129198Scognet * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17129198Scognet * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18129198Scognet * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19129198Scognet * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20129198Scognet * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21129198Scognet * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22129198Scognet * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23129198Scognet * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24129198Scognet * SUCH DAMAGE. 25129198Scognet */ 26175255Scognet/* 27175255Scognet * Copyright 2003 Wasabi Systems, Inc. 28175255Scognet * All rights reserved. 29175255Scognet * 30175255Scognet * Written by Steve C. Woodford for Wasabi Systems, Inc. 31175255Scognet * 32175255Scognet * Redistribution and use in source and binary forms, with or without 33175255Scognet * modification, are permitted provided that the following conditions 34175255Scognet * are met: 35175255Scognet * 1. Redistributions of source code must retain the above copyright 36175255Scognet * notice, this list of conditions and the following disclaimer. 37175255Scognet * 2. Redistributions in binary form must reproduce the above copyright 38175255Scognet * notice, this list of conditions and the following disclaimer in the 39175255Scognet * documentation and/or other materials provided with the distribution. 40175255Scognet * 3. All advertising materials mentioning features or use of this software 41175255Scognet * must display the following acknowledgement: 42175255Scognet * This product includes software developed for the NetBSD Project by 43175255Scognet * Wasabi Systems, Inc. 44175255Scognet * 4. The name of Wasabi Systems, Inc. may not be used to endorse 45175255Scognet * or promote products derived from this software without specific prior 46175255Scognet * written permission. 47175255Scognet * 48175255Scognet * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 49175255Scognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 50175255Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 51175255Scognet * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 52175255Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 53175255Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 54175255Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 55175255Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 56175255Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 57175255Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 58175255Scognet * POSSIBILITY OF SUCH DAMAGE. 59175255Scognet */ 60175255Scognet/* 61175255Scognet * Copyright (c) 1997 The NetBSD Foundation, Inc. 62175255Scognet * All rights reserved. 63175255Scognet * 64175255Scognet * This code is derived from software contributed to The NetBSD Foundation 65175255Scognet * by Neil A. Carson and Mark Brinicombe 66175255Scognet * 67175255Scognet * Redistribution and use in source and binary forms, with or without 68175255Scognet * modification, are permitted provided that the following conditions 69175255Scognet * are met: 70175255Scognet * 1. Redistributions of source code must retain the above copyright 71175255Scognet * notice, this list of conditions and the following disclaimer. 72175255Scognet * 2. Redistributions in binary form must reproduce the above copyright 73175255Scognet * notice, this list of conditions and the following disclaimer in the 74175255Scognet * documentation and/or other materials provided with the distribution. 75175255Scognet * 76175255Scognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 77175255Scognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 78175255Scognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 79175255Scognet * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 80175255Scognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 81175255Scognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 82175255Scognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 83175255Scognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 84175255Scognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 85175255Scognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 86175255Scognet * POSSIBILITY OF SUCH DAMAGE. 87175255Scognet */ 88129198Scognet 89129198Scognet#include <machine/asm.h> 90129198Scognet#include <machine/asmacros.h> 91129198Scognet__FBSDID("$FreeBSD$"); 92129198Scognet 93129198Scognet#include "assym.s" 94129198Scognet 95150864Scognet.L_arm_memcpy: 96150864Scognet .word _C_LABEL(_arm_memcpy) 97150864Scognet.L_arm_bzero: 98150864Scognet .word _C_LABEL(_arm_bzero) 99150864Scognet.L_min_memcpy_size: 100150864Scognet .word _C_LABEL(_min_memcpy_size) 101150864Scognet.L_min_bzero_size: 102150864Scognet .word _C_LABEL(_min_bzero_size) 103129198Scognet/* 104129250Scognet * memset: Sets a block of memory to the specified value 105129250Scognet * 106129250Scognet * On entry: 107129250Scognet * r0 - dest address 108129250Scognet * r1 - byte to write 109129250Scognet * r2 - number of bytes to write 110129250Scognet * 111129250Scognet * On exit: 112129250Scognet * r0 - dest address 113129250Scognet */ 114129250Scognet/* LINTSTUB: Func: void bzero(void *, size_t) */ 115129250ScognetENTRY(bzero) 116150864Scognet ldr r3, .L_arm_bzero 117150864Scognet ldr r3, [r3] 118150864Scognet cmp r3, #0 119150864Scognet beq .Lnormal0 120150864Scognet ldr r2, .L_min_bzero_size 121150864Scognet ldr r2, [r2] 122150864Scognet cmp r1, r2 123150864Scognet blt .Lnormal0 124150864Scognet stmfd sp!, {r0, r1, lr} 125150864Scognet mov r2, #0 126150864Scognet mov lr, pc 127150864Scognet mov pc, r3 128150864Scognet cmp r0, #0 129150864Scognet ldmfd sp!, {r0, r1, lr} 130150864Scognet RETeq 131150864Scognet.Lnormal0: 132129250Scognet mov r3, #0x00 133129250Scognet b do_memset 134129250Scognet 135129250Scognet/* LINTSTUB: Func: void *memset(void *, int, size_t) */ 136129250ScognetENTRY(memset) 137129250Scognet and r3, r1, #0xff /* We deal with bytes */ 138129250Scognet mov r1, r2 139129250Scognetdo_memset: 140129250Scognet cmp r1, #0x04 /* Do we have less than 4 bytes */ 141129250Scognet mov ip, r0 142129250Scognet blt .Lmemset_lessthanfour 143129250Scognet 144129250Scognet /* Ok first we will word align the address */ 145129250Scognet ands r2, ip, #0x03 /* Get the bottom two bits */ 146129250Scognet bne .Lmemset_wordunaligned /* The address is not word aligned */ 147129250Scognet 148129250Scognet /* We are now word aligned */ 149129250Scognet.Lmemset_wordaligned: 150129250Scognet orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */ 151172614Scognet#ifdef _ARM_ARCH_5E 152172614Scognet tst ip, #0x04 /* Quad-align for armv5e */ 153129250Scognet#else 154129250Scognet cmp r1, #0x10 155129250Scognet#endif 156129250Scognet orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */ 157172614Scognet#ifdef _ARM_ARCH_5E 158129250Scognet subne r1, r1, #0x04 /* Quad-align if necessary */ 159129250Scognet strne r3, [ip], #0x04 160129250Scognet cmp r1, #0x10 161129250Scognet#endif 162129250Scognet blt .Lmemset_loop4 /* If less than 16 then use words */ 163129250Scognet mov r2, r3 /* Duplicate data */ 164129250Scognet cmp r1, #0x80 /* If < 128 then skip the big loop */ 165129250Scognet blt .Lmemset_loop32 166129250Scognet 167129250Scognet /* Do 128 bytes at a time */ 168129250Scognet.Lmemset_loop128: 169129250Scognet subs r1, r1, #0x80 170172614Scognet#ifdef _ARM_ARCH_5E 171129250Scognet strged r2, [ip], #0x08 172129250Scognet strged r2, [ip], #0x08 173129250Scognet strged r2, [ip], #0x08 174129250Scognet strged r2, [ip], #0x08 175129250Scognet strged r2, [ip], #0x08 176129250Scognet strged r2, [ip], #0x08 177129250Scognet strged r2, [ip], #0x08 178129250Scognet strged r2, [ip], #0x08 179129250Scognet strged r2, [ip], #0x08 180129250Scognet strged r2, [ip], #0x08 181129250Scognet strged r2, [ip], #0x08 182129250Scognet strged r2, [ip], #0x08 183129250Scognet strged r2, [ip], #0x08 184129250Scognet strged r2, [ip], #0x08 185129250Scognet strged r2, [ip], #0x08 186129250Scognet strged r2, [ip], #0x08 187129250Scognet#else 188129250Scognet stmgeia ip!, {r2-r3} 189129250Scognet stmgeia ip!, {r2-r3} 190129250Scognet stmgeia ip!, {r2-r3} 191129250Scognet stmgeia ip!, {r2-r3} 192129250Scognet stmgeia ip!, {r2-r3} 193129250Scognet stmgeia ip!, {r2-r3} 194129250Scognet stmgeia ip!, {r2-r3} 195129250Scognet stmgeia ip!, {r2-r3} 196129250Scognet stmgeia ip!, {r2-r3} 197129250Scognet stmgeia ip!, {r2-r3} 198129250Scognet stmgeia ip!, {r2-r3} 199129250Scognet stmgeia ip!, {r2-r3} 200129250Scognet stmgeia ip!, {r2-r3} 201129250Scognet stmgeia ip!, {r2-r3} 202129250Scognet stmgeia ip!, {r2-r3} 203129250Scognet stmgeia ip!, {r2-r3} 204129250Scognet#endif 205129250Scognet bgt .Lmemset_loop128 206137463Scognet RETeq /* Zero length so just exit */ 207129250Scognet 208129250Scognet add r1, r1, #0x80 /* Adjust for extra sub */ 209129250Scognet 210129250Scognet /* Do 32 bytes at a time */ 211129250Scognet.Lmemset_loop32: 212129250Scognet subs r1, r1, #0x20 213172614Scognet#ifdef _ARM_ARCH_5E 214129250Scognet strged r2, [ip], #0x08 215129250Scognet strged r2, [ip], #0x08 216129250Scognet strged r2, [ip], #0x08 217129250Scognet strged r2, [ip], #0x08 218129250Scognet#else 219129250Scognet stmgeia ip!, {r2-r3} 220129250Scognet stmgeia ip!, {r2-r3} 221129250Scognet stmgeia ip!, {r2-r3} 222129250Scognet stmgeia ip!, {r2-r3} 223129250Scognet#endif 224129250Scognet bgt .Lmemset_loop32 225137463Scognet RETeq /* Zero length so just exit */ 226129250Scognet 227129250Scognet adds r1, r1, #0x10 /* Partially adjust for extra sub */ 228129250Scognet 229129250Scognet /* Deal with 16 bytes or more */ 230172614Scognet#ifdef _ARM_ARCH_5E 231129250Scognet strged r2, [ip], #0x08 232129250Scognet strged r2, [ip], #0x08 233129250Scognet#else 234129250Scognet stmgeia ip!, {r2-r3} 235129250Scognet stmgeia ip!, {r2-r3} 236129250Scognet#endif 237137463Scognet RETeq /* Zero length so just exit */ 238129250Scognet 239129250Scognet addlt r1, r1, #0x10 /* Possibly adjust for extra sub */ 240129250Scognet 241129250Scognet /* We have at least 4 bytes so copy as words */ 242129250Scognet.Lmemset_loop4: 243129250Scognet subs r1, r1, #0x04 244129250Scognet strge r3, [ip], #0x04 245129250Scognet bgt .Lmemset_loop4 246137463Scognet RETeq /* Zero length so just exit */ 247129250Scognet 248172614Scognet#ifdef _ARM_ARCH_5E 249129250Scognet /* Compensate for 64-bit alignment check */ 250129250Scognet adds r1, r1, #0x04 251137463Scognet RETeq 252129250Scognet cmp r1, #2 253129250Scognet#else 254129250Scognet cmp r1, #-2 255129250Scognet#endif 256129250Scognet 257129250Scognet strb r3, [ip], #0x01 /* Set 1 byte */ 258129250Scognet strgeb r3, [ip], #0x01 /* Set another byte */ 259129250Scognet strgtb r3, [ip] /* and a third */ 260137463Scognet RET /* Exit */ 261129250Scognet 262129250Scognet.Lmemset_wordunaligned: 263129250Scognet rsb r2, r2, #0x004 264129250Scognet strb r3, [ip], #0x01 /* Set 1 byte */ 265129250Scognet cmp r2, #0x02 266129250Scognet strgeb r3, [ip], #0x01 /* Set another byte */ 267129250Scognet sub r1, r1, r2 268129250Scognet strgtb r3, [ip], #0x01 /* and a third */ 269129250Scognet cmp r1, #0x04 /* More than 4 bytes left? */ 270129250Scognet bge .Lmemset_wordaligned /* Yup */ 271129250Scognet 272129250Scognet.Lmemset_lessthanfour: 273129250Scognet cmp r1, #0x00 274137463Scognet RETeq /* Zero length so exit */ 275129250Scognet strb r3, [ip], #0x01 /* Set 1 byte */ 276129250Scognet cmp r1, #0x02 277129250Scognet strgeb r3, [ip], #0x01 /* Set another byte */ 278129250Scognet strgtb r3, [ip] /* and a third */ 279137463Scognet RET /* Exit */ 280248361SandrewEND(bzero) 281248361SandrewEND(memset) 282129254Scognet 283144967ScognetENTRY(bcmp) 284129254Scognet mov ip, r0 285129254Scognet cmp r2, #0x06 286129254Scognet beq .Lmemcmp_6bytes 287129254Scognet mov r0, #0x00 288129254Scognet 289129254Scognet /* Are both addresses aligned the same way? */ 290129254Scognet cmp r2, #0x00 291129254Scognet eornes r3, ip, r1 292137463Scognet RETeq /* len == 0, or same addresses! */ 293129254Scognet tst r3, #0x03 294129254Scognet subne r2, r2, #0x01 295129254Scognet bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */ 296129254Scognet 297129254Scognet /* Word-align the addresses, if necessary */ 298129254Scognet sub r3, r1, #0x05 299129254Scognet ands r3, r3, #0x03 300129254Scognet add r3, r3, r3, lsl #1 301129254Scognet addne pc, pc, r3, lsl #3 302129254Scognet nop 303129254Scognet 304129254Scognet /* Compare up to 3 bytes */ 305129254Scognet ldrb r0, [ip], #0x01 306129254Scognet ldrb r3, [r1], #0x01 307129254Scognet subs r0, r0, r3 308137463Scognet RETne 309129254Scognet subs r2, r2, #0x01 310137463Scognet RETeq 311129254Scognet 312129254Scognet /* Compare up to 2 bytes */ 313129254Scognet ldrb r0, [ip], #0x01 314129254Scognet ldrb r3, [r1], #0x01 315129254Scognet subs r0, r0, r3 316137463Scognet RETne 317129254Scognet subs r2, r2, #0x01 318137463Scognet RETeq 319129254Scognet 320129254Scognet /* Compare 1 byte */ 321129254Scognet ldrb r0, [ip], #0x01 322129254Scognet ldrb r3, [r1], #0x01 323129254Scognet subs r0, r0, r3 324137463Scognet RETne 325129254Scognet subs r2, r2, #0x01 326137463Scognet RETeq 327129254Scognet 328129254Scognet /* Compare 4 bytes at a time, if possible */ 329129254Scognet subs r2, r2, #0x04 330129254Scognet bcc .Lmemcmp_bytewise 331129254Scognet.Lmemcmp_word_aligned: 332129254Scognet ldr r0, [ip], #0x04 333129254Scognet ldr r3, [r1], #0x04 334129254Scognet subs r2, r2, #0x04 335129254Scognet cmpcs r0, r3 336129254Scognet beq .Lmemcmp_word_aligned 337129254Scognet sub r0, r0, r3 338129254Scognet 339129254Scognet /* Correct for extra subtraction, and check if done */ 340129254Scognet adds r2, r2, #0x04 341129254Scognet cmpeq r0, #0x00 /* If done, did all bytes match? */ 342137463Scognet RETeq /* Yup. Just return */ 343129254Scognet 344129254Scognet /* Re-do the final word byte-wise */ 345129254Scognet sub ip, ip, #0x04 346129254Scognet sub r1, r1, #0x04 347129254Scognet 348129254Scognet.Lmemcmp_bytewise: 349129254Scognet add r2, r2, #0x03 350129254Scognet.Lmemcmp_bytewise2: 351129254Scognet ldrb r0, [ip], #0x01 352129254Scognet ldrb r3, [r1], #0x01 353129254Scognet subs r2, r2, #0x01 354129254Scognet cmpcs r0, r3 355129254Scognet beq .Lmemcmp_bytewise2 356129254Scognet sub r0, r0, r3 357137463Scognet RET 358129254Scognet 359129254Scognet /* 360129254Scognet * 6 byte compares are very common, thanks to the network stack. 361129254Scognet * This code is hand-scheduled to reduce the number of stalls for 362129254Scognet * load results. Everything else being equal, this will be ~32% 363129254Scognet * faster than a byte-wise memcmp. 364129254Scognet */ 365129254Scognet .align 5 366129254Scognet.Lmemcmp_6bytes: 367129254Scognet ldrb r3, [r1, #0x00] /* r3 = b2#0 */ 368129254Scognet ldrb r0, [ip, #0x00] /* r0 = b1#0 */ 369129254Scognet ldrb r2, [r1, #0x01] /* r2 = b2#1 */ 370129254Scognet subs r0, r0, r3 /* r0 = b1#0 - b2#0 */ 371129254Scognet ldreqb r3, [ip, #0x01] /* r3 = b1#1 */ 372137463Scognet RETne /* Return if mismatch on #0 */ 373129254Scognet subs r0, r3, r2 /* r0 = b1#1 - b2#1 */ 374129254Scognet ldreqb r3, [r1, #0x02] /* r3 = b2#2 */ 375129254Scognet ldreqb r0, [ip, #0x02] /* r0 = b1#2 */ 376137463Scognet RETne /* Return if mismatch on #1 */ 377129254Scognet ldrb r2, [r1, #0x03] /* r2 = b2#3 */ 378129254Scognet subs r0, r0, r3 /* r0 = b1#2 - b2#2 */ 379129254Scognet ldreqb r3, [ip, #0x03] /* r3 = b1#3 */ 380137463Scognet RETne /* Return if mismatch on #2 */ 381129254Scognet subs r0, r3, r2 /* r0 = b1#3 - b2#3 */ 382129254Scognet ldreqb r3, [r1, #0x04] /* r3 = b2#4 */ 383129254Scognet ldreqb r0, [ip, #0x04] /* r0 = b1#4 */ 384137463Scognet RETne /* Return if mismatch on #3 */ 385129254Scognet ldrb r2, [r1, #0x05] /* r2 = b2#5 */ 386129254Scognet subs r0, r0, r3 /* r0 = b1#4 - b2#4 */ 387129254Scognet ldreqb r3, [ip, #0x05] /* r3 = b1#5 */ 388137463Scognet RETne /* Return if mismatch on #4 */ 389129254Scognet sub r0, r3, r2 /* r0 = b1#5 - b2#5 */ 390137463Scognet RET 391248361SandrewEND(bcmp) 392129254Scognet 393129254ScognetENTRY(bcopy) 394143175Scognet /* switch the source and destination registers */ 395236991Simp eor r0, r1, r0 396236991Simp eor r1, r0, r1 397236991Simp eor r0, r1, r0 398143175ScognetENTRY(memmove) 399143175Scognet /* Do the buffers overlap? */ 400143175Scognet cmp r0, r1 401143175Scognet RETeq /* Bail now if src/dst are the same */ 402143175Scognet subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */ 403143175Scognet subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */ 404143175Scognet cmp r3, r2 /* if (r3 < len) we have an overlap */ 405143175Scognet bcc PIC_SYM(_C_LABEL(memcpy), PLT) 406143175Scognet 407143175Scognet /* Determine copy direction */ 408143175Scognet cmp r1, r0 409143175Scognet bcc .Lmemmove_backwards 410143175Scognet 411143175Scognet moveq r0, #0 /* Quick abort for len=0 */ 412143175Scognet RETeq 413143175Scognet 414143175Scognet stmdb sp!, {r0, lr} /* memmove() returns dest addr */ 415143175Scognet subs r2, r2, #4 416143175Scognet blt .Lmemmove_fl4 /* less than 4 bytes */ 417143175Scognet ands r12, r0, #3 418143175Scognet bne .Lmemmove_fdestul /* oh unaligned destination addr */ 419143175Scognet ands r12, r1, #3 420143175Scognet bne .Lmemmove_fsrcul /* oh unaligned source addr */ 421143175Scognet 422143175Scognet.Lmemmove_ft8: 423143175Scognet /* We have aligned source and destination */ 424143175Scognet subs r2, r2, #8 425143175Scognet blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */ 426236991Simp subs r2, r2, #0x14 427143175Scognet blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */ 428143175Scognet stmdb sp!, {r4} /* borrow r4 */ 429143175Scognet 430143175Scognet /* blat 32 bytes at a time */ 431143175Scognet /* XXX for really big copies perhaps we should use more registers */ 432143175Scognet.Lmemmove_floop32: 433143175Scognet ldmia r1!, {r3, r4, r12, lr} 434143175Scognet stmia r0!, {r3, r4, r12, lr} 435143175Scognet ldmia r1!, {r3, r4, r12, lr} 436143175Scognet stmia r0!, {r3, r4, r12, lr} 437236991Simp subs r2, r2, #0x20 438143175Scognet bge .Lmemmove_floop32 439143175Scognet 440143175Scognet cmn r2, #0x10 441143175Scognet ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 442143175Scognet stmgeia r0!, {r3, r4, r12, lr} 443236991Simp subge r2, r2, #0x10 444143175Scognet ldmia sp!, {r4} /* return r4 */ 445143175Scognet 446143175Scognet.Lmemmove_fl32: 447236991Simp adds r2, r2, #0x14 448143175Scognet 449143175Scognet /* blat 12 bytes at a time */ 450143175Scognet.Lmemmove_floop12: 451143175Scognet ldmgeia r1!, {r3, r12, lr} 452143175Scognet stmgeia r0!, {r3, r12, lr} 453236991Simp subges r2, r2, #0x0c 454143175Scognet bge .Lmemmove_floop12 455143175Scognet 456143175Scognet.Lmemmove_fl12: 457143175Scognet adds r2, r2, #8 458143175Scognet blt .Lmemmove_fl4 459143175Scognet 460143175Scognet subs r2, r2, #4 461143175Scognet ldrlt r3, [r1], #4 462143175Scognet strlt r3, [r0], #4 463143175Scognet ldmgeia r1!, {r3, r12} 464143175Scognet stmgeia r0!, {r3, r12} 465143175Scognet subge r2, r2, #4 466143175Scognet 467143175Scognet.Lmemmove_fl4: 468143175Scognet /* less than 4 bytes to go */ 469143175Scognet adds r2, r2, #4 470143175Scognet ldmeqia sp!, {r0, pc} /* done */ 471143175Scognet 472143175Scognet /* copy the crud byte at a time */ 473143175Scognet cmp r2, #2 474143175Scognet ldrb r3, [r1], #1 475143175Scognet strb r3, [r0], #1 476143175Scognet ldrgeb r3, [r1], #1 477143175Scognet strgeb r3, [r0], #1 478143175Scognet ldrgtb r3, [r1], #1 479143175Scognet strgtb r3, [r0], #1 480143175Scognet ldmia sp!, {r0, pc} 481143175Scognet 482143175Scognet /* erg - unaligned destination */ 483143175Scognet.Lmemmove_fdestul: 484143175Scognet rsb r12, r12, #4 485143175Scognet cmp r12, #2 486143175Scognet 487143175Scognet /* align destination with byte copies */ 488143175Scognet ldrb r3, [r1], #1 489143175Scognet strb r3, [r0], #1 490143175Scognet ldrgeb r3, [r1], #1 491143175Scognet strgeb r3, [r0], #1 492143175Scognet ldrgtb r3, [r1], #1 493143175Scognet strgtb r3, [r0], #1 494143175Scognet subs r2, r2, r12 495143175Scognet blt .Lmemmove_fl4 /* less the 4 bytes */ 496143175Scognet 497143175Scognet ands r12, r1, #3 498143175Scognet beq .Lmemmove_ft8 /* we have an aligned source */ 499143175Scognet 500143175Scognet /* erg - unaligned source */ 501143175Scognet /* This is where it gets nasty ... */ 502143175Scognet.Lmemmove_fsrcul: 503143175Scognet bic r1, r1, #3 504143175Scognet ldr lr, [r1], #4 505143175Scognet cmp r12, #2 506143175Scognet bgt .Lmemmove_fsrcul3 507143175Scognet beq .Lmemmove_fsrcul2 508236991Simp cmp r2, #0x0c 509143175Scognet blt .Lmemmove_fsrcul1loop4 510236991Simp sub r2, r2, #0x0c 511143175Scognet stmdb sp!, {r4, r5} 512143175Scognet 513143175Scognet.Lmemmove_fsrcul1loop16: 514143175Scognet#ifdef __ARMEB__ 515143175Scognet mov r3, lr, lsl #8 516143175Scognet#else 517143175Scognet mov r3, lr, lsr #8 518143175Scognet#endif 519143175Scognet ldmia r1!, {r4, r5, r12, lr} 520143175Scognet#ifdef __ARMEB__ 521143175Scognet orr r3, r3, r4, lsr #24 522143175Scognet mov r4, r4, lsl #8 523143175Scognet orr r4, r4, r5, lsr #24 524143175Scognet mov r5, r5, lsl #8 525143175Scognet orr r5, r5, r12, lsr #24 526143175Scognet mov r12, r12, lsl #8 527143175Scognet orr r12, r12, lr, lsr #24 528143175Scognet#else 529143175Scognet orr r3, r3, r4, lsl #24 530143175Scognet mov r4, r4, lsr #8 531143175Scognet orr r4, r4, r5, lsl #24 532143175Scognet mov r5, r5, lsr #8 533143175Scognet orr r5, r5, r12, lsl #24 534143175Scognet mov r12, r12, lsr #8 535143175Scognet orr r12, r12, lr, lsl #24 536143175Scognet#endif 537143175Scognet stmia r0!, {r3-r5, r12} 538236991Simp subs r2, r2, #0x10 539143175Scognet bge .Lmemmove_fsrcul1loop16 540143175Scognet ldmia sp!, {r4, r5} 541236991Simp adds r2, r2, #0x0c 542143175Scognet blt .Lmemmove_fsrcul1l4 543143175Scognet 544143175Scognet.Lmemmove_fsrcul1loop4: 545143175Scognet#ifdef __ARMEB__ 546143175Scognet mov r12, lr, lsl #8 547143175Scognet#else 548143175Scognet mov r12, lr, lsr #8 549143175Scognet#endif 550143175Scognet ldr lr, [r1], #4 551143175Scognet#ifdef __ARMEB__ 552143175Scognet orr r12, r12, lr, lsr #24 553143175Scognet#else 554143175Scognet orr r12, r12, lr, lsl #24 555143175Scognet#endif 556143175Scognet str r12, [r0], #4 557143175Scognet subs r2, r2, #4 558143175Scognet bge .Lmemmove_fsrcul1loop4 559143175Scognet 560143175Scognet.Lmemmove_fsrcul1l4: 561143175Scognet sub r1, r1, #3 562143175Scognet b .Lmemmove_fl4 563143175Scognet 564143175Scognet.Lmemmove_fsrcul2: 565236991Simp cmp r2, #0x0c 566143175Scognet blt .Lmemmove_fsrcul2loop4 567236991Simp sub r2, r2, #0x0c 568143175Scognet stmdb sp!, {r4, r5} 569143175Scognet 570143175Scognet.Lmemmove_fsrcul2loop16: 571143175Scognet#ifdef __ARMEB__ 572143175Scognet mov r3, lr, lsl #16 573143175Scognet#else 574143175Scognet mov r3, lr, lsr #16 575143175Scognet#endif 576143175Scognet ldmia r1!, {r4, r5, r12, lr} 577143175Scognet#ifdef __ARMEB__ 578143175Scognet orr r3, r3, r4, lsr #16 579143175Scognet mov r4, r4, lsl #16 580143175Scognet orr r4, r4, r5, lsr #16 581143175Scognet mov r5, r5, lsl #16 582143175Scognet orr r5, r5, r12, lsr #16 583143175Scognet mov r12, r12, lsl #16 584143175Scognet orr r12, r12, lr, lsr #16 585143175Scognet#else 586143175Scognet orr r3, r3, r4, lsl #16 587143175Scognet mov r4, r4, lsr #16 588143175Scognet orr r4, r4, r5, lsl #16 589143175Scognet mov r5, r5, lsr #16 590143175Scognet orr r5, r5, r12, lsl #16 591143175Scognet mov r12, r12, lsr #16 592143175Scognet orr r12, r12, lr, lsl #16 593143175Scognet#endif 594143175Scognet stmia r0!, {r3-r5, r12} 595236991Simp subs r2, r2, #0x10 596143175Scognet bge .Lmemmove_fsrcul2loop16 597143175Scognet ldmia sp!, {r4, r5} 598236991Simp adds r2, r2, #0x0c 599143175Scognet blt .Lmemmove_fsrcul2l4 600143175Scognet 601143175Scognet.Lmemmove_fsrcul2loop4: 602143175Scognet#ifdef __ARMEB__ 603143175Scognet mov r12, lr, lsl #16 604143175Scognet#else 605143175Scognet mov r12, lr, lsr #16 606143175Scognet#endif 607143175Scognet ldr lr, [r1], #4 608143175Scognet#ifdef __ARMEB__ 609143175Scognet orr r12, r12, lr, lsr #16 610143175Scognet#else 611143175Scognet orr r12, r12, lr, lsl #16 612143175Scognet#endif 613143175Scognet str r12, [r0], #4 614143175Scognet subs r2, r2, #4 615143175Scognet bge .Lmemmove_fsrcul2loop4 616143175Scognet 617143175Scognet.Lmemmove_fsrcul2l4: 618143175Scognet sub r1, r1, #2 619143175Scognet b .Lmemmove_fl4 620143175Scognet 621143175Scognet.Lmemmove_fsrcul3: 622236991Simp cmp r2, #0x0c 623143175Scognet blt .Lmemmove_fsrcul3loop4 624236991Simp sub r2, r2, #0x0c 625143175Scognet stmdb sp!, {r4, r5} 626143175Scognet 627143175Scognet.Lmemmove_fsrcul3loop16: 628143175Scognet#ifdef __ARMEB__ 629143175Scognet mov r3, lr, lsl #24 630143175Scognet#else 631143175Scognet mov r3, lr, lsr #24 632143175Scognet#endif 633143175Scognet ldmia r1!, {r4, r5, r12, lr} 634143175Scognet#ifdef __ARMEB__ 635143175Scognet orr r3, r3, r4, lsr #8 636143175Scognet mov r4, r4, lsl #24 637143175Scognet orr r4, r4, r5, lsr #8 638143175Scognet mov r5, r5, lsl #24 639143175Scognet orr r5, r5, r12, lsr #8 640143175Scognet mov r12, r12, lsl #24 641143175Scognet orr r12, r12, lr, lsr #8 642143175Scognet#else 643143175Scognet orr r3, r3, r4, lsl #8 644143175Scognet mov r4, r4, lsr #24 645143175Scognet orr r4, r4, r5, lsl #8 646143175Scognet mov r5, r5, lsr #24 647143175Scognet orr r5, r5, r12, lsl #8 648143175Scognet mov r12, r12, lsr #24 649143175Scognet orr r12, r12, lr, lsl #8 650143175Scognet#endif 651143175Scognet stmia r0!, {r3-r5, r12} 652236991Simp subs r2, r2, #0x10 653143175Scognet bge .Lmemmove_fsrcul3loop16 654143175Scognet ldmia sp!, {r4, r5} 655236991Simp adds r2, r2, #0x0c 656143175Scognet blt .Lmemmove_fsrcul3l4 657143175Scognet 658143175Scognet.Lmemmove_fsrcul3loop4: 659143175Scognet#ifdef __ARMEB__ 660143175Scognet mov r12, lr, lsl #24 661143175Scognet#else 662143175Scognet mov r12, lr, lsr #24 663143175Scognet#endif 664143175Scognet ldr lr, [r1], #4 665143175Scognet#ifdef __ARMEB__ 666143175Scognet orr r12, r12, lr, lsr #8 667143175Scognet#else 668143175Scognet orr r12, r12, lr, lsl #8 669143175Scognet#endif 670143175Scognet str r12, [r0], #4 671143175Scognet subs r2, r2, #4 672143175Scognet bge .Lmemmove_fsrcul3loop4 673143175Scognet 674143175Scognet.Lmemmove_fsrcul3l4: 675143175Scognet sub r1, r1, #1 676143175Scognet b .Lmemmove_fl4 677143175Scognet 678143175Scognet.Lmemmove_backwards: 679143175Scognet add r1, r1, r2 680143175Scognet add r0, r0, r2 681143175Scognet subs r2, r2, #4 682143175Scognet blt .Lmemmove_bl4 /* less than 4 bytes */ 683143175Scognet ands r12, r0, #3 684143175Scognet bne .Lmemmove_bdestul /* oh unaligned destination addr */ 685143175Scognet ands r12, r1, #3 686143175Scognet bne .Lmemmove_bsrcul /* oh unaligned source addr */ 687143175Scognet 688143175Scognet.Lmemmove_bt8: 689143175Scognet /* We have aligned source and destination */ 690143175Scognet subs r2, r2, #8 691143175Scognet blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */ 692143175Scognet stmdb sp!, {r4, lr} 693143175Scognet subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ 694143175Scognet blt .Lmemmove_bl32 695143175Scognet 696143175Scognet /* blat 32 bytes at a time */ 697143175Scognet /* XXX for really big copies perhaps we should use more registers */ 698143175Scognet.Lmemmove_bloop32: 699143175Scognet ldmdb r1!, {r3, r4, r12, lr} 700143175Scognet stmdb r0!, {r3, r4, r12, lr} 701143175Scognet ldmdb r1!, {r3, r4, r12, lr} 702143175Scognet stmdb r0!, {r3, r4, r12, lr} 703236991Simp subs r2, r2, #0x20 704143175Scognet bge .Lmemmove_bloop32 705143175Scognet 706143175Scognet.Lmemmove_bl32: 707236991Simp cmn r2, #0x10 708143175Scognet ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 709143175Scognet stmgedb r0!, {r3, r4, r12, lr} 710236991Simp subge r2, r2, #0x10 711236991Simp adds r2, r2, #0x14 712143175Scognet ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ 713143175Scognet stmgedb r0!, {r3, r12, lr} 714236991Simp subge r2, r2, #0x0c 715143175Scognet ldmia sp!, {r4, lr} 716143175Scognet 717143175Scognet.Lmemmove_bl12: 718143175Scognet adds r2, r2, #8 719143175Scognet blt .Lmemmove_bl4 720143175Scognet subs r2, r2, #4 721143175Scognet ldrlt r3, [r1, #-4]! 722143175Scognet strlt r3, [r0, #-4]! 723143175Scognet ldmgedb r1!, {r3, r12} 724143175Scognet stmgedb r0!, {r3, r12} 725143175Scognet subge r2, r2, #4 726143175Scognet 727143175Scognet.Lmemmove_bl4: 728143175Scognet /* less than 4 bytes to go */ 729143175Scognet adds r2, r2, #4 730143175Scognet RETeq /* done */ 731143175Scognet 732143175Scognet /* copy the crud byte at a time */ 733143175Scognet cmp r2, #2 734143175Scognet ldrb r3, [r1, #-1]! 735143175Scognet strb r3, [r0, #-1]! 736143175Scognet ldrgeb r3, [r1, #-1]! 737143175Scognet strgeb r3, [r0, #-1]! 738143175Scognet ldrgtb r3, [r1, #-1]! 739143175Scognet strgtb r3, [r0, #-1]! 740143175Scognet RET 741143175Scognet 742143175Scognet /* erg - unaligned destination */ 743143175Scognet.Lmemmove_bdestul: 744143175Scognet cmp r12, #2 745143175Scognet 746143175Scognet /* align destination with byte copies */ 747143175Scognet ldrb r3, [r1, #-1]! 748143175Scognet strb r3, [r0, #-1]! 749143175Scognet ldrgeb r3, [r1, #-1]! 750143175Scognet strgeb r3, [r0, #-1]! 751143175Scognet ldrgtb r3, [r1, #-1]! 752143175Scognet strgtb r3, [r0, #-1]! 753143175Scognet subs r2, r2, r12 754143175Scognet blt .Lmemmove_bl4 /* less than 4 bytes to go */ 755143175Scognet ands r12, r1, #3 756143175Scognet beq .Lmemmove_bt8 /* we have an aligned source */ 757143175Scognet 758143175Scognet /* erg - unaligned source */ 759143175Scognet /* This is where it gets nasty ... */ 760143175Scognet.Lmemmove_bsrcul: 761143175Scognet bic r1, r1, #3 762143175Scognet ldr r3, [r1, #0] 763143175Scognet cmp r12, #2 764143175Scognet blt .Lmemmove_bsrcul1 765143175Scognet beq .Lmemmove_bsrcul2 766236991Simp cmp r2, #0x0c 767143175Scognet blt .Lmemmove_bsrcul3loop4 768236991Simp sub r2, r2, #0x0c 769143175Scognet stmdb sp!, {r4, r5, lr} 770143175Scognet 771143175Scognet.Lmemmove_bsrcul3loop16: 772143175Scognet#ifdef __ARMEB__ 773143175Scognet mov lr, r3, lsr #8 774143175Scognet#else 775143175Scognet mov lr, r3, lsl #8 776143175Scognet#endif 777143175Scognet ldmdb r1!, {r3-r5, r12} 778143175Scognet#ifdef __ARMEB__ 779143175Scognet orr lr, lr, r12, lsl #24 780143175Scognet mov r12, r12, lsr #8 781143175Scognet orr r12, r12, r5, lsl #24 782143175Scognet mov r5, r5, lsr #8 783143175Scognet orr r5, r5, r4, lsl #24 784143175Scognet mov r4, r4, lsr #8 785143175Scognet orr r4, r4, r3, lsl #24 786143175Scognet#else 787143175Scognet orr lr, lr, r12, lsr #24 788143175Scognet mov r12, r12, lsl #8 789143175Scognet orr r12, r12, r5, lsr #24 790143175Scognet mov r5, r5, lsl #8 791143175Scognet orr r5, r5, r4, lsr #24 792143175Scognet mov r4, r4, lsl #8 793143175Scognet orr r4, r4, r3, lsr #24 794143175Scognet#endif 795143175Scognet stmdb r0!, {r4, r5, r12, lr} 796236991Simp subs r2, r2, #0x10 797143175Scognet bge .Lmemmove_bsrcul3loop16 798143175Scognet ldmia sp!, {r4, r5, lr} 799236991Simp adds r2, r2, #0x0c 800143175Scognet blt .Lmemmove_bsrcul3l4 801143175Scognet 802143175Scognet.Lmemmove_bsrcul3loop4: 803143175Scognet#ifdef __ARMEB__ 804143175Scognet mov r12, r3, lsr #8 805143175Scognet#else 806143175Scognet mov r12, r3, lsl #8 807143175Scognet#endif 808143175Scognet ldr r3, [r1, #-4]! 809143175Scognet#ifdef __ARMEB__ 810143175Scognet orr r12, r12, r3, lsl #24 811143175Scognet#else 812143175Scognet orr r12, r12, r3, lsr #24 813143175Scognet#endif 814143175Scognet str r12, [r0, #-4]! 815143175Scognet subs r2, r2, #4 816143175Scognet bge .Lmemmove_bsrcul3loop4 817143175Scognet 818143175Scognet.Lmemmove_bsrcul3l4: 819143175Scognet add r1, r1, #3 820143175Scognet b .Lmemmove_bl4 821143175Scognet 822143175Scognet.Lmemmove_bsrcul2: 823236991Simp cmp r2, #0x0c 824143175Scognet blt .Lmemmove_bsrcul2loop4 825236991Simp sub r2, r2, #0x0c 826143175Scognet stmdb sp!, {r4, r5, lr} 827143175Scognet 828143175Scognet.Lmemmove_bsrcul2loop16: 829143175Scognet#ifdef __ARMEB__ 830143175Scognet mov lr, r3, lsr #16 831143175Scognet#else 832143175Scognet mov lr, r3, lsl #16 833143175Scognet#endif 834143175Scognet ldmdb r1!, {r3-r5, r12} 835143175Scognet#ifdef __ARMEB__ 836143175Scognet orr lr, lr, r12, lsl #16 837143175Scognet mov r12, r12, lsr #16 838143175Scognet orr r12, r12, r5, lsl #16 839143175Scognet mov r5, r5, lsr #16 840143175Scognet orr r5, r5, r4, lsl #16 841143175Scognet mov r4, r4, lsr #16 842143175Scognet orr r4, r4, r3, lsl #16 843143175Scognet#else 844143175Scognet orr lr, lr, r12, lsr #16 845143175Scognet mov r12, r12, lsl #16 846143175Scognet orr r12, r12, r5, lsr #16 847143175Scognet mov r5, r5, lsl #16 848143175Scognet orr r5, r5, r4, lsr #16 849143175Scognet mov r4, r4, lsl #16 850143175Scognet orr r4, r4, r3, lsr #16 851143175Scognet#endif 852143175Scognet stmdb r0!, {r4, r5, r12, lr} 853236991Simp subs r2, r2, #0x10 854143175Scognet bge .Lmemmove_bsrcul2loop16 855143175Scognet ldmia sp!, {r4, r5, lr} 856236991Simp adds r2, r2, #0x0c 857143175Scognet blt .Lmemmove_bsrcul2l4 858143175Scognet 859143175Scognet.Lmemmove_bsrcul2loop4: 860143175Scognet#ifdef __ARMEB__ 861143175Scognet mov r12, r3, lsr #16 862143175Scognet#else 863143175Scognet mov r12, r3, lsl #16 864143175Scognet#endif 865143175Scognet ldr r3, [r1, #-4]! 866143175Scognet#ifdef __ARMEB__ 867143175Scognet orr r12, r12, r3, lsl #16 868143175Scognet#else 869143175Scognet orr r12, r12, r3, lsr #16 870143175Scognet#endif 871143175Scognet str r12, [r0, #-4]! 872143175Scognet subs r2, r2, #4 873143175Scognet bge .Lmemmove_bsrcul2loop4 874143175Scognet 875143175Scognet.Lmemmove_bsrcul2l4: 876143175Scognet add r1, r1, #2 877143175Scognet b .Lmemmove_bl4 878143175Scognet 879143175Scognet.Lmemmove_bsrcul1: 880236991Simp cmp r2, #0x0c 881143175Scognet blt .Lmemmove_bsrcul1loop4 882236991Simp sub r2, r2, #0x0c 883143175Scognet stmdb sp!, {r4, r5, lr} 884143175Scognet 885143175Scognet.Lmemmove_bsrcul1loop32: 886143175Scognet#ifdef __ARMEB__ 887143175Scognet mov lr, r3, lsr #24 888143175Scognet#else 889143175Scognet mov lr, r3, lsl #24 890143175Scognet#endif 891143175Scognet ldmdb r1!, {r3-r5, r12} 892143175Scognet#ifdef __ARMEB__ 893143175Scognet orr lr, lr, r12, lsl #8 894143175Scognet mov r12, r12, lsr #24 895143175Scognet orr r12, r12, r5, lsl #8 896143175Scognet mov r5, r5, lsr #24 897143175Scognet orr r5, r5, r4, lsl #8 898143175Scognet mov r4, r4, lsr #24 899143175Scognet orr r4, r4, r3, lsl #8 900143175Scognet#else 901143175Scognet orr lr, lr, r12, lsr #8 902143175Scognet mov r12, r12, lsl #24 903143175Scognet orr r12, r12, r5, lsr #8 904143175Scognet mov r5, r5, lsl #24 905143175Scognet orr r5, r5, r4, lsr #8 906143175Scognet mov r4, r4, lsl #24 907143175Scognet orr r4, r4, r3, lsr #8 908143175Scognet#endif 909143175Scognet stmdb r0!, {r4, r5, r12, lr} 910236991Simp subs r2, r2, #0x10 911143175Scognet bge .Lmemmove_bsrcul1loop32 912143175Scognet ldmia sp!, {r4, r5, lr} 913236991Simp adds r2, r2, #0x0c 914143175Scognet blt .Lmemmove_bsrcul1l4 915143175Scognet 916143175Scognet.Lmemmove_bsrcul1loop4: 917143175Scognet#ifdef __ARMEB__ 918143175Scognet mov r12, r3, lsr #24 919143175Scognet#else 920143175Scognet mov r12, r3, lsl #24 921143175Scognet#endif 922143175Scognet ldr r3, [r1, #-4]! 923143175Scognet#ifdef __ARMEB__ 924143175Scognet orr r12, r12, r3, lsl #8 925143175Scognet#else 926143175Scognet orr r12, r12, r3, lsr #8 927143175Scognet#endif 928143175Scognet str r12, [r0, #-4]! 929143175Scognet subs r2, r2, #4 930143175Scognet bge .Lmemmove_bsrcul1loop4 931143175Scognet 932143175Scognet.Lmemmove_bsrcul1l4: 933143175Scognet add r1, r1, #1 934143175Scognet b .Lmemmove_bl4 935248361SandrewEND(bcopy) 936248361SandrewEND(memmove) 937143175Scognet 938172614Scognet#if !defined(_ARM_ARCH_5E) 939129254ScognetENTRY(memcpy) 940129254Scognet /* save leaf functions having to store this away */ 941167003Scognet /* Do not check arm_memcpy if we're running from flash */ 942167003Scognet#ifdef FLASHADDR 943167003Scognet#if FLASHADDR > PHYSADDR 944167003Scognet ldr r3, =FLASHADDR 945167003Scognet cmp r3, pc 946167003Scognet bls .Lnormal 947167003Scognet#else 948167003Scognet ldr r3, =FLASHADDR 949167003Scognet cmp r3, pc 950167003Scognet bhi .Lnormal 951167003Scognet#endif 952167003Scognet#endif 953150864Scognet ldr r3, .L_arm_memcpy 954150864Scognet ldr r3, [r3] 955150864Scognet cmp r3, #0 956150864Scognet beq .Lnormal 957150864Scognet ldr r3, .L_min_memcpy_size 958150864Scognet ldr r3, [r3] 959150864Scognet cmp r2, r3 960150864Scognet blt .Lnormal 961150864Scognet stmfd sp!, {r0-r2, r4, lr} 962150864Scognet mov r3, #0 963150864Scognet ldr r4, .L_arm_memcpy 964150864Scognet mov lr, pc 965150864Scognet ldr pc, [r4] 966150864Scognet cmp r0, #0 967150864Scognet ldmfd sp!, {r0-r2, r4, lr} 968150864Scognet RETeq 969150864Scognet 970151596Scognet.Lnormal: 971129254Scognet stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ 972129254Scognet 973129254Scognet subs r2, r2, #4 974129254Scognet blt .Lmemcpy_l4 /* less than 4 bytes */ 975129254Scognet ands r12, r0, #3 976129254Scognet bne .Lmemcpy_destul /* oh unaligned destination addr */ 977129254Scognet ands r12, r1, #3 978129254Scognet bne .Lmemcpy_srcul /* oh unaligned source addr */ 979129254Scognet 980129254Scognet.Lmemcpy_t8: 981129254Scognet /* We have aligned source and destination */ 982129254Scognet subs r2, r2, #8 983129254Scognet blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */ 984236991Simp subs r2, r2, #0x14 985129254Scognet blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */ 986129254Scognet stmdb sp!, {r4} /* borrow r4 */ 987129254Scognet 988129254Scognet /* blat 32 bytes at a time */ 989129254Scognet /* XXX for really big copies perhaps we should use more registers */ 990129254Scognet.Lmemcpy_loop32: 991129254Scognet ldmia r1!, {r3, r4, r12, lr} 992129254Scognet stmia r0!, {r3, r4, r12, lr} 993129254Scognet ldmia r1!, {r3, r4, r12, lr} 994129254Scognet stmia r0!, {r3, r4, r12, lr} 995236991Simp subs r2, r2, #0x20 996129254Scognet bge .Lmemcpy_loop32 997129254Scognet 998129254Scognet cmn r2, #0x10 999129254Scognet ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ 1000129254Scognet stmgeia r0!, {r3, r4, r12, lr} 1001236991Simp subge r2, r2, #0x10 1002129254Scognet ldmia sp!, {r4} /* return r4 */ 1003129254Scognet 1004129254Scognet.Lmemcpy_l32: 1005236991Simp adds r2, r2, #0x14 1006129254Scognet 1007129254Scognet /* blat 12 bytes at a time */ 1008129254Scognet.Lmemcpy_loop12: 1009129254Scognet ldmgeia r1!, {r3, r12, lr} 1010129254Scognet stmgeia r0!, {r3, r12, lr} 1011236991Simp subges r2, r2, #0x0c 1012129254Scognet bge .Lmemcpy_loop12 1013129254Scognet 1014129254Scognet.Lmemcpy_l12: 1015129254Scognet adds r2, r2, #8 1016129254Scognet blt .Lmemcpy_l4 1017129254Scognet 1018129254Scognet subs r2, r2, #4 1019129254Scognet ldrlt r3, [r1], #4 1020129254Scognet strlt r3, [r0], #4 1021129254Scognet ldmgeia r1!, {r3, r12} 1022129254Scognet stmgeia r0!, {r3, r12} 1023129254Scognet subge r2, r2, #4 1024129254Scognet 1025129254Scognet.Lmemcpy_l4: 1026129254Scognet /* less than 4 bytes to go */ 1027129254Scognet adds r2, r2, #4 1028129254Scognet#ifdef __APCS_26_ 1029129254Scognet ldmeqia sp!, {r0, pc}^ /* done */ 1030129254Scognet#else 1031129254Scognet ldmeqia sp!, {r0, pc} /* done */ 1032129254Scognet#endif 1033129254Scognet /* copy the crud byte at a time */ 1034129254Scognet cmp r2, #2 1035129254Scognet ldrb r3, [r1], #1 1036129254Scognet strb r3, [r0], #1 1037129254Scognet ldrgeb r3, [r1], #1 1038129254Scognet strgeb r3, [r0], #1 1039129254Scognet ldrgtb r3, [r1], #1 1040129254Scognet strgtb r3, [r0], #1 1041129254Scognet ldmia sp!, {r0, pc} 1042129254Scognet 1043129254Scognet /* erg - unaligned destination */ 1044129254Scognet.Lmemcpy_destul: 1045129254Scognet rsb r12, r12, #4 1046129254Scognet cmp r12, #2 1047129254Scognet 1048129254Scognet /* align destination with byte copies */ 1049129254Scognet ldrb r3, [r1], #1 1050129254Scognet strb r3, [r0], #1 1051129254Scognet ldrgeb r3, [r1], #1 1052129254Scognet strgeb r3, [r0], #1 1053129254Scognet ldrgtb r3, [r1], #1 1054129254Scognet strgtb r3, [r0], #1 1055129254Scognet subs r2, r2, r12 1056129254Scognet blt .Lmemcpy_l4 /* less the 4 bytes */ 1057129254Scognet 1058129254Scognet ands r12, r1, #3 1059129254Scognet beq .Lmemcpy_t8 /* we have an aligned source */ 1060129254Scognet 1061129254Scognet /* erg - unaligned source */ 1062129254Scognet /* This is where it gets nasty ... */ 1063129254Scognet.Lmemcpy_srcul: 1064129254Scognet bic r1, r1, #3 1065129254Scognet ldr lr, [r1], #4 1066129254Scognet cmp r12, #2 1067129254Scognet bgt .Lmemcpy_srcul3 1068129254Scognet beq .Lmemcpy_srcul2 1069236991Simp cmp r2, #0x0c 1070129254Scognet blt .Lmemcpy_srcul1loop4 1071236991Simp sub r2, r2, #0x0c 1072129254Scognet stmdb sp!, {r4, r5} 1073129254Scognet 1074129254Scognet.Lmemcpy_srcul1loop16: 1075129254Scognet mov r3, lr, lsr #8 1076129254Scognet ldmia r1!, {r4, r5, r12, lr} 1077129254Scognet orr r3, r3, r4, lsl #24 1078129254Scognet mov r4, r4, lsr #8 1079129254Scognet orr r4, r4, r5, lsl #24 1080129254Scognet mov r5, r5, lsr #8 1081129254Scognet orr r5, r5, r12, lsl #24 1082129254Scognet mov r12, r12, lsr #8 1083129254Scognet orr r12, r12, lr, lsl #24 1084129254Scognet stmia r0!, {r3-r5, r12} 1085236991Simp subs r2, r2, #0x10 1086129254Scognet bge .Lmemcpy_srcul1loop16 1087129254Scognet ldmia sp!, {r4, r5} 1088236991Simp adds r2, r2, #0x0c 1089129254Scognet blt .Lmemcpy_srcul1l4 1090129254Scognet 1091129254Scognet.Lmemcpy_srcul1loop4: 1092129254Scognet mov r12, lr, lsr #8 1093129254Scognet ldr lr, [r1], #4 1094129254Scognet orr r12, r12, lr, lsl #24 1095129254Scognet str r12, [r0], #4 1096129254Scognet subs r2, r2, #4 1097129254Scognet bge .Lmemcpy_srcul1loop4 1098129254Scognet 1099129254Scognet.Lmemcpy_srcul1l4: 1100129254Scognet sub r1, r1, #3 1101129254Scognet b .Lmemcpy_l4 1102129254Scognet 1103129254Scognet.Lmemcpy_srcul2: 1104236991Simp cmp r2, #0x0c 1105129254Scognet blt .Lmemcpy_srcul2loop4 1106236991Simp sub r2, r2, #0x0c 1107129254Scognet stmdb sp!, {r4, r5} 1108129254Scognet 1109129254Scognet.Lmemcpy_srcul2loop16: 1110129254Scognet mov r3, lr, lsr #16 1111129254Scognet ldmia r1!, {r4, r5, r12, lr} 1112129254Scognet orr r3, r3, r4, lsl #16 1113129254Scognet mov r4, r4, lsr #16 1114129254Scognet orr r4, r4, r5, lsl #16 1115129254Scognet mov r5, r5, lsr #16 1116129254Scognet orr r5, r5, r12, lsl #16 1117129254Scognet mov r12, r12, lsr #16 1118129254Scognet orr r12, r12, lr, lsl #16 1119129254Scognet stmia r0!, {r3-r5, r12} 1120236991Simp subs r2, r2, #0x10 1121129254Scognet bge .Lmemcpy_srcul2loop16 1122129254Scognet ldmia sp!, {r4, r5} 1123236991Simp adds r2, r2, #0x0c 1124129254Scognet blt .Lmemcpy_srcul2l4 1125129254Scognet 1126129254Scognet.Lmemcpy_srcul2loop4: 1127129254Scognet mov r12, lr, lsr #16 1128129254Scognet ldr lr, [r1], #4 1129129254Scognet orr r12, r12, lr, lsl #16 1130129254Scognet str r12, [r0], #4 1131129254Scognet subs r2, r2, #4 1132129254Scognet bge .Lmemcpy_srcul2loop4 1133129254Scognet 1134129254Scognet.Lmemcpy_srcul2l4: 1135129254Scognet sub r1, r1, #2 1136129254Scognet b .Lmemcpy_l4 1137129254Scognet 1138129254Scognet.Lmemcpy_srcul3: 1139236991Simp cmp r2, #0x0c 1140129254Scognet blt .Lmemcpy_srcul3loop4 1141236991Simp sub r2, r2, #0x0c 1142129254Scognet stmdb sp!, {r4, r5} 1143129254Scognet 1144129254Scognet.Lmemcpy_srcul3loop16: 1145129254Scognet mov r3, lr, lsr #24 1146129254Scognet ldmia r1!, {r4, r5, r12, lr} 1147129254Scognet orr r3, r3, r4, lsl #8 1148129254Scognet mov r4, r4, lsr #24 1149129254Scognet orr r4, r4, r5, lsl #8 1150129254Scognet mov r5, r5, lsr #24 1151129254Scognet orr r5, r5, r12, lsl #8 1152129254Scognet mov r12, r12, lsr #24 1153129254Scognet orr r12, r12, lr, lsl #8 1154129254Scognet stmia r0!, {r3-r5, r12} 1155236991Simp subs r2, r2, #0x10 1156129254Scognet bge .Lmemcpy_srcul3loop16 1157129254Scognet ldmia sp!, {r4, r5} 1158236991Simp adds r2, r2, #0x0c 1159129254Scognet blt .Lmemcpy_srcul3l4 1160129254Scognet 1161129254Scognet.Lmemcpy_srcul3loop4: 1162129254Scognet mov r12, lr, lsr #24 1163129254Scognet ldr lr, [r1], #4 1164129254Scognet orr r12, r12, lr, lsl #8 1165129254Scognet str r12, [r0], #4 1166129254Scognet subs r2, r2, #4 1167129254Scognet bge .Lmemcpy_srcul3loop4 1168129254Scognet 1169129254Scognet.Lmemcpy_srcul3l4: 1170129254Scognet sub r1, r1, #1 1171129254Scognet b .Lmemcpy_l4 1172248361SandrewEND(memcpy) 1173248361Sandrew 1174129254Scognet#else 1175129254Scognet/* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */ 1176129254ScognetENTRY(memcpy) 1177129254Scognet pld [r1] 1178129254Scognet cmp r2, #0x0c 1179129254Scognet ble .Lmemcpy_short /* <= 12 bytes */ 1180167003Scognet#ifdef FLASHADDR 1181167003Scognet#if FLASHADDR > PHYSADDR 1182167003Scognet ldr r3, =FLASHADDR 1183167003Scognet cmp r3, pc 1184167003Scognet bls .Lnormal 1185167003Scognet#else 1186167003Scognet ldr r3, =FLASHADDR 1187167003Scognet cmp r3, pc 1188167003Scognet bhi .Lnormal 1189167003Scognet#endif 1190167003Scognet#endif 1191150864Scognet ldr r3, .L_arm_memcpy 1192150864Scognet ldr r3, [r3] 1193150864Scognet cmp r3, #0 1194150864Scognet beq .Lnormal 1195150864Scognet ldr r3, .L_min_memcpy_size 1196150864Scognet ldr r3, [r3] 1197150864Scognet cmp r2, r3 1198150864Scognet blt .Lnormal 1199150864Scognet stmfd sp!, {r0-r2, r4, lr} 1200150864Scognet mov r3, #0 1201150864Scognet ldr r4, .L_arm_memcpy 1202150864Scognet mov lr, pc 1203150864Scognet ldr pc, [r4] 1204150864Scognet cmp r0, #0 1205150864Scognet ldmfd sp!, {r0-r2, r4, lr} 1206150864Scognet RETeq 1207150864Scognet.Lnormal: 1208129254Scognet mov r3, r0 /* We must not clobber r0 */ 1209129254Scognet 1210129254Scognet /* Word-align the destination buffer */ 1211129254Scognet ands ip, r3, #0x03 /* Already word aligned? */ 1212129254Scognet beq .Lmemcpy_wordaligned /* Yup */ 1213129254Scognet cmp ip, #0x02 1214129254Scognet ldrb ip, [r1], #0x01 1215129254Scognet sub r2, r2, #0x01 1216129254Scognet strb ip, [r3], #0x01 1217129254Scognet ldrleb ip, [r1], #0x01 1218129254Scognet suble r2, r2, #0x01 1219129254Scognet strleb ip, [r3], #0x01 1220129254Scognet ldrltb ip, [r1], #0x01 1221129254Scognet sublt r2, r2, #0x01 1222129254Scognet strltb ip, [r3], #0x01 1223129254Scognet 1224129254Scognet /* Destination buffer is now word aligned */ 1225129254Scognet.Lmemcpy_wordaligned: 1226129254Scognet ands ip, r1, #0x03 /* Is src also word-aligned? */ 1227129254Scognet bne .Lmemcpy_bad_align /* Nope. Things just got bad */ 1228129254Scognet 1229129254Scognet /* Quad-align the destination buffer */ 1230129254Scognet tst r3, #0x07 /* Already quad aligned? */ 1231129254Scognet ldrne ip, [r1], #0x04 1232129254Scognet stmfd sp!, {r4-r9} /* Free up some registers */ 1233129254Scognet subne r2, r2, #0x04 1234129254Scognet strne ip, [r3], #0x04 1235129254Scognet 1236129254Scognet /* Destination buffer quad aligned, source is at least word aligned */ 1237129254Scognet subs r2, r2, #0x80 1238129254Scognet blt .Lmemcpy_w_lessthan128 1239129254Scognet 1240129254Scognet /* Copy 128 bytes at a time */ 1241129254Scognet.Lmemcpy_w_loop128: 1242129254Scognet ldr r4, [r1], #0x04 /* LD:00-03 */ 1243129254Scognet ldr r5, [r1], #0x04 /* LD:04-07 */ 1244129254Scognet pld [r1, #0x18] /* Prefetch 0x20 */ 1245129254Scognet ldr r6, [r1], #0x04 /* LD:08-0b */ 1246129254Scognet ldr r7, [r1], #0x04 /* LD:0c-0f */ 1247129254Scognet ldr r8, [r1], #0x04 /* LD:10-13 */ 1248129254Scognet ldr r9, [r1], #0x04 /* LD:14-17 */ 1249129254Scognet strd r4, [r3], #0x08 /* ST:00-07 */ 1250129254Scognet ldr r4, [r1], #0x04 /* LD:18-1b */ 1251129254Scognet ldr r5, [r1], #0x04 /* LD:1c-1f */ 1252129254Scognet strd r6, [r3], #0x08 /* ST:08-0f */ 1253129254Scognet ldr r6, [r1], #0x04 /* LD:20-23 */ 1254129254Scognet ldr r7, [r1], #0x04 /* LD:24-27 */ 1255129254Scognet pld [r1, #0x18] /* Prefetch 0x40 */ 1256129254Scognet strd r8, [r3], #0x08 /* ST:10-17 */ 1257129254Scognet ldr r8, [r1], #0x04 /* LD:28-2b */ 1258129254Scognet ldr r9, [r1], #0x04 /* LD:2c-2f */ 1259129254Scognet strd r4, [r3], #0x08 /* ST:18-1f */ 1260129254Scognet ldr r4, [r1], #0x04 /* LD:30-33 */ 1261129254Scognet ldr r5, [r1], #0x04 /* LD:34-37 */ 1262129254Scognet strd r6, [r3], #0x08 /* ST:20-27 */ 1263129254Scognet ldr r6, [r1], #0x04 /* LD:38-3b */ 1264129254Scognet ldr r7, [r1], #0x04 /* LD:3c-3f */ 1265129254Scognet strd r8, [r3], #0x08 /* ST:28-2f */ 1266129254Scognet ldr r8, [r1], #0x04 /* LD:40-43 */ 1267129254Scognet ldr r9, [r1], #0x04 /* LD:44-47 */ 1268129254Scognet pld [r1, #0x18] /* Prefetch 0x60 */ 1269129254Scognet strd r4, [r3], #0x08 /* ST:30-37 */ 1270129254Scognet ldr r4, [r1], #0x04 /* LD:48-4b */ 1271129254Scognet ldr r5, [r1], #0x04 /* LD:4c-4f */ 1272129254Scognet strd r6, [r3], #0x08 /* ST:38-3f */ 1273129254Scognet ldr r6, [r1], #0x04 /* LD:50-53 */ 1274129254Scognet ldr r7, [r1], #0x04 /* LD:54-57 */ 1275129254Scognet strd r8, [r3], #0x08 /* ST:40-47 */ 1276129254Scognet ldr r8, [r1], #0x04 /* LD:58-5b */ 1277129254Scognet ldr r9, [r1], #0x04 /* LD:5c-5f */ 1278129254Scognet strd r4, [r3], #0x08 /* ST:48-4f */ 1279129254Scognet ldr r4, [r1], #0x04 /* LD:60-63 */ 1280129254Scognet ldr r5, [r1], #0x04 /* LD:64-67 */ 1281129254Scognet pld [r1, #0x18] /* Prefetch 0x80 */ 1282129254Scognet strd r6, [r3], #0x08 /* ST:50-57 */ 1283129254Scognet ldr r6, [r1], #0x04 /* LD:68-6b */ 1284129254Scognet ldr r7, [r1], #0x04 /* LD:6c-6f */ 1285129254Scognet strd r8, [r3], #0x08 /* ST:58-5f */ 1286129254Scognet ldr r8, [r1], #0x04 /* LD:70-73 */ 1287129254Scognet ldr r9, [r1], #0x04 /* LD:74-77 */ 1288129254Scognet strd r4, [r3], #0x08 /* ST:60-67 */ 1289129254Scognet ldr r4, [r1], #0x04 /* LD:78-7b */ 1290129254Scognet ldr r5, [r1], #0x04 /* LD:7c-7f */ 1291129254Scognet strd r6, [r3], #0x08 /* ST:68-6f */ 1292129254Scognet strd r8, [r3], #0x08 /* ST:70-77 */ 1293129254Scognet subs r2, r2, #0x80 1294129254Scognet strd r4, [r3], #0x08 /* ST:78-7f */ 1295129254Scognet bge .Lmemcpy_w_loop128 1296129254Scognet 1297129254Scognet.Lmemcpy_w_lessthan128: 1298129254Scognet adds r2, r2, #0x80 /* Adjust for extra sub */ 1299129254Scognet ldmeqfd sp!, {r4-r9} 1300137463Scognet RETeq /* Return now if done */ 1301129254Scognet subs r2, r2, #0x20 1302129254Scognet blt .Lmemcpy_w_lessthan32 1303129254Scognet 1304129254Scognet /* Copy 32 bytes at a time */ 1305129254Scognet.Lmemcpy_w_loop32: 1306129254Scognet ldr r4, [r1], #0x04 1307129254Scognet ldr r5, [r1], #0x04 1308129254Scognet pld [r1, #0x18] 1309129254Scognet ldr r6, [r1], #0x04 1310129254Scognet ldr r7, [r1], #0x04 1311129254Scognet ldr r8, [r1], #0x04 1312129254Scognet ldr r9, [r1], #0x04 1313129254Scognet strd r4, [r3], #0x08 1314129254Scognet ldr r4, [r1], #0x04 1315129254Scognet ldr r5, [r1], #0x04 1316129254Scognet strd r6, [r3], #0x08 1317129254Scognet strd r8, [r3], #0x08 1318129254Scognet subs r2, r2, #0x20 1319129254Scognet strd r4, [r3], #0x08 1320129254Scognet bge .Lmemcpy_w_loop32 1321129254Scognet 1322129254Scognet.Lmemcpy_w_lessthan32: 1323129254Scognet adds r2, r2, #0x20 /* Adjust for extra sub */ 1324129254Scognet ldmeqfd sp!, {r4-r9} 1325137463Scognet RETeq /* Return now if done */ 1326129254Scognet 1327129254Scognet and r4, r2, #0x18 1328129254Scognet rsbs r4, r4, #0x18 1329129254Scognet addne pc, pc, r4, lsl #1 1330129254Scognet nop 1331129254Scognet 1332129254Scognet /* At least 24 bytes remaining */ 1333129254Scognet ldr r4, [r1], #0x04 1334129254Scognet ldr r5, [r1], #0x04 1335129254Scognet sub r2, r2, #0x08 1336129254Scognet strd r4, [r3], #0x08 1337129254Scognet 1338129254Scognet /* At least 16 bytes remaining */ 1339129254Scognet ldr r4, [r1], #0x04 1340129254Scognet ldr r5, [r1], #0x04 1341129254Scognet sub r2, r2, #0x08 1342129254Scognet strd r4, [r3], #0x08 1343129254Scognet 1344129254Scognet /* At least 8 bytes remaining */ 1345129254Scognet ldr r4, [r1], #0x04 1346129254Scognet ldr r5, [r1], #0x04 1347129254Scognet subs r2, r2, #0x08 1348129254Scognet strd r4, [r3], #0x08 1349129254Scognet 1350129254Scognet /* Less than 8 bytes remaining */ 1351129254Scognet ldmfd sp!, {r4-r9} 1352137463Scognet RETeq /* Return now if done */ 1353129254Scognet subs r2, r2, #0x04 1354129254Scognet ldrge ip, [r1], #0x04 1355129254Scognet strge ip, [r3], #0x04 1356137463Scognet RETeq /* Return now if done */ 1357129254Scognet addlt r2, r2, #0x04 1358129254Scognet ldrb ip, [r1], #0x01 1359129254Scognet cmp r2, #0x02 1360129254Scognet ldrgeb r2, [r1], #0x01 1361129254Scognet strb ip, [r3], #0x01 1362129254Scognet ldrgtb ip, [r1] 1363129254Scognet strgeb r2, [r3], #0x01 1364129254Scognet strgtb ip, [r3] 1365137463Scognet RET 1366129254Scognet 1367129254Scognet 1368129254Scognet/* 1369129254Scognet * At this point, it has not been possible to word align both buffers. 1370129254Scognet * The destination buffer is word aligned, but the source buffer is not. 1371129254Scognet */ 1372129254Scognet.Lmemcpy_bad_align: 1373129254Scognet stmfd sp!, {r4-r7} 1374129254Scognet bic r1, r1, #0x03 1375129254Scognet cmp ip, #2 1376129254Scognet ldr ip, [r1], #0x04 1377129254Scognet bgt .Lmemcpy_bad3 1378129254Scognet beq .Lmemcpy_bad2 1379129254Scognet b .Lmemcpy_bad1 1380129254Scognet 1381129254Scognet.Lmemcpy_bad1_loop16: 1382129254Scognet#ifdef __ARMEB__ 1383129254Scognet mov r4, ip, lsl #8 1384129254Scognet#else 1385129254Scognet mov r4, ip, lsr #8 1386129254Scognet#endif 1387129254Scognet ldr r5, [r1], #0x04 1388129254Scognet pld [r1, #0x018] 1389129254Scognet ldr r6, [r1], #0x04 1390129254Scognet ldr r7, [r1], #0x04 1391129254Scognet ldr ip, [r1], #0x04 1392129254Scognet#ifdef __ARMEB__ 1393129254Scognet orr r4, r4, r5, lsr #24 1394129254Scognet mov r5, r5, lsl #8 1395129254Scognet orr r5, r5, r6, lsr #24 1396129254Scognet mov r6, r6, lsl #8 1397129254Scognet orr r6, r6, r7, lsr #24 1398129254Scognet mov r7, r7, lsl #8 1399129254Scognet orr r7, r7, ip, lsr #24 1400129254Scognet#else 1401129254Scognet orr r4, r4, r5, lsl #24 1402129254Scognet mov r5, r5, lsr #8 1403129254Scognet orr r5, r5, r6, lsl #24 1404129254Scognet mov r6, r6, lsr #8 1405129254Scognet orr r6, r6, r7, lsl #24 1406129254Scognet mov r7, r7, lsr #8 1407129254Scognet orr r7, r7, ip, lsl #24 1408129254Scognet#endif 1409129254Scognet str r4, [r3], #0x04 1410129254Scognet str r5, [r3], #0x04 1411129254Scognet str r6, [r3], #0x04 1412129254Scognet str r7, [r3], #0x04 1413129254Scognet.Lmemcpy_bad1: 1414236991Simp subs r2, r2, #0x10 1415129254Scognet bge .Lmemcpy_bad1_loop16 1416129254Scognet 1417236991Simp adds r2, r2, #0x10 1418129254Scognet ldmeqfd sp!, {r4-r7} 1419137463Scognet RETeq /* Return now if done */ 1420129254Scognet subs r2, r2, #0x04 1421129254Scognet sublt r1, r1, #0x03 1422129254Scognet blt .Lmemcpy_bad_done 1423129254Scognet 1424129254Scognet.Lmemcpy_bad1_loop4: 1425129254Scognet#ifdef __ARMEB__ 1426129254Scognet mov r4, ip, lsl #8 1427129254Scognet#else 1428129254Scognet mov r4, ip, lsr #8 1429129254Scognet#endif 1430129254Scognet ldr ip, [r1], #0x04 1431129254Scognet subs r2, r2, #0x04 1432129254Scognet#ifdef __ARMEB__ 1433129254Scognet orr r4, r4, ip, lsr #24 1434129254Scognet#else 1435129254Scognet orr r4, r4, ip, lsl #24 1436129254Scognet#endif 1437129254Scognet str r4, [r3], #0x04 1438129254Scognet bge .Lmemcpy_bad1_loop4 1439129254Scognet sub r1, r1, #0x03 1440129254Scognet b .Lmemcpy_bad_done 1441129254Scognet 1442129254Scognet.Lmemcpy_bad2_loop16: 1443129254Scognet#ifdef __ARMEB__ 1444129254Scognet mov r4, ip, lsl #16 1445129254Scognet#else 1446129254Scognet mov r4, ip, lsr #16 1447129254Scognet#endif 1448129254Scognet ldr r5, [r1], #0x04 1449129254Scognet pld [r1, #0x018] 1450129254Scognet ldr r6, [r1], #0x04 1451129254Scognet ldr r7, [r1], #0x04 1452129254Scognet ldr ip, [r1], #0x04 1453129254Scognet#ifdef __ARMEB__ 1454129254Scognet orr r4, r4, r5, lsr #16 1455129254Scognet mov r5, r5, lsl #16 1456129254Scognet orr r5, r5, r6, lsr #16 1457129254Scognet mov r6, r6, lsl #16 1458129254Scognet orr r6, r6, r7, lsr #16 1459129254Scognet mov r7, r7, lsl #16 1460129254Scognet orr r7, r7, ip, lsr #16 1461129254Scognet#else 1462129254Scognet orr r4, r4, r5, lsl #16 1463129254Scognet mov r5, r5, lsr #16 1464129254Scognet orr r5, r5, r6, lsl #16 1465129254Scognet mov r6, r6, lsr #16 1466129254Scognet orr r6, r6, r7, lsl #16 1467129254Scognet mov r7, r7, lsr #16 1468129254Scognet orr r7, r7, ip, lsl #16 1469129254Scognet#endif 1470129254Scognet str r4, [r3], #0x04 1471129254Scognet str r5, [r3], #0x04 1472129254Scognet str r6, [r3], #0x04 1473129254Scognet str r7, [r3], #0x04 1474129254Scognet.Lmemcpy_bad2: 1475236991Simp subs r2, r2, #0x10 1476129254Scognet bge .Lmemcpy_bad2_loop16 1477129254Scognet 1478236991Simp adds r2, r2, #0x10 1479129254Scognet ldmeqfd sp!, {r4-r7} 1480137463Scognet RETeq /* Return now if done */ 1481129254Scognet subs r2, r2, #0x04 1482129254Scognet sublt r1, r1, #0x02 1483129254Scognet blt .Lmemcpy_bad_done 1484129254Scognet 1485129254Scognet.Lmemcpy_bad2_loop4: 1486129254Scognet#ifdef __ARMEB__ 1487129254Scognet mov r4, ip, lsl #16 1488129254Scognet#else 1489129254Scognet mov r4, ip, lsr #16 1490129254Scognet#endif 1491129254Scognet ldr ip, [r1], #0x04 1492129254Scognet subs r2, r2, #0x04 1493129254Scognet#ifdef __ARMEB__ 1494129254Scognet orr r4, r4, ip, lsr #16 1495129254Scognet#else 1496129254Scognet orr r4, r4, ip, lsl #16 1497129254Scognet#endif 1498129254Scognet str r4, [r3], #0x04 1499129254Scognet bge .Lmemcpy_bad2_loop4 1500129254Scognet sub r1, r1, #0x02 1501129254Scognet b .Lmemcpy_bad_done 1502129254Scognet 1503129254Scognet.Lmemcpy_bad3_loop16: 1504129254Scognet#ifdef __ARMEB__ 1505129254Scognet mov r4, ip, lsl #24 1506129254Scognet#else 1507129254Scognet mov r4, ip, lsr #24 1508129254Scognet#endif 1509129254Scognet ldr r5, [r1], #0x04 1510129254Scognet pld [r1, #0x018] 1511129254Scognet ldr r6, [r1], #0x04 1512129254Scognet ldr r7, [r1], #0x04 1513129254Scognet ldr ip, [r1], #0x04 1514129254Scognet#ifdef __ARMEB__ 1515129254Scognet orr r4, r4, r5, lsr #8 1516129254Scognet mov r5, r5, lsl #24 1517129254Scognet orr r5, r5, r6, lsr #8 1518129254Scognet mov r6, r6, lsl #24 1519129254Scognet orr r6, r6, r7, lsr #8 1520129254Scognet mov r7, r7, lsl #24 1521129254Scognet orr r7, r7, ip, lsr #8 1522129254Scognet#else 1523129254Scognet orr r4, r4, r5, lsl #8 1524129254Scognet mov r5, r5, lsr #24 1525129254Scognet orr r5, r5, r6, lsl #8 1526129254Scognet mov r6, r6, lsr #24 1527129254Scognet orr r6, r6, r7, lsl #8 1528129254Scognet mov r7, r7, lsr #24 1529129254Scognet orr r7, r7, ip, lsl #8 1530129254Scognet#endif 1531129254Scognet str r4, [r3], #0x04 1532129254Scognet str r5, [r3], #0x04 1533129254Scognet str r6, [r3], #0x04 1534129254Scognet str r7, [r3], #0x04 1535129254Scognet.Lmemcpy_bad3: 1536236991Simp subs r2, r2, #0x10 1537129254Scognet bge .Lmemcpy_bad3_loop16 1538129254Scognet 1539236991Simp adds r2, r2, #0x10 1540129254Scognet ldmeqfd sp!, {r4-r7} 1541137463Scognet RETeq /* Return now if done */ 1542129254Scognet subs r2, r2, #0x04 1543129254Scognet sublt r1, r1, #0x01 1544129254Scognet blt .Lmemcpy_bad_done 1545129254Scognet 1546129254Scognet.Lmemcpy_bad3_loop4: 1547129254Scognet#ifdef __ARMEB__ 1548129254Scognet mov r4, ip, lsl #24 1549129254Scognet#else 1550129254Scognet mov r4, ip, lsr #24 1551129254Scognet#endif 1552129254Scognet ldr ip, [r1], #0x04 1553129254Scognet subs r2, r2, #0x04 1554129254Scognet#ifdef __ARMEB__ 1555129254Scognet orr r4, r4, ip, lsr #8 1556129254Scognet#else 1557129254Scognet orr r4, r4, ip, lsl #8 1558129254Scognet#endif 1559129254Scognet str r4, [r3], #0x04 1560129254Scognet bge .Lmemcpy_bad3_loop4 1561129254Scognet sub r1, r1, #0x01 1562129254Scognet 1563129254Scognet.Lmemcpy_bad_done: 1564129254Scognet ldmfd sp!, {r4-r7} 1565129254Scognet adds r2, r2, #0x04 1566137463Scognet RETeq 1567129254Scognet ldrb ip, [r1], #0x01 1568129254Scognet cmp r2, #0x02 1569129254Scognet ldrgeb r2, [r1], #0x01 1570129254Scognet strb ip, [r3], #0x01 1571129254Scognet ldrgtb ip, [r1] 1572129254Scognet strgeb r2, [r3], #0x01 1573129254Scognet strgtb ip, [r3] 1574137463Scognet RET 1575129254Scognet 1576129254Scognet 1577129254Scognet/* 1578129254Scognet * Handle short copies (less than 16 bytes), possibly misaligned. 1579129254Scognet * Some of these are *very* common, thanks to the network stack, 1580129254Scognet * and so are handled specially. 1581129254Scognet */ 1582129254Scognet.Lmemcpy_short: 1583129254Scognet add pc, pc, r2, lsl #2 1584129254Scognet nop 1585137463Scognet RET /* 0x00 */ 1586129254Scognet b .Lmemcpy_bytewise /* 0x01 */ 1587129254Scognet b .Lmemcpy_bytewise /* 0x02 */ 1588129254Scognet b .Lmemcpy_bytewise /* 0x03 */ 1589129254Scognet b .Lmemcpy_4 /* 0x04 */ 1590129254Scognet b .Lmemcpy_bytewise /* 0x05 */ 1591129254Scognet b .Lmemcpy_6 /* 0x06 */ 1592129254Scognet b .Lmemcpy_bytewise /* 0x07 */ 1593129254Scognet b .Lmemcpy_8 /* 0x08 */ 1594129254Scognet b .Lmemcpy_bytewise /* 0x09 */ 1595129254Scognet b .Lmemcpy_bytewise /* 0x0a */ 1596129254Scognet b .Lmemcpy_bytewise /* 0x0b */ 1597129254Scognet b .Lmemcpy_c /* 0x0c */ 1598129254Scognet.Lmemcpy_bytewise: 1599129254Scognet mov r3, r0 /* We must not clobber r0 */ 1600129254Scognet ldrb ip, [r1], #0x01 1601129254Scognet1: subs r2, r2, #0x01 1602129254Scognet strb ip, [r3], #0x01 1603129254Scognet ldrneb ip, [r1], #0x01 1604129254Scognet bne 1b 1605137463Scognet RET 1606129254Scognet 1607129254Scognet/****************************************************************************** 1608129254Scognet * Special case for 4 byte copies 1609129254Scognet */ 1610129254Scognet#define LMEMCPY_4_LOG2 6 /* 64 bytes */ 1611129254Scognet#define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2 1612129254Scognet LMEMCPY_4_PAD 1613129254Scognet.Lmemcpy_4: 1614129254Scognet and r2, r1, #0x03 1615129254Scognet orr r2, r2, r0, lsl #2 1616129254Scognet ands r2, r2, #0x0f 1617129254Scognet sub r3, pc, #0x14 1618129254Scognet addne pc, r3, r2, lsl #LMEMCPY_4_LOG2 1619129254Scognet 1620129254Scognet/* 1621129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 1622129254Scognet */ 1623129254Scognet ldr r2, [r1] 1624129254Scognet str r2, [r0] 1625137463Scognet RET 1626129254Scognet LMEMCPY_4_PAD 1627129254Scognet 1628129254Scognet/* 1629129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 1630129254Scognet */ 1631129254Scognet ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 1632129254Scognet ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */ 1633129254Scognet#ifdef __ARMEB__ 1634129254Scognet mov r3, r3, lsl #8 /* r3 = 012. */ 1635129254Scognet orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 1636129254Scognet#else 1637129254Scognet mov r3, r3, lsr #8 /* r3 = .210 */ 1638129254Scognet orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 1639129254Scognet#endif 1640129254Scognet str r3, [r0] 1641137463Scognet RET 1642129254Scognet LMEMCPY_4_PAD 1643129254Scognet 1644129254Scognet/* 1645129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 1646129254Scognet */ 1647129254Scognet#ifdef __ARMEB__ 1648129254Scognet ldrh r3, [r1] 1649129254Scognet ldrh r2, [r1, #0x02] 1650129254Scognet#else 1651129254Scognet ldrh r3, [r1, #0x02] 1652129254Scognet ldrh r2, [r1] 1653129254Scognet#endif 1654129254Scognet orr r3, r2, r3, lsl #16 1655129254Scognet str r3, [r0] 1656137463Scognet RET 1657129254Scognet LMEMCPY_4_PAD 1658129254Scognet 1659129254Scognet/* 1660129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 1661129254Scognet */ 1662129254Scognet ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */ 1663129254Scognet ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */ 1664129254Scognet#ifdef __ARMEB__ 1665129254Scognet mov r3, r3, lsl #24 /* r3 = 0... */ 1666129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 1667129254Scognet#else 1668129254Scognet mov r3, r3, lsr #24 /* r3 = ...0 */ 1669129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 1670129254Scognet#endif 1671129254Scognet str r3, [r0] 1672137463Scognet RET 1673129254Scognet LMEMCPY_4_PAD 1674129254Scognet 1675129254Scognet/* 1676129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned 1677129254Scognet */ 1678129254Scognet ldr r2, [r1] 1679129254Scognet#ifdef __ARMEB__ 1680129254Scognet strb r2, [r0, #0x03] 1681129254Scognet mov r3, r2, lsr #8 1682129254Scognet mov r1, r2, lsr #24 1683129254Scognet strb r1, [r0] 1684129254Scognet#else 1685129254Scognet strb r2, [r0] 1686129254Scognet mov r3, r2, lsr #8 1687129254Scognet mov r1, r2, lsr #24 1688129254Scognet strb r1, [r0, #0x03] 1689129254Scognet#endif 1690129254Scognet strh r3, [r0, #0x01] 1691137463Scognet RET 1692129254Scognet LMEMCPY_4_PAD 1693129254Scognet 1694129254Scognet/* 1695129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned 1696129254Scognet */ 1697129254Scognet ldrb r2, [r1] 1698129254Scognet ldrh r3, [r1, #0x01] 1699129254Scognet ldrb r1, [r1, #0x03] 1700129254Scognet strb r2, [r0] 1701129254Scognet strh r3, [r0, #0x01] 1702129254Scognet strb r1, [r0, #0x03] 1703137463Scognet RET 1704129254Scognet LMEMCPY_4_PAD 1705129254Scognet 1706129254Scognet/* 1707129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned 1708129254Scognet */ 1709129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1710129254Scognet ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */ 1711129254Scognet#ifdef __ARMEB__ 1712129254Scognet mov r1, r2, lsr #8 /* r1 = ...0 */ 1713129254Scognet strb r1, [r0] 1714129254Scognet mov r2, r2, lsl #8 /* r2 = .01. */ 1715129254Scognet orr r2, r2, r3, lsr #8 /* r2 = .012 */ 1716129254Scognet#else 1717129254Scognet strb r2, [r0] 1718129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 1719129254Scognet orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1720129254Scognet mov r3, r3, lsr #8 /* r3 = ...3 */ 1721129254Scognet#endif 1722129254Scognet strh r2, [r0, #0x01] 1723129254Scognet strb r3, [r0, #0x03] 1724137463Scognet RET 1725129254Scognet LMEMCPY_4_PAD 1726129254Scognet 1727129254Scognet/* 1728129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned 1729129254Scognet */ 1730129254Scognet ldrb r2, [r1] 1731129254Scognet ldrh r3, [r1, #0x01] 1732129254Scognet ldrb r1, [r1, #0x03] 1733129254Scognet strb r2, [r0] 1734129254Scognet strh r3, [r0, #0x01] 1735129254Scognet strb r1, [r0, #0x03] 1736137463Scognet RET 1737129254Scognet LMEMCPY_4_PAD 1738129254Scognet 1739129254Scognet/* 1740129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 1741129254Scognet */ 1742129254Scognet ldr r2, [r1] 1743129254Scognet#ifdef __ARMEB__ 1744129254Scognet strh r2, [r0, #0x02] 1745129254Scognet mov r3, r2, lsr #16 1746129254Scognet strh r3, [r0] 1747129254Scognet#else 1748129254Scognet strh r2, [r0] 1749129254Scognet mov r3, r2, lsr #16 1750129254Scognet strh r3, [r0, #0x02] 1751129254Scognet#endif 1752137463Scognet RET 1753129254Scognet LMEMCPY_4_PAD 1754129254Scognet 1755129254Scognet/* 1756129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned 1757129254Scognet */ 1758129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1759129254Scognet ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */ 1760129254Scognet mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 1761129254Scognet strh r1, [r0] 1762129254Scognet#ifdef __ARMEB__ 1763129254Scognet mov r2, r2, lsl #8 /* r2 = 012. */ 1764129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1765129254Scognet#else 1766129254Scognet mov r2, r2, lsr #24 /* r2 = ...2 */ 1767129254Scognet orr r2, r2, r3, lsl #8 /* r2 = xx32 */ 1768129254Scognet#endif 1769129254Scognet strh r2, [r0, #0x02] 1770137463Scognet RET 1771129254Scognet LMEMCPY_4_PAD 1772129254Scognet 1773129254Scognet/* 1774129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 1775129254Scognet */ 1776129254Scognet ldrh r2, [r1] 1777129254Scognet ldrh r3, [r1, #0x02] 1778129254Scognet strh r2, [r0] 1779129254Scognet strh r3, [r0, #0x02] 1780137463Scognet RET 1781129254Scognet LMEMCPY_4_PAD 1782129254Scognet 1783129254Scognet/* 1784129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned 1785129254Scognet */ 1786129254Scognet ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */ 1787129254Scognet ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1788129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */ 1789129254Scognet strh r1, [r0, #0x02] 1790129254Scognet#ifdef __ARMEB__ 1791129254Scognet mov r3, r3, lsr #24 /* r3 = ...1 */ 1792129254Scognet orr r3, r3, r2, lsl #8 /* r3 = xx01 */ 1793129254Scognet#else 1794129254Scognet mov r3, r3, lsl #8 /* r3 = 321. */ 1795129254Scognet orr r3, r3, r2, lsr #24 /* r3 = 3210 */ 1796129254Scognet#endif 1797129254Scognet strh r3, [r0] 1798137463Scognet RET 1799129254Scognet LMEMCPY_4_PAD 1800129254Scognet 1801129254Scognet/* 1802129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned 1803129254Scognet */ 1804129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 1805129254Scognet#ifdef __ARMEB__ 1806129254Scognet strb r2, [r0, #0x03] 1807129254Scognet mov r3, r2, lsr #8 1808129254Scognet mov r1, r2, lsr #24 1809129254Scognet strh r3, [r0, #0x01] 1810129254Scognet strb r1, [r0] 1811129254Scognet#else 1812129254Scognet strb r2, [r0] 1813129254Scognet mov r3, r2, lsr #8 1814129254Scognet mov r1, r2, lsr #24 1815129254Scognet strh r3, [r0, #0x01] 1816129254Scognet strb r1, [r0, #0x03] 1817129254Scognet#endif 1818137463Scognet RET 1819129254Scognet LMEMCPY_4_PAD 1820129254Scognet 1821129254Scognet/* 1822129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned 1823129254Scognet */ 1824129254Scognet ldrb r2, [r1] 1825129254Scognet ldrh r3, [r1, #0x01] 1826129254Scognet ldrb r1, [r1, #0x03] 1827129254Scognet strb r2, [r0] 1828129254Scognet strh r3, [r0, #0x01] 1829129254Scognet strb r1, [r0, #0x03] 1830137463Scognet RET 1831129254Scognet LMEMCPY_4_PAD 1832129254Scognet 1833129254Scognet/* 1834129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned 1835129254Scognet */ 1836129254Scognet#ifdef __ARMEB__ 1837129254Scognet ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1838129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1839129254Scognet strb r3, [r0, #0x03] 1840129254Scognet mov r3, r3, lsr #8 /* r3 = ...2 */ 1841129254Scognet orr r3, r3, r2, lsl #8 /* r3 = ..12 */ 1842129254Scognet strh r3, [r0, #0x01] 1843129254Scognet mov r2, r2, lsr #8 /* r2 = ...0 */ 1844129254Scognet strb r2, [r0] 1845129254Scognet#else 1846129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1847129254Scognet ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */ 1848129254Scognet strb r2, [r0] 1849129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 1850129254Scognet orr r2, r2, r3, lsl #8 /* r2 = .321 */ 1851129254Scognet strh r2, [r0, #0x01] 1852129254Scognet mov r3, r3, lsr #8 /* r3 = ...3 */ 1853129254Scognet strb r3, [r0, #0x03] 1854129254Scognet#endif 1855137463Scognet RET 1856129254Scognet LMEMCPY_4_PAD 1857129254Scognet 1858129254Scognet/* 1859129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned 1860129254Scognet */ 1861129254Scognet ldrb r2, [r1] 1862129254Scognet ldrh r3, [r1, #0x01] 1863129254Scognet ldrb r1, [r1, #0x03] 1864129254Scognet strb r2, [r0] 1865129254Scognet strh r3, [r0, #0x01] 1866129254Scognet strb r1, [r0, #0x03] 1867137463Scognet RET 1868129254Scognet LMEMCPY_4_PAD 1869129254Scognet 1870129254Scognet 1871129254Scognet/****************************************************************************** 1872129254Scognet * Special case for 6 byte copies 1873129254Scognet */ 1874129254Scognet#define LMEMCPY_6_LOG2 6 /* 64 bytes */ 1875129254Scognet#define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2 1876129254Scognet LMEMCPY_6_PAD 1877129254Scognet.Lmemcpy_6: 1878129254Scognet and r2, r1, #0x03 1879129254Scognet orr r2, r2, r0, lsl #2 1880129254Scognet ands r2, r2, #0x0f 1881129254Scognet sub r3, pc, #0x14 1882129254Scognet addne pc, r3, r2, lsl #LMEMCPY_6_LOG2 1883129254Scognet 1884129254Scognet/* 1885129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 1886129254Scognet */ 1887129254Scognet ldr r2, [r1] 1888129254Scognet ldrh r3, [r1, #0x04] 1889129254Scognet str r2, [r0] 1890129254Scognet strh r3, [r0, #0x04] 1891137463Scognet RET 1892129254Scognet LMEMCPY_6_PAD 1893129254Scognet 1894129254Scognet/* 1895129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 1896129254Scognet */ 1897129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 1898129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */ 1899129254Scognet#ifdef __ARMEB__ 1900129254Scognet mov r2, r2, lsl #8 /* r2 = 012. */ 1901129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 0123 */ 1902129254Scognet#else 1903129254Scognet mov r2, r2, lsr #8 /* r2 = .210 */ 1904129254Scognet orr r2, r2, r3, lsl #24 /* r2 = 3210 */ 1905129254Scognet#endif 1906129254Scognet mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */ 1907129254Scognet str r2, [r0] 1908129254Scognet strh r3, [r0, #0x04] 1909137463Scognet RET 1910129254Scognet LMEMCPY_6_PAD 1911129254Scognet 1912129254Scognet/* 1913129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 1914129254Scognet */ 1915129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 1916129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1917129254Scognet#ifdef __ARMEB__ 1918129254Scognet mov r1, r3, lsr #16 /* r1 = ..23 */ 1919129254Scognet orr r1, r1, r2, lsl #16 /* r1 = 0123 */ 1920129254Scognet str r1, [r0] 1921129254Scognet strh r3, [r0, #0x04] 1922129254Scognet#else 1923129254Scognet mov r1, r3, lsr #16 /* r1 = ..54 */ 1924129254Scognet orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 1925129254Scognet str r2, [r0] 1926129254Scognet strh r1, [r0, #0x04] 1927129254Scognet#endif 1928137463Scognet RET 1929129254Scognet LMEMCPY_6_PAD 1930129254Scognet 1931129254Scognet/* 1932129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 1933129254Scognet */ 1934129254Scognet ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */ 1935129254Scognet ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */ 1936129254Scognet ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */ 1937129254Scognet#ifdef __ARMEB__ 1938129254Scognet mov r2, r2, lsl #24 /* r2 = 0... */ 1939129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 1940129254Scognet mov r3, r3, lsl #8 /* r3 = 234. */ 1941129254Scognet orr r1, r3, r1, lsr #24 /* r1 = 2345 */ 1942129254Scognet#else 1943129254Scognet mov r2, r2, lsr #24 /* r2 = ...0 */ 1944129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 1945129254Scognet mov r1, r1, lsl #8 /* r1 = xx5. */ 1946129254Scognet orr r1, r1, r3, lsr #24 /* r1 = xx54 */ 1947129254Scognet#endif 1948129254Scognet str r2, [r0] 1949129254Scognet strh r1, [r0, #0x04] 1950137463Scognet RET 1951129254Scognet LMEMCPY_6_PAD 1952129254Scognet 1953129254Scognet/* 1954129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned 1955129254Scognet */ 1956129254Scognet ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 1957129254Scognet ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */ 1958129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 1959129254Scognet strh r1, [r0, #0x01] 1960129254Scognet#ifdef __ARMEB__ 1961129254Scognet mov r1, r3, lsr #24 /* r1 = ...0 */ 1962129254Scognet strb r1, [r0] 1963129254Scognet mov r3, r3, lsl #8 /* r3 = 123. */ 1964129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 1234 */ 1965129254Scognet#else 1966129254Scognet strb r3, [r0] 1967129254Scognet mov r3, r3, lsr #24 /* r3 = ...3 */ 1968129254Scognet orr r3, r3, r2, lsl #8 /* r3 = .543 */ 1969129254Scognet mov r2, r2, lsr #8 /* r2 = ...5 */ 1970129254Scognet#endif 1971129254Scognet strh r3, [r0, #0x03] 1972129254Scognet strb r2, [r0, #0x05] 1973137463Scognet RET 1974129254Scognet LMEMCPY_6_PAD 1975129254Scognet 1976129254Scognet/* 1977129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned 1978129254Scognet */ 1979129254Scognet ldrb r2, [r1] 1980129254Scognet ldrh r3, [r1, #0x01] 1981129254Scognet ldrh ip, [r1, #0x03] 1982129254Scognet ldrb r1, [r1, #0x05] 1983129254Scognet strb r2, [r0] 1984129254Scognet strh r3, [r0, #0x01] 1985129254Scognet strh ip, [r0, #0x03] 1986129254Scognet strb r1, [r0, #0x05] 1987137463Scognet RET 1988129254Scognet LMEMCPY_6_PAD 1989129254Scognet 1990129254Scognet/* 1991129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned 1992129254Scognet */ 1993129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 1994129254Scognet ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 1995129254Scognet#ifdef __ARMEB__ 1996129254Scognet mov r3, r2, lsr #8 /* r3 = ...0 */ 1997129254Scognet strb r3, [r0] 1998129254Scognet strb r1, [r0, #0x05] 1999129254Scognet mov r3, r1, lsr #8 /* r3 = .234 */ 2000129254Scognet strh r3, [r0, #0x03] 2001129254Scognet mov r3, r2, lsl #8 /* r3 = .01. */ 2002129254Scognet orr r3, r3, r1, lsr #24 /* r3 = .012 */ 2003129254Scognet strh r3, [r0, #0x01] 2004129254Scognet#else 2005129254Scognet strb r2, [r0] 2006129254Scognet mov r3, r1, lsr #24 2007129254Scognet strb r3, [r0, #0x05] 2008129254Scognet mov r3, r1, lsr #8 /* r3 = .543 */ 2009129254Scognet strh r3, [r0, #0x03] 2010129254Scognet mov r3, r2, lsr #8 /* r3 = ...1 */ 2011129254Scognet orr r3, r3, r1, lsl #8 /* r3 = 4321 */ 2012129254Scognet strh r3, [r0, #0x01] 2013129254Scognet#endif 2014137463Scognet RET 2015129254Scognet LMEMCPY_6_PAD 2016129254Scognet 2017129254Scognet/* 2018129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned 2019129254Scognet */ 2020129254Scognet ldrb r2, [r1] 2021129254Scognet ldrh r3, [r1, #0x01] 2022129254Scognet ldrh ip, [r1, #0x03] 2023129254Scognet ldrb r1, [r1, #0x05] 2024129254Scognet strb r2, [r0] 2025129254Scognet strh r3, [r0, #0x01] 2026129254Scognet strh ip, [r0, #0x03] 2027129254Scognet strb r1, [r0, #0x05] 2028137463Scognet RET 2029129254Scognet LMEMCPY_6_PAD 2030129254Scognet 2031129254Scognet/* 2032129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 2033129254Scognet */ 2034129254Scognet#ifdef __ARMEB__ 2035129254Scognet ldr r2, [r1] /* r2 = 0123 */ 2036129254Scognet ldrh r3, [r1, #0x04] /* r3 = ..45 */ 2037129254Scognet mov r1, r2, lsr #16 /* r1 = ..01 */ 2038129254Scognet orr r3, r3, r2, lsl#16 /* r3 = 2345 */ 2039129254Scognet strh r1, [r0] 2040129254Scognet str r3, [r0, #0x02] 2041129254Scognet#else 2042129254Scognet ldrh r2, [r1, #0x04] /* r2 = ..54 */ 2043129254Scognet ldr r3, [r1] /* r3 = 3210 */ 2044129254Scognet mov r2, r2, lsl #16 /* r2 = 54.. */ 2045129254Scognet orr r2, r2, r3, lsr #16 /* r2 = 5432 */ 2046129254Scognet strh r3, [r0] 2047129254Scognet str r2, [r0, #0x02] 2048129254Scognet#endif 2049137463Scognet RET 2050129254Scognet LMEMCPY_6_PAD 2051129254Scognet 2052129254Scognet/* 2053129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned 2054129254Scognet */ 2055129254Scognet ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2056129254Scognet ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */ 2057129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2058129254Scognet#ifdef __ARMEB__ 2059129254Scognet mov r2, r2, lsr #8 /* r2 = .345 */ 2060129254Scognet orr r2, r2, r3, lsl #24 /* r2 = 2345 */ 2061129254Scognet#else 2062129254Scognet mov r2, r2, lsl #8 /* r2 = 543. */ 2063129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 5432 */ 2064129254Scognet#endif 2065129254Scognet strh r1, [r0] 2066129254Scognet str r2, [r0, #0x02] 2067137463Scognet RET 2068129254Scognet LMEMCPY_6_PAD 2069129254Scognet 2070129254Scognet/* 2071129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 2072129254Scognet */ 2073129254Scognet ldrh r2, [r1] 2074129254Scognet ldr r3, [r1, #0x02] 2075129254Scognet strh r2, [r0] 2076129254Scognet str r3, [r0, #0x02] 2077137463Scognet RET 2078129254Scognet LMEMCPY_6_PAD 2079129254Scognet 2080129254Scognet/* 2081129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned 2082129254Scognet */ 2083129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2084129254Scognet ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2085129254Scognet ldrb r1, [r1, #0x05] /* r1 = ...5 */ 2086129254Scognet#ifdef __ARMEB__ 2087129254Scognet mov r3, r3, lsl #8 /* r3 = ..0. */ 2088129254Scognet orr r3, r3, r2, lsr #24 /* r3 = ..01 */ 2089129254Scognet orr r1, r1, r2, lsl #8 /* r1 = 2345 */ 2090129254Scognet#else 2091129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2092129254Scognet mov r1, r1, lsl #24 /* r1 = 5... */ 2093129254Scognet orr r1, r1, r2, lsr #8 /* r1 = 5432 */ 2094129254Scognet#endif 2095129254Scognet strh r3, [r0] 2096129254Scognet str r1, [r0, #0x02] 2097137463Scognet RET 2098129254Scognet LMEMCPY_6_PAD 2099129254Scognet 2100129254Scognet/* 2101129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned 2102129254Scognet */ 2103129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2104129254Scognet ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */ 2105129254Scognet#ifdef __ARMEB__ 2106129254Scognet mov r3, r2, lsr #24 /* r3 = ...0 */ 2107129254Scognet strb r3, [r0] 2108129254Scognet mov r2, r2, lsl #8 /* r2 = 123. */ 2109129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2110129254Scognet#else 2111129254Scognet strb r2, [r0] 2112129254Scognet mov r2, r2, lsr #8 /* r2 = .321 */ 2113129254Scognet orr r2, r2, r1, lsl #24 /* r2 = 4321 */ 2114129254Scognet mov r1, r1, lsr #8 /* r1 = ...5 */ 2115129254Scognet#endif 2116129254Scognet str r2, [r0, #0x01] 2117129254Scognet strb r1, [r0, #0x05] 2118137463Scognet RET 2119129254Scognet LMEMCPY_6_PAD 2120129254Scognet 2121129254Scognet/* 2122129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned 2123129254Scognet */ 2124129254Scognet ldrb r2, [r1] 2125129254Scognet ldrh r3, [r1, #0x01] 2126129254Scognet ldrh ip, [r1, #0x03] 2127129254Scognet ldrb r1, [r1, #0x05] 2128129254Scognet strb r2, [r0] 2129129254Scognet strh r3, [r0, #0x01] 2130129254Scognet strh ip, [r0, #0x03] 2131129254Scognet strb r1, [r0, #0x05] 2132137463Scognet RET 2133129254Scognet LMEMCPY_6_PAD 2134129254Scognet 2135129254Scognet/* 2136129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned 2137129254Scognet */ 2138129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2139129254Scognet ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */ 2140129254Scognet#ifdef __ARMEB__ 2141129254Scognet mov r3, r2, lsr #8 /* r3 = ...0 */ 2142129254Scognet strb r3, [r0] 2143129254Scognet mov r2, r2, lsl #24 /* r2 = 1... */ 2144129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 1234 */ 2145129254Scognet#else 2146129254Scognet strb r2, [r0] 2147129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 2148129254Scognet orr r2, r2, r1, lsl #8 /* r2 = 4321 */ 2149129254Scognet mov r1, r1, lsr #24 /* r1 = ...5 */ 2150129254Scognet#endif 2151129254Scognet str r2, [r0, #0x01] 2152129254Scognet strb r1, [r0, #0x05] 2153137463Scognet RET 2154129254Scognet LMEMCPY_6_PAD 2155129254Scognet 2156129254Scognet/* 2157129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned 2158129254Scognet */ 2159129254Scognet ldrb r2, [r1] 2160129254Scognet ldr r3, [r1, #0x01] 2161129254Scognet ldrb r1, [r1, #0x05] 2162129254Scognet strb r2, [r0] 2163129254Scognet str r3, [r0, #0x01] 2164129254Scognet strb r1, [r0, #0x05] 2165137463Scognet RET 2166129254Scognet LMEMCPY_6_PAD 2167129254Scognet 2168129254Scognet 2169129254Scognet/****************************************************************************** 2170129254Scognet * Special case for 8 byte copies 2171129254Scognet */ 2172129254Scognet#define LMEMCPY_8_LOG2 6 /* 64 bytes */ 2173129254Scognet#define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2 2174129254Scognet LMEMCPY_8_PAD 2175129254Scognet.Lmemcpy_8: 2176129254Scognet and r2, r1, #0x03 2177129254Scognet orr r2, r2, r0, lsl #2 2178129254Scognet ands r2, r2, #0x0f 2179129254Scognet sub r3, pc, #0x14 2180129254Scognet addne pc, r3, r2, lsl #LMEMCPY_8_LOG2 2181129254Scognet 2182129254Scognet/* 2183129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 2184129254Scognet */ 2185129254Scognet ldr r2, [r1] 2186129254Scognet ldr r3, [r1, #0x04] 2187129254Scognet str r2, [r0] 2188129254Scognet str r3, [r0, #0x04] 2189137463Scognet RET 2190129254Scognet LMEMCPY_8_PAD 2191129254Scognet 2192129254Scognet/* 2193129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 2194129254Scognet */ 2195129254Scognet ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */ 2196129254Scognet ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */ 2197129254Scognet ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2198129254Scognet#ifdef __ARMEB__ 2199129254Scognet mov r3, r3, lsl #8 /* r3 = 012. */ 2200129254Scognet orr r3, r3, r2, lsr #24 /* r3 = 0123 */ 2201129254Scognet orr r2, r1, r2, lsl #8 /* r2 = 4567 */ 2202129254Scognet#else 2203129254Scognet mov r3, r3, lsr #8 /* r3 = .210 */ 2204129254Scognet orr r3, r3, r2, lsl #24 /* r3 = 3210 */ 2205129254Scognet mov r1, r1, lsl #24 /* r1 = 7... */ 2206129254Scognet orr r2, r1, r2, lsr #8 /* r2 = 7654 */ 2207129254Scognet#endif 2208129254Scognet str r3, [r0] 2209129254Scognet str r2, [r0, #0x04] 2210137463Scognet RET 2211129254Scognet LMEMCPY_8_PAD 2212129254Scognet 2213129254Scognet/* 2214129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 2215129254Scognet */ 2216129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2217129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2218129254Scognet ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2219129254Scognet#ifdef __ARMEB__ 2220129254Scognet mov r2, r2, lsl #16 /* r2 = 01.. */ 2221129254Scognet orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2222129254Scognet orr r3, r1, r3, lsl #16 /* r3 = 4567 */ 2223129254Scognet#else 2224129254Scognet orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2225129254Scognet mov r3, r3, lsr #16 /* r3 = ..54 */ 2226129254Scognet orr r3, r3, r1, lsl #16 /* r3 = 7654 */ 2227129254Scognet#endif 2228129254Scognet str r2, [r0] 2229129254Scognet str r3, [r0, #0x04] 2230137463Scognet RET 2231129254Scognet LMEMCPY_8_PAD 2232129254Scognet 2233129254Scognet/* 2234129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 2235129254Scognet */ 2236129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2237129254Scognet ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2238129254Scognet ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */ 2239129254Scognet#ifdef __ARMEB__ 2240129254Scognet mov r3, r3, lsl #24 /* r3 = 0... */ 2241129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 0123 */ 2242129254Scognet mov r2, r2, lsl #24 /* r2 = 4... */ 2243129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 4567 */ 2244129254Scognet#else 2245129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 3210 */ 2246129254Scognet mov r2, r2, lsr #24 /* r2 = ...4 */ 2247129254Scognet orr r2, r2, r1, lsl #8 /* r2 = 7654 */ 2248129254Scognet#endif 2249129254Scognet str r3, [r0] 2250129254Scognet str r2, [r0, #0x04] 2251137463Scognet RET 2252129254Scognet LMEMCPY_8_PAD 2253129254Scognet 2254129254Scognet/* 2255129254Scognet * 0100: dst is 8-bit aligned, src is 32-bit aligned 2256129254Scognet */ 2257129254Scognet ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */ 2258129254Scognet ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */ 2259129254Scognet#ifdef __ARMEB__ 2260129254Scognet mov r1, r3, lsr #24 /* r1 = ...0 */ 2261129254Scognet strb r1, [r0] 2262129254Scognet mov r1, r3, lsr #8 /* r1 = .012 */ 2263129254Scognet strb r2, [r0, #0x07] 2264129254Scognet mov r3, r3, lsl #24 /* r3 = 3... */ 2265129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 3456 */ 2266129254Scognet#else 2267129254Scognet strb r3, [r0] 2268129254Scognet mov r1, r2, lsr #24 /* r1 = ...7 */ 2269129254Scognet strb r1, [r0, #0x07] 2270129254Scognet mov r1, r3, lsr #8 /* r1 = .321 */ 2271129254Scognet mov r3, r3, lsr #24 /* r3 = ...3 */ 2272129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 6543 */ 2273129254Scognet#endif 2274129254Scognet strh r1, [r0, #0x01] 2275129254Scognet str r3, [r0, #0x03] 2276137463Scognet RET 2277129254Scognet LMEMCPY_8_PAD 2278129254Scognet 2279129254Scognet/* 2280129254Scognet * 0101: dst is 8-bit aligned, src is 8-bit aligned 2281129254Scognet */ 2282129254Scognet ldrb r2, [r1] 2283129254Scognet ldrh r3, [r1, #0x01] 2284129254Scognet ldr ip, [r1, #0x03] 2285129254Scognet ldrb r1, [r1, #0x07] 2286129254Scognet strb r2, [r0] 2287129254Scognet strh r3, [r0, #0x01] 2288129254Scognet str ip, [r0, #0x03] 2289129254Scognet strb r1, [r0, #0x07] 2290137463Scognet RET 2291129254Scognet LMEMCPY_8_PAD 2292129254Scognet 2293129254Scognet/* 2294129254Scognet * 0110: dst is 8-bit aligned, src is 16-bit aligned 2295129254Scognet */ 2296129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2297129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2298129254Scognet ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2299129254Scognet#ifdef __ARMEB__ 2300129254Scognet mov ip, r2, lsr #8 /* ip = ...0 */ 2301129254Scognet strb ip, [r0] 2302129254Scognet mov ip, r2, lsl #8 /* ip = .01. */ 2303129254Scognet orr ip, ip, r3, lsr #24 /* ip = .012 */ 2304129254Scognet strb r1, [r0, #0x07] 2305129254Scognet mov r3, r3, lsl #8 /* r3 = 345. */ 2306129254Scognet orr r3, r3, r1, lsr #8 /* r3 = 3456 */ 2307129254Scognet#else 2308129254Scognet strb r2, [r0] /* 0 */ 2309129254Scognet mov ip, r1, lsr #8 /* ip = ...7 */ 2310129254Scognet strb ip, [r0, #0x07] /* 7 */ 2311129254Scognet mov ip, r2, lsr #8 /* ip = ...1 */ 2312129254Scognet orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2313129254Scognet mov r3, r3, lsr #8 /* r3 = .543 */ 2314129254Scognet orr r3, r3, r1, lsl #24 /* r3 = 6543 */ 2315129254Scognet#endif 2316129254Scognet strh ip, [r0, #0x01] 2317129254Scognet str r3, [r0, #0x03] 2318137463Scognet RET 2319129254Scognet LMEMCPY_8_PAD 2320129254Scognet 2321129254Scognet/* 2322129254Scognet * 0111: dst is 8-bit aligned, src is 8-bit aligned 2323129254Scognet */ 2324129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2325129254Scognet ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2326129254Scognet ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */ 2327129254Scognet ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2328129254Scognet strb r3, [r0] 2329129254Scognet mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */ 2330129254Scognet#ifdef __ARMEB__ 2331129254Scognet strh r3, [r0, #0x01] 2332129254Scognet orr r2, r2, ip, lsl #16 /* r2 = 3456 */ 2333129254Scognet#else 2334129254Scognet strh ip, [r0, #0x01] 2335129254Scognet orr r2, r3, r2, lsl #16 /* r2 = 6543 */ 2336129254Scognet#endif 2337129254Scognet str r2, [r0, #0x03] 2338129254Scognet strb r1, [r0, #0x07] 2339137463Scognet RET 2340129254Scognet LMEMCPY_8_PAD 2341129254Scognet 2342129254Scognet/* 2343129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 2344129254Scognet */ 2345129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2346129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2347129254Scognet mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2348129254Scognet#ifdef __ARMEB__ 2349129254Scognet strh r1, [r0] 2350129254Scognet mov r1, r3, lsr #16 /* r1 = ..45 */ 2351129254Scognet orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */ 2352129254Scognet#else 2353129254Scognet strh r2, [r0] 2354129254Scognet orr r2, r1, r3, lsl #16 /* r2 = 5432 */ 2355129254Scognet mov r3, r3, lsr #16 /* r3 = ..76 */ 2356129254Scognet#endif 2357129254Scognet str r2, [r0, #0x02] 2358129254Scognet strh r3, [r0, #0x06] 2359137463Scognet RET 2360129254Scognet LMEMCPY_8_PAD 2361129254Scognet 2362129254Scognet/* 2363129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned 2364129254Scognet */ 2365129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2366129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2367129254Scognet ldrb ip, [r1, #0x07] /* ip = ...7 */ 2368129254Scognet mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */ 2369129254Scognet strh r1, [r0] 2370129254Scognet#ifdef __ARMEB__ 2371129254Scognet mov r1, r2, lsl #24 /* r1 = 2... */ 2372129254Scognet orr r1, r1, r3, lsr #8 /* r1 = 2345 */ 2373129254Scognet orr r3, ip, r3, lsl #8 /* r3 = 4567 */ 2374129254Scognet#else 2375129254Scognet mov r1, r2, lsr #24 /* r1 = ...2 */ 2376129254Scognet orr r1, r1, r3, lsl #8 /* r1 = 5432 */ 2377129254Scognet mov r3, r3, lsr #24 /* r3 = ...6 */ 2378129254Scognet orr r3, r3, ip, lsl #8 /* r3 = ..76 */ 2379129254Scognet#endif 2380129254Scognet str r1, [r0, #0x02] 2381129254Scognet strh r3, [r0, #0x06] 2382137463Scognet RET 2383129254Scognet LMEMCPY_8_PAD 2384129254Scognet 2385129254Scognet/* 2386129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 2387129254Scognet */ 2388129254Scognet ldrh r2, [r1] 2389129254Scognet ldr ip, [r1, #0x02] 2390129254Scognet ldrh r3, [r1, #0x06] 2391129254Scognet strh r2, [r0] 2392129254Scognet str ip, [r0, #0x02] 2393129254Scognet strh r3, [r0, #0x06] 2394137463Scognet RET 2395129254Scognet LMEMCPY_8_PAD 2396129254Scognet 2397129254Scognet/* 2398129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned 2399129254Scognet */ 2400129254Scognet ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */ 2401129254Scognet ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */ 2402129254Scognet ldrb ip, [r1] /* ip = ...0 */ 2403129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */ 2404129254Scognet strh r1, [r0, #0x06] 2405129254Scognet#ifdef __ARMEB__ 2406129254Scognet mov r3, r3, lsr #24 /* r3 = ...5 */ 2407129254Scognet orr r3, r3, r2, lsl #8 /* r3 = 2345 */ 2408129254Scognet mov r2, r2, lsr #24 /* r2 = ...1 */ 2409129254Scognet orr r2, r2, ip, lsl #8 /* r2 = ..01 */ 2410129254Scognet#else 2411129254Scognet mov r3, r3, lsl #24 /* r3 = 5... */ 2412129254Scognet orr r3, r3, r2, lsr #8 /* r3 = 5432 */ 2413129254Scognet orr r2, ip, r2, lsl #8 /* r2 = 3210 */ 2414129254Scognet#endif 2415129254Scognet str r3, [r0, #0x02] 2416129254Scognet strh r2, [r0] 2417137463Scognet RET 2418129254Scognet LMEMCPY_8_PAD 2419129254Scognet 2420129254Scognet/* 2421129254Scognet * 1100: dst is 8-bit aligned, src is 32-bit aligned 2422129254Scognet */ 2423129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2424129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2425129254Scognet mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */ 2426129254Scognet strh r1, [r0, #0x05] 2427129254Scognet#ifdef __ARMEB__ 2428129254Scognet strb r3, [r0, #0x07] 2429129254Scognet mov r1, r2, lsr #24 /* r1 = ...0 */ 2430129254Scognet strb r1, [r0] 2431129254Scognet mov r2, r2, lsl #8 /* r2 = 123. */ 2432129254Scognet orr r2, r2, r3, lsr #24 /* r2 = 1234 */ 2433129254Scognet str r2, [r0, #0x01] 2434129254Scognet#else 2435129254Scognet strb r2, [r0] 2436129254Scognet mov r1, r3, lsr #24 /* r1 = ...7 */ 2437129254Scognet strb r1, [r0, #0x07] 2438129254Scognet mov r2, r2, lsr #8 /* r2 = .321 */ 2439129254Scognet orr r2, r2, r3, lsl #24 /* r2 = 4321 */ 2440129254Scognet str r2, [r0, #0x01] 2441129254Scognet#endif 2442137463Scognet RET 2443129254Scognet LMEMCPY_8_PAD 2444129254Scognet 2445129254Scognet/* 2446129254Scognet * 1101: dst is 8-bit aligned, src is 8-bit aligned 2447129254Scognet */ 2448129254Scognet ldrb r3, [r1] /* r3 = ...0 */ 2449129254Scognet ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */ 2450129254Scognet ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2451129254Scognet ldrb r1, [r1, #0x07] /* r1 = ...7 */ 2452129254Scognet strb r3, [r0] 2453129254Scognet mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */ 2454129254Scognet#ifdef __ARMEB__ 2455129254Scognet strh ip, [r0, #0x05] 2456129254Scognet orr r2, r3, r2, lsl #16 /* r2 = 1234 */ 2457129254Scognet#else 2458129254Scognet strh r3, [r0, #0x05] 2459129254Scognet orr r2, r2, ip, lsl #16 /* r2 = 4321 */ 2460129254Scognet#endif 2461129254Scognet str r2, [r0, #0x01] 2462129254Scognet strb r1, [r0, #0x07] 2463137463Scognet RET 2464129254Scognet LMEMCPY_8_PAD 2465129254Scognet 2466129254Scognet/* 2467129254Scognet * 1110: dst is 8-bit aligned, src is 16-bit aligned 2468129254Scognet */ 2469129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2470129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2471129254Scognet ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */ 2472129254Scognet#ifdef __ARMEB__ 2473129254Scognet mov ip, r2, lsr #8 /* ip = ...0 */ 2474129254Scognet strb ip, [r0] 2475129254Scognet mov ip, r2, lsl #24 /* ip = 1... */ 2476129254Scognet orr ip, ip, r3, lsr #8 /* ip = 1234 */ 2477129254Scognet strb r1, [r0, #0x07] 2478129254Scognet mov r1, r1, lsr #8 /* r1 = ...6 */ 2479129254Scognet orr r1, r1, r3, lsl #8 /* r1 = 3456 */ 2480129254Scognet#else 2481129254Scognet strb r2, [r0] 2482129254Scognet mov ip, r2, lsr #8 /* ip = ...1 */ 2483129254Scognet orr ip, ip, r3, lsl #8 /* ip = 4321 */ 2484129254Scognet mov r2, r1, lsr #8 /* r2 = ...7 */ 2485129254Scognet strb r2, [r0, #0x07] 2486129254Scognet mov r1, r1, lsl #8 /* r1 = .76. */ 2487129254Scognet orr r1, r1, r3, lsr #24 /* r1 = .765 */ 2488129254Scognet#endif 2489129254Scognet str ip, [r0, #0x01] 2490129254Scognet strh r1, [r0, #0x05] 2491137463Scognet RET 2492129254Scognet LMEMCPY_8_PAD 2493129254Scognet 2494129254Scognet/* 2495129254Scognet * 1111: dst is 8-bit aligned, src is 8-bit aligned 2496129254Scognet */ 2497129254Scognet ldrb r2, [r1] 2498129254Scognet ldr ip, [r1, #0x01] 2499129254Scognet ldrh r3, [r1, #0x05] 2500129254Scognet ldrb r1, [r1, #0x07] 2501129254Scognet strb r2, [r0] 2502129254Scognet str ip, [r0, #0x01] 2503129254Scognet strh r3, [r0, #0x05] 2504129254Scognet strb r1, [r0, #0x07] 2505137463Scognet RET 2506129254Scognet LMEMCPY_8_PAD 2507129254Scognet 2508129254Scognet/****************************************************************************** 2509129254Scognet * Special case for 12 byte copies 2510129254Scognet */ 2511129254Scognet#define LMEMCPY_C_LOG2 7 /* 128 bytes */ 2512129254Scognet#define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2 2513129254Scognet LMEMCPY_C_PAD 2514129254Scognet.Lmemcpy_c: 2515129254Scognet and r2, r1, #0x03 2516129254Scognet orr r2, r2, r0, lsl #2 2517129254Scognet ands r2, r2, #0x0f 2518129254Scognet sub r3, pc, #0x14 2519129254Scognet addne pc, r3, r2, lsl #LMEMCPY_C_LOG2 2520129254Scognet 2521129254Scognet/* 2522129254Scognet * 0000: dst is 32-bit aligned, src is 32-bit aligned 2523129254Scognet */ 2524129254Scognet ldr r2, [r1] 2525129254Scognet ldr r3, [r1, #0x04] 2526129254Scognet ldr r1, [r1, #0x08] 2527129254Scognet str r2, [r0] 2528129254Scognet str r3, [r0, #0x04] 2529129254Scognet str r1, [r0, #0x08] 2530137463Scognet RET 2531129254Scognet LMEMCPY_C_PAD 2532129254Scognet 2533129254Scognet/* 2534129254Scognet * 0001: dst is 32-bit aligned, src is 8-bit aligned 2535129254Scognet */ 2536129254Scognet ldrb r2, [r1, #0xb] /* r2 = ...B */ 2537129254Scognet ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2538129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2539129254Scognet ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2540129254Scognet#ifdef __ARMEB__ 2541129254Scognet orr r2, r2, ip, lsl #8 /* r2 = 89AB */ 2542129254Scognet str r2, [r0, #0x08] 2543129254Scognet mov r2, ip, lsr #24 /* r2 = ...7 */ 2544129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 4567 */ 2545129254Scognet mov r1, r1, lsl #8 /* r1 = 012. */ 2546129254Scognet orr r1, r1, r3, lsr #24 /* r1 = 0123 */ 2547129254Scognet#else 2548129254Scognet mov r2, r2, lsl #24 /* r2 = B... */ 2549129254Scognet orr r2, r2, ip, lsr #8 /* r2 = BA98 */ 2550129254Scognet str r2, [r0, #0x08] 2551129254Scognet mov r2, ip, lsl #24 /* r2 = 7... */ 2552129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 7654 */ 2553129254Scognet mov r1, r1, lsr #8 /* r1 = .210 */ 2554129254Scognet orr r1, r1, r3, lsl #24 /* r1 = 3210 */ 2555129254Scognet#endif 2556129254Scognet str r2, [r0, #0x04] 2557129254Scognet str r1, [r0] 2558137463Scognet RET 2559129254Scognet LMEMCPY_C_PAD 2560129254Scognet 2561129254Scognet/* 2562129254Scognet * 0010: dst is 32-bit aligned, src is 16-bit aligned 2563129254Scognet */ 2564129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2565129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2566129254Scognet ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2567129254Scognet ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2568129254Scognet#ifdef __ARMEB__ 2569129254Scognet mov r2, r2, lsl #16 /* r2 = 01.. */ 2570129254Scognet orr r2, r2, r3, lsr #16 /* r2 = 0123 */ 2571129254Scognet str r2, [r0] 2572129254Scognet mov r3, r3, lsl #16 /* r3 = 45.. */ 2573129254Scognet orr r3, r3, ip, lsr #16 /* r3 = 4567 */ 2574129254Scognet orr r1, r1, ip, lsl #16 /* r1 = 89AB */ 2575129254Scognet#else 2576129254Scognet orr r2, r2, r3, lsl #16 /* r2 = 3210 */ 2577129254Scognet str r2, [r0] 2578129254Scognet mov r3, r3, lsr #16 /* r3 = ..54 */ 2579129254Scognet orr r3, r3, ip, lsl #16 /* r3 = 7654 */ 2580129254Scognet mov r1, r1, lsl #16 /* r1 = BA.. */ 2581129254Scognet orr r1, r1, ip, lsr #16 /* r1 = BA98 */ 2582129254Scognet#endif 2583129254Scognet str r3, [r0, #0x04] 2584129254Scognet str r1, [r0, #0x08] 2585137463Scognet RET 2586129254Scognet LMEMCPY_C_PAD 2587129254Scognet 2588129254Scognet/* 2589129254Scognet * 0011: dst is 32-bit aligned, src is 8-bit aligned 2590129254Scognet */ 2591129254Scognet ldrb r2, [r1] /* r2 = ...0 */ 2592129254Scognet ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2593129254Scognet ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2594129254Scognet ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2595129254Scognet#ifdef __ARMEB__ 2596129254Scognet mov r2, r2, lsl #24 /* r2 = 0... */ 2597129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 0123 */ 2598129254Scognet str r2, [r0] 2599129254Scognet mov r3, r3, lsl #24 /* r3 = 4... */ 2600129254Scognet orr r3, r3, ip, lsr #8 /* r3 = 4567 */ 2601129254Scognet mov r1, r1, lsr #8 /* r1 = .9AB */ 2602129254Scognet orr r1, r1, ip, lsl #24 /* r1 = 89AB */ 2603129254Scognet#else 2604129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 3210 */ 2605129254Scognet str r2, [r0] 2606129254Scognet mov r3, r3, lsr #24 /* r3 = ...4 */ 2607129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 7654 */ 2608129254Scognet mov r1, r1, lsl #8 /* r1 = BA9. */ 2609129254Scognet orr r1, r1, ip, lsr #24 /* r1 = BA98 */ 2610129254Scognet#endif 2611129254Scognet str r3, [r0, #0x04] 2612129254Scognet str r1, [r0, #0x08] 2613137463Scognet RET 2614129254Scognet LMEMCPY_C_PAD 2615129254Scognet 2616129254Scognet/* 2617129254Scognet * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned 2618129254Scognet */ 2619129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2620129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2621129254Scognet ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */ 2622129254Scognet mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */ 2623129254Scognet strh r1, [r0, #0x01] 2624129254Scognet#ifdef __ARMEB__ 2625129254Scognet mov r1, r2, lsr #24 /* r1 = ...0 */ 2626129254Scognet strb r1, [r0] 2627129254Scognet mov r1, r2, lsl #24 /* r1 = 3... */ 2628129254Scognet orr r2, r1, r3, lsr #8 /* r1 = 3456 */ 2629129254Scognet mov r1, r3, lsl #24 /* r1 = 7... */ 2630129254Scognet orr r1, r1, ip, lsr #8 /* r1 = 789A */ 2631129254Scognet#else 2632129254Scognet strb r2, [r0] 2633129254Scognet mov r1, r2, lsr #24 /* r1 = ...3 */ 2634129254Scognet orr r2, r1, r3, lsl #8 /* r1 = 6543 */ 2635129254Scognet mov r1, r3, lsr #24 /* r1 = ...7 */ 2636129254Scognet orr r1, r1, ip, lsl #8 /* r1 = A987 */ 2637129254Scognet mov ip, ip, lsr #24 /* ip = ...B */ 2638129254Scognet#endif 2639129254Scognet str r2, [r0, #0x03] 2640129254Scognet str r1, [r0, #0x07] 2641129254Scognet strb ip, [r0, #0x0b] 2642137463Scognet RET 2643129254Scognet LMEMCPY_C_PAD 2644129254Scognet 2645129254Scognet/* 2646129254Scognet * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1) 2647129254Scognet */ 2648129254Scognet ldrb r2, [r1] 2649129254Scognet ldrh r3, [r1, #0x01] 2650129254Scognet ldr ip, [r1, #0x03] 2651129254Scognet strb r2, [r0] 2652129254Scognet ldr r2, [r1, #0x07] 2653129254Scognet ldrb r1, [r1, #0x0b] 2654129254Scognet strh r3, [r0, #0x01] 2655129254Scognet str ip, [r0, #0x03] 2656129254Scognet str r2, [r0, #0x07] 2657129254Scognet strb r1, [r0, #0x0b] 2658137463Scognet RET 2659129254Scognet LMEMCPY_C_PAD 2660129254Scognet 2661129254Scognet/* 2662129254Scognet * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned 2663129254Scognet */ 2664129254Scognet ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */ 2665129254Scognet ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */ 2666129254Scognet ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */ 2667129254Scognet ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */ 2668129254Scognet#ifdef __ARMEB__ 2669129254Scognet mov r2, r2, ror #8 /* r2 = 1..0 */ 2670129254Scognet strb r2, [r0] 2671129254Scognet mov r2, r2, lsr #16 /* r2 = ..1. */ 2672129254Scognet orr r2, r2, r3, lsr #24 /* r2 = ..12 */ 2673129254Scognet strh r2, [r0, #0x01] 2674129254Scognet mov r2, r3, lsl #8 /* r2 = 345. */ 2675129254Scognet orr r3, r2, ip, lsr #24 /* r3 = 3456 */ 2676129254Scognet mov r2, ip, lsl #8 /* r2 = 789. */ 2677129254Scognet orr r2, r2, r1, lsr #8 /* r2 = 789A */ 2678129254Scognet#else 2679129254Scognet strb r2, [r0] 2680129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 2681129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2682129254Scognet strh r2, [r0, #0x01] 2683129254Scognet mov r2, r3, lsr #8 /* r2 = .543 */ 2684129254Scognet orr r3, r2, ip, lsl #24 /* r3 = 6543 */ 2685129254Scognet mov r2, ip, lsr #8 /* r2 = .987 */ 2686129254Scognet orr r2, r2, r1, lsl #24 /* r2 = A987 */ 2687129254Scognet mov r1, r1, lsr #8 /* r1 = ...B */ 2688129254Scognet#endif 2689129254Scognet str r3, [r0, #0x03] 2690129254Scognet str r2, [r0, #0x07] 2691129254Scognet strb r1, [r0, #0x0b] 2692137463Scognet RET 2693129254Scognet LMEMCPY_C_PAD 2694129254Scognet 2695129254Scognet/* 2696129254Scognet * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3) 2697129254Scognet */ 2698129254Scognet ldrb r2, [r1] 2699129254Scognet ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */ 2700129254Scognet ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */ 2701129254Scognet ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */ 2702129254Scognet strb r2, [r0] 2703129254Scognet#ifdef __ARMEB__ 2704129254Scognet mov r2, r3, lsr #16 /* r2 = ..12 */ 2705129254Scognet strh r2, [r0, #0x01] 2706129254Scognet mov r3, r3, lsl #16 /* r3 = 34.. */ 2707129254Scognet orr r3, r3, ip, lsr #16 /* r3 = 3456 */ 2708129254Scognet mov ip, ip, lsl #16 /* ip = 78.. */ 2709129254Scognet orr ip, ip, r1, lsr #16 /* ip = 789A */ 2710129254Scognet mov r1, r1, lsr #8 /* r1 = .9AB */ 2711129254Scognet#else 2712129254Scognet strh r3, [r0, #0x01] 2713129254Scognet mov r3, r3, lsr #16 /* r3 = ..43 */ 2714129254Scognet orr r3, r3, ip, lsl #16 /* r3 = 6543 */ 2715129254Scognet mov ip, ip, lsr #16 /* ip = ..87 */ 2716129254Scognet orr ip, ip, r1, lsl #16 /* ip = A987 */ 2717129254Scognet mov r1, r1, lsr #16 /* r1 = ..xB */ 2718129254Scognet#endif 2719129254Scognet str r3, [r0, #0x03] 2720129254Scognet str ip, [r0, #0x07] 2721129254Scognet strb r1, [r0, #0x0b] 2722137463Scognet RET 2723129254Scognet LMEMCPY_C_PAD 2724129254Scognet 2725129254Scognet/* 2726129254Scognet * 1000: dst is 16-bit aligned, src is 32-bit aligned 2727129254Scognet */ 2728129254Scognet ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */ 2729129254Scognet ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */ 2730129254Scognet ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */ 2731129254Scognet mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */ 2732129254Scognet#ifdef __ARMEB__ 2733129254Scognet strh r1, [r0] 2734129254Scognet mov r1, ip, lsl #16 /* r1 = 23.. */ 2735129254Scognet orr r1, r1, r3, lsr #16 /* r1 = 2345 */ 2736129254Scognet mov r3, r3, lsl #16 /* r3 = 67.. */ 2737129254Scognet orr r3, r3, r2, lsr #16 /* r3 = 6789 */ 2738129254Scognet#else 2739129254Scognet strh ip, [r0] 2740129254Scognet orr r1, r1, r3, lsl #16 /* r1 = 5432 */ 2741129254Scognet mov r3, r3, lsr #16 /* r3 = ..76 */ 2742129254Scognet orr r3, r3, r2, lsl #16 /* r3 = 9876 */ 2743129254Scognet mov r2, r2, lsr #16 /* r2 = ..BA */ 2744129254Scognet#endif 2745129254Scognet str r1, [r0, #0x02] 2746129254Scognet str r3, [r0, #0x06] 2747129254Scognet strh r2, [r0, #0x0a] 2748137463Scognet RET 2749129254Scognet LMEMCPY_C_PAD 2750129254Scognet 2751129254Scognet/* 2752129254Scognet * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1) 2753129254Scognet */ 2754129254Scognet ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */ 2755129254Scognet ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */ 2756129254Scognet mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */ 2757129254Scognet strh ip, [r0] 2758129254Scognet ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */ 2759129254Scognet ldrb r1, [r1, #0x0b] /* r1 = ...B */ 2760129254Scognet#ifdef __ARMEB__ 2761129254Scognet mov r2, r2, lsl #24 /* r2 = 2... */ 2762129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 2345 */ 2763129254Scognet mov r3, r3, lsl #24 /* r3 = 6... */ 2764129254Scognet orr r3, r3, ip, lsr #8 /* r3 = 6789 */ 2765129254Scognet orr r1, r1, ip, lsl #8 /* r1 = 89AB */ 2766129254Scognet#else 2767129254Scognet mov r2, r2, lsr #24 /* r2 = ...2 */ 2768129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 5432 */ 2769129254Scognet mov r3, r3, lsr #24 /* r3 = ...6 */ 2770129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 9876 */ 2771129254Scognet mov r1, r1, lsl #8 /* r1 = ..B. */ 2772129254Scognet orr r1, r1, ip, lsr #24 /* r1 = ..BA */ 2773129254Scognet#endif 2774129254Scognet str r2, [r0, #0x02] 2775129254Scognet str r3, [r0, #0x06] 2776129254Scognet strh r1, [r0, #0x0a] 2777137463Scognet RET 2778129254Scognet LMEMCPY_C_PAD 2779129254Scognet 2780129254Scognet/* 2781129254Scognet * 1010: dst is 16-bit aligned, src is 16-bit aligned 2782129254Scognet */ 2783129254Scognet ldrh r2, [r1] 2784129254Scognet ldr r3, [r1, #0x02] 2785129254Scognet ldr ip, [r1, #0x06] 2786129254Scognet ldrh r1, [r1, #0x0a] 2787129254Scognet strh r2, [r0] 2788129254Scognet str r3, [r0, #0x02] 2789129254Scognet str ip, [r0, #0x06] 2790129254Scognet strh r1, [r0, #0x0a] 2791137463Scognet RET 2792129254Scognet LMEMCPY_C_PAD 2793129254Scognet 2794129254Scognet/* 2795129254Scognet * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3) 2796129254Scognet */ 2797129254Scognet ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */ 2798129254Scognet ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */ 2799129254Scognet mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */ 2800129254Scognet strh ip, [r0, #0x0a] 2801129254Scognet ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */ 2802129254Scognet ldrb r1, [r1] /* r1 = ...0 */ 2803129254Scognet#ifdef __ARMEB__ 2804129254Scognet mov r2, r2, lsr #24 /* r2 = ...9 */ 2805129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 6789 */ 2806129254Scognet mov r3, r3, lsr #24 /* r3 = ...5 */ 2807129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 2345 */ 2808129254Scognet mov r1, r1, lsl #8 /* r1 = ..0. */ 2809129254Scognet orr r1, r1, ip, lsr #24 /* r1 = ..01 */ 2810129254Scognet#else 2811129254Scognet mov r2, r2, lsl #24 /* r2 = 9... */ 2812129254Scognet orr r2, r2, r3, lsr #8 /* r2 = 9876 */ 2813129254Scognet mov r3, r3, lsl #24 /* r3 = 5... */ 2814129254Scognet orr r3, r3, ip, lsr #8 /* r3 = 5432 */ 2815129254Scognet orr r1, r1, ip, lsl #8 /* r1 = 3210 */ 2816129254Scognet#endif 2817129254Scognet str r2, [r0, #0x06] 2818129254Scognet str r3, [r0, #0x02] 2819129254Scognet strh r1, [r0] 2820137463Scognet RET 2821129254Scognet LMEMCPY_C_PAD 2822129254Scognet 2823129254Scognet/* 2824129254Scognet * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned 2825129254Scognet */ 2826129254Scognet ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */ 2827129254Scognet ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */ 2828129254Scognet ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */ 2829129254Scognet#ifdef __ARMEB__ 2830129254Scognet mov r3, r2, lsr #24 /* r3 = ...0 */ 2831129254Scognet strb r3, [r0] 2832129254Scognet mov r2, r2, lsl #8 /* r2 = 123. */ 2833129254Scognet orr r2, r2, ip, lsr #24 /* r2 = 1234 */ 2834129254Scognet str r2, [r0, #0x01] 2835129254Scognet mov r2, ip, lsl #8 /* r2 = 567. */ 2836129254Scognet orr r2, r2, r1, lsr #24 /* r2 = 5678 */ 2837129254Scognet str r2, [r0, #0x05] 2838129254Scognet mov r2, r1, lsr #8 /* r2 = ..9A */ 2839129254Scognet strh r2, [r0, #0x09] 2840129254Scognet strb r1, [r0, #0x0b] 2841129254Scognet#else 2842129254Scognet strb r2, [r0] 2843129254Scognet mov r3, r2, lsr #8 /* r3 = .321 */ 2844129254Scognet orr r3, r3, ip, lsl #24 /* r3 = 4321 */ 2845129254Scognet str r3, [r0, #0x01] 2846129254Scognet mov r3, ip, lsr #8 /* r3 = .765 */ 2847129254Scognet orr r3, r3, r1, lsl #24 /* r3 = 8765 */ 2848129254Scognet str r3, [r0, #0x05] 2849129254Scognet mov r1, r1, lsr #8 /* r1 = .BA9 */ 2850129254Scognet strh r1, [r0, #0x09] 2851129254Scognet mov r1, r1, lsr #16 /* r1 = ...B */ 2852129254Scognet strb r1, [r0, #0x0b] 2853129254Scognet#endif 2854137463Scognet RET 2855129254Scognet LMEMCPY_C_PAD 2856129254Scognet 2857129254Scognet/* 2858129254Scognet * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1) 2859129254Scognet */ 2860129254Scognet ldrb r2, [r1, #0x0b] /* r2 = ...B */ 2861129254Scognet ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */ 2862129254Scognet ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */ 2863129254Scognet ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */ 2864129254Scognet strb r2, [r0, #0x0b] 2865129254Scognet#ifdef __ARMEB__ 2866129254Scognet strh r3, [r0, #0x09] 2867129254Scognet mov r3, r3, lsr #16 /* r3 = ..78 */ 2868129254Scognet orr r3, r3, ip, lsl #16 /* r3 = 5678 */ 2869129254Scognet mov ip, ip, lsr #16 /* ip = ..34 */ 2870129254Scognet orr ip, ip, r1, lsl #16 /* ip = 1234 */ 2871129254Scognet mov r1, r1, lsr #16 /* r1 = ..x0 */ 2872129254Scognet#else 2873129254Scognet mov r2, r3, lsr #16 /* r2 = ..A9 */ 2874129254Scognet strh r2, [r0, #0x09] 2875129254Scognet mov r3, r3, lsl #16 /* r3 = 87.. */ 2876129254Scognet orr r3, r3, ip, lsr #16 /* r3 = 8765 */ 2877129254Scognet mov ip, ip, lsl #16 /* ip = 43.. */ 2878129254Scognet orr ip, ip, r1, lsr #16 /* ip = 4321 */ 2879129254Scognet mov r1, r1, lsr #8 /* r1 = .210 */ 2880129254Scognet#endif 2881129254Scognet str r3, [r0, #0x05] 2882129254Scognet str ip, [r0, #0x01] 2883129254Scognet strb r1, [r0] 2884137463Scognet RET 2885129254Scognet LMEMCPY_C_PAD 2886129254Scognet 2887129254Scognet/* 2888129254Scognet * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned 2889129254Scognet */ 2890129254Scognet#ifdef __ARMEB__ 2891129254Scognet ldrh r2, [r1, #0x0a] /* r2 = ..AB */ 2892129254Scognet ldr ip, [r1, #0x06] /* ip = 6789 */ 2893129254Scognet ldr r3, [r1, #0x02] /* r3 = 2345 */ 2894129254Scognet ldrh r1, [r1] /* r1 = ..01 */ 2895129254Scognet strb r2, [r0, #0x0b] 2896129254Scognet mov r2, r2, lsr #8 /* r2 = ...A */ 2897129254Scognet orr r2, r2, ip, lsl #8 /* r2 = 789A */ 2898129254Scognet mov ip, ip, lsr #8 /* ip = .678 */ 2899129254Scognet orr ip, ip, r3, lsl #24 /* ip = 5678 */ 2900129254Scognet mov r3, r3, lsr #8 /* r3 = .234 */ 2901129254Scognet orr r3, r3, r1, lsl #24 /* r3 = 1234 */ 2902129254Scognet mov r1, r1, lsr #8 /* r1 = ...0 */ 2903129254Scognet strb r1, [r0] 2904129254Scognet str r3, [r0, #0x01] 2905129254Scognet str ip, [r0, #0x05] 2906129254Scognet strh r2, [r0, #0x09] 2907129254Scognet#else 2908129254Scognet ldrh r2, [r1] /* r2 = ..10 */ 2909129254Scognet ldr r3, [r1, #0x02] /* r3 = 5432 */ 2910129254Scognet ldr ip, [r1, #0x06] /* ip = 9876 */ 2911129254Scognet ldrh r1, [r1, #0x0a] /* r1 = ..BA */ 2912129254Scognet strb r2, [r0] 2913129254Scognet mov r2, r2, lsr #8 /* r2 = ...1 */ 2914129254Scognet orr r2, r2, r3, lsl #8 /* r2 = 4321 */ 2915129254Scognet mov r3, r3, lsr #24 /* r3 = ...5 */ 2916129254Scognet orr r3, r3, ip, lsl #8 /* r3 = 8765 */ 2917129254Scognet mov ip, ip, lsr #24 /* ip = ...9 */ 2918129254Scognet orr ip, ip, r1, lsl #8 /* ip = .BA9 */ 2919129254Scognet mov r1, r1, lsr #8 /* r1 = ...B */ 2920129254Scognet str r2, [r0, #0x01] 2921129254Scognet str r3, [r0, #0x05] 2922129254Scognet strh ip, [r0, #0x09] 2923129254Scognet strb r1, [r0, #0x0b] 2924129254Scognet#endif 2925137463Scognet RET 2926129254Scognet LMEMCPY_C_PAD 2927129254Scognet 2928129254Scognet/* 2929129254Scognet * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3) 2930129254Scognet */ 2931129254Scognet ldrb r2, [r1] 2932129254Scognet ldr r3, [r1, #0x01] 2933129254Scognet ldr ip, [r1, #0x05] 2934129254Scognet strb r2, [r0] 2935129254Scognet ldrh r2, [r1, #0x09] 2936129254Scognet ldrb r1, [r1, #0x0b] 2937129254Scognet str r3, [r0, #0x01] 2938129254Scognet str ip, [r0, #0x05] 2939129254Scognet strh r2, [r0, #0x09] 2940129254Scognet strb r1, [r0, #0x0b] 2941137463Scognet RET 2942248361SandrewEND(memcpy) 2943172614Scognet#endif /* _ARM_ARCH_5E */ 2944135654Scognet 2945135654Scognet#ifdef GPROF 2946135654Scognet 2947135654ScognetENTRY(user) 2948135654Scognet nop 2949135654ScognetENTRY(btrap) 2950135654Scognet nop 2951135654ScognetENTRY(etrap) 2952135654Scognet nop 2953135654ScognetENTRY(bintr) 2954135654Scognet nop 2955135654ScognetENTRY(eintr) 2956135654Scognet nop 2957135654Scognet 2958135654Scognet#endif 2959