1129198Scognet/* $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $ */ 2129198Scognet 3139735Simp/*- 4129198Scognet * Copyright (c) 2001 Ben Harris. 5129198Scognet * Copyright (c) 1994 Mark Brinicombe. 6129198Scognet * Copyright (c) 1994 Brini. 7129198Scognet * All rights reserved. 8129198Scognet * 9129198Scognet * This code is derived from software written for Brini by Mark Brinicombe 10129198Scognet * 11129198Scognet * Redistribution and use in source and binary forms, with or without 12129198Scognet * modification, are permitted provided that the following conditions 13129198Scognet * are met: 14129198Scognet * 1. Redistributions of source code must retain the above copyright 15129198Scognet * notice, this list of conditions and the following disclaimer. 16129198Scognet * 2. Redistributions in binary form must reproduce the above copyright 17129198Scognet * notice, this list of conditions and the following disclaimer in the 18129198Scognet * documentation and/or other materials provided with the distribution. 19129198Scognet * 3. All advertising materials mentioning features or use of this software 20129198Scognet * must display the following acknowledgement: 21129198Scognet * This product includes software developed by Brini. 22129198Scognet * 4. The name of the company nor the name of the author may be used to 23129198Scognet * endorse or promote products derived from this software without specific 24129198Scognet * prior written permission. 25129198Scognet * 26129198Scognet * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED 27129198Scognet * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 28129198Scognet * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 29129198Scognet * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 30129198Scognet * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 31129198Scognet * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 32129198Scognet * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33129198Scognet * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34129198Scognet * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35129198Scognet * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36129198Scognet * SUCH DAMAGE. 37129198Scognet * 38129198Scognet * RiscBSD kernel project 39129198Scognet * 40129198Scognet * blockio.S 41129198Scognet * 42129198Scognet * optimised block read/write from/to IO routines. 43129198Scognet * 44129198Scognet * Created : 08/10/94 45129198Scognet * Modified : 22/01/99 -- R.Earnshaw 46129198Scognet * Faster, and small tweaks for StrongARM 47129198Scognet */ 48129198Scognet 49129198Scognet#include <machine/asm.h> 50129198Scognet 51129198Scognet__FBSDID("$FreeBSD$"); 52129198Scognet 53129198Scognet/* 54129198Scognet * Read bytes from an I/O address into a block of memory 55129198Scognet * 56129198Scognet * r0 = address to read from (IO) 57129198Scognet * r1 = address to write to (memory) 58129198Scognet * r2 = length 59129198Scognet */ 60129198Scognet 61129198Scognet/* This code will look very familiar if you've read _memcpy(). */ 62129198ScognetENTRY(read_multi_1) 63129198Scognet mov ip, sp 64129198Scognet stmfd sp!, {fp, ip, lr, pc} 65129198Scognet sub fp, ip, #4 66129198Scognet subs r2, r2, #4 /* r2 = length - 4 */ 67129198Scognet blt .Lrm1_l4 /* less than 4 bytes */ 68129198Scognet ands r12, r1, #3 69129198Scognet beq .Lrm1_main /* aligned destination */ 70129198Scognet rsb r12, r12, #4 71129198Scognet cmp r12, #2 72129198Scognet ldrb r3, [r0] 73129198Scognet strb r3, [r1], #1 74129198Scognet ldrgeb r3, [r0] 75129198Scognet strgeb r3, [r1], #1 76129198Scognet ldrgtb r3, [r0] 77129198Scognet strgtb r3, [r1], #1 78129198Scognet subs r2, r2, r12 79129198Scognet blt .Lrm1_l4 80129198Scognet.Lrm1_main: 81129198Scognet.Lrm1loop: 82129198Scognet ldrb r3, [r0] 83129198Scognet ldrb r12, [r0] 84129198Scognet orr r3, r3, r12, lsl #8 85129198Scognet ldrb r12, [r0] 86129198Scognet orr r3, r3, r12, lsl #16 87129198Scognet ldrb r12, [r0] 88129198Scognet orr r3, r3, r12, lsl #24 89129198Scognet str r3, [r1], #4 90129198Scognet subs r2, r2, #4 91129198Scognet bge .Lrm1loop 92129198Scognet.Lrm1_l4: 93129198Scognet adds r2, r2, #4 /* r2 = length again */ 94129198Scognet ldmeqdb fp, {fp, sp, pc} 95137463Scognet RETeq 96129198Scognet cmp r2, #2 97129198Scognet ldrb r3, [r0] 98129198Scognet strb r3, [r1], #1 99129198Scognet ldrgeb r3, [r0] 100129198Scognet strgeb r3, [r1], #1 101129198Scognet ldrgtb r3, [r0] 102129198Scognet strgtb r3, [r1], #1 103129198Scognet ldmdb fp, {fp, sp, pc} 104248361SandrewEND(read_multi_1) 105129198Scognet 106129198Scognet/* 107129198Scognet * Write bytes to an I/O address from a block of memory 108129198Scognet * 109129198Scognet * r0 = address to write to (IO) 110129198Scognet * r1 = address to read from (memory) 111129198Scognet * r2 = length 112129198Scognet */ 113129198Scognet 114129198Scognet/* This code will look very familiar if you've read _memcpy(). */ 115129198ScognetENTRY(write_multi_1) 116129198Scognet mov ip, sp 117129198Scognet stmfd sp!, {fp, ip, lr, pc} 118129198Scognet sub fp, ip, #4 119129198Scognet subs r2, r2, #4 /* r2 = length - 4 */ 120129198Scognet blt .Lwm1_l4 /* less than 4 bytes */ 121129198Scognet ands r12, r1, #3 122129198Scognet beq .Lwm1_main /* aligned source */ 123129198Scognet rsb r12, r12, #4 124129198Scognet cmp r12, #2 125129198Scognet ldrb r3, [r1], #1 126129198Scognet strb r3, [r0] 127129198Scognet ldrgeb r3, [r1], #1 128129198Scognet strgeb r3, [r0] 129129198Scognet ldrgtb r3, [r1], #1 130129198Scognet strgtb r3, [r0] 131129198Scognet subs r2, r2, r12 132129198Scognet blt .Lwm1_l4 133129198Scognet.Lwm1_main: 134129198Scognet.Lwm1loop: 135129198Scognet ldr r3, [r1], #4 136129198Scognet strb r3, [r0] 137129198Scognet mov r3, r3, lsr #8 138129198Scognet strb r3, [r0] 139129198Scognet mov r3, r3, lsr #8 140129198Scognet strb r3, [r0] 141129198Scognet mov r3, r3, lsr #8 142129198Scognet strb r3, [r0] 143129198Scognet subs r2, r2, #4 144129198Scognet bge .Lwm1loop 145129198Scognet.Lwm1_l4: 146129198Scognet adds r2, r2, #4 /* r2 = length again */ 147129198Scognet ldmeqdb fp, {fp, sp, pc} 148129198Scognet cmp r2, #2 149129198Scognet ldrb r3, [r1], #1 150129198Scognet strb r3, [r0] 151129198Scognet ldrgeb r3, [r1], #1 152129198Scognet strgeb r3, [r0] 153129198Scognet ldrgtb r3, [r1], #1 154129198Scognet strgtb r3, [r0] 155129198Scognet ldmdb fp, {fp, sp, pc} 156248361SandrewEND(write_multi_1) 157129198Scognet 158129198Scognet/* 159129198Scognet * Reads short ints (16 bits) from an I/O address into a block of memory 160129198Scognet * 161129198Scognet * r0 = address to read from (IO) 162129198Scognet * r1 = address to write to (memory) 163129198Scognet * r2 = length 164129198Scognet */ 165129198Scognet 166129198ScognetENTRY(insw) 167129198Scognet/* Make sure that we have a positive length */ 168129198Scognet cmp r2, #0x00000000 169129198Scognet movle pc, lr 170129198Scognet 171129198Scognet/* If the destination address and the size is word aligned, do it fast */ 172129198Scognet 173129198Scognet tst r2, #0x00000001 174129198Scognet tsteq r1, #0x00000003 175129198Scognet beq .Lfastinsw 176129198Scognet 177129198Scognet/* Non aligned insw */ 178129198Scognet 179129198Scognet.Linswloop: 180129198Scognet ldr r3, [r0] 181129198Scognet subs r2, r2, #0x00000001 /* Loop test in load delay slot */ 182129198Scognet strb r3, [r1], #0x0001 183129198Scognet mov r3, r3, lsr #8 184129198Scognet strb r3, [r1], #0x0001 185129198Scognet bgt .Linswloop 186129198Scognet 187137463Scognet RET 188129198Scognet 189129198Scognet/* Word aligned insw */ 190129198Scognet 191129198Scognet.Lfastinsw: 192129198Scognet 193129198Scognet.Lfastinswloop: 194129198Scognet ldr r3, [r0, #0x0002] /* take advantage of nonaligned 195129198Scognet * word accesses */ 196129198Scognet ldr ip, [r0] 197129198Scognet mov r3, r3, lsr #16 /* Put the two shorts together */ 198129198Scognet orr r3, r3, ip, lsl #16 199129198Scognet str r3, [r1], #0x0004 /* Store */ 200129198Scognet subs r2, r2, #0x00000002 /* Next */ 201129198Scognet bgt .Lfastinswloop 202129198Scognet 203137463Scognet RET 204248361SandrewEND(insw) 205129198Scognet 206129198Scognet/* 207129198Scognet * Writes short ints (16 bits) from a block of memory to an I/O address 208129198Scognet * 209129198Scognet * r0 = address to write to (IO) 210129198Scognet * r1 = address to read from (memory) 211129198Scognet * r2 = length 212129198Scognet */ 213129198Scognet 214129198ScognetENTRY(outsw) 215129198Scognet/* Make sure that we have a positive length */ 216129198Scognet cmp r2, #0x00000000 217129198Scognet movle pc, lr 218129198Scognet 219129198Scognet/* If the destination address and the size is word aligned, do it fast */ 220129198Scognet 221129198Scognet tst r2, #0x00000001 222129198Scognet tsteq r1, #0x00000003 223129198Scognet beq .Lfastoutsw 224129198Scognet 225129198Scognet/* Non aligned outsw */ 226129198Scognet 227129198Scognet.Loutswloop: 228129198Scognet ldrb r3, [r1], #0x0001 229129198Scognet ldrb ip, [r1], #0x0001 230129198Scognet subs r2, r2, #0x00000001 /* Loop test in load delay slot */ 231129198Scognet orr r3, r3, ip, lsl #8 232129198Scognet orr r3, r3, r3, lsl #16 233129198Scognet str r3, [r0] 234129198Scognet bgt .Loutswloop 235129198Scognet 236137463Scognet RET 237129198Scognet 238129198Scognet/* Word aligned outsw */ 239129198Scognet 240129198Scognet.Lfastoutsw: 241129198Scognet 242129198Scognet.Lfastoutswloop: 243129198Scognet ldr r3, [r1], #0x0004 /* r3 = (H)(L) */ 244129198Scognet subs r2, r2, #0x00000002 /* Loop test in load delay slot */ 245129198Scognet 246129198Scognet eor ip, r3, r3, lsr #16 /* ip = (H)(H^L) */ 247129198Scognet eor r3, r3, ip, lsl #16 /* r3 = (H^H^L)(L) = (L)(L) */ 248129198Scognet eor ip, ip, r3, lsr #16 /* ip = (H)(H^L^L) = (H)(H) */ 249129198Scognet 250129198Scognet str r3, [r0] 251129198Scognet str ip, [r0] 252129198Scognet 253129198Scognet/* mov ip, r3, lsl #16 254129198Scognet * orr ip, ip, ip, lsr #16 255129198Scognet * str ip, [r0] 256129198Scognet * 257129198Scognet * mov ip, r3, lsr #16 258129198Scognet * orr ip, ip, ip, lsl #16 259129198Scognet * str ip, [r0] 260129198Scognet */ 261129198Scognet 262129198Scognet bgt .Lfastoutswloop 263129198Scognet 264137463Scognet RET 265248361SandrewEND(outsw) 266129198Scognet 267129198Scognet/* 268129198Scognet * reads short ints (16 bits) from an I/O address into a block of memory 269129198Scognet * with a length garenteed to be a multiple of 16 bytes 270129198Scognet * with a word aligned destination address 271129198Scognet * 272129198Scognet * r0 = address to read from (IO) 273129198Scognet * r1 = address to write to (memory) 274129198Scognet * r2 = length 275129198Scognet */ 276129198Scognet 277129198ScognetENTRY(insw16) 278129198Scognet/* Make sure that we have a positive length */ 279129198Scognet cmp r2, #0x00000000 280129198Scognet movle pc, lr 281129198Scognet 282129198Scognet/* If the destination address is word aligned and the size suitably 283129198Scognet aligned, do it fast */ 284129198Scognet 285129198Scognet tst r2, #0x00000007 286129198Scognet tsteq r1, #0x00000003 287129198Scognet 288129198Scognet bne _C_LABEL(insw) 289129198Scognet 290129198Scognet/* Word aligned insw */ 291129198Scognet 292129198Scognet stmfd sp!, {r4,r5,lr} 293129198Scognet 294129198Scognet.Linsw16loop: 295129198Scognet ldr r3, [r0, #0x0002] /* take advantage of nonaligned 296129198Scognet * word accesses */ 297129198Scognet ldr lr, [r0] 298129198Scognet mov r3, r3, lsr #16 /* Put the two shorts together */ 299129198Scognet orr r3, r3, lr, lsl #16 300129198Scognet 301129198Scognet ldr r4, [r0, #0x0002] /* take advantage of nonaligned 302129198Scognet * word accesses */ 303129198Scognet ldr lr, [r0] 304129198Scognet mov r4, r4, lsr #16 /* Put the two shorts together */ 305129198Scognet orr r4, r4, lr, lsl #16 306129198Scognet 307129198Scognet ldr r5, [r0, #0x0002] /* take advantage of nonaligned 308129198Scognet * word accesses */ 309129198Scognet ldr lr, [r0] 310129198Scognet mov r5, r5, lsr #16 /* Put the two shorts together */ 311129198Scognet orr r5, r5, lr, lsl #16 312129198Scognet 313129198Scognet ldr ip, [r0, #0x0002] /* take advantage of nonaligned 314129198Scognet * word accesses */ 315129198Scognet ldr lr, [r0] 316129198Scognet mov ip, ip, lsr #16 /* Put the two shorts together */ 317129198Scognet orr ip, ip, lr, lsl #16 318129198Scognet 319129198Scognet stmia r1!, {r3-r5,ip} 320129198Scognet subs r2, r2, #0x00000008 /* Next */ 321129198Scognet bgt .Linsw16loop 322129198Scognet 323129198Scognet ldmfd sp!, {r4,r5,pc} /* Restore regs and go home */ 324248361SandrewEND(insw16) 325129198Scognet 326129198Scognet/* 327129198Scognet * Writes short ints (16 bits) from a block of memory to an I/O address 328129198Scognet * 329129198Scognet * r0 = address to write to (IO) 330129198Scognet * r1 = address to read from (memory) 331129198Scognet * r2 = length 332129198Scognet */ 333129198Scognet 334129198ScognetENTRY(outsw16) 335129198Scognet/* Make sure that we have a positive length */ 336129198Scognet cmp r2, #0x00000000 337129198Scognet movle pc, lr 338129198Scognet 339129198Scognet/* If the destination address is word aligned and the size suitably 340129198Scognet aligned, do it fast */ 341129198Scognet 342129198Scognet tst r2, #0x00000007 343129198Scognet tsteq r1, #0x00000003 344129198Scognet 345129198Scognet bne _C_LABEL(outsw) 346129198Scognet 347129198Scognet/* Word aligned outsw */ 348129198Scognet 349129198Scognet stmfd sp!, {r4,r5,lr} 350129198Scognet 351129198Scognet.Loutsw16loop: 352129198Scognet ldmia r1!, {r4,r5,ip,lr} 353129198Scognet 354129198Scognet eor r3, r4, r4, lsl #16 /* r3 = (A^B)(B) */ 355129198Scognet eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 356129198Scognet eor r3, r3, r4, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 357129198Scognet str r3, [r0] 358129198Scognet str r4, [r0] 359129198Scognet 360129198Scognet/* mov r3, r4, lsl #16 361129198Scognet * orr r3, r3, r3, lsr #16 362129198Scognet * str r3, [r0] 363129198Scognet * 364129198Scognet * mov r3, r4, lsr #16 365129198Scognet * orr r3, r3, r3, lsl #16 366129198Scognet * str r3, [r0] 367129198Scognet */ 368129198Scognet 369129198Scognet eor r3, r5, r5, lsl #16 /* r3 = (A^B)(B) */ 370129198Scognet eor r5, r5, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 371129198Scognet eor r3, r3, r5, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 372129198Scognet str r3, [r0] 373129198Scognet str r5, [r0] 374129198Scognet 375129198Scognet eor r3, ip, ip, lsl #16 /* r3 = (A^B)(B) */ 376129198Scognet eor ip, ip, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 377129198Scognet eor r3, r3, ip, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 378129198Scognet str r3, [r0] 379129198Scognet str ip, [r0] 380129198Scognet 381129198Scognet eor r3, lr, lr, lsl #16 /* r3 = (A^B)(B) */ 382129198Scognet eor lr, lr, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 383129198Scognet eor r3, r3, lr, lsl #16 /* r3 = (A^B^A)(B) = (B)(B) */ 384129198Scognet str r3, [r0] 385129198Scognet str lr, [r0] 386129198Scognet 387129198Scognet subs r2, r2, #0x00000008 388129198Scognet bgt .Loutsw16loop 389129198Scognet 390129198Scognet ldmfd sp!, {r4,r5,pc} /* and go home */ 391248361SandrewEND(outsw16) 392129198Scognet 393129198Scognet/* 394129198Scognet * reads short ints (16 bits) from an I/O address into a block of memory 395129198Scognet * The I/O address is assumed to be mapped multiple times in a block of 396129198Scognet * 8 words. 397129198Scognet * The destination address should be word aligned. 398129198Scognet * 399129198Scognet * r0 = address to read from (IO) 400129198Scognet * r1 = address to write to (memory) 401129198Scognet * r2 = length 402129198Scognet */ 403129198Scognet 404129198ScognetENTRY(inswm8) 405129198Scognet/* Make sure that we have a positive length */ 406129198Scognet cmp r2, #0x00000000 407129198Scognet movle pc, lr 408129198Scognet 409129198Scognet/* If the destination address is word aligned and the size suitably 410129198Scognet aligned, do it fast */ 411129198Scognet 412129198Scognet tst r1, #0x00000003 413129198Scognet 414129198Scognet bne _C_LABEL(insw) 415129198Scognet 416129198Scognet/* Word aligned insw */ 417129198Scognet 418129198Scognet stmfd sp!, {r4-r9,lr} 419129198Scognet 420129198Scognet mov lr, #0xff000000 421129198Scognet orr lr, lr, #0x00ff0000 422129198Scognet 423129198Scognet.Linswm8_loop8: 424129198Scognet cmp r2, #8 425129198Scognet bcc .Linswm8_l8 426129198Scognet 427129198Scognet ldmia r0, {r3-r9,ip} 428129198Scognet 429129198Scognet bic r3, r3, lr 430129198Scognet orr r3, r3, r4, lsl #16 431129198Scognet bic r5, r5, lr 432129198Scognet orr r4, r5, r6, lsl #16 433129198Scognet bic r7, r7, lr 434129198Scognet orr r5, r7, r8, lsl #16 435129198Scognet bic r9, r9, lr 436129198Scognet orr r6, r9, ip, lsl #16 437129198Scognet 438129198Scognet stmia r1!, {r3-r6} 439129198Scognet 440129198Scognet subs r2, r2, #0x00000008 /* Next */ 441129198Scognet bne .Linswm8_loop8 442129198Scognet beq .Linswm8_l1 443129198Scognet 444129198Scognet.Linswm8_l8: 445129198Scognet cmp r2, #4 446129198Scognet bcc .Linswm8_l4 447129198Scognet 448129198Scognet ldmia r0, {r3-r6} 449129198Scognet 450129198Scognet bic r3, r3, lr 451129198Scognet orr r3, r3, r4, lsl #16 452129198Scognet bic r5, r5, lr 453129198Scognet orr r4, r5, r6, lsl #16 454129198Scognet 455129198Scognet stmia r1!, {r3-r4} 456129198Scognet 457129198Scognet subs r2, r2, #0x00000004 458129198Scognet beq .Linswm8_l1 459129198Scognet 460129198Scognet.Linswm8_l4: 461129198Scognet cmp r2, #2 462129198Scognet bcc .Linswm8_l2 463129198Scognet 464129198Scognet ldmia r0, {r3-r4} 465129198Scognet 466129198Scognet bic r3, r3, lr 467129198Scognet orr r3, r3, r4, lsl #16 468129198Scognet str r3, [r1], #0x0004 469129198Scognet 470129198Scognet subs r2, r2, #0x00000002 471129198Scognet beq .Linswm8_l1 472129198Scognet 473129198Scognet.Linswm8_l2: 474129198Scognet cmp r2, #1 475129198Scognet bcc .Linswm8_l1 476129198Scognet 477129198Scognet ldr r3, [r0] 478129198Scognet subs r2, r2, #0x00000001 /* Test in load delay slot */ 479129198Scognet /* XXX, why don't we use result? */ 480129198Scognet 481129198Scognet strb r3, [r1], #0x0001 482129198Scognet mov r3, r3, lsr #8 483129198Scognet strb r3, [r1], #0x0001 484129198Scognet 485129198Scognet 486129198Scognet.Linswm8_l1: 487129198Scognet ldmfd sp!, {r4-r9,pc} /* And go home */ 488248361SandrewEND(inswm8) 489129198Scognet 490129198Scognet/* 491129198Scognet * write short ints (16 bits) to an I/O address from a block of memory 492129198Scognet * The I/O address is assumed to be mapped multiple times in a block of 493129198Scognet * 8 words. 494129198Scognet * The source address should be word aligned. 495129198Scognet * 496129198Scognet * r0 = address to read to (IO) 497129198Scognet * r1 = address to write from (memory) 498129198Scognet * r2 = length 499129198Scognet */ 500129198Scognet 501129198ScognetENTRY(outswm8) 502129198Scognet/* Make sure that we have a positive length */ 503129198Scognet cmp r2, #0x00000000 504129198Scognet movle pc, lr 505129198Scognet 506129198Scognet/* If the destination address is word aligned and the size suitably 507129198Scognet aligned, do it fast */ 508129198Scognet 509129198Scognet tst r1, #0x00000003 510129198Scognet 511129198Scognet bne _C_LABEL(outsw) 512129198Scognet 513129198Scognet/* Word aligned outsw */ 514129198Scognet 515129198Scognet stmfd sp!, {r4-r8,lr} 516129198Scognet 517129198Scognet.Loutswm8_loop8: 518129198Scognet cmp r2, #8 519129198Scognet bcc .Loutswm8_l8 520129198Scognet 521129198Scognet ldmia r1!, {r3,r5,r7,ip} 522129198Scognet 523129198Scognet eor r4, r3, r3, lsr #16 /* r4 = (A)(A^B) */ 524129198Scognet eor r3, r3, r4, lsl #16 /* r3 = (A^A^B)(B) = (B)(B) */ 525129198Scognet eor r4, r4, r3, lsr #16 /* r4 = (A)(B^A^B) = (A)(A) */ 526129198Scognet 527129198Scognet eor r6, r5, r5, lsr #16 /* r6 = (A)(A^B) */ 528129198Scognet eor r5, r5, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */ 529129198Scognet eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */ 530129198Scognet 531129198Scognet eor r8, r7, r7, lsr #16 /* r8 = (A)(A^B) */ 532129198Scognet eor r7, r7, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */ 533129198Scognet eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */ 534129198Scognet 535129198Scognet eor lr, ip, ip, lsr #16 /* lr = (A)(A^B) */ 536129198Scognet eor ip, ip, lr, lsl #16 /* ip = (A^A^B)(B) = (B)(B) */ 537129198Scognet eor lr, lr, ip, lsr #16 /* lr = (A)(B^A^B) = (A)(A) */ 538129198Scognet 539129198Scognet stmia r0, {r3-r8,ip,lr} 540129198Scognet 541129198Scognet subs r2, r2, #0x00000008 /* Next */ 542129198Scognet bne .Loutswm8_loop8 543129198Scognet beq .Loutswm8_l1 544129198Scognet 545129198Scognet.Loutswm8_l8: 546129198Scognet cmp r2, #4 547129198Scognet bcc .Loutswm8_l4 548129198Scognet 549129198Scognet ldmia r1!, {r3-r4} 550129198Scognet 551129198Scognet eor r6, r3, r3, lsr #16 /* r6 = (A)(A^B) */ 552129198Scognet eor r5, r3, r6, lsl #16 /* r5 = (A^A^B)(B) = (B)(B) */ 553129198Scognet eor r6, r6, r5, lsr #16 /* r6 = (A)(B^A^B) = (A)(A) */ 554129198Scognet 555129198Scognet eor r8, r4, r4, lsr #16 /* r8 = (A)(A^B) */ 556129198Scognet eor r7, r4, r8, lsl #16 /* r7 = (A^A^B)(B) = (B)(B) */ 557129198Scognet eor r8, r8, r7, lsr #16 /* r8 = (A)(B^A^B) = (A)(A) */ 558129198Scognet 559129198Scognet stmia r0, {r5-r8} 560129198Scognet 561129198Scognet subs r2, r2, #0x00000004 562129198Scognet beq .Loutswm8_l1 563129198Scognet 564129198Scognet.Loutswm8_l4: 565129198Scognet cmp r2, #2 566129198Scognet bcc .Loutswm8_l2 567129198Scognet 568129198Scognet ldr r3, [r1], #0x0004 /* r3 = (A)(B) */ 569129198Scognet subs r2, r2, #0x00000002 /* Done test in Load delay slot */ 570129198Scognet 571129198Scognet eor r5, r3, r3, lsr #16 /* r5 = (A)(A^B)*/ 572129198Scognet eor r4, r3, r5, lsl #16 /* r4 = (A^A^B)(B) = (B)(B) */ 573129198Scognet eor r5, r5, r4, lsr #16 /* r5 = (A)(B^A^B) = (A)(A) */ 574129198Scognet 575129198Scognet stmia r0, {r4, r5} 576129198Scognet 577129198Scognet beq .Loutswm8_l1 578129198Scognet 579129198Scognet.Loutswm8_l2: 580129198Scognet cmp r2, #1 581129198Scognet bcc .Loutswm8_l1 582129198Scognet 583129198Scognet ldrb r3, [r1], #0x0001 584129198Scognet ldrb r4, [r1], #0x0001 585129198Scognet subs r2, r2, #0x00000001 /* Done test in load delay slot */ 586129198Scognet /* XXX This test isn't used? */ 587129198Scognet orr r3, r3, r4, lsl #8 588129198Scognet orr r3, r3, r3, lsl #16 589129198Scognet str r3, [r0] 590129198Scognet 591129198Scognet.Loutswm8_l1: 592129198Scognet ldmfd sp!, {r4-r8,pc} /* And go home */ 593248361SandrewEND(outswm8) 594248361Sandrew 595