1/*- 2 * Copyright (c) 2001 Jake Burkholder. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <machine/asm.h> 28__FBSDID("$FreeBSD$"); 29 30#include <sys/errno.h> 31 32#include <machine/asi.h> 33#include <machine/asmacros.h> 34#include <machine/fsr.h> 35#include <machine/intr_machdep.h> 36#include <machine/pcb.h> 37#include <machine/pstate.h> 38#include <machine/wstate.h> 39 40#include "assym.s" 41 42 .register %g2, #ignore 43 .register %g3, #ignore 44 .register %g6, #ignore 45 46/* 47 * Common code for copy routines. 48 * 49 * We use large macros to generate functions for each of the copy routines. 50 * This allows the load and store instructions to be generated for the right 51 * operation, asi or not. It is possible to write an asi independent function 52 * but this would require 2 expensive wrs in the main loop to switch %asi. 53 * It would also screw up profiling (if we ever get it), but may save some I$. 54 * We assume that either one of dasi and sasi is empty, or that they are both 55 * the same (empty or non-empty). It is up to the caller to set %asi. 56 */ 57 58/* 59 * ASI independent implementation of copystr(9). 60 * Used to implement copyinstr() and copystr(). 61 * 62 * Return value is in %g1. 63 */ 64#define _COPYSTR(src, dst, len, done, sa, sasi, da, dasi) \ 65 brz len, 4f ; \ 66 mov src, %g2 ; \ 671: deccc 1, len ; \ 68 bl,a,pn %xcc, 3f ; \ 69 nop ; \ 70 LD(ub, sa) [src] sasi, %g1 ; \ 71 ST(b, da) %g1, [dst] dasi ; \ 72 brz,pn %g1, 3f ; \ 73 inc src ; \ 74 ba %xcc, 1b ; \ 75 inc dst ; \ 762: mov ENAMETOOLONG, %g1 ; \ 773: sub src, %g2, %g2 ; \ 78 brnz,a done, 4f ; \ 79 stx %g2, [done] ; \ 804: 81 82/* 83 * ASI independent implementation of memset(3). 84 * Used to implement bzero(), memset() and aszero(). 85 * 86 * If the pattern is non-zero, duplicate it to fill 64 bits. 87 * Store bytes until dst is 8-byte aligned, then store 8 bytes. 88 * It has yet to be determined how much unrolling is beneficial. 89 * Could also read and compare before writing to minimize snoop traffic. 90 * 91 * XXX bzero() should be implemented as 92 * #define bzero(dst, len) (void)memset((dst), 0, (len)) 93 * if at all. 94 */ 95#define _MEMSET(dst, pat, len, da, dasi) \ 96 brlez,pn len, 5f ; \ 97 and pat, 0xff, pat ; \ 98 brz,pt pat, 1f ; \ 99 sllx pat, 8, %g1 ; \ 100 or pat, %g1, pat ; \ 101 sllx pat, 16, %g1 ; \ 102 or pat, %g1, pat ; \ 103 sllx pat, 32, %g1 ; \ 104 or pat, %g1, pat ; \ 105 .align 16 ; \ 1061: deccc 1, len ; \ 107 bl,pn %xcc, 5f ; \ 108 btst 7, dst ; \ 109 bz,a,pt %xcc, 2f ; \ 110 inc 1, len ; \ 111 ST(b, da) pat, [dst] dasi ; \ 112 ba %xcc, 1b ; \ 113 inc dst ; \ 114 .align 16 ; \ 1152: deccc 32, len ; \ 116 bl,a,pn %xcc, 3f ; \ 117 inc 32, len ; \ 118 ST(x, da) pat, [dst] dasi ; \ 119 ST(x, da) pat, [dst + 8] dasi ; \ 120 ST(x, da) pat, [dst + 16] dasi ; \ 121 ST(x, da) pat, [dst + 24] dasi ; \ 122 ba %xcc, 2b ; \ 123 inc 32, dst ; \ 124 .align 16 ; \ 1253: deccc 8, len ; \ 126 bl,a,pn %xcc, 4f ; \ 127 inc 8, len ; \ 128 ST(x, da) pat, [dst] dasi ; \ 129 ba %xcc, 3b ; \ 130 inc 8, dst ; \ 131 .align 16 ; \ 1324: deccc 1, len ; \ 133 bl,a,pn %xcc, 5f ; \ 134 nop ; \ 135 ST(b, da) pat, [dst] dasi ; \ 136 ba %xcc, 4b ; \ 137 inc 1, dst ; \ 1385: 139 140/* 141 * ASI independent implementation of memcpy(3). 142 * Used to implement bcopy(), copyin(), copyout(), memcpy(), ascopy(), 143 * ascopyfrom() and ascopyto(). 144 * 145 * Transfer bytes until dst is 8-byte aligned. If src is then also 8 byte 146 * aligned, transfer 8 bytes, otherwise finish with bytes. The unaligned 147 * case could be optimized, but it is expected that this is the uncommon 148 * case and of questionable value. The code to do so is also rather large 149 * and ugly. It has yet to be determined how much unrolling is beneficial. 150 * 151 * XXX bcopy() must also check for overlap. This is stupid. 152 * XXX bcopy() should be implemented as 153 * #define bcopy(src, dst, len) (void)memcpy((dst), (src), (len)) 154 * if at all. 155 */ 156#define _MEMCPY(dst, src, len, da, dasi, sa, sasi) \ 1571: deccc 1, len ; \ 158 bl,pn %xcc, 6f ; \ 159 btst 7, dst ; \ 160 bz,a,pt %xcc, 2f ; \ 161 inc 1, len ; \ 162 LD(ub, sa) [src] sasi, %g1 ; \ 163 ST(b, da) %g1, [dst] dasi ; \ 164 inc 1, src ; \ 165 ba %xcc, 1b ; \ 166 inc 1, dst ; \ 167 .align 16 ; \ 1682: btst 7, src ; \ 169 bz,a,pt %xcc, 3f ; \ 170 nop ; \ 171 ba,a %xcc, 5f ; \ 172 .align 16 ; \ 1733: deccc 32, len ; \ 174 bl,a,pn %xcc, 4f ; \ 175 inc 32, len ; \ 176 LD(x, sa) [src] sasi, %g1 ; \ 177 LD(x, sa) [src + 8] sasi, %g2 ; \ 178 LD(x, sa) [src + 16] sasi, %g3 ; \ 179 LD(x, sa) [src + 24] sasi, %g4 ; \ 180 ST(x, da) %g1, [dst] dasi ; \ 181 ST(x, da) %g2, [dst + 8] dasi ; \ 182 ST(x, da) %g3, [dst + 16] dasi ; \ 183 ST(x, da) %g4, [dst + 24] dasi ; \ 184 inc 32, src ; \ 185 ba %xcc, 3b ; \ 186 inc 32, dst ; \ 187 .align 16 ; \ 1884: deccc 8, len ; \ 189 bl,a,pn %xcc, 5f ; \ 190 inc 8, len ; \ 191 LD(x, sa) [src] sasi, %g1 ; \ 192 ST(x, da) %g1, [dst] dasi ; \ 193 inc 8, src ; \ 194 ba %xcc, 4b ; \ 195 inc 8, dst ; \ 196 .align 16 ; \ 1975: deccc 1, len ; \ 198 bl,a,pn %xcc, 6f ; \ 199 nop ; \ 200 LD(ub, sa) [src] sasi, %g1 ; \ 201 ST(b, da) %g1, [dst] dasi ; \ 202 inc src ; \ 203 ba %xcc, 5b ; \ 204 inc dst ; \ 2056: 206 207/* 208 * void ascopy(u_long asi, vm_offset_t src, vm_offset_t dst, size_t len) 209 */ 210ENTRY(ascopy) 211 wr %o0, 0, %asi 212 _MEMCPY(%o2, %o1, %o3, a, %asi, a, %asi) 213 retl 214 nop 215END(ascopy) 216 217/* 218 * void ascopyfrom(u_long sasi, vm_offset_t src, caddr_t dst, size_t len) 219 */ 220ENTRY(ascopyfrom) 221 wr %o0, 0, %asi 222 _MEMCPY(%o2, %o1, %o3, EMPTY, EMPTY, a, %asi) 223 retl 224 nop 225END(ascopyfrom) 226 227/* 228 * void ascopyto(caddr_t src, u_long dasi, vm_offset_t dst, size_t len) 229 */ 230ENTRY(ascopyto) 231 wr %o1, 0, %asi 232 _MEMCPY(%o2, %o0, %o3, a, %asi, EMPTY, EMPTY) 233 retl 234 nop 235END(ascopyto) 236 237/* 238 * void aszero(u_long asi, vm_offset_t pa, size_t len) 239 */ 240ENTRY(aszero) 241 wr %o0, 0, %asi 242 _MEMSET(%o1, %g0, %o2, a, %asi) 243 retl 244 nop 245END(aszero) 246 247/* 248 * int bcmp(const void *b1, const void *b2, size_t len) 249 */ 250ENTRY(bcmp) 251 brz,pn %o2, 2f 252 clr %o3 2531: ldub [%o0 + %o3], %o4 254 ldub [%o1 + %o3], %o5 255 cmp %o4, %o5 256 bne,pn %xcc, 2f 257 inc %o3 258 deccc %o2 259 bne,pt %xcc, 1b 260 nop 2612: retl 262 mov %o2, %o0 263END(bcmp) 264 265/* 266 * void bcopy(const void *src, void *dst, size_t len) 267 */ 268ENTRY(bcopy) 269 /* 270 * Check for overlap, and copy backwards if so. 271 */ 272 sub %o1, %o0, %g1 273 cmp %g1, %o2 274 bgeu,a,pt %xcc, 3f 275 nop 276 277 /* 278 * Copy backwards. 279 */ 280 add %o0, %o2, %o0 281 add %o1, %o2, %o1 2821: deccc 1, %o2 283 bl,a,pn %xcc, 2f 284 nop 285 dec 1, %o0 286 ldub [%o0], %g1 287 dec 1, %o1 288 ba %xcc, 1b 289 stb %g1, [%o1] 2902: retl 291 nop 292 293 /* 294 * Do the fast version. 295 */ 2963: _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, EMPTY, EMPTY) 297 retl 298 nop 299END(bcopy) 300 301/* 302 * void bzero(void *b, size_t len) 303 */ 304ENTRY(bzero) 305 _MEMSET(%o0, %g0, %o1, EMPTY, EMPTY) 306 retl 307 nop 308END(bzero) 309 310/* 311 * int copystr(const void *src, void *dst, size_t len, size_t *done) 312 */ 313ENTRY(copystr) 314 _COPYSTR(%o0, %o1, %o2, %o3, EMPTY, EMPTY, EMPTY, EMPTY) 315 retl 316 mov %g1, %o0 317END(copystr) 318 319/* 320 * void *memcpy(void *dst, const void *src, size_t len) 321 */ 322ENTRY(memcpy) 323 mov %o0, %o3 324 _MEMCPY(%o3, %o1, %o2, EMPTY, EMPTY, EMPTY, EMPTY) 325 retl 326 nop 327END(memcpy) 328 329/* 330 * void *memset(void *b, int c, size_t len) 331 */ 332ENTRY(memset) 333 mov %o0, %o3 334 _MEMSET(%o3, %o1, %o2, EMPTY, EMPTY) 335 retl 336 nop 337END(memset) 338 339 .globl copy_nofault_begin 340copy_nofault_begin: 341 nop 342 343/* 344 * int copyin(const void *uaddr, void *kaddr, size_t len) 345 */ 346ENTRY(copyin) 347 wr %g0, ASI_AIUP, %asi 348 _MEMCPY(%o1, %o0, %o2, EMPTY, EMPTY, a, %asi) 349 retl 350 clr %o0 351END(copyin) 352 353/* 354 * int copyinstr(const void *uaddr, void *kaddr, size_t len, size_t *done) 355 */ 356ENTRY(copyinstr) 357 wr %g0, ASI_AIUP, %asi 358 _COPYSTR(%o0, %o1, %o2, %o3, a, %asi, EMPTY, EMPTY) 359 retl 360 mov %g1, %o0 361END(copyinstr) 362 363/* 364 * int copyout(const void *kaddr, void *uaddr, size_t len) 365 */ 366ENTRY(copyout) 367 wr %g0, ASI_AIUP, %asi 368 _MEMCPY(%o1, %o0, %o2, a, %asi, EMPTY, EMPTY) 369 retl 370 clr %o0 371END(copyout) 372 373 .globl copy_nofault_end 374copy_nofault_end: 375 nop 376 377ENTRY(copy_fault) 378 retl 379 mov EFAULT, %o0 380END(copy_fault) 381 382 .globl fs_nofault_begin 383fs_nofault_begin: 384 nop 385 386/* 387 * Chatty aliases for fetch, store functions. 388 */ 389 .globl fubyte, fusword, fuword, subyte, susword, suword 390 .set fubyte, fuword8 391 .set fusword, fuword16 392 .set fuword, fuword64 393 .set subyte, suword8 394 .set susword, suword16 395 .set suword, suword64 396 397 .globl casuword32, casuword, fuptr, suptr 398 .set casuword, casuword64 399 .set fuptr, fuword64 400 .set suptr, suword64 401 402/* 403 * int32_t casuword32(volatile int32_t *p, int32_t e, int32_t s) 404 */ 405ENTRY(casuword32) 406 casa [%o0] ASI_AIUP, %o1, %o2 407 retl 408 mov %o2, %o0 409END(casuword32) 410 411/* 412 * int64_t casuword64(volatile int64_t *p, int64_t e, int64_t s) 413 */ 414ENTRY(casuword64) 415 casxa [%o0] ASI_AIUP, %o1, %o2 416 retl 417 mov %o2, %o0 418END(casuword64) 419 420/* 421 * int fuword8(const void *base) 422 */ 423ENTRY(fuword8) 424 retl 425 lduba [%o0] ASI_AIUP, %o0 426END(fuword8) 427 428/* 429 * int fuword16(const void *base) 430 */ 431ENTRY(fuword16) 432 retl 433 lduha [%o0] ASI_AIUP, %o0 434END(fuword16) 435 436/* 437 * int32_t fuword32(const void *base) 438 */ 439ENTRY(fuword32) 440 retl 441 lduwa [%o0] ASI_AIUP, %o0 442END(fuword32) 443 444/* 445 * int64_t fuword64(const void *base) 446 */ 447ENTRY(fuword64) 448 retl 449 ldxa [%o0] ASI_AIUP, %o0 450END(fuword64) 451 452/* 453 * int suword8(const void *base, int word) 454 */ 455ENTRY(suword8) 456 stba %o1, [%o0] ASI_AIUP 457 retl 458 clr %o0 459END(suword8) 460 461/* 462 * int suword16(const void *base, int word) 463 */ 464ENTRY(suword16) 465 stha %o1, [%o0] ASI_AIUP 466 retl 467 clr %o0 468END(suword16) 469 470/* 471 * int suword32(const void *base, int32_t word) 472 */ 473ENTRY(suword32) 474 stwa %o1, [%o0] ASI_AIUP 475 retl 476 clr %o0 477END(suword32) 478 479/* 480 * int suword64(const void *base, int64_t word) 481 */ 482ENTRY(suword64) 483 stxa %o1, [%o0] ASI_AIUP 484 retl 485 clr %o0 486END(suword64) 487 488 .globl fs_nofault_intr_begin 489fs_nofault_intr_begin: 490 nop 491 492/* 493 * int fuswintr(const void *base) 494 */ 495ENTRY(fuswintr) 496 retl 497 lduha [%o0] ASI_AIUP, %o0 498END(fuswintr) 499 500/* 501 * int suswintr(const void *base, int word) 502 */ 503ENTRY(suswintr) 504 stha %o1, [%o0] ASI_AIUP 505 retl 506 clr %o0 507END(suswintr) 508 509 .globl fs_nofault_intr_end 510fs_nofault_intr_end: 511 nop 512 513 .globl fs_nofault_end 514fs_nofault_end: 515 nop 516 517ENTRY(fs_fault) 518 retl 519 mov -1, %o0 520END(fsfault) 521 522 .globl fas_nofault_begin 523fas_nofault_begin: 524 525/* 526 * int fasword8(u_long asi, uint64_t addr, uint8_t *val) 527 */ 528ENTRY(fasword8) 529 wr %o0, 0, %asi 530 membar #Sync 531 lduba [%o1] %asi, %o3 532 membar #Sync 533 stb %o3, [%o2] 534 retl 535 clr %o0 536END(fasword8) 537 538/* 539 * int fasword16(u_long asi, uint64_t addr, uint16_t *val) 540 */ 541ENTRY(fasword16) 542 wr %o0, 0, %asi 543 membar #Sync 544 lduha [%o1] %asi, %o3 545 membar #Sync 546 sth %o3, [%o2] 547 retl 548 clr %o0 549END(fasword16) 550 551/* 552 * int fasword32(u_long asi, uint64_t addr, uint32_t *val) 553 */ 554ENTRY(fasword32) 555 wr %o0, 0, %asi 556 membar #Sync 557 lduwa [%o1] %asi, %o3 558 membar #Sync 559 stw %o3, [%o2] 560 retl 561 clr %o0 562END(fasword32) 563 564 .globl fas_nofault_end 565fas_nofault_end: 566 nop 567 568 .globl fas_fault 569ENTRY(fas_fault) 570 retl 571 mov -1, %o0 572END(fas_fault) 573 574 .globl fpu_fault_begin 575fpu_fault_begin: 576 nop 577 578/* 579 * void spitfire_block_copy(void *src, void *dst, size_t len) 580 */ 581ENTRY(spitfire_block_copy) 582 rdpr %pstate, %o3 583 wrpr %g0, PSTATE_NORMAL, %pstate 584 585 wr %g0, ASI_BLK_S, %asi 586 wr %g0, FPRS_FEF, %fprs 587 588 sub PCB_REG, TF_SIZEOF, %o4 589 ldx [%o4 + TF_FPRS], %o5 590 andcc %o5, FPRS_FEF, %g0 591 bz,a,pt %xcc, 1f 592 nop 593 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi 594 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi 595 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi 596 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi 597 membar #Sync 598 599 andn %o5, FPRS_FEF, %o5 600 stx %o5, [%o4 + TF_FPRS] 601 ldx [PCB_REG + PCB_FLAGS], %o4 602 or %o4, PCB_FEF, %o4 603 stx %o4, [PCB_REG + PCB_FLAGS] 604 6051: wrpr %o3, 0, %pstate 606 607 ldda [%o0] %asi, %f0 608 add %o0, VIS_BLOCKSIZE, %o0 609 sub %o2, VIS_BLOCKSIZE, %o2 610 6112: ldda [%o0] %asi, %f16 612 fsrc1 %f0, %f32 613 fsrc1 %f2, %f34 614 fsrc1 %f4, %f36 615 fsrc1 %f6, %f38 616 fsrc1 %f8, %f40 617 fsrc1 %f10, %f42 618 fsrc1 %f12, %f44 619 fsrc1 %f14, %f46 620 stda %f32, [%o1] %asi 621 add %o0, VIS_BLOCKSIZE, %o0 622 subcc %o2, VIS_BLOCKSIZE, %o2 623 bz,pn %xcc, 3f 624 add %o1, VIS_BLOCKSIZE, %o1 625 ldda [%o0] %asi, %f0 626 fsrc1 %f16, %f32 627 fsrc1 %f18, %f34 628 fsrc1 %f20, %f36 629 fsrc1 %f22, %f38 630 fsrc1 %f24, %f40 631 fsrc1 %f26, %f42 632 fsrc1 %f28, %f44 633 fsrc1 %f30, %f46 634 stda %f32, [%o1] %asi 635 add %o0, VIS_BLOCKSIZE, %o0 636 sub %o2, VIS_BLOCKSIZE, %o2 637 ba,pt %xcc, 2b 638 add %o1, VIS_BLOCKSIZE, %o1 639 6403: membar #Sync 641 642 stda %f16, [%o1] %asi 643 membar #Sync 644 645 retl 646 wr %g0, 0, %fprs 647END(spitfire_block_copy) 648 649/* 650 * void zeus_block_copy(void *src, void *dst, size_t len) 651 */ 652ENTRY(zeus_block_copy) 653 prefetch [%o0 + (0 * VIS_BLOCKSIZE)], 0 654 655 rdpr %pstate, %o3 656 wrpr %g0, PSTATE_NORMAL, %pstate 657 658 wr %g0, ASI_BLK_S, %asi 659 wr %g0, FPRS_FEF, %fprs 660 661 sub PCB_REG, TF_SIZEOF, %o4 662 ldx [%o4 + TF_FPRS], %o5 663 andcc %o5, FPRS_FEF, %g0 664 bz,a,pt %xcc, 1f 665 nop 666 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi 667 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi 668 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi 669 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi 670 membar #Sync 671 672 andn %o5, FPRS_FEF, %o5 673 stx %o5, [%o4 + TF_FPRS] 674 ldx [PCB_REG + PCB_FLAGS], %o4 675 or %o4, PCB_FEF, %o4 676 stx %o4, [PCB_REG + PCB_FLAGS] 677 6781: wrpr %o3, 0, %pstate 679 680 ldd [%o0 + (0 * 8)], %f0 681 prefetch [%o0 + (1 * VIS_BLOCKSIZE)], 0 682 ldd [%o0 + (1 * 8)], %f2 683 prefetch [%o0 + (2 * VIS_BLOCKSIZE)], 0 684 fmovd %f0, %f32 685 ldd [%o0 + (2 * 8)], %f4 686 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0 687 fmovd %f2, %f34 688 ldd [%o0 + (3 * 8)], %f6 689 prefetch [%o0 + (4 * VIS_BLOCKSIZE)], 1 690 fmovd %f4, %f36 691 ldd [%o0 + (4 * 8)], %f8 692 prefetch [%o0 + (8 * VIS_BLOCKSIZE)], 1 693 fmovd %f6, %f38 694 ldd [%o0 + (5 * 8)], %f10 695 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1 696 fmovd %f8, %f40 697 ldd [%o0 + (6 * 8)], %f12 698 prefetch [%o0 + (16 * VIS_BLOCKSIZE)], 1 699 fmovd %f10, %f42 700 ldd [%o0 + (7 * 8)], %f14 701 ldd [%o0 + (8 * 8)], %f0 702 sub %o2, VIS_BLOCKSIZE, %o2 703 add %o0, VIS_BLOCKSIZE, %o0 704 prefetch [%o0 + (19 * VIS_BLOCKSIZE)], 1 705 ba,pt %xcc, 2f 706 prefetch [%o0 + (23 * VIS_BLOCKSIZE)], 1 707 .align 32 708 7092: ldd [%o0 + (1 * 8)], %f2 710 fmovd %f12, %f44 711 ldd [%o0 + (2 * 8)], %f4 712 fmovd %f14, %f46 713 stda %f32, [%o1] %asi 714 ldd [%o0 + (3 * 8)], %f6 715 fmovd %f0, %f32 716 ldd [%o0 + (4 * 8)], %f8 717 fmovd %f2, %f34 718 ldd [%o0 + (5 * 8)], %f10 719 fmovd %f4, %f36 720 ldd [%o0 + (6 * 8)], %f12 721 fmovd %f6, %f38 722 ldd [%o0 + (7 * 8)], %f14 723 fmovd %f8, %f40 724 ldd [%o0 + (8 * 8)], %f0 725 fmovd %f10, %f42 726 sub %o2, VIS_BLOCKSIZE, %o2 727 prefetch [%o0 + (3 * VIS_BLOCKSIZE)], 0 728 add %o1, VIS_BLOCKSIZE, %o1 729 prefetch [%o0 + (24 * VIS_BLOCKSIZE)], 1 730 add %o0, VIS_BLOCKSIZE, %o0 731 cmp %o2, VIS_BLOCKSIZE + 8 732 bgu,pt %xcc, 2b 733 prefetch [%o0 + (12 * VIS_BLOCKSIZE)], 1 734 ldd [%o0 + (1 * 8)], %f2 735 fsrc1 %f12, %f44 736 ldd [%o0 + (2 * 8)], %f4 737 fsrc1 %f14, %f46 738 stda %f32, [%o1] %asi 739 ldd [%o0 + (3 * 8)], %f6 740 fsrc1 %f0, %f32 741 ldd [%o0 + (4 * 8)], %f8 742 fsrc1 %f2, %f34 743 ldd [%o0 + (5 * 8)], %f10 744 fsrc1 %f4, %f36 745 ldd [%o0 + (6 * 8)], %f12 746 fsrc1 %f6, %f38 747 ldd [%o0 + (7 * 8)], %f14 748 fsrc1 %f8, %f40 749 add %o1, VIS_BLOCKSIZE, %o1 750 fsrc1 %f10, %f42 751 fsrc1 %f12, %f44 752 fsrc1 %f14, %f46 753 stda %f32, [%o1] %asi 754 membar #Sync 755 756 retl 757 wr %g0, 0, %fprs 758END(zeus_block_copy) 759 760/* 761 * void spitfire_block_zero(void *dst, size_t len) 762 * void zeus_block_zero(void *dst, size_t len) 763 */ 764ALTENTRY(zeus_block_zero) 765ENTRY(spitfire_block_zero) 766 rdpr %pstate, %o3 767 wrpr %g0, PSTATE_NORMAL, %pstate 768 769 wr %g0, ASI_BLK_S, %asi 770 wr %g0, FPRS_FEF, %fprs 771 772 sub PCB_REG, TF_SIZEOF, %o4 773 ldx [%o4 + TF_FPRS], %o5 774 andcc %o5, FPRS_FEF, %g0 775 bz,a,pt %xcc, 1f 776 nop 777 stda %f0, [PCB_REG + PCB_UFP + (0 * VIS_BLOCKSIZE)] %asi 778 stda %f16, [PCB_REG + PCB_UFP + (1 * VIS_BLOCKSIZE)] %asi 779 stda %f32, [PCB_REG + PCB_UFP + (2 * VIS_BLOCKSIZE)] %asi 780 stda %f48, [PCB_REG + PCB_UFP + (3 * VIS_BLOCKSIZE)] %asi 781 membar #Sync 782 783 andn %o5, FPRS_FEF, %o5 784 stx %o5, [%o4 + TF_FPRS] 785 ldx [PCB_REG + PCB_FLAGS], %o4 786 or %o4, PCB_FEF, %o4 787 stx %o4, [PCB_REG + PCB_FLAGS] 788 7891: wrpr %o3, 0, %pstate 790 791 fzero %f0 792 fzero %f2 793 fzero %f4 794 fzero %f6 795 fzero %f8 796 fzero %f10 797 fzero %f12 798 fzero %f14 799 8001: stda %f0, [%o0 + (0 * VIS_BLOCKSIZE)] %asi 801 stda %f0, [%o0 + (1 * VIS_BLOCKSIZE)] %asi 802 stda %f0, [%o0 + (2 * VIS_BLOCKSIZE)] %asi 803 stda %f0, [%o0 + (3 * VIS_BLOCKSIZE)] %asi 804 sub %o1, (4 * VIS_BLOCKSIZE), %o1 805 brnz,pt %o1, 1b 806 add %o0, (4 * VIS_BLOCKSIZE), %o0 807 membar #Sync 808 809 retl 810 wr %g0, 0, %fprs 811END(spitfire_block_zero) 812 813 .globl fpu_fault_end 814fpu_fault_end: 815 nop 816 817 .globl fpu_fault_size 818 .set fpu_fault_size, fpu_fault_end - fpu_fault_begin 819 820ENTRY(longjmp) 821 set 1, %g3 822 movrz %o1, %o1, %g3 823 mov %o0, %g1 824 ldx [%g1 + _JB_FP], %g2 8251: cmp %fp, %g2 826 bl,a,pt %xcc, 1b 827 restore 828 bne,pn %xcc, 2f 829 ldx [%g1 + _JB_SP], %o2 830 cmp %o2, %sp 831 blt,pn %xcc, 2f 832 movge %xcc, %o2, %sp 833 ldx [%g1 + _JB_PC], %o7 834 retl 835 mov %g3, %o0 8362: PANIC("longjmp botch", %l1) 837END(longjmp) 838 839ENTRY(setjmp) 840 stx %sp, [%o0 + _JB_SP] 841 stx %o7, [%o0 + _JB_PC] 842 stx %fp, [%o0 + _JB_FP] 843 retl 844 clr %o0 845END(setjmp) 846 847/* 848 * void ofw_entry(cell_t args[]) 849 */ 850ENTRY(ofw_entry) 851 save %sp, -CCFSZ, %sp 852 SET(ofw_vec, %l7, %l6) 853 ldx [%l6], %l6 854 rdpr %pstate, %l7 855 andn %l7, PSTATE_AM | PSTATE_IE, %l5 856 wrpr %l5, 0, %pstate 857 SET(tba_taken_over, %l5, %l4) 858 brz,pn %l4, 1f 859 rdpr %wstate, %l5 860 andn %l5, WSTATE_PROM_MASK, %l3 861 wrpr %l3, WSTATE_PROM_KMIX, %wstate 8621: call %l6 863 mov %i0, %o0 864 brz,pn %l4, 1f 865 nop 866 wrpr %g0, %l5, %wstate 8671: wrpr %l7, 0, %pstate 868 ret 869 restore %o0, %g0, %o0 870END(ofw_entry) 871 872/* 873 * void ofw_exit(cell_t args[]) 874 */ 875ENTRY(ofw_exit) 876 save %sp, -CCFSZ, %sp 877 flushw 878 SET(ofw_tba, %l7, %l5) 879 ldx [%l5], %l5 880 rdpr %pstate, %l7 881 andn %l7, PSTATE_AM | PSTATE_IE, %l7 882 wrpr %l7, 0, %pstate 883 rdpr %wstate, %l7 884 andn %l7, WSTATE_PROM_MASK, %l7 885 wrpr %l7, WSTATE_PROM_KMIX, %wstate 886 wrpr %l5, 0, %tba ! restore the OFW trap table 887 SET(ofw_vec, %l7, %l6) 888 ldx [%l6], %l6 889 SET(kstack0 + KSTACK_PAGES * PAGE_SIZE - PCB_SIZEOF, %l7, %l0) 890 sub %l0, SPOFF, %fp ! setup a stack in a locked page 891 sub %l0, SPOFF + CCFSZ, %sp 892 mov AA_DMMU_PCXR, %l3 ! force primary DMMU context 0 893 sethi %hi(KERNBASE), %l5 894 stxa %g0, [%l3] ASI_DMMU 895 flush %l5 896 wrpr %g0, 0, %tl ! force trap level 0 897 call %l6 898 mov %i0, %o0 899 ! never to return 900END(ofw_exit) 901 902#ifdef GPROF 903 904ENTRY(user) 905 nop 906 907ENTRY(btrap) 908 nop 909 910ENTRY(etrap) 911 nop 912 913ENTRY(bintr) 914 nop 915 916ENTRY(eintr) 917 nop 918 919/* 920 * XXX including sys/gmon.h in genassym.c is not possible due to uintfptr_t 921 * badness. 922 */ 923#define GM_STATE 0x0 924#define GMON_PROF_OFF 3 925#define GMON_PROF_HIRES 4 926 927 .globl _mcount 928 .set _mcount, __cyg_profile_func_enter 929 930ENTRY(__cyg_profile_func_enter) 931 SET(_gmonparam, %o3, %o2) 932 lduw [%o2 + GM_STATE], %o3 933 cmp %o3, GMON_PROF_OFF 934 be,a,pn %icc, 1f 935 nop 936 SET(mcount, %o3, %o2) 937 jmpl %o2, %g0 938 nop 9391: retl 940 nop 941END(__cyg_profile_func_enter) 942 943#ifdef GUPROF 944 945ENTRY(__cyg_profile_func_exit) 946 SET(_gmonparam, %o3, %o2) 947 lduw [%o2 + GM_STATE], %o3 948 cmp %o3, GMON_PROF_HIRES 949 be,a,pn %icc, 1f 950 nop 951 SET(mexitcount, %o3, %o2) 952 jmpl %o2, %g0 953 nop 9541: retl 955 nop 956END(__cyg_profile_func_exit) 957 958#endif /* GUPROF */ 959 960#endif /* GPROF */ 961