1;; libgcc routines for the Renesas H8/300 CPU. 2;; Contributed by Steve Chamberlain <sac@cygnus.com> 3;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com> 4 5/* Copyright (C) 1994-2015 Free Software Foundation, Inc. 6 7This file is free software; you can redistribute it and/or modify it 8under the terms of the GNU General Public License as published by the 9Free Software Foundation; either version 3, or (at your option) any 10later version. 11 12This file is distributed in the hope that it will be useful, but 13WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15General Public License for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26/* Assembler register definitions. */ 27 28#define A0 r0 29#define A0L r0l 30#define A0H r0h 31 32#define A1 r1 33#define A1L r1l 34#define A1H r1h 35 36#define A2 r2 37#define A2L r2l 38#define A2H r2h 39 40#define A3 r3 41#define A3L r3l 42#define A3H r3h 43 44#define S0 r4 45#define S0L r4l 46#define S0H r4h 47 48#define S1 r5 49#define S1L r5l 50#define S1H r5h 51 52#define S2 r6 53#define S2L r6l 54#define S2H r6h 55 56#ifdef __H8300__ 57#define PUSHP push 58#define POPP pop 59 60#define A0P r0 61#define A1P r1 62#define A2P r2 63#define A3P r3 64#define S0P r4 65#define S1P r5 66#define S2P r6 67#endif 68 69#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__) 70#define PUSHP push.l 71#define POPP pop.l 72 73#define A0P er0 74#define A1P er1 75#define A2P er2 76#define A3P er3 77#define S0P er4 78#define S1P er5 79#define S2P er6 80 81#define A0E e0 82#define A1E e1 83#define A2E e2 84#define A3E e3 85#endif 86 87#ifdef __H8300H__ 88#ifdef __NORMAL_MODE__ 89 .h8300hn 90#else 91 .h8300h 92#endif 93#endif 94 95#ifdef __H8300S__ 96#ifdef __NORMAL_MODE__ 97 .h8300sn 98#else 99 .h8300s 100#endif 101#endif 102#ifdef __H8300SX__ 103#ifdef __NORMAL_MODE__ 104 .h8300sxn 105#else 106 .h8300sx 107#endif 108#endif 109 110#ifdef L_cmpsi2 111#ifdef __H8300__ 112 .section .text 113 .align 2 114 .global ___cmpsi2 115___cmpsi2: 116 cmp.w A0,A2 117 bne .L2 118 cmp.w A1,A3 119 bne .L4 120 mov.w #1,A0 121 rts 122.L2: 123 bgt .L5 124.L3: 125 mov.w #2,A0 126 rts 127.L4: 128 bls .L3 129.L5: 130 sub.w A0,A0 131 rts 132 .end 133#endif 134#endif /* L_cmpsi2 */ 135 136#ifdef L_ucmpsi2 137#ifdef __H8300__ 138 .section .text 139 .align 2 140 .global ___ucmpsi2 141___ucmpsi2: 142 cmp.w A0,A2 143 bne .L2 144 cmp.w A1,A3 145 bne .L4 146 mov.w #1,A0 147 rts 148.L2: 149 bhi .L5 150.L3: 151 mov.w #2,A0 152 rts 153.L4: 154 bls .L3 155.L5: 156 sub.w A0,A0 157 rts 158 .end 159#endif 160#endif /* L_ucmpsi2 */ 161 162#ifdef L_divhi3 163 164;; HImode divides for the H8/300. 165;; We bunch all of this into one object file since there are several 166;; "supporting routines". 167 168; general purpose normalize routine 169; 170; divisor in A0 171; dividend in A1 172; turns both into +ve numbers, and leaves what the answer sign 173; should be in A2L 174 175#ifdef __H8300__ 176 .section .text 177 .align 2 178divnorm: 179 or A0H,A0H ; is divisor > 0 180 stc ccr,A2L 181 bge _lab1 182 not A0H ; no - then make it +ve 183 not A0L 184 adds #1,A0 185_lab1: or A1H,A1H ; look at dividend 186 bge _lab2 187 not A1H ; it is -ve, make it positive 188 not A1L 189 adds #1,A1 190 xor #0x8,A2L; and toggle sign of result 191_lab2: rts 192;; Basically the same, except that the sign of the divisor determines 193;; the sign. 194modnorm: 195 or A0H,A0H ; is divisor > 0 196 stc ccr,A2L 197 bge _lab7 198 not A0H ; no - then make it +ve 199 not A0L 200 adds #1,A0 201_lab7: or A1H,A1H ; look at dividend 202 bge _lab8 203 not A1H ; it is -ve, make it positive 204 not A1L 205 adds #1,A1 206_lab8: rts 207 208; A0=A0/A1 signed 209 210 .global ___divhi3 211___divhi3: 212 bsr divnorm 213 bsr ___udivhi3 214negans: btst #3,A2L ; should answer be negative ? 215 beq _lab4 216 not A0H ; yes, so make it so 217 not A0L 218 adds #1,A0 219_lab4: rts 220 221; A0=A0%A1 signed 222 223 .global ___modhi3 224___modhi3: 225 bsr modnorm 226 bsr ___udivhi3 227 mov A3,A0 228 bra negans 229 230; A0=A0%A1 unsigned 231 232 .global ___umodhi3 233___umodhi3: 234 bsr ___udivhi3 235 mov A3,A0 236 rts 237 238; A0=A0/A1 unsigned 239; A3=A0%A1 unsigned 240; A2H trashed 241; D high 8 bits of denom 242; d low 8 bits of denom 243; N high 8 bits of num 244; n low 8 bits of num 245; M high 8 bits of mod 246; m low 8 bits of mod 247; Q high 8 bits of quot 248; q low 8 bits of quot 249; P preserve 250 251; The H8/300 only has a 16/8 bit divide, so we look at the incoming and 252; see how to partition up the expression. 253 254 .global ___udivhi3 255___udivhi3: 256 ; A0 A1 A2 A3 257 ; Nn Dd P 258 sub.w A3,A3 ; Nn Dd xP 00 259 or A1H,A1H 260 bne divlongway 261 or A0H,A0H 262 beq _lab6 263 264; we know that D == 0 and N is != 0 265 mov.b A0H,A3L ; Nn Dd xP 0N 266 divxu A1L,A3 ; MQ 267 mov.b A3L,A0H ; Q 268; dealt with N, do n 269_lab6: mov.b A0L,A3L ; n 270 divxu A1L,A3 ; mq 271 mov.b A3L,A0L ; Qq 272 mov.b A3H,A3L ; m 273 mov.b #0x0,A3H ; Qq 0m 274 rts 275 276; D != 0 - which means the denominator is 277; loop around to get the result. 278 279divlongway: 280 mov.b A0H,A3L ; Nn Dd xP 0N 281 mov.b #0x0,A0H ; high byte of answer has to be zero 282 mov.b #0x8,A2H ; 8 283div8: add.b A0L,A0L ; n*=2 284 rotxl A3L ; Make remainder bigger 285 rotxl A3H 286 sub.w A1,A3 ; Q-=N 287 bhs setbit ; set a bit ? 288 add.w A1,A3 ; no : too far , Q+=N 289 290 dec A2H 291 bne div8 ; next bit 292 rts 293 294setbit: inc A0L ; do insert bit 295 dec A2H 296 bne div8 ; next bit 297 rts 298 299#endif /* __H8300__ */ 300#endif /* L_divhi3 */ 301 302#ifdef L_divsi3 303 304;; 4 byte integer divides for the H8/300. 305;; 306;; We have one routine which does all the work and lots of 307;; little ones which prepare the args and massage the sign. 308;; We bunch all of this into one object file since there are several 309;; "supporting routines". 310 311 .section .text 312 .align 2 313 314; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest. 315; This function is here to keep branch displacements small. 316 317#ifdef __H8300__ 318 319divnorm: 320 mov.b A0H,A0H ; is the numerator -ve 321 stc ccr,S2L ; keep the sign in bit 3 of S2L 322 bge postive 323 324 ; negate arg 325 not A0H 326 not A1H 327 not A0L 328 not A1L 329 330 add #1,A1L 331 addx #0,A1H 332 addx #0,A0L 333 addx #0,A0H 334postive: 335 mov.b A2H,A2H ; is the denominator -ve 336 bge postive2 337 not A2L 338 not A2H 339 not A3L 340 not A3H 341 add.b #1,A3L 342 addx #0,A3H 343 addx #0,A2L 344 addx #0,A2H 345 xor.b #0x08,S2L ; toggle the result sign 346postive2: 347 rts 348 349;; Basically the same, except that the sign of the divisor determines 350;; the sign. 351modnorm: 352 mov.b A0H,A0H ; is the numerator -ve 353 stc ccr,S2L ; keep the sign in bit 3 of S2L 354 bge mpostive 355 356 ; negate arg 357 not A0H 358 not A1H 359 not A0L 360 not A1L 361 362 add #1,A1L 363 addx #0,A1H 364 addx #0,A0L 365 addx #0,A0H 366mpostive: 367 mov.b A2H,A2H ; is the denominator -ve 368 bge mpostive2 369 not A2L 370 not A2H 371 not A3L 372 not A3H 373 add.b #1,A3L 374 addx #0,A3H 375 addx #0,A2L 376 addx #0,A2H 377mpostive2: 378 rts 379 380#else /* __H8300H__ */ 381 382divnorm: 383 mov.l A0P,A0P ; is the numerator -ve 384 stc ccr,S2L ; keep the sign in bit 3 of S2L 385 bge postive 386 387 neg.l A0P ; negate arg 388 389postive: 390 mov.l A1P,A1P ; is the denominator -ve 391 bge postive2 392 393 neg.l A1P ; negate arg 394 xor.b #0x08,S2L ; toggle the result sign 395 396postive2: 397 rts 398 399;; Basically the same, except that the sign of the divisor determines 400;; the sign. 401modnorm: 402 mov.l A0P,A0P ; is the numerator -ve 403 stc ccr,S2L ; keep the sign in bit 3 of S2L 404 bge mpostive 405 406 neg.l A0P ; negate arg 407 408mpostive: 409 mov.l A1P,A1P ; is the denominator -ve 410 bge mpostive2 411 412 neg.l A1P ; negate arg 413 414mpostive2: 415 rts 416 417#endif 418 419; numerator in A0/A1 420; denominator in A2/A3 421 .global ___modsi3 422___modsi3: 423#ifdef __H8300__ 424 PUSHP S2P 425 PUSHP S0P 426 PUSHP S1P 427 bsr modnorm 428 bsr divmodsi4 429 mov S0,A0 430 mov S1,A1 431 bra exitdiv 432#else 433 PUSHP S2P 434 bsr modnorm 435 bsr ___udivsi3 436 mov.l er3,er0 437 bra exitdiv 438#endif 439 440 ;; H8/300H and H8S version of ___udivsi3 is defined later in 441 ;; the file. 442#ifdef __H8300__ 443 .global ___udivsi3 444___udivsi3: 445 PUSHP S2P 446 PUSHP S0P 447 PUSHP S1P 448 bsr divmodsi4 449 bra reti 450#endif 451 452 .global ___umodsi3 453___umodsi3: 454#ifdef __H8300__ 455 PUSHP S2P 456 PUSHP S0P 457 PUSHP S1P 458 bsr divmodsi4 459 mov S0,A0 460 mov S1,A1 461 bra reti 462#else 463 bsr ___udivsi3 464 mov.l er3,er0 465 rts 466#endif 467 468 .global ___divsi3 469___divsi3: 470#ifdef __H8300__ 471 PUSHP S2P 472 PUSHP S0P 473 PUSHP S1P 474 jsr divnorm 475 jsr divmodsi4 476#else 477 PUSHP S2P 478 jsr divnorm 479 bsr ___udivsi3 480#endif 481 482 ; examine what the sign should be 483exitdiv: 484 btst #3,S2L 485 beq reti 486 487 ; should be -ve 488#ifdef __H8300__ 489 not A0H 490 not A1H 491 not A0L 492 not A1L 493 494 add #1,A1L 495 addx #0,A1H 496 addx #0,A0L 497 addx #0,A0H 498#else /* __H8300H__ */ 499 neg.l A0P 500#endif 501 502reti: 503#ifdef __H8300__ 504 POPP S1P 505 POPP S0P 506#endif 507 POPP S2P 508 rts 509 510 ; takes A0/A1 numerator (A0P for H8/300H) 511 ; A2/A3 denominator (A1P for H8/300H) 512 ; returns A0/A1 quotient (A0P for H8/300H) 513 ; S0/S1 remainder (S0P for H8/300H) 514 ; trashes S2H 515 516#ifdef __H8300__ 517 518divmodsi4: 519 sub.w S0,S0 ; zero play area 520 mov.w S0,S1 521 mov.b A2H,S2H 522 or A2L,S2H 523 or A3H,S2H 524 bne DenHighNonZero 525 mov.b A0H,A0H 526 bne NumByte0Zero 527 mov.b A0L,A0L 528 bne NumByte1Zero 529 mov.b A1H,A1H 530 bne NumByte2Zero 531 bra NumByte3Zero 532NumByte0Zero: 533 mov.b A0H,S1L 534 divxu A3L,S1 535 mov.b S1L,A0H 536NumByte1Zero: 537 mov.b A0L,S1L 538 divxu A3L,S1 539 mov.b S1L,A0L 540NumByte2Zero: 541 mov.b A1H,S1L 542 divxu A3L,S1 543 mov.b S1L,A1H 544NumByte3Zero: 545 mov.b A1L,S1L 546 divxu A3L,S1 547 mov.b S1L,A1L 548 549 mov.b S1H,S1L 550 mov.b #0x0,S1H 551 rts 552 553; have to do the divide by shift and test 554DenHighNonZero: 555 mov.b A0H,S1L 556 mov.b A0L,A0H 557 mov.b A1H,A0L 558 mov.b A1L,A1H 559 560 mov.b #0,A1L 561 mov.b #24,S2H ; only do 24 iterations 562 563nextbit: 564 add.w A1,A1 ; double the answer guess 565 rotxl A0L 566 rotxl A0H 567 568 rotxl S1L ; double remainder 569 rotxl S1H 570 rotxl S0L 571 rotxl S0H 572 sub.w A3,S1 ; does it all fit 573 subx A2L,S0L 574 subx A2H,S0H 575 bhs setone 576 577 add.w A3,S1 ; no, restore mistake 578 addx A2L,S0L 579 addx A2H,S0H 580 581 dec S2H 582 bne nextbit 583 rts 584 585setone: 586 inc A1L 587 dec S2H 588 bne nextbit 589 rts 590 591#else /* __H8300H__ */ 592 593 ;; This function also computes the remainder and stores it in er3. 594 .global ___udivsi3 595___udivsi3: 596 mov.w A1E,A1E ; denominator top word 0? 597 bne DenHighNonZero 598 599 ; do it the easy way, see page 107 in manual 600 mov.w A0E,A2 601 extu.l A2P 602 divxu.w A1,A2P 603 mov.w A2E,A0E 604 divxu.w A1,A0P 605 mov.w A0E,A3 606 mov.w A2,A0E 607 extu.l A3P 608 rts 609 610 ; er0 = er0 / er1 611 ; er3 = er0 % er1 612 ; trashes er1 er2 613 ; expects er1 >= 2^16 614DenHighNonZero: 615 mov.l er0,er3 616 mov.l er1,er2 617#ifdef __H8300H__ 618divmod_L21: 619 shlr.l er0 620 shlr.l er2 ; make divisor < 2^16 621 mov.w e2,e2 622 bne divmod_L21 623#else 624 shlr.l #2,er2 ; make divisor < 2^16 625 mov.w e2,e2 626 beq divmod_L22A 627divmod_L21: 628 shlr.l #2,er0 629divmod_L22: 630 shlr.l #2,er2 ; make divisor < 2^16 631 mov.w e2,e2 632 bne divmod_L21 633divmod_L22A: 634 rotxl.w r2 635 bcs divmod_L23 636 shlr.l er0 637 bra divmod_L24 638divmod_L23: 639 rotxr.w r2 640 shlr.l #2,er0 641divmod_L24: 642#endif 643 ;; At this point, 644 ;; er0 contains shifted dividend 645 ;; er1 contains divisor 646 ;; er2 contains shifted divisor 647 ;; er3 contains dividend, later remainder 648 divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ) 649 extu.l er0 650 beq divmod_L25 651 subs #1,er0 ; er0 = AQ - 1 652 mov.w e1,r2 653 mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor 654 sub.w r2,e3 ; dividend - 65536 * er2 655 mov.w r1,r2 656 mulxu.w r0,er2 ; compute er3 = remainder (tentative) 657 sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor 658divmod_L25: 659 cmp.l er1,er3 ; is divisor < remainder? 660 blo divmod_L26 661 adds #1,er0 662 sub.l er1,er3 ; correct the remainder 663divmod_L26: 664 rts 665 666#endif 667#endif /* L_divsi3 */ 668 669#ifdef L_mulhi3 670 671;; HImode multiply. 672; The H8/300 only has an 8*8->16 multiply. 673; The answer is the same as: 674; 675; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256 676; (we can ignore A1.h * A0.h cause that will all off the top) 677; A0 in 678; A1 in 679; A0 answer 680 681#ifdef __H8300__ 682 .section .text 683 .align 2 684 .global ___mulhi3 685___mulhi3: 686 mov.b A1L,A2L ; A2l gets srcb.l 687 mulxu A0L,A2 ; A2 gets first sub product 688 689 mov.b A0H,A3L ; prepare for 690 mulxu A1L,A3 ; second sub product 691 692 add.b A3L,A2H ; sum first two terms 693 694 mov.b A1H,A3L ; third sub product 695 mulxu A0L,A3 696 697 add.b A3L,A2H ; almost there 698 mov.w A2,A0 ; that is 699 rts 700 701#endif 702#endif /* L_mulhi3 */ 703 704#ifdef L_mulsi3 705 706;; SImode multiply. 707;; 708;; I think that shift and add may be sufficient for this. Using the 709;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way 710;; the inner loop uses maybe 20 cycles + overhead, but terminates 711;; quickly on small args. 712;; 713;; A0/A1 src_a 714;; A2/A3 src_b 715;; 716;; while (a) 717;; { 718;; if (a & 1) 719;; r += b; 720;; a >>= 1; 721;; b <<= 1; 722;; } 723 724 .section .text 725 .align 2 726 727#ifdef __H8300__ 728 729 .global ___mulsi3 730___mulsi3: 731 PUSHP S0P 732 PUSHP S1P 733 734 sub.w S0,S0 735 sub.w S1,S1 736 737 ; while (a) 738_top: mov.w A0,A0 739 bne _more 740 mov.w A1,A1 741 beq _done 742_more: ; if (a & 1) 743 bld #0,A1L 744 bcc _nobit 745 ; r += b 746 add.w A3,S1 747 addx A2L,S0L 748 addx A2H,S0H 749_nobit: 750 ; a >>= 1 751 shlr A0H 752 rotxr A0L 753 rotxr A1H 754 rotxr A1L 755 756 ; b <<= 1 757 add.w A3,A3 758 addx A2L,A2L 759 addx A2H,A2H 760 bra _top 761 762_done: 763 mov.w S0,A0 764 mov.w S1,A1 765 POPP S1P 766 POPP S0P 767 rts 768 769#else /* __H8300H__ */ 770 771; 772; mulsi3 for H8/300H - based on Renesas SH implementation 773; 774; by Toshiyasu Morita 775; 776; Old code: 777; 778; 16b * 16b = 372 states (worst case) 779; 32b * 32b = 724 states (worst case) 780; 781; New code: 782; 783; 16b * 16b = 48 states 784; 16b * 32b = 72 states 785; 32b * 32b = 92 states 786; 787 788 .global ___mulsi3 789___mulsi3: 790 mov.w r1,r2 ; ( 2 states) b * d 791 mulxu r0,er2 ; (22 states) 792 793 mov.w e0,r3 ; ( 2 states) a * d 794 beq L_skip1 ; ( 4 states) 795 mulxu r1,er3 ; (22 states) 796 add.w r3,e2 ; ( 2 states) 797 798L_skip1: 799 mov.w e1,r3 ; ( 2 states) c * b 800 beq L_skip2 ; ( 4 states) 801 mulxu r0,er3 ; (22 states) 802 add.w r3,e2 ; ( 2 states) 803 804L_skip2: 805 mov.l er2,er0 ; ( 2 states) 806 rts ; (10 states) 807 808#endif 809#endif /* L_mulsi3 */ 810#ifdef L_fixunssfsi_asm 811/* For the h8300 we use asm to save some bytes, to 812 allow more programs to fit into the tiny address 813 space. For the H8/300H and H8S, the C version is good enough. */ 814#ifdef __H8300__ 815/* We still treat NANs different than libgcc2.c, but then, the 816 behavior is undefined anyways. */ 817 .global ___fixunssfsi 818___fixunssfsi: 819 cmp.b #0x4f,r0h 820 bge Large_num 821 jmp @___fixsfsi 822Large_num: 823 bhi L_huge_num 824 xor.b #0x80,A0L 825 bmi L_shift8 826L_huge_num: 827 mov.w #65535,A0 828 mov.w A0,A1 829 rts 830L_shift8: 831 mov.b A0L,A0H 832 mov.b A1H,A0L 833 mov.b A1L,A1H 834 mov.b #0,A1L 835 rts 836#endif 837#endif /* L_fixunssfsi_asm */ 838