1/* IEEE-754 double-precision functions for Xtensa 2 Copyright (C) 2006-2015 Free Software Foundation, Inc. 3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26#ifdef __XTENSA_EB__ 27#define xh a2 28#define xl a3 29#define yh a4 30#define yl a5 31#else 32#define xh a3 33#define xl a2 34#define yh a5 35#define yl a4 36#endif 37 38/* Warning! The branch displacements for some Xtensa branch instructions 39 are quite small, and this code has been carefully laid out to keep 40 branch targets in range. If you change anything, be sure to check that 41 the assembler is not relaxing anything to branch over a jump. */ 42 43#ifdef L_negdf2 44 45 .align 4 46 .global __negdf2 47 .type __negdf2, @function 48__negdf2: 49 leaf_entry sp, 16 50 movi a4, 0x80000000 51 xor xh, xh, a4 52 leaf_return 53 54#endif /* L_negdf2 */ 55 56#ifdef L_addsubdf3 57 58 /* Addition */ 59__adddf3_aux: 60 61 /* Handle NaNs and Infinities. (This code is placed before the 62 start of the function just to keep it in range of the limited 63 branch displacements.) */ 64 65.Ladd_xnan_or_inf: 66 /* If y is neither Infinity nor NaN, return x. */ 67 bnall yh, a6, 1f 68 /* If x is a NaN, return it. Otherwise, return y. */ 69 slli a7, xh, 12 70 or a7, a7, xl 71 beqz a7, .Ladd_ynan_or_inf 721: leaf_return 73 74.Ladd_ynan_or_inf: 75 /* Return y. */ 76 mov xh, yh 77 mov xl, yl 78 leaf_return 79 80.Ladd_opposite_signs: 81 /* Operand signs differ. Do a subtraction. */ 82 slli a7, a6, 11 83 xor yh, yh, a7 84 j .Lsub_same_sign 85 86 .align 4 87 .global __adddf3 88 .type __adddf3, @function 89__adddf3: 90 leaf_entry sp, 16 91 movi a6, 0x7ff00000 92 93 /* Check if the two operands have the same sign. */ 94 xor a7, xh, yh 95 bltz a7, .Ladd_opposite_signs 96 97.Ladd_same_sign: 98 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ 99 ball xh, a6, .Ladd_xnan_or_inf 100 ball yh, a6, .Ladd_ynan_or_inf 101 102 /* Compare the exponents. The smaller operand will be shifted 103 right by the exponent difference and added to the larger 104 one. */ 105 extui a7, xh, 20, 12 106 extui a8, yh, 20, 12 107 bltu a7, a8, .Ladd_shiftx 108 109.Ladd_shifty: 110 /* Check if the smaller (or equal) exponent is zero. */ 111 bnone yh, a6, .Ladd_yexpzero 112 113 /* Replace yh sign/exponent with 0x001. */ 114 or yh, yh, a6 115 slli yh, yh, 11 116 srli yh, yh, 11 117 118.Ladd_yexpdiff: 119 /* Compute the exponent difference. Optimize for difference < 32. */ 120 sub a10, a7, a8 121 bgeui a10, 32, .Ladd_bigshifty 122 123 /* Shift yh/yl right by the exponent difference. Any bits that are 124 shifted out of yl are saved in a9 for rounding the result. */ 125 ssr a10 126 movi a9, 0 127 src a9, yl, a9 128 src yl, yh, yl 129 srl yh, yh 130 131.Ladd_addy: 132 /* Do the 64-bit addition. */ 133 add xl, xl, yl 134 add xh, xh, yh 135 bgeu xl, yl, 1f 136 addi xh, xh, 1 1371: 138 /* Check if the add overflowed into the exponent. */ 139 extui a10, xh, 20, 12 140 beq a10, a7, .Ladd_round 141 mov a8, a7 142 j .Ladd_carry 143 144.Ladd_yexpzero: 145 /* y is a subnormal value. Replace its sign/exponent with zero, 146 i.e., no implicit "1.0", and increment the apparent exponent 147 because subnormals behave as if they had the minimum (nonzero) 148 exponent. Test for the case when both exponents are zero. */ 149 slli yh, yh, 12 150 srli yh, yh, 12 151 bnone xh, a6, .Ladd_bothexpzero 152 addi a8, a8, 1 153 j .Ladd_yexpdiff 154 155.Ladd_bothexpzero: 156 /* Both exponents are zero. Handle this as a special case. There 157 is no need to shift or round, and the normal code for handling 158 a carry into the exponent field will not work because it 159 assumes there is an implicit "1.0" that needs to be added. */ 160 add xl, xl, yl 161 add xh, xh, yh 162 bgeu xl, yl, 1f 163 addi xh, xh, 1 1641: leaf_return 165 166.Ladd_bigshifty: 167 /* Exponent difference > 64 -- just return the bigger value. */ 168 bgeui a10, 64, 1b 169 170 /* Shift yh/yl right by the exponent difference. Any bits that are 171 shifted out are saved in a9 for rounding the result. */ 172 ssr a10 173 sll a11, yl /* lost bits shifted out of yl */ 174 src a9, yh, yl 175 srl yl, yh 176 movi yh, 0 177 beqz a11, .Ladd_addy 178 or a9, a9, a10 /* any positive, nonzero value will work */ 179 j .Ladd_addy 180 181.Ladd_xexpzero: 182 /* Same as "yexpzero" except skip handling the case when both 183 exponents are zero. */ 184 slli xh, xh, 12 185 srli xh, xh, 12 186 addi a7, a7, 1 187 j .Ladd_xexpdiff 188 189.Ladd_shiftx: 190 /* Same thing as the "shifty" code, but with x and y swapped. Also, 191 because the exponent difference is always nonzero in this version, 192 the shift sequence can use SLL and skip loading a constant zero. */ 193 bnone xh, a6, .Ladd_xexpzero 194 195 or xh, xh, a6 196 slli xh, xh, 11 197 srli xh, xh, 11 198 199.Ladd_xexpdiff: 200 sub a10, a8, a7 201 bgeui a10, 32, .Ladd_bigshiftx 202 203 ssr a10 204 sll a9, xl 205 src xl, xh, xl 206 srl xh, xh 207 208.Ladd_addx: 209 add xl, xl, yl 210 add xh, xh, yh 211 bgeu xl, yl, 1f 212 addi xh, xh, 1 2131: 214 /* Check if the add overflowed into the exponent. */ 215 extui a10, xh, 20, 12 216 bne a10, a8, .Ladd_carry 217 218.Ladd_round: 219 /* Round up if the leftover fraction is >= 1/2. */ 220 bgez a9, 1f 221 addi xl, xl, 1 222 beqz xl, .Ladd_roundcarry 223 224 /* Check if the leftover fraction is exactly 1/2. */ 225 slli a9, a9, 1 226 beqz a9, .Ladd_exactlyhalf 2271: leaf_return 228 229.Ladd_bigshiftx: 230 /* Mostly the same thing as "bigshifty".... */ 231 bgeui a10, 64, .Ladd_returny 232 233 ssr a10 234 sll a11, xl 235 src a9, xh, xl 236 srl xl, xh 237 movi xh, 0 238 beqz a11, .Ladd_addx 239 or a9, a9, a10 240 j .Ladd_addx 241 242.Ladd_returny: 243 mov xh, yh 244 mov xl, yl 245 leaf_return 246 247.Ladd_carry: 248 /* The addition has overflowed into the exponent field, so the 249 value needs to be renormalized. The mantissa of the result 250 can be recovered by subtracting the original exponent and 251 adding 0x100000 (which is the explicit "1.0" for the 252 mantissa of the non-shifted operand -- the "1.0" for the 253 shifted operand was already added). The mantissa can then 254 be shifted right by one bit. The explicit "1.0" of the 255 shifted mantissa then needs to be replaced by the exponent, 256 incremented by one to account for the normalizing shift. 257 It is faster to combine these operations: do the shift first 258 and combine the additions and subtractions. If x is the 259 original exponent, the result is: 260 shifted mantissa - (x << 19) + (1 << 19) + (x << 20) 261 or: 262 shifted mantissa + ((x + 1) << 19) 263 Note that the exponent is incremented here by leaving the 264 explicit "1.0" of the mantissa in the exponent field. */ 265 266 /* Shift xh/xl right by one bit. Save the lsb of xl. */ 267 mov a10, xl 268 ssai 1 269 src xl, xh, xl 270 srl xh, xh 271 272 /* See explanation above. The original exponent is in a8. */ 273 addi a8, a8, 1 274 slli a8, a8, 19 275 add xh, xh, a8 276 277 /* Return an Infinity if the exponent overflowed. */ 278 ball xh, a6, .Ladd_infinity 279 280 /* Same thing as the "round" code except the msb of the leftover 281 fraction is bit 0 of a10, with the rest of the fraction in a9. */ 282 bbci.l a10, 0, 1f 283 addi xl, xl, 1 284 beqz xl, .Ladd_roundcarry 285 beqz a9, .Ladd_exactlyhalf 2861: leaf_return 287 288.Ladd_infinity: 289 /* Clear the mantissa. */ 290 movi xl, 0 291 srli xh, xh, 20 292 slli xh, xh, 20 293 294 /* The sign bit may have been lost in a carry-out. Put it back. */ 295 slli a8, a8, 1 296 or xh, xh, a8 297 leaf_return 298 299.Ladd_exactlyhalf: 300 /* Round down to the nearest even value. */ 301 srli xl, xl, 1 302 slli xl, xl, 1 303 leaf_return 304 305.Ladd_roundcarry: 306 /* xl is always zero when the rounding increment overflows, so 307 there's no need to round it to an even value. */ 308 addi xh, xh, 1 309 /* Overflow to the exponent is OK. */ 310 leaf_return 311 312 313 /* Subtraction */ 314__subdf3_aux: 315 316 /* Handle NaNs and Infinities. (This code is placed before the 317 start of the function just to keep it in range of the limited 318 branch displacements.) */ 319 320.Lsub_xnan_or_inf: 321 /* If y is neither Infinity nor NaN, return x. */ 322 bnall yh, a6, 1f 323 /* Both x and y are either NaN or Inf, so the result is NaN. */ 324 movi a4, 0x80000 /* make it a quiet NaN */ 325 or xh, xh, a4 3261: leaf_return 327 328.Lsub_ynan_or_inf: 329 /* Negate y and return it. */ 330 slli a7, a6, 11 331 xor xh, yh, a7 332 mov xl, yl 333 leaf_return 334 335.Lsub_opposite_signs: 336 /* Operand signs differ. Do an addition. */ 337 slli a7, a6, 11 338 xor yh, yh, a7 339 j .Ladd_same_sign 340 341 .align 4 342 .global __subdf3 343 .type __subdf3, @function 344__subdf3: 345 leaf_entry sp, 16 346 movi a6, 0x7ff00000 347 348 /* Check if the two operands have the same sign. */ 349 xor a7, xh, yh 350 bltz a7, .Lsub_opposite_signs 351 352.Lsub_same_sign: 353 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ 354 ball xh, a6, .Lsub_xnan_or_inf 355 ball yh, a6, .Lsub_ynan_or_inf 356 357 /* Compare the operands. In contrast to addition, the entire 358 value matters here. */ 359 extui a7, xh, 20, 11 360 extui a8, yh, 20, 11 361 bltu xh, yh, .Lsub_xsmaller 362 beq xh, yh, .Lsub_compare_low 363 364.Lsub_ysmaller: 365 /* Check if the smaller (or equal) exponent is zero. */ 366 bnone yh, a6, .Lsub_yexpzero 367 368 /* Replace yh sign/exponent with 0x001. */ 369 or yh, yh, a6 370 slli yh, yh, 11 371 srli yh, yh, 11 372 373.Lsub_yexpdiff: 374 /* Compute the exponent difference. Optimize for difference < 32. */ 375 sub a10, a7, a8 376 bgeui a10, 32, .Lsub_bigshifty 377 378 /* Shift yh/yl right by the exponent difference. Any bits that are 379 shifted out of yl are saved in a9 for rounding the result. */ 380 ssr a10 381 movi a9, 0 382 src a9, yl, a9 383 src yl, yh, yl 384 srl yh, yh 385 386.Lsub_suby: 387 /* Do the 64-bit subtraction. */ 388 sub xh, xh, yh 389 bgeu xl, yl, 1f 390 addi xh, xh, -1 3911: sub xl, xl, yl 392 393 /* Subtract the leftover bits in a9 from zero and propagate any 394 borrow from xh/xl. */ 395 neg a9, a9 396 beqz a9, 1f 397 addi a5, xh, -1 398 moveqz xh, a5, xl 399 addi xl, xl, -1 4001: 401 /* Check if the subtract underflowed into the exponent. */ 402 extui a10, xh, 20, 11 403 beq a10, a7, .Lsub_round 404 j .Lsub_borrow 405 406.Lsub_compare_low: 407 /* The high words are equal. Compare the low words. */ 408 bltu xl, yl, .Lsub_xsmaller 409 bltu yl, xl, .Lsub_ysmaller 410 /* The operands are equal. Return 0.0. */ 411 movi xh, 0 412 movi xl, 0 4131: leaf_return 414 415.Lsub_yexpzero: 416 /* y is a subnormal value. Replace its sign/exponent with zero, 417 i.e., no implicit "1.0". Unless x is also a subnormal, increment 418 y's apparent exponent because subnormals behave as if they had 419 the minimum (nonzero) exponent. */ 420 slli yh, yh, 12 421 srli yh, yh, 12 422 bnone xh, a6, .Lsub_yexpdiff 423 addi a8, a8, 1 424 j .Lsub_yexpdiff 425 426.Lsub_bigshifty: 427 /* Exponent difference > 64 -- just return the bigger value. */ 428 bgeui a10, 64, 1b 429 430 /* Shift yh/yl right by the exponent difference. Any bits that are 431 shifted out are saved in a9 for rounding the result. */ 432 ssr a10 433 sll a11, yl /* lost bits shifted out of yl */ 434 src a9, yh, yl 435 srl yl, yh 436 movi yh, 0 437 beqz a11, .Lsub_suby 438 or a9, a9, a10 /* any positive, nonzero value will work */ 439 j .Lsub_suby 440 441.Lsub_xsmaller: 442 /* Same thing as the "ysmaller" code, but with x and y swapped and 443 with y negated. */ 444 bnone xh, a6, .Lsub_xexpzero 445 446 or xh, xh, a6 447 slli xh, xh, 11 448 srli xh, xh, 11 449 450.Lsub_xexpdiff: 451 sub a10, a8, a7 452 bgeui a10, 32, .Lsub_bigshiftx 453 454 ssr a10 455 movi a9, 0 456 src a9, xl, a9 457 src xl, xh, xl 458 srl xh, xh 459 460 /* Negate y. */ 461 slli a11, a6, 11 462 xor yh, yh, a11 463 464.Lsub_subx: 465 sub xl, yl, xl 466 sub xh, yh, xh 467 bgeu yl, xl, 1f 468 addi xh, xh, -1 4691: 470 /* Subtract the leftover bits in a9 from zero and propagate any 471 borrow from xh/xl. */ 472 neg a9, a9 473 beqz a9, 1f 474 addi a5, xh, -1 475 moveqz xh, a5, xl 476 addi xl, xl, -1 4771: 478 /* Check if the subtract underflowed into the exponent. */ 479 extui a10, xh, 20, 11 480 bne a10, a8, .Lsub_borrow 481 482.Lsub_round: 483 /* Round up if the leftover fraction is >= 1/2. */ 484 bgez a9, 1f 485 addi xl, xl, 1 486 beqz xl, .Lsub_roundcarry 487 488 /* Check if the leftover fraction is exactly 1/2. */ 489 slli a9, a9, 1 490 beqz a9, .Lsub_exactlyhalf 4911: leaf_return 492 493.Lsub_xexpzero: 494 /* Same as "yexpzero". */ 495 slli xh, xh, 12 496 srli xh, xh, 12 497 bnone yh, a6, .Lsub_xexpdiff 498 addi a7, a7, 1 499 j .Lsub_xexpdiff 500 501.Lsub_bigshiftx: 502 /* Mostly the same thing as "bigshifty", but with the sign bit of the 503 shifted value set so that the subsequent subtraction flips the 504 sign of y. */ 505 bgeui a10, 64, .Lsub_returny 506 507 ssr a10 508 sll a11, xl 509 src a9, xh, xl 510 srl xl, xh 511 slli xh, a6, 11 /* set sign bit of xh */ 512 beqz a11, .Lsub_subx 513 or a9, a9, a10 514 j .Lsub_subx 515 516.Lsub_returny: 517 /* Negate and return y. */ 518 slli a7, a6, 11 519 xor xh, yh, a7 520 mov xl, yl 521 leaf_return 522 523.Lsub_borrow: 524 /* The subtraction has underflowed into the exponent field, so the 525 value needs to be renormalized. Shift the mantissa left as 526 needed to remove any leading zeros and adjust the exponent 527 accordingly. If the exponent is not large enough to remove 528 all the leading zeros, the result will be a subnormal value. */ 529 530 slli a8, xh, 12 531 beqz a8, .Lsub_xhzero 532 do_nsau a6, a8, a7, a11 533 srli a8, a8, 12 534 bge a6, a10, .Lsub_subnormal 535 addi a6, a6, 1 536 537.Lsub_shift_lt32: 538 /* Shift the mantissa (a8/xl/a9) left by a6. */ 539 ssl a6 540 src a8, a8, xl 541 src xl, xl, a9 542 sll a9, a9 543 544 /* Combine the shifted mantissa with the sign and exponent, 545 decrementing the exponent by a6. (The exponent has already 546 been decremented by one due to the borrow from the subtraction, 547 but adding the mantissa will increment the exponent by one.) */ 548 srli xh, xh, 20 549 sub xh, xh, a6 550 slli xh, xh, 20 551 add xh, xh, a8 552 j .Lsub_round 553 554.Lsub_exactlyhalf: 555 /* Round down to the nearest even value. */ 556 srli xl, xl, 1 557 slli xl, xl, 1 558 leaf_return 559 560.Lsub_roundcarry: 561 /* xl is always zero when the rounding increment overflows, so 562 there's no need to round it to an even value. */ 563 addi xh, xh, 1 564 /* Overflow to the exponent is OK. */ 565 leaf_return 566 567.Lsub_xhzero: 568 /* When normalizing the result, all the mantissa bits in the high 569 word are zero. Shift by "20 + (leading zero count of xl) + 1". */ 570 do_nsau a6, xl, a7, a11 571 addi a6, a6, 21 572 blt a10, a6, .Lsub_subnormal 573 574.Lsub_normalize_shift: 575 bltui a6, 32, .Lsub_shift_lt32 576 577 ssl a6 578 src a8, xl, a9 579 sll xl, a9 580 movi a9, 0 581 582 srli xh, xh, 20 583 sub xh, xh, a6 584 slli xh, xh, 20 585 add xh, xh, a8 586 j .Lsub_round 587 588.Lsub_subnormal: 589 /* The exponent is too small to shift away all the leading zeros. 590 Set a6 to the current exponent (which has already been 591 decremented by the borrow) so that the exponent of the result 592 will be zero. Do not add 1 to a6 in this case, because: (1) 593 adding the mantissa will not increment the exponent, so there is 594 no need to subtract anything extra from the exponent to 595 compensate, and (2) the effective exponent of a subnormal is 1 596 not 0 so the shift amount must be 1 smaller than normal. */ 597 mov a6, a10 598 j .Lsub_normalize_shift 599 600#endif /* L_addsubdf3 */ 601 602#ifdef L_muldf3 603 604 /* Multiplication */ 605#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 606#define XCHAL_NO_MUL 1 607#endif 608 609__muldf3_aux: 610 611 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 612 (This code is placed before the start of the function just to 613 keep it in range of the limited branch displacements.) */ 614 615.Lmul_xexpzero: 616 /* Clear the sign bit of x. */ 617 slli xh, xh, 1 618 srli xh, xh, 1 619 620 /* If x is zero, return zero. */ 621 or a10, xh, xl 622 beqz a10, .Lmul_return_zero 623 624 /* Normalize x. Adjust the exponent in a8. */ 625 beqz xh, .Lmul_xh_zero 626 do_nsau a10, xh, a11, a12 627 addi a10, a10, -11 628 ssl a10 629 src xh, xh, xl 630 sll xl, xl 631 movi a8, 1 632 sub a8, a8, a10 633 j .Lmul_xnormalized 634.Lmul_xh_zero: 635 do_nsau a10, xl, a11, a12 636 addi a10, a10, -11 637 movi a8, -31 638 sub a8, a8, a10 639 ssl a10 640 bltz a10, .Lmul_xl_srl 641 sll xh, xl 642 movi xl, 0 643 j .Lmul_xnormalized 644.Lmul_xl_srl: 645 srl xh, xl 646 sll xl, xl 647 j .Lmul_xnormalized 648 649.Lmul_yexpzero: 650 /* Clear the sign bit of y. */ 651 slli yh, yh, 1 652 srli yh, yh, 1 653 654 /* If y is zero, return zero. */ 655 or a10, yh, yl 656 beqz a10, .Lmul_return_zero 657 658 /* Normalize y. Adjust the exponent in a9. */ 659 beqz yh, .Lmul_yh_zero 660 do_nsau a10, yh, a11, a12 661 addi a10, a10, -11 662 ssl a10 663 src yh, yh, yl 664 sll yl, yl 665 movi a9, 1 666 sub a9, a9, a10 667 j .Lmul_ynormalized 668.Lmul_yh_zero: 669 do_nsau a10, yl, a11, a12 670 addi a10, a10, -11 671 movi a9, -31 672 sub a9, a9, a10 673 ssl a10 674 bltz a10, .Lmul_yl_srl 675 sll yh, yl 676 movi yl, 0 677 j .Lmul_ynormalized 678.Lmul_yl_srl: 679 srl yh, yl 680 sll yl, yl 681 j .Lmul_ynormalized 682 683.Lmul_return_zero: 684 /* Return zero with the appropriate sign bit. */ 685 srli xh, a7, 31 686 slli xh, xh, 31 687 movi xl, 0 688 j .Lmul_done 689 690.Lmul_xnan_or_inf: 691 /* If y is zero, return NaN. */ 692 bnez yl, 1f 693 slli a8, yh, 1 694 bnez a8, 1f 695 movi a4, 0x80000 /* make it a quiet NaN */ 696 or xh, xh, a4 697 j .Lmul_done 6981: 699 /* If y is NaN, return y. */ 700 bnall yh, a6, .Lmul_returnx 701 slli a8, yh, 12 702 or a8, a8, yl 703 beqz a8, .Lmul_returnx 704 705.Lmul_returny: 706 mov xh, yh 707 mov xl, yl 708 709.Lmul_returnx: 710 /* Set the sign bit and return. */ 711 extui a7, a7, 31, 1 712 slli xh, xh, 1 713 ssai 1 714 src xh, a7, xh 715 j .Lmul_done 716 717.Lmul_ynan_or_inf: 718 /* If x is zero, return NaN. */ 719 bnez xl, .Lmul_returny 720 slli a8, xh, 1 721 bnez a8, .Lmul_returny 722 movi a7, 0x80000 /* make it a quiet NaN */ 723 or xh, yh, a7 724 j .Lmul_done 725 726 .align 4 727 .global __muldf3 728 .type __muldf3, @function 729__muldf3: 730#if __XTENSA_CALL0_ABI__ 731 leaf_entry sp, 32 732 addi sp, sp, -32 733 s32i a12, sp, 16 734 s32i a13, sp, 20 735 s32i a14, sp, 24 736 s32i a15, sp, 28 737#elif XCHAL_NO_MUL 738 /* This is not really a leaf function; allocate enough stack space 739 to allow CALL12s to a helper function. */ 740 leaf_entry sp, 64 741#else 742 leaf_entry sp, 32 743#endif 744 movi a6, 0x7ff00000 745 746 /* Get the sign of the result. */ 747 xor a7, xh, yh 748 749 /* Check for NaN and infinity. */ 750 ball xh, a6, .Lmul_xnan_or_inf 751 ball yh, a6, .Lmul_ynan_or_inf 752 753 /* Extract the exponents. */ 754 extui a8, xh, 20, 11 755 extui a9, yh, 20, 11 756 757 beqz a8, .Lmul_xexpzero 758.Lmul_xnormalized: 759 beqz a9, .Lmul_yexpzero 760.Lmul_ynormalized: 761 762 /* Add the exponents. */ 763 add a8, a8, a9 764 765 /* Replace sign/exponent fields with explicit "1.0". */ 766 movi a10, 0x1fffff 767 or xh, xh, a6 768 and xh, xh, a10 769 or yh, yh, a6 770 and yh, yh, a10 771 772 /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. 773 The least-significant word of the result is thrown away except 774 that if it is nonzero, the lsb of a6 is set to 1. */ 775#if XCHAL_HAVE_MUL32_HIGH 776 777 /* Compute a6 with any carry-outs in a10. */ 778 movi a10, 0 779 mull a6, xl, yh 780 mull a11, xh, yl 781 add a6, a6, a11 782 bgeu a6, a11, 1f 783 addi a10, a10, 1 7841: 785 muluh a11, xl, yl 786 add a6, a6, a11 787 bgeu a6, a11, 1f 788 addi a10, a10, 1 7891: 790 /* If the low word of the result is nonzero, set the lsb of a6. */ 791 mull a11, xl, yl 792 beqz a11, 1f 793 movi a9, 1 794 or a6, a6, a9 7951: 796 /* Compute xl with any carry-outs in a9. */ 797 movi a9, 0 798 mull a11, xh, yh 799 add a10, a10, a11 800 bgeu a10, a11, 1f 801 addi a9, a9, 1 8021: 803 muluh a11, xh, yl 804 add a10, a10, a11 805 bgeu a10, a11, 1f 806 addi a9, a9, 1 8071: 808 muluh xl, xl, yh 809 add xl, xl, a10 810 bgeu xl, a10, 1f 811 addi a9, a9, 1 8121: 813 /* Compute xh. */ 814 muluh xh, xh, yh 815 add xh, xh, a9 816 817#else /* ! XCHAL_HAVE_MUL32_HIGH */ 818 819 /* Break the inputs into 16-bit chunks and compute 16 32-bit partial 820 products. These partial products are: 821 822 0 xll * yll 823 824 1 xll * ylh 825 2 xlh * yll 826 827 3 xll * yhl 828 4 xlh * ylh 829 5 xhl * yll 830 831 6 xll * yhh 832 7 xlh * yhl 833 8 xhl * ylh 834 9 xhh * yll 835 836 10 xlh * yhh 837 11 xhl * yhl 838 12 xhh * ylh 839 840 13 xhl * yhh 841 14 xhh * yhl 842 843 15 xhh * yhh 844 845 where the input chunks are (hh, hl, lh, ll). If using the Mul16 846 or Mul32 multiplier options, these input chunks must be stored in 847 separate registers. For Mac16, the UMUL.AA.* opcodes can specify 848 that the inputs come from either half of the registers, so there 849 is no need to shift them out ahead of time. If there is no 850 multiply hardware, the 16-bit chunks can be extracted when setting 851 up the arguments to the separate multiply function. */ 852 853 /* Save a7 since it is needed to hold a temporary value. */ 854 s32i a7, sp, 4 855#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 856 /* Calling a separate multiply function will clobber a0 and requires 857 use of a8 as a temporary, so save those values now. (The function 858 uses a custom ABI so nothing else needs to be saved.) */ 859 s32i a0, sp, 0 860 s32i a8, sp, 8 861#endif 862 863#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 864 865#define xlh a12 866#define ylh a13 867#define xhh a14 868#define yhh a15 869 870 /* Get the high halves of the inputs into registers. */ 871 srli xlh, xl, 16 872 srli ylh, yl, 16 873 srli xhh, xh, 16 874 srli yhh, yh, 16 875 876#define xll xl 877#define yll yl 878#define xhl xh 879#define yhl yh 880 881#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 882 /* Clear the high halves of the inputs. This does not matter 883 for MUL16 because the high bits are ignored. */ 884 extui xl, xl, 0, 16 885 extui xh, xh, 0, 16 886 extui yl, yl, 0, 16 887 extui yh, yh, 0, 16 888#endif 889#endif /* MUL16 || MUL32 */ 890 891 892#if XCHAL_HAVE_MUL16 893 894#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 895 mul16u dst, xreg ## xhalf, yreg ## yhalf 896 897#elif XCHAL_HAVE_MUL32 898 899#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 900 mull dst, xreg ## xhalf, yreg ## yhalf 901 902#elif XCHAL_HAVE_MAC16 903 904/* The preprocessor insists on inserting a space when concatenating after 905 a period in the definition of do_mul below. These macros are a workaround 906 using underscores instead of periods when doing the concatenation. */ 907#define umul_aa_ll umul.aa.ll 908#define umul_aa_lh umul.aa.lh 909#define umul_aa_hl umul.aa.hl 910#define umul_aa_hh umul.aa.hh 911 912#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 913 umul_aa_ ## xhalf ## yhalf xreg, yreg; \ 914 rsr dst, ACCLO 915 916#else /* no multiply hardware */ 917 918#define set_arg_l(dst, src) \ 919 extui dst, src, 0, 16 920#define set_arg_h(dst, src) \ 921 srli dst, src, 16 922 923#if __XTENSA_CALL0_ABI__ 924#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 925 set_arg_ ## xhalf (a13, xreg); \ 926 set_arg_ ## yhalf (a14, yreg); \ 927 call0 .Lmul_mulsi3; \ 928 mov dst, a12 929#else 930#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 931 set_arg_ ## xhalf (a14, xreg); \ 932 set_arg_ ## yhalf (a15, yreg); \ 933 call12 .Lmul_mulsi3; \ 934 mov dst, a14 935#endif /* __XTENSA_CALL0_ABI__ */ 936 937#endif /* no multiply hardware */ 938 939 /* Add pp1 and pp2 into a10 with carry-out in a9. */ 940 do_mul(a10, xl, l, yl, h) /* pp 1 */ 941 do_mul(a11, xl, h, yl, l) /* pp 2 */ 942 movi a9, 0 943 add a10, a10, a11 944 bgeu a10, a11, 1f 945 addi a9, a9, 1 9461: 947 /* Initialize a6 with a9/a10 shifted into position. Note that 948 this value can be safely incremented without any carry-outs. */ 949 ssai 16 950 src a6, a9, a10 951 952 /* Compute the low word into a10. */ 953 do_mul(a11, xl, l, yl, l) /* pp 0 */ 954 sll a10, a10 955 add a10, a10, a11 956 bgeu a10, a11, 1f 957 addi a6, a6, 1 9581: 959 /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. 960 This is good enough to determine the low half of a6, so that any 961 nonzero bits from the low word of the result can be collapsed 962 into a6, freeing up a register. */ 963 movi a9, 0 964 do_mul(a11, xl, l, yh, l) /* pp 3 */ 965 add a6, a6, a11 966 bgeu a6, a11, 1f 967 addi a9, a9, 1 9681: 969 do_mul(a11, xl, h, yl, h) /* pp 4 */ 970 add a6, a6, a11 971 bgeu a6, a11, 1f 972 addi a9, a9, 1 9731: 974 do_mul(a11, xh, l, yl, l) /* pp 5 */ 975 add a6, a6, a11 976 bgeu a6, a11, 1f 977 addi a9, a9, 1 9781: 979 /* Collapse any nonzero bits from the low word into a6. */ 980 beqz a10, 1f 981 movi a11, 1 982 or a6, a6, a11 9831: 984 /* Add pp6-9 into a11 with carry-outs in a10. */ 985 do_mul(a7, xl, l, yh, h) /* pp 6 */ 986 do_mul(a11, xh, h, yl, l) /* pp 9 */ 987 movi a10, 0 988 add a11, a11, a7 989 bgeu a11, a7, 1f 990 addi a10, a10, 1 9911: 992 do_mul(a7, xl, h, yh, l) /* pp 7 */ 993 add a11, a11, a7 994 bgeu a11, a7, 1f 995 addi a10, a10, 1 9961: 997 do_mul(a7, xh, l, yl, h) /* pp 8 */ 998 add a11, a11, a7 999 bgeu a11, a7, 1f 1000 addi a10, a10, 1 10011: 1002 /* Shift a10/a11 into position, and add low half of a11 to a6. */ 1003 src a10, a10, a11 1004 add a10, a10, a9 1005 sll a11, a11 1006 add a6, a6, a11 1007 bgeu a6, a11, 1f 1008 addi a10, a10, 1 10091: 1010 /* Add pp10-12 into xl with carry-outs in a9. */ 1011 movi a9, 0 1012 do_mul(xl, xl, h, yh, h) /* pp 10 */ 1013 add xl, xl, a10 1014 bgeu xl, a10, 1f 1015 addi a9, a9, 1 10161: 1017 do_mul(a10, xh, l, yh, l) /* pp 11 */ 1018 add xl, xl, a10 1019 bgeu xl, a10, 1f 1020 addi a9, a9, 1 10211: 1022 do_mul(a10, xh, h, yl, h) /* pp 12 */ 1023 add xl, xl, a10 1024 bgeu xl, a10, 1f 1025 addi a9, a9, 1 10261: 1027 /* Add pp13-14 into a11 with carry-outs in a10. */ 1028 do_mul(a11, xh, l, yh, h) /* pp 13 */ 1029 do_mul(a7, xh, h, yh, l) /* pp 14 */ 1030 movi a10, 0 1031 add a11, a11, a7 1032 bgeu a11, a7, 1f 1033 addi a10, a10, 1 10341: 1035 /* Shift a10/a11 into position, and add low half of a11 to a6. */ 1036 src a10, a10, a11 1037 add a10, a10, a9 1038 sll a11, a11 1039 add xl, xl, a11 1040 bgeu xl, a11, 1f 1041 addi a10, a10, 1 10421: 1043 /* Compute xh. */ 1044 do_mul(xh, xh, h, yh, h) /* pp 15 */ 1045 add xh, xh, a10 1046 1047 /* Restore values saved on the stack during the multiplication. */ 1048 l32i a7, sp, 4 1049#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 1050 l32i a0, sp, 0 1051 l32i a8, sp, 8 1052#endif 1053#endif /* ! XCHAL_HAVE_MUL32_HIGH */ 1054 1055 /* Shift left by 12 bits, unless there was a carry-out from the 1056 multiply, in which case, shift by 11 bits and increment the 1057 exponent. Note: It is convenient to use the constant 0x3ff 1058 instead of 0x400 when removing the extra exponent bias (so that 1059 it is easy to construct 0x7fe for the overflow check). Reverse 1060 the logic here to decrement the exponent sum by one unless there 1061 was a carry-out. */ 1062 movi a4, 11 1063 srli a5, xh, 21 - 12 1064 bnez a5, 1f 1065 addi a4, a4, 1 1066 addi a8, a8, -1 10671: ssl a4 1068 src xh, xh, xl 1069 src xl, xl, a6 1070 sll a6, a6 1071 1072 /* Subtract the extra bias from the exponent sum (plus one to account 1073 for the explicit "1.0" of the mantissa that will be added to the 1074 exponent in the final result). */ 1075 movi a4, 0x3ff 1076 sub a8, a8, a4 1077 1078 /* Check for over/underflow. The value in a8 is one less than the 1079 final exponent, so values in the range 0..7fd are OK here. */ 1080 slli a4, a4, 1 /* 0x7fe */ 1081 bgeu a8, a4, .Lmul_overflow 1082 1083.Lmul_round: 1084 /* Round. */ 1085 bgez a6, .Lmul_rounded 1086 addi xl, xl, 1 1087 beqz xl, .Lmul_roundcarry 1088 slli a6, a6, 1 1089 beqz a6, .Lmul_exactlyhalf 1090 1091.Lmul_rounded: 1092 /* Add the exponent to the mantissa. */ 1093 slli a8, a8, 20 1094 add xh, xh, a8 1095 1096.Lmul_addsign: 1097 /* Add the sign bit. */ 1098 srli a7, a7, 31 1099 slli a7, a7, 31 1100 or xh, xh, a7 1101 1102.Lmul_done: 1103#if __XTENSA_CALL0_ABI__ 1104 l32i a12, sp, 16 1105 l32i a13, sp, 20 1106 l32i a14, sp, 24 1107 l32i a15, sp, 28 1108 addi sp, sp, 32 1109#endif 1110 leaf_return 1111 1112.Lmul_exactlyhalf: 1113 /* Round down to the nearest even value. */ 1114 srli xl, xl, 1 1115 slli xl, xl, 1 1116 j .Lmul_rounded 1117 1118.Lmul_roundcarry: 1119 /* xl is always zero when the rounding increment overflows, so 1120 there's no need to round it to an even value. */ 1121 addi xh, xh, 1 1122 /* Overflow is OK -- it will be added to the exponent. */ 1123 j .Lmul_rounded 1124 1125.Lmul_overflow: 1126 bltz a8, .Lmul_underflow 1127 /* Return +/- Infinity. */ 1128 addi a8, a4, 1 /* 0x7ff */ 1129 slli xh, a8, 20 1130 movi xl, 0 1131 j .Lmul_addsign 1132 1133.Lmul_underflow: 1134 /* Create a subnormal value, where the exponent field contains zero, 1135 but the effective exponent is 1. The value of a8 is one less than 1136 the actual exponent, so just negate it to get the shift amount. */ 1137 neg a8, a8 1138 mov a9, a6 1139 ssr a8 1140 bgeui a8, 32, .Lmul_bigshift 1141 1142 /* Shift xh/xl right. Any bits that are shifted out of xl are saved 1143 in a6 (combined with the shifted-out bits currently in a6) for 1144 rounding the result. */ 1145 sll a6, xl 1146 src xl, xh, xl 1147 srl xh, xh 1148 j 1f 1149 1150.Lmul_bigshift: 1151 bgeui a8, 64, .Lmul_flush_to_zero 1152 sll a10, xl /* lost bits shifted out of xl */ 1153 src a6, xh, xl 1154 srl xl, xh 1155 movi xh, 0 1156 or a9, a9, a10 1157 1158 /* Set the exponent to zero. */ 11591: movi a8, 0 1160 1161 /* Pack any nonzero bits shifted out into a6. */ 1162 beqz a9, .Lmul_round 1163 movi a9, 1 1164 or a6, a6, a9 1165 j .Lmul_round 1166 1167.Lmul_flush_to_zero: 1168 /* Return zero with the appropriate sign bit. */ 1169 srli xh, a7, 31 1170 slli xh, xh, 31 1171 movi xl, 0 1172 j .Lmul_done 1173 1174#if XCHAL_NO_MUL 1175 1176 /* For Xtensa processors with no multiply hardware, this simplified 1177 version of _mulsi3 is used for multiplying 16-bit chunks of 1178 the floating-point mantissas. When using CALL0, this function 1179 uses a custom ABI: the inputs are passed in a13 and a14, the 1180 result is returned in a12, and a8 and a15 are clobbered. */ 1181 .align 4 1182.Lmul_mulsi3: 1183 leaf_entry sp, 16 1184 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 1185 movi \dst, 0 11861: add \tmp1, \src2, \dst 1187 extui \tmp2, \src1, 0, 1 1188 movnez \dst, \tmp1, \tmp2 1189 1190 do_addx2 \tmp1, \src2, \dst, \tmp1 1191 extui \tmp2, \src1, 1, 1 1192 movnez \dst, \tmp1, \tmp2 1193 1194 do_addx4 \tmp1, \src2, \dst, \tmp1 1195 extui \tmp2, \src1, 2, 1 1196 movnez \dst, \tmp1, \tmp2 1197 1198 do_addx8 \tmp1, \src2, \dst, \tmp1 1199 extui \tmp2, \src1, 3, 1 1200 movnez \dst, \tmp1, \tmp2 1201 1202 srli \src1, \src1, 4 1203 slli \src2, \src2, 4 1204 bnez \src1, 1b 1205 .endm 1206#if __XTENSA_CALL0_ABI__ 1207 mul_mulsi3_body a12, a13, a14, a15, a8 1208#else 1209 /* The result will be written into a2, so save that argument in a4. */ 1210 mov a4, a2 1211 mul_mulsi3_body a2, a4, a3, a5, a6 1212#endif 1213 leaf_return 1214#endif /* XCHAL_NO_MUL */ 1215#endif /* L_muldf3 */ 1216 1217#ifdef L_divdf3 1218 1219 /* Division */ 1220__divdf3_aux: 1221 1222 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 1223 (This code is placed before the start of the function just to 1224 keep it in range of the limited branch displacements.) */ 1225 1226.Ldiv_yexpzero: 1227 /* Clear the sign bit of y. */ 1228 slli yh, yh, 1 1229 srli yh, yh, 1 1230 1231 /* Check for division by zero. */ 1232 or a10, yh, yl 1233 beqz a10, .Ldiv_yzero 1234 1235 /* Normalize y. Adjust the exponent in a9. */ 1236 beqz yh, .Ldiv_yh_zero 1237 do_nsau a10, yh, a11, a9 1238 addi a10, a10, -11 1239 ssl a10 1240 src yh, yh, yl 1241 sll yl, yl 1242 movi a9, 1 1243 sub a9, a9, a10 1244 j .Ldiv_ynormalized 1245.Ldiv_yh_zero: 1246 do_nsau a10, yl, a11, a9 1247 addi a10, a10, -11 1248 movi a9, -31 1249 sub a9, a9, a10 1250 ssl a10 1251 bltz a10, .Ldiv_yl_srl 1252 sll yh, yl 1253 movi yl, 0 1254 j .Ldiv_ynormalized 1255.Ldiv_yl_srl: 1256 srl yh, yl 1257 sll yl, yl 1258 j .Ldiv_ynormalized 1259 1260.Ldiv_yzero: 1261 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ 1262 slli xh, xh, 1 1263 srli xh, xh, 1 1264 or xl, xl, xh 1265 srli xh, a7, 31 1266 slli xh, xh, 31 1267 or xh, xh, a6 1268 bnez xl, 1f 1269 movi a4, 0x80000 /* make it a quiet NaN */ 1270 or xh, xh, a4 12711: movi xl, 0 1272 leaf_return 1273 1274.Ldiv_xexpzero: 1275 /* Clear the sign bit of x. */ 1276 slli xh, xh, 1 1277 srli xh, xh, 1 1278 1279 /* If x is zero, return zero. */ 1280 or a10, xh, xl 1281 beqz a10, .Ldiv_return_zero 1282 1283 /* Normalize x. Adjust the exponent in a8. */ 1284 beqz xh, .Ldiv_xh_zero 1285 do_nsau a10, xh, a11, a8 1286 addi a10, a10, -11 1287 ssl a10 1288 src xh, xh, xl 1289 sll xl, xl 1290 movi a8, 1 1291 sub a8, a8, a10 1292 j .Ldiv_xnormalized 1293.Ldiv_xh_zero: 1294 do_nsau a10, xl, a11, a8 1295 addi a10, a10, -11 1296 movi a8, -31 1297 sub a8, a8, a10 1298 ssl a10 1299 bltz a10, .Ldiv_xl_srl 1300 sll xh, xl 1301 movi xl, 0 1302 j .Ldiv_xnormalized 1303.Ldiv_xl_srl: 1304 srl xh, xl 1305 sll xl, xl 1306 j .Ldiv_xnormalized 1307 1308.Ldiv_return_zero: 1309 /* Return zero with the appropriate sign bit. */ 1310 srli xh, a7, 31 1311 slli xh, xh, 31 1312 movi xl, 0 1313 leaf_return 1314 1315.Ldiv_xnan_or_inf: 1316 /* Set the sign bit of the result. */ 1317 srli a7, yh, 31 1318 slli a7, a7, 31 1319 xor xh, xh, a7 1320 /* If y is NaN or Inf, return NaN. */ 1321 bnall yh, a6, 1f 1322 movi a4, 0x80000 /* make it a quiet NaN */ 1323 or xh, xh, a4 13241: leaf_return 1325 1326.Ldiv_ynan_or_inf: 1327 /* If y is Infinity, return zero. */ 1328 slli a8, yh, 12 1329 or a8, a8, yl 1330 beqz a8, .Ldiv_return_zero 1331 /* y is NaN; return it. */ 1332 mov xh, yh 1333 mov xl, yl 1334 leaf_return 1335 1336.Ldiv_highequal1: 1337 bltu xl, yl, 2f 1338 j 3f 1339 1340 .align 4 1341 .global __divdf3 1342 .type __divdf3, @function 1343__divdf3: 1344 leaf_entry sp, 16 1345 movi a6, 0x7ff00000 1346 1347 /* Get the sign of the result. */ 1348 xor a7, xh, yh 1349 1350 /* Check for NaN and infinity. */ 1351 ball xh, a6, .Ldiv_xnan_or_inf 1352 ball yh, a6, .Ldiv_ynan_or_inf 1353 1354 /* Extract the exponents. */ 1355 extui a8, xh, 20, 11 1356 extui a9, yh, 20, 11 1357 1358 beqz a9, .Ldiv_yexpzero 1359.Ldiv_ynormalized: 1360 beqz a8, .Ldiv_xexpzero 1361.Ldiv_xnormalized: 1362 1363 /* Subtract the exponents. */ 1364 sub a8, a8, a9 1365 1366 /* Replace sign/exponent fields with explicit "1.0". */ 1367 movi a10, 0x1fffff 1368 or xh, xh, a6 1369 and xh, xh, a10 1370 or yh, yh, a6 1371 and yh, yh, a10 1372 1373 /* Set SAR for left shift by one. */ 1374 ssai (32 - 1) 1375 1376 /* The first digit of the mantissa division must be a one. 1377 Shift x (and adjust the exponent) as needed to make this true. */ 1378 bltu yh, xh, 3f 1379 beq yh, xh, .Ldiv_highequal1 13802: src xh, xh, xl 1381 sll xl, xl 1382 addi a8, a8, -1 13833: 1384 /* Do the first subtraction and shift. */ 1385 sub xh, xh, yh 1386 bgeu xl, yl, 1f 1387 addi xh, xh, -1 13881: sub xl, xl, yl 1389 src xh, xh, xl 1390 sll xl, xl 1391 1392 /* Put the quotient into a10/a11. */ 1393 movi a10, 0 1394 movi a11, 1 1395 1396 /* Divide one bit at a time for 52 bits. */ 1397 movi a9, 52 1398#if XCHAL_HAVE_LOOPS 1399 loop a9, .Ldiv_loopend 1400#endif 1401.Ldiv_loop: 1402 /* Shift the quotient << 1. */ 1403 src a10, a10, a11 1404 sll a11, a11 1405 1406 /* Is this digit a 0 or 1? */ 1407 bltu xh, yh, 3f 1408 beq xh, yh, .Ldiv_highequal2 1409 1410 /* Output a 1 and subtract. */ 14112: addi a11, a11, 1 1412 sub xh, xh, yh 1413 bgeu xl, yl, 1f 1414 addi xh, xh, -1 14151: sub xl, xl, yl 1416 1417 /* Shift the dividend << 1. */ 14183: src xh, xh, xl 1419 sll xl, xl 1420 1421#if !XCHAL_HAVE_LOOPS 1422 addi a9, a9, -1 1423 bnez a9, .Ldiv_loop 1424#endif 1425.Ldiv_loopend: 1426 1427 /* Add the exponent bias (less one to account for the explicit "1.0" 1428 of the mantissa that will be added to the exponent in the final 1429 result). */ 1430 movi a9, 0x3fe 1431 add a8, a8, a9 1432 1433 /* Check for over/underflow. The value in a8 is one less than the 1434 final exponent, so values in the range 0..7fd are OK here. */ 1435 addmi a9, a9, 0x400 /* 0x7fe */ 1436 bgeu a8, a9, .Ldiv_overflow 1437 1438.Ldiv_round: 1439 /* Round. The remainder (<< 1) is in xh/xl. */ 1440 bltu xh, yh, .Ldiv_rounded 1441 beq xh, yh, .Ldiv_highequal3 1442.Ldiv_roundup: 1443 addi a11, a11, 1 1444 beqz a11, .Ldiv_roundcarry 1445 1446.Ldiv_rounded: 1447 mov xl, a11 1448 /* Add the exponent to the mantissa. */ 1449 slli a8, a8, 20 1450 add xh, a10, a8 1451 1452.Ldiv_addsign: 1453 /* Add the sign bit. */ 1454 srli a7, a7, 31 1455 slli a7, a7, 31 1456 or xh, xh, a7 1457 leaf_return 1458 1459.Ldiv_highequal2: 1460 bgeu xl, yl, 2b 1461 j 3b 1462 1463.Ldiv_highequal3: 1464 bltu xl, yl, .Ldiv_rounded 1465 bne xl, yl, .Ldiv_roundup 1466 1467 /* Remainder is exactly half the divisor. Round even. */ 1468 addi a11, a11, 1 1469 beqz a11, .Ldiv_roundcarry 1470 srli a11, a11, 1 1471 slli a11, a11, 1 1472 j .Ldiv_rounded 1473 1474.Ldiv_overflow: 1475 bltz a8, .Ldiv_underflow 1476 /* Return +/- Infinity. */ 1477 addi a8, a9, 1 /* 0x7ff */ 1478 slli xh, a8, 20 1479 movi xl, 0 1480 j .Ldiv_addsign 1481 1482.Ldiv_underflow: 1483 /* Create a subnormal value, where the exponent field contains zero, 1484 but the effective exponent is 1. The value of a8 is one less than 1485 the actual exponent, so just negate it to get the shift amount. */ 1486 neg a8, a8 1487 ssr a8 1488 bgeui a8, 32, .Ldiv_bigshift 1489 1490 /* Shift a10/a11 right. Any bits that are shifted out of a11 are 1491 saved in a6 for rounding the result. */ 1492 sll a6, a11 1493 src a11, a10, a11 1494 srl a10, a10 1495 j 1f 1496 1497.Ldiv_bigshift: 1498 bgeui a8, 64, .Ldiv_flush_to_zero 1499 sll a9, a11 /* lost bits shifted out of a11 */ 1500 src a6, a10, a11 1501 srl a11, a10 1502 movi a10, 0 1503 or xl, xl, a9 1504 1505 /* Set the exponent to zero. */ 15061: movi a8, 0 1507 1508 /* Pack any nonzero remainder (in xh/xl) into a6. */ 1509 or xh, xh, xl 1510 beqz xh, 1f 1511 movi a9, 1 1512 or a6, a6, a9 1513 1514 /* Round a10/a11 based on the bits shifted out into a6. */ 15151: bgez a6, .Ldiv_rounded 1516 addi a11, a11, 1 1517 beqz a11, .Ldiv_roundcarry 1518 slli a6, a6, 1 1519 bnez a6, .Ldiv_rounded 1520 srli a11, a11, 1 1521 slli a11, a11, 1 1522 j .Ldiv_rounded 1523 1524.Ldiv_roundcarry: 1525 /* a11 is always zero when the rounding increment overflows, so 1526 there's no need to round it to an even value. */ 1527 addi a10, a10, 1 1528 /* Overflow to the exponent field is OK. */ 1529 j .Ldiv_rounded 1530 1531.Ldiv_flush_to_zero: 1532 /* Return zero with the appropriate sign bit. */ 1533 srli xh, a7, 31 1534 slli xh, xh, 31 1535 movi xl, 0 1536 leaf_return 1537 1538#endif /* L_divdf3 */ 1539 1540#ifdef L_cmpdf2 1541 1542 /* Equal and Not Equal */ 1543 1544 .align 4 1545 .global __eqdf2 1546 .global __nedf2 1547 .set __nedf2, __eqdf2 1548 .type __eqdf2, @function 1549__eqdf2: 1550 leaf_entry sp, 16 1551 bne xl, yl, 2f 1552 bne xh, yh, 4f 1553 1554 /* The values are equal but NaN != NaN. Check the exponent. */ 1555 movi a6, 0x7ff00000 1556 ball xh, a6, 3f 1557 1558 /* Equal. */ 1559 movi a2, 0 1560 leaf_return 1561 1562 /* Not equal. */ 15632: movi a2, 1 1564 leaf_return 1565 1566 /* Check if the mantissas are nonzero. */ 15673: slli a7, xh, 12 1568 or a7, a7, xl 1569 j 5f 1570 1571 /* Check if x and y are zero with different signs. */ 15724: or a7, xh, yh 1573 slli a7, a7, 1 1574 or a7, a7, xl /* xl == yl here */ 1575 1576 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa 1577 or x when exponent(x) = 0x7ff and x == y. */ 15785: movi a2, 0 1579 movi a3, 1 1580 movnez a2, a3, a7 1581 leaf_return 1582 1583 1584 /* Greater Than */ 1585 1586 .align 4 1587 .global __gtdf2 1588 .type __gtdf2, @function 1589__gtdf2: 1590 leaf_entry sp, 16 1591 movi a6, 0x7ff00000 1592 ball xh, a6, 2f 15931: bnall yh, a6, .Lle_cmp 1594 1595 /* Check if y is a NaN. */ 1596 slli a7, yh, 12 1597 or a7, a7, yl 1598 beqz a7, .Lle_cmp 1599 movi a2, 0 1600 leaf_return 1601 1602 /* Check if x is a NaN. */ 16032: slli a7, xh, 12 1604 or a7, a7, xl 1605 beqz a7, 1b 1606 movi a2, 0 1607 leaf_return 1608 1609 1610 /* Less Than or Equal */ 1611 1612 .align 4 1613 .global __ledf2 1614 .type __ledf2, @function 1615__ledf2: 1616 leaf_entry sp, 16 1617 movi a6, 0x7ff00000 1618 ball xh, a6, 2f 16191: bnall yh, a6, .Lle_cmp 1620 1621 /* Check if y is a NaN. */ 1622 slli a7, yh, 12 1623 or a7, a7, yl 1624 beqz a7, .Lle_cmp 1625 movi a2, 1 1626 leaf_return 1627 1628 /* Check if x is a NaN. */ 16292: slli a7, xh, 12 1630 or a7, a7, xl 1631 beqz a7, 1b 1632 movi a2, 1 1633 leaf_return 1634 1635.Lle_cmp: 1636 /* Check if x and y have different signs. */ 1637 xor a7, xh, yh 1638 bltz a7, .Lle_diff_signs 1639 1640 /* Check if x is negative. */ 1641 bltz xh, .Lle_xneg 1642 1643 /* Check if x <= y. */ 1644 bltu xh, yh, 4f 1645 bne xh, yh, 5f 1646 bltu yl, xl, 5f 16474: movi a2, 0 1648 leaf_return 1649 1650.Lle_xneg: 1651 /* Check if y <= x. */ 1652 bltu yh, xh, 4b 1653 bne yh, xh, 5f 1654 bgeu xl, yl, 4b 16555: movi a2, 1 1656 leaf_return 1657 1658.Lle_diff_signs: 1659 bltz xh, 4b 1660 1661 /* Check if both x and y are zero. */ 1662 or a7, xh, yh 1663 slli a7, a7, 1 1664 or a7, a7, xl 1665 or a7, a7, yl 1666 movi a2, 1 1667 movi a3, 0 1668 moveqz a2, a3, a7 1669 leaf_return 1670 1671 1672 /* Greater Than or Equal */ 1673 1674 .align 4 1675 .global __gedf2 1676 .type __gedf2, @function 1677__gedf2: 1678 leaf_entry sp, 16 1679 movi a6, 0x7ff00000 1680 ball xh, a6, 2f 16811: bnall yh, a6, .Llt_cmp 1682 1683 /* Check if y is a NaN. */ 1684 slli a7, yh, 12 1685 or a7, a7, yl 1686 beqz a7, .Llt_cmp 1687 movi a2, -1 1688 leaf_return 1689 1690 /* Check if x is a NaN. */ 16912: slli a7, xh, 12 1692 or a7, a7, xl 1693 beqz a7, 1b 1694 movi a2, -1 1695 leaf_return 1696 1697 1698 /* Less Than */ 1699 1700 .align 4 1701 .global __ltdf2 1702 .type __ltdf2, @function 1703__ltdf2: 1704 leaf_entry sp, 16 1705 movi a6, 0x7ff00000 1706 ball xh, a6, 2f 17071: bnall yh, a6, .Llt_cmp 1708 1709 /* Check if y is a NaN. */ 1710 slli a7, yh, 12 1711 or a7, a7, yl 1712 beqz a7, .Llt_cmp 1713 movi a2, 0 1714 leaf_return 1715 1716 /* Check if x is a NaN. */ 17172: slli a7, xh, 12 1718 or a7, a7, xl 1719 beqz a7, 1b 1720 movi a2, 0 1721 leaf_return 1722 1723.Llt_cmp: 1724 /* Check if x and y have different signs. */ 1725 xor a7, xh, yh 1726 bltz a7, .Llt_diff_signs 1727 1728 /* Check if x is negative. */ 1729 bltz xh, .Llt_xneg 1730 1731 /* Check if x < y. */ 1732 bltu xh, yh, 4f 1733 bne xh, yh, 5f 1734 bgeu xl, yl, 5f 17354: movi a2, -1 1736 leaf_return 1737 1738.Llt_xneg: 1739 /* Check if y < x. */ 1740 bltu yh, xh, 4b 1741 bne yh, xh, 5f 1742 bltu yl, xl, 4b 17435: movi a2, 0 1744 leaf_return 1745 1746.Llt_diff_signs: 1747 bgez xh, 5b 1748 1749 /* Check if both x and y are nonzero. */ 1750 or a7, xh, yh 1751 slli a7, a7, 1 1752 or a7, a7, xl 1753 or a7, a7, yl 1754 movi a2, 0 1755 movi a3, -1 1756 movnez a2, a3, a7 1757 leaf_return 1758 1759 1760 /* Unordered */ 1761 1762 .align 4 1763 .global __unorddf2 1764 .type __unorddf2, @function 1765__unorddf2: 1766 leaf_entry sp, 16 1767 movi a6, 0x7ff00000 1768 ball xh, a6, 3f 17691: ball yh, a6, 4f 17702: movi a2, 0 1771 leaf_return 1772 17733: slli a7, xh, 12 1774 or a7, a7, xl 1775 beqz a7, 1b 1776 movi a2, 1 1777 leaf_return 1778 17794: slli a7, yh, 12 1780 or a7, a7, yl 1781 beqz a7, 2b 1782 movi a2, 1 1783 leaf_return 1784 1785#endif /* L_cmpdf2 */ 1786 1787#ifdef L_fixdfsi 1788 1789 .align 4 1790 .global __fixdfsi 1791 .type __fixdfsi, @function 1792__fixdfsi: 1793 leaf_entry sp, 16 1794 1795 /* Check for NaN and Infinity. */ 1796 movi a6, 0x7ff00000 1797 ball xh, a6, .Lfixdfsi_nan_or_inf 1798 1799 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ 1800 extui a4, xh, 20, 11 1801 extui a5, a6, 19, 10 /* 0x3fe */ 1802 sub a4, a4, a5 1803 bgei a4, 32, .Lfixdfsi_maxint 1804 blti a4, 1, .Lfixdfsi_zero 1805 1806 /* Add explicit "1.0" and shift << 11. */ 1807 or a7, xh, a6 1808 ssai (32 - 11) 1809 src a5, a7, xl 1810 1811 /* Shift back to the right, based on the exponent. */ 1812 ssl a4 /* shift by 32 - a4 */ 1813 srl a5, a5 1814 1815 /* Negate the result if sign != 0. */ 1816 neg a2, a5 1817 movgez a2, a5, a7 1818 leaf_return 1819 1820.Lfixdfsi_nan_or_inf: 1821 /* Handle Infinity and NaN. */ 1822 slli a4, xh, 12 1823 or a4, a4, xl 1824 beqz a4, .Lfixdfsi_maxint 1825 1826 /* Translate NaN to +maxint. */ 1827 movi xh, 0 1828 1829.Lfixdfsi_maxint: 1830 slli a4, a6, 11 /* 0x80000000 */ 1831 addi a5, a4, -1 /* 0x7fffffff */ 1832 movgez a4, a5, xh 1833 mov a2, a4 1834 leaf_return 1835 1836.Lfixdfsi_zero: 1837 movi a2, 0 1838 leaf_return 1839 1840#endif /* L_fixdfsi */ 1841 1842#ifdef L_fixdfdi 1843 1844 .align 4 1845 .global __fixdfdi 1846 .type __fixdfdi, @function 1847__fixdfdi: 1848 leaf_entry sp, 16 1849 1850 /* Check for NaN and Infinity. */ 1851 movi a6, 0x7ff00000 1852 ball xh, a6, .Lfixdfdi_nan_or_inf 1853 1854 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ 1855 extui a4, xh, 20, 11 1856 extui a5, a6, 19, 10 /* 0x3fe */ 1857 sub a4, a4, a5 1858 bgei a4, 64, .Lfixdfdi_maxint 1859 blti a4, 1, .Lfixdfdi_zero 1860 1861 /* Add explicit "1.0" and shift << 11. */ 1862 or a7, xh, a6 1863 ssai (32 - 11) 1864 src xh, a7, xl 1865 sll xl, xl 1866 1867 /* Shift back to the right, based on the exponent. */ 1868 ssl a4 /* shift by 64 - a4 */ 1869 bgei a4, 32, .Lfixdfdi_smallshift 1870 srl xl, xh 1871 movi xh, 0 1872 1873.Lfixdfdi_shifted: 1874 /* Negate the result if sign != 0. */ 1875 bgez a7, 1f 1876 neg xl, xl 1877 neg xh, xh 1878 beqz xl, 1f 1879 addi xh, xh, -1 18801: leaf_return 1881 1882.Lfixdfdi_smallshift: 1883 src xl, xh, xl 1884 srl xh, xh 1885 j .Lfixdfdi_shifted 1886 1887.Lfixdfdi_nan_or_inf: 1888 /* Handle Infinity and NaN. */ 1889 slli a4, xh, 12 1890 or a4, a4, xl 1891 beqz a4, .Lfixdfdi_maxint 1892 1893 /* Translate NaN to +maxint. */ 1894 movi xh, 0 1895 1896.Lfixdfdi_maxint: 1897 slli a7, a6, 11 /* 0x80000000 */ 1898 bgez xh, 1f 1899 mov xh, a7 1900 movi xl, 0 1901 leaf_return 1902 19031: addi xh, a7, -1 /* 0x7fffffff */ 1904 movi xl, -1 1905 leaf_return 1906 1907.Lfixdfdi_zero: 1908 movi xh, 0 1909 movi xl, 0 1910 leaf_return 1911 1912#endif /* L_fixdfdi */ 1913 1914#ifdef L_fixunsdfsi 1915 1916 .align 4 1917 .global __fixunsdfsi 1918 .type __fixunsdfsi, @function 1919__fixunsdfsi: 1920 leaf_entry sp, 16 1921 1922 /* Check for NaN and Infinity. */ 1923 movi a6, 0x7ff00000 1924 ball xh, a6, .Lfixunsdfsi_nan_or_inf 1925 1926 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ 1927 extui a4, xh, 20, 11 1928 extui a5, a6, 20, 10 /* 0x3ff */ 1929 sub a4, a4, a5 1930 bgei a4, 32, .Lfixunsdfsi_maxint 1931 bltz a4, .Lfixunsdfsi_zero 1932 1933 /* Add explicit "1.0" and shift << 11. */ 1934 or a7, xh, a6 1935 ssai (32 - 11) 1936 src a5, a7, xl 1937 1938 /* Shift back to the right, based on the exponent. */ 1939 addi a4, a4, 1 1940 beqi a4, 32, .Lfixunsdfsi_bigexp 1941 ssl a4 /* shift by 32 - a4 */ 1942 srl a5, a5 1943 1944 /* Negate the result if sign != 0. */ 1945 neg a2, a5 1946 movgez a2, a5, a7 1947 leaf_return 1948 1949.Lfixunsdfsi_nan_or_inf: 1950 /* Handle Infinity and NaN. */ 1951 slli a4, xh, 12 1952 or a4, a4, xl 1953 beqz a4, .Lfixunsdfsi_maxint 1954 1955 /* Translate NaN to 0xffffffff. */ 1956 movi a2, -1 1957 leaf_return 1958 1959.Lfixunsdfsi_maxint: 1960 slli a4, a6, 11 /* 0x80000000 */ 1961 movi a5, -1 /* 0xffffffff */ 1962 movgez a4, a5, xh 1963 mov a2, a4 1964 leaf_return 1965 1966.Lfixunsdfsi_zero: 1967 movi a2, 0 1968 leaf_return 1969 1970.Lfixunsdfsi_bigexp: 1971 /* Handle unsigned maximum exponent case. */ 1972 bltz xh, 1f 1973 mov a2, a5 /* no shift needed */ 1974 leaf_return 1975 1976 /* Return 0x80000000 if negative. */ 19771: slli a2, a6, 11 1978 leaf_return 1979 1980#endif /* L_fixunsdfsi */ 1981 1982#ifdef L_fixunsdfdi 1983 1984 .align 4 1985 .global __fixunsdfdi 1986 .type __fixunsdfdi, @function 1987__fixunsdfdi: 1988 leaf_entry sp, 16 1989 1990 /* Check for NaN and Infinity. */ 1991 movi a6, 0x7ff00000 1992 ball xh, a6, .Lfixunsdfdi_nan_or_inf 1993 1994 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ 1995 extui a4, xh, 20, 11 1996 extui a5, a6, 20, 10 /* 0x3ff */ 1997 sub a4, a4, a5 1998 bgei a4, 64, .Lfixunsdfdi_maxint 1999 bltz a4, .Lfixunsdfdi_zero 2000 2001 /* Add explicit "1.0" and shift << 11. */ 2002 or a7, xh, a6 2003 ssai (32 - 11) 2004 src xh, a7, xl 2005 sll xl, xl 2006 2007 /* Shift back to the right, based on the exponent. */ 2008 addi a4, a4, 1 2009 beqi a4, 64, .Lfixunsdfdi_bigexp 2010 ssl a4 /* shift by 64 - a4 */ 2011 bgei a4, 32, .Lfixunsdfdi_smallshift 2012 srl xl, xh 2013 movi xh, 0 2014 2015.Lfixunsdfdi_shifted: 2016 /* Negate the result if sign != 0. */ 2017 bgez a7, 1f 2018 neg xl, xl 2019 neg xh, xh 2020 beqz xl, 1f 2021 addi xh, xh, -1 20221: leaf_return 2023 2024.Lfixunsdfdi_smallshift: 2025 src xl, xh, xl 2026 srl xh, xh 2027 j .Lfixunsdfdi_shifted 2028 2029.Lfixunsdfdi_nan_or_inf: 2030 /* Handle Infinity and NaN. */ 2031 slli a4, xh, 12 2032 or a4, a4, xl 2033 beqz a4, .Lfixunsdfdi_maxint 2034 2035 /* Translate NaN to 0xffffffff.... */ 20361: movi xh, -1 2037 movi xl, -1 2038 leaf_return 2039 2040.Lfixunsdfdi_maxint: 2041 bgez xh, 1b 20422: slli xh, a6, 11 /* 0x80000000 */ 2043 movi xl, 0 2044 leaf_return 2045 2046.Lfixunsdfdi_zero: 2047 movi xh, 0 2048 movi xl, 0 2049 leaf_return 2050 2051.Lfixunsdfdi_bigexp: 2052 /* Handle unsigned maximum exponent case. */ 2053 bltz a7, 2b 2054 leaf_return /* no shift needed */ 2055 2056#endif /* L_fixunsdfdi */ 2057 2058#ifdef L_floatsidf 2059 2060 .align 4 2061 .global __floatunsidf 2062 .type __floatunsidf, @function 2063__floatunsidf: 2064 leaf_entry sp, 16 2065 beqz a2, .Lfloatsidf_return_zero 2066 2067 /* Set the sign to zero and jump to the floatsidf code. */ 2068 movi a7, 0 2069 j .Lfloatsidf_normalize 2070 2071 .align 4 2072 .global __floatsidf 2073 .type __floatsidf, @function 2074__floatsidf: 2075 leaf_entry sp, 16 2076 2077 /* Check for zero. */ 2078 beqz a2, .Lfloatsidf_return_zero 2079 2080 /* Save the sign. */ 2081 extui a7, a2, 31, 1 2082 2083 /* Get the absolute value. */ 2084#if XCHAL_HAVE_ABS 2085 abs a2, a2 2086#else 2087 neg a4, a2 2088 movltz a2, a4, a2 2089#endif 2090 2091.Lfloatsidf_normalize: 2092 /* Normalize with the first 1 bit in the msb. */ 2093 do_nsau a4, a2, a5, a6 2094 ssl a4 2095 sll a5, a2 2096 2097 /* Shift the mantissa into position. */ 2098 srli xh, a5, 11 2099 slli xl, a5, (32 - 11) 2100 2101 /* Set the exponent. */ 2102 movi a5, 0x41d /* 0x3fe + 31 */ 2103 sub a5, a5, a4 2104 slli a5, a5, 20 2105 add xh, xh, a5 2106 2107 /* Add the sign and return. */ 2108 slli a7, a7, 31 2109 or xh, xh, a7 2110 leaf_return 2111 2112.Lfloatsidf_return_zero: 2113 movi a3, 0 2114 leaf_return 2115 2116#endif /* L_floatsidf */ 2117 2118#ifdef L_floatdidf 2119 2120 .align 4 2121 .global __floatundidf 2122 .type __floatundidf, @function 2123__floatundidf: 2124 leaf_entry sp, 16 2125 2126 /* Check for zero. */ 2127 or a4, xh, xl 2128 beqz a4, 2f 2129 2130 /* Set the sign to zero and jump to the floatdidf code. */ 2131 movi a7, 0 2132 j .Lfloatdidf_normalize 2133 2134 .align 4 2135 .global __floatdidf 2136 .type __floatdidf, @function 2137__floatdidf: 2138 leaf_entry sp, 16 2139 2140 /* Check for zero. */ 2141 or a4, xh, xl 2142 beqz a4, 2f 2143 2144 /* Save the sign. */ 2145 extui a7, xh, 31, 1 2146 2147 /* Get the absolute value. */ 2148 bgez xh, .Lfloatdidf_normalize 2149 neg xl, xl 2150 neg xh, xh 2151 beqz xl, .Lfloatdidf_normalize 2152 addi xh, xh, -1 2153 2154.Lfloatdidf_normalize: 2155 /* Normalize with the first 1 bit in the msb of xh. */ 2156 beqz xh, .Lfloatdidf_bigshift 2157 do_nsau a4, xh, a5, a6 2158 ssl a4 2159 src xh, xh, xl 2160 sll xl, xl 2161 2162.Lfloatdidf_shifted: 2163 /* Shift the mantissa into position, with rounding bits in a6. */ 2164 ssai 11 2165 sll a6, xl 2166 src xl, xh, xl 2167 srl xh, xh 2168 2169 /* Set the exponent. */ 2170 movi a5, 0x43d /* 0x3fe + 63 */ 2171 sub a5, a5, a4 2172 slli a5, a5, 20 2173 add xh, xh, a5 2174 2175 /* Add the sign. */ 2176 slli a7, a7, 31 2177 or xh, xh, a7 2178 2179 /* Round up if the leftover fraction is >= 1/2. */ 2180 bgez a6, 2f 2181 addi xl, xl, 1 2182 beqz xl, .Lfloatdidf_roundcarry 2183 2184 /* Check if the leftover fraction is exactly 1/2. */ 2185 slli a6, a6, 1 2186 beqz a6, .Lfloatdidf_exactlyhalf 21872: leaf_return 2188 2189.Lfloatdidf_bigshift: 2190 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ 2191 do_nsau a4, xl, a5, a6 2192 ssl a4 2193 sll xh, xl 2194 movi xl, 0 2195 addi a4, a4, 32 2196 j .Lfloatdidf_shifted 2197 2198.Lfloatdidf_exactlyhalf: 2199 /* Round down to the nearest even value. */ 2200 srli xl, xl, 1 2201 slli xl, xl, 1 2202 leaf_return 2203 2204.Lfloatdidf_roundcarry: 2205 /* xl is always zero when the rounding increment overflows, so 2206 there's no need to round it to an even value. */ 2207 addi xh, xh, 1 2208 /* Overflow to the exponent is OK. */ 2209 leaf_return 2210 2211#endif /* L_floatdidf */ 2212 2213#ifdef L_truncdfsf2 2214 2215 .align 4 2216 .global __truncdfsf2 2217 .type __truncdfsf2, @function 2218__truncdfsf2: 2219 leaf_entry sp, 16 2220 2221 /* Adjust the exponent bias. */ 2222 movi a4, (0x3ff - 0x7f) << 20 2223 sub a5, xh, a4 2224 2225 /* Check for underflow. */ 2226 xor a6, xh, a5 2227 bltz a6, .Ltrunc_underflow 2228 extui a6, a5, 20, 11 2229 beqz a6, .Ltrunc_underflow 2230 2231 /* Check for overflow. */ 2232 movi a4, 255 2233 bge a6, a4, .Ltrunc_overflow 2234 2235 /* Shift a5/xl << 3 into a5/a4. */ 2236 ssai (32 - 3) 2237 src a5, a5, xl 2238 sll a4, xl 2239 2240.Ltrunc_addsign: 2241 /* Add the sign bit. */ 2242 extui a6, xh, 31, 1 2243 slli a6, a6, 31 2244 or a2, a6, a5 2245 2246 /* Round up if the leftover fraction is >= 1/2. */ 2247 bgez a4, 1f 2248 addi a2, a2, 1 2249 /* Overflow to the exponent is OK. The answer will be correct. */ 2250 2251 /* Check if the leftover fraction is exactly 1/2. */ 2252 slli a4, a4, 1 2253 beqz a4, .Ltrunc_exactlyhalf 22541: leaf_return 2255 2256.Ltrunc_exactlyhalf: 2257 /* Round down to the nearest even value. */ 2258 srli a2, a2, 1 2259 slli a2, a2, 1 2260 leaf_return 2261 2262.Ltrunc_overflow: 2263 /* Check if exponent == 0x7ff. */ 2264 movi a4, 0x7ff00000 2265 bnall xh, a4, 1f 2266 2267 /* Check if mantissa is nonzero. */ 2268 slli a5, xh, 12 2269 or a5, a5, xl 2270 beqz a5, 1f 2271 2272 /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ 2273 srli a4, a4, 1 2274 22751: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ 2276 /* Add the sign bit. */ 2277 extui a6, xh, 31, 1 2278 ssai 1 2279 src a2, a6, a4 2280 leaf_return 2281 2282.Ltrunc_underflow: 2283 /* Find shift count for a subnormal. Flush to zero if >= 32. */ 2284 extui a6, xh, 20, 11 2285 movi a5, 0x3ff - 0x7f 2286 sub a6, a5, a6 2287 addi a6, a6, 1 2288 bgeui a6, 32, 1f 2289 2290 /* Replace the exponent with an explicit "1.0". */ 2291 slli a5, a5, 13 /* 0x700000 */ 2292 or a5, a5, xh 2293 slli a5, a5, 11 2294 srli a5, a5, 11 2295 2296 /* Shift the mantissa left by 3 bits (into a5/a4). */ 2297 ssai (32 - 3) 2298 src a5, a5, xl 2299 sll a4, xl 2300 2301 /* Shift right by a6. */ 2302 ssr a6 2303 sll a7, a4 2304 src a4, a5, a4 2305 srl a5, a5 2306 beqz a7, .Ltrunc_addsign 2307 or a4, a4, a6 /* any positive, nonzero value will work */ 2308 j .Ltrunc_addsign 2309 2310 /* Return +/- zero. */ 23111: extui a2, xh, 31, 1 2312 slli a2, a2, 31 2313 leaf_return 2314 2315#endif /* L_truncdfsf2 */ 2316 2317#ifdef L_extendsfdf2 2318 2319 .align 4 2320 .global __extendsfdf2 2321 .type __extendsfdf2, @function 2322__extendsfdf2: 2323 leaf_entry sp, 16 2324 2325 /* Save the sign bit and then shift it off. */ 2326 extui a5, a2, 31, 1 2327 slli a5, a5, 31 2328 slli a4, a2, 1 2329 2330 /* Extract and check the exponent. */ 2331 extui a6, a2, 23, 8 2332 beqz a6, .Lextend_expzero 2333 addi a6, a6, 1 2334 beqi a6, 256, .Lextend_nan_or_inf 2335 2336 /* Shift >> 3 into a4/xl. */ 2337 srli a4, a4, 4 2338 slli xl, a2, (32 - 3) 2339 2340 /* Adjust the exponent bias. */ 2341 movi a6, (0x3ff - 0x7f) << 20 2342 add a4, a4, a6 2343 2344 /* Add the sign bit. */ 2345 or xh, a4, a5 2346 leaf_return 2347 2348.Lextend_nan_or_inf: 2349 movi a4, 0x7ff00000 2350 2351 /* Check for NaN. */ 2352 slli a7, a2, 9 2353 beqz a7, 1f 2354 2355 slli a6, a6, 11 /* 0x80000 */ 2356 or a4, a4, a6 2357 2358 /* Add the sign and return. */ 23591: or xh, a4, a5 2360 movi xl, 0 2361 leaf_return 2362 2363.Lextend_expzero: 2364 beqz a4, 1b 2365 2366 /* Normalize it to have 8 zero bits before the first 1 bit. */ 2367 do_nsau a7, a4, a2, a3 2368 addi a7, a7, -8 2369 ssl a7 2370 sll a4, a4 2371 2372 /* Shift >> 3 into a4/xl. */ 2373 slli xl, a4, (32 - 3) 2374 srli a4, a4, 3 2375 2376 /* Set the exponent. */ 2377 movi a6, 0x3fe - 0x7f 2378 sub a6, a6, a7 2379 slli a6, a6, 20 2380 add a4, a4, a6 2381 2382 /* Add the sign and return. */ 2383 or xh, a4, a5 2384 leaf_return 2385 2386#endif /* L_extendsfdf2 */ 2387 2388 2389