x86-gf2m.S revision 299966
1# $FreeBSD: stable/10/secure/lib/libcrypto/i386/x86-gf2m.S 299966 2016-05-16 19:30:27Z jkim $ 2# Do not modify. This file is auto-generated from x86-gf2m.pl. 3#ifdef PIC 4.file "x86-gf2m.S" 5.text 6.type _mul_1x1_mmx,@function 7.align 16 8_mul_1x1_mmx: 9 subl $36,%esp 10 movl %eax,%ecx 11 leal (%eax,%eax,1),%edx 12 andl $1073741823,%ecx 13 leal (%edx,%edx,1),%ebp 14 movl $0,(%esp) 15 andl $2147483647,%edx 16 movd %eax,%mm2 17 movd %ebx,%mm3 18 movl %ecx,4(%esp) 19 xorl %edx,%ecx 20 pxor %mm5,%mm5 21 pxor %mm4,%mm4 22 movl %edx,8(%esp) 23 xorl %ebp,%edx 24 movl %ecx,12(%esp) 25 pcmpgtd %mm2,%mm5 26 paddd %mm2,%mm2 27 xorl %edx,%ecx 28 movl %ebp,16(%esp) 29 xorl %edx,%ebp 30 pand %mm3,%mm5 31 pcmpgtd %mm2,%mm4 32 movl %ecx,20(%esp) 33 xorl %ecx,%ebp 34 psllq $31,%mm5 35 pand %mm3,%mm4 36 movl %edx,24(%esp) 37 movl $7,%esi 38 movl %ebp,28(%esp) 39 movl %esi,%ebp 40 andl %ebx,%esi 41 shrl $3,%ebx 42 movl %ebp,%edi 43 psllq $30,%mm4 44 andl %ebx,%edi 45 shrl $3,%ebx 46 movd (%esp,%esi,4),%mm0 47 movl %ebp,%esi 48 andl %ebx,%esi 49 shrl $3,%ebx 50 movd (%esp,%edi,4),%mm2 51 movl %ebp,%edi 52 psllq $3,%mm2 53 andl %ebx,%edi 54 shrl $3,%ebx 55 pxor %mm2,%mm0 56 movd (%esp,%esi,4),%mm1 57 movl %ebp,%esi 58 psllq $6,%mm1 59 andl %ebx,%esi 60 shrl $3,%ebx 61 pxor %mm1,%mm0 62 movd (%esp,%edi,4),%mm2 63 movl %ebp,%edi 64 psllq $9,%mm2 65 andl %ebx,%edi 66 shrl $3,%ebx 67 pxor %mm2,%mm0 68 movd (%esp,%esi,4),%mm1 69 movl %ebp,%esi 70 psllq $12,%mm1 71 andl %ebx,%esi 72 shrl $3,%ebx 73 pxor %mm1,%mm0 74 movd (%esp,%edi,4),%mm2 75 movl %ebp,%edi 76 psllq $15,%mm2 77 andl %ebx,%edi 78 shrl $3,%ebx 79 pxor %mm2,%mm0 80 movd (%esp,%esi,4),%mm1 81 movl %ebp,%esi 82 psllq $18,%mm1 83 andl %ebx,%esi 84 shrl $3,%ebx 85 pxor %mm1,%mm0 86 movd (%esp,%edi,4),%mm2 87 movl %ebp,%edi 88 psllq $21,%mm2 89 andl %ebx,%edi 90 shrl $3,%ebx 91 pxor %mm2,%mm0 92 movd (%esp,%esi,4),%mm1 93 movl %ebp,%esi 94 psllq $24,%mm1 95 andl %ebx,%esi 96 shrl $3,%ebx 97 pxor %mm1,%mm0 98 movd (%esp,%edi,4),%mm2 99 pxor %mm4,%mm0 100 psllq $27,%mm2 101 pxor %mm2,%mm0 102 movd (%esp,%esi,4),%mm1 103 pxor %mm5,%mm0 104 psllq $30,%mm1 105 addl $36,%esp 106 pxor %mm1,%mm0 107 ret 108.size _mul_1x1_mmx,.-_mul_1x1_mmx 109.type _mul_1x1_ialu,@function 110.align 16 111_mul_1x1_ialu: 112 subl $36,%esp 113 movl %eax,%ecx 114 leal (%eax,%eax,1),%edx 115 leal (,%eax,4),%ebp 116 andl $1073741823,%ecx 117 leal (%eax,%eax,1),%edi 118 sarl $31,%eax 119 movl $0,(%esp) 120 andl $2147483647,%edx 121 movl %ecx,4(%esp) 122 xorl %edx,%ecx 123 movl %edx,8(%esp) 124 xorl %ebp,%edx 125 movl %ecx,12(%esp) 126 xorl %edx,%ecx 127 movl %ebp,16(%esp) 128 xorl %edx,%ebp 129 movl %ecx,20(%esp) 130 xorl %ecx,%ebp 131 sarl $31,%edi 132 andl %ebx,%eax 133 movl %edx,24(%esp) 134 andl %ebx,%edi 135 movl %ebp,28(%esp) 136 movl %eax,%edx 137 shll $31,%eax 138 movl %edi,%ecx 139 shrl $1,%edx 140 movl $7,%esi 141 shll $30,%edi 142 andl %ebx,%esi 143 shrl $2,%ecx 144 xorl %edi,%eax 145 shrl $3,%ebx 146 movl $7,%edi 147 andl %ebx,%edi 148 shrl $3,%ebx 149 xorl %ecx,%edx 150 xorl (%esp,%esi,4),%eax 151 movl $7,%esi 152 andl %ebx,%esi 153 shrl $3,%ebx 154 movl (%esp,%edi,4),%ebp 155 movl $7,%edi 156 movl %ebp,%ecx 157 shll $3,%ebp 158 andl %ebx,%edi 159 shrl $29,%ecx 160 xorl %ebp,%eax 161 shrl $3,%ebx 162 xorl %ecx,%edx 163 movl (%esp,%esi,4),%ecx 164 movl $7,%esi 165 movl %ecx,%ebp 166 shll $6,%ecx 167 andl %ebx,%esi 168 shrl $26,%ebp 169 xorl %ecx,%eax 170 shrl $3,%ebx 171 xorl %ebp,%edx 172 movl (%esp,%edi,4),%ebp 173 movl $7,%edi 174 movl %ebp,%ecx 175 shll $9,%ebp 176 andl %ebx,%edi 177 shrl $23,%ecx 178 xorl %ebp,%eax 179 shrl $3,%ebx 180 xorl %ecx,%edx 181 movl (%esp,%esi,4),%ecx 182 movl $7,%esi 183 movl %ecx,%ebp 184 shll $12,%ecx 185 andl %ebx,%esi 186 shrl $20,%ebp 187 xorl %ecx,%eax 188 shrl $3,%ebx 189 xorl %ebp,%edx 190 movl (%esp,%edi,4),%ebp 191 movl $7,%edi 192 movl %ebp,%ecx 193 shll $15,%ebp 194 andl %ebx,%edi 195 shrl $17,%ecx 196 xorl %ebp,%eax 197 shrl $3,%ebx 198 xorl %ecx,%edx 199 movl (%esp,%esi,4),%ecx 200 movl $7,%esi 201 movl %ecx,%ebp 202 shll $18,%ecx 203 andl %ebx,%esi 204 shrl $14,%ebp 205 xorl %ecx,%eax 206 shrl $3,%ebx 207 xorl %ebp,%edx 208 movl (%esp,%edi,4),%ebp 209 movl $7,%edi 210 movl %ebp,%ecx 211 shll $21,%ebp 212 andl %ebx,%edi 213 shrl $11,%ecx 214 xorl %ebp,%eax 215 shrl $3,%ebx 216 xorl %ecx,%edx 217 movl (%esp,%esi,4),%ecx 218 movl $7,%esi 219 movl %ecx,%ebp 220 shll $24,%ecx 221 andl %ebx,%esi 222 shrl $8,%ebp 223 xorl %ecx,%eax 224 shrl $3,%ebx 225 xorl %ebp,%edx 226 movl (%esp,%edi,4),%ebp 227 movl %ebp,%ecx 228 shll $27,%ebp 229 movl (%esp,%esi,4),%edi 230 shrl $5,%ecx 231 movl %edi,%esi 232 xorl %ebp,%eax 233 shll $30,%edi 234 xorl %ecx,%edx 235 shrl $2,%esi 236 xorl %edi,%eax 237 xorl %esi,%edx 238 addl $36,%esp 239 ret 240.size _mul_1x1_ialu,.-_mul_1x1_ialu 241.globl bn_GF2m_mul_2x2 242.type bn_GF2m_mul_2x2,@function 243.align 16 244bn_GF2m_mul_2x2: 245.L_bn_GF2m_mul_2x2_begin: 246 call .L000PIC_me_up 247.L000PIC_me_up: 248 popl %edx 249 leal _GLOBAL_OFFSET_TABLE_+[.-.L000PIC_me_up](%edx),%edx 250 movl OPENSSL_ia32cap_P@GOT(%edx),%edx 251 movl (%edx),%eax 252 movl 4(%edx),%edx 253 testl $8388608,%eax 254 jz .L001ialu 255 testl $16777216,%eax 256 jz .L002mmx 257 testl $2,%edx 258 jz .L002mmx 259 movups 8(%esp),%xmm0 260 shufps $177,%xmm0,%xmm0 261.byte 102,15,58,68,192,1 262 movl 4(%esp),%eax 263 movups %xmm0,(%eax) 264 ret 265.align 16 266.L002mmx: 267 pushl %ebp 268 pushl %ebx 269 pushl %esi 270 pushl %edi 271 movl 24(%esp),%eax 272 movl 32(%esp),%ebx 273 call _mul_1x1_mmx 274 movq %mm0,%mm7 275 movl 28(%esp),%eax 276 movl 36(%esp),%ebx 277 call _mul_1x1_mmx 278 movq %mm0,%mm6 279 movl 24(%esp),%eax 280 movl 32(%esp),%ebx 281 xorl 28(%esp),%eax 282 xorl 36(%esp),%ebx 283 call _mul_1x1_mmx 284 pxor %mm7,%mm0 285 movl 20(%esp),%eax 286 pxor %mm6,%mm0 287 movq %mm0,%mm2 288 psllq $32,%mm0 289 popl %edi 290 psrlq $32,%mm2 291 popl %esi 292 pxor %mm6,%mm0 293 popl %ebx 294 pxor %mm7,%mm2 295 movq %mm0,(%eax) 296 popl %ebp 297 movq %mm2,8(%eax) 298 emms 299 ret 300.align 16 301.L001ialu: 302 pushl %ebp 303 pushl %ebx 304 pushl %esi 305 pushl %edi 306 subl $20,%esp 307 movl 44(%esp),%eax 308 movl 52(%esp),%ebx 309 call _mul_1x1_ialu 310 movl %eax,8(%esp) 311 movl %edx,12(%esp) 312 movl 48(%esp),%eax 313 movl 56(%esp),%ebx 314 call _mul_1x1_ialu 315 movl %eax,(%esp) 316 movl %edx,4(%esp) 317 movl 44(%esp),%eax 318 movl 52(%esp),%ebx 319 xorl 48(%esp),%eax 320 xorl 56(%esp),%ebx 321 call _mul_1x1_ialu 322 movl 40(%esp),%ebp 323 movl (%esp),%ebx 324 movl 4(%esp),%ecx 325 movl 8(%esp),%edi 326 movl 12(%esp),%esi 327 xorl %edx,%eax 328 xorl %ecx,%edx 329 xorl %ebx,%eax 330 movl %ebx,(%ebp) 331 xorl %edi,%edx 332 movl %esi,12(%ebp) 333 xorl %esi,%eax 334 addl $20,%esp 335 xorl %esi,%edx 336 popl %edi 337 xorl %edx,%eax 338 popl %esi 339 movl %edx,8(%ebp) 340 popl %ebx 341 movl %eax,4(%ebp) 342 popl %ebp 343 ret 344.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin 345.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 346.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 347.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 348.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 349.byte 62,0 350.comm OPENSSL_ia32cap_P,8,4 351#else 352.file "x86-gf2m.S" 353.text 354.type _mul_1x1_mmx,@function 355.align 16 356_mul_1x1_mmx: 357 subl $36,%esp 358 movl %eax,%ecx 359 leal (%eax,%eax,1),%edx 360 andl $1073741823,%ecx 361 leal (%edx,%edx,1),%ebp 362 movl $0,(%esp) 363 andl $2147483647,%edx 364 movd %eax,%mm2 365 movd %ebx,%mm3 366 movl %ecx,4(%esp) 367 xorl %edx,%ecx 368 pxor %mm5,%mm5 369 pxor %mm4,%mm4 370 movl %edx,8(%esp) 371 xorl %ebp,%edx 372 movl %ecx,12(%esp) 373 pcmpgtd %mm2,%mm5 374 paddd %mm2,%mm2 375 xorl %edx,%ecx 376 movl %ebp,16(%esp) 377 xorl %edx,%ebp 378 pand %mm3,%mm5 379 pcmpgtd %mm2,%mm4 380 movl %ecx,20(%esp) 381 xorl %ecx,%ebp 382 psllq $31,%mm5 383 pand %mm3,%mm4 384 movl %edx,24(%esp) 385 movl $7,%esi 386 movl %ebp,28(%esp) 387 movl %esi,%ebp 388 andl %ebx,%esi 389 shrl $3,%ebx 390 movl %ebp,%edi 391 psllq $30,%mm4 392 andl %ebx,%edi 393 shrl $3,%ebx 394 movd (%esp,%esi,4),%mm0 395 movl %ebp,%esi 396 andl %ebx,%esi 397 shrl $3,%ebx 398 movd (%esp,%edi,4),%mm2 399 movl %ebp,%edi 400 psllq $3,%mm2 401 andl %ebx,%edi 402 shrl $3,%ebx 403 pxor %mm2,%mm0 404 movd (%esp,%esi,4),%mm1 405 movl %ebp,%esi 406 psllq $6,%mm1 407 andl %ebx,%esi 408 shrl $3,%ebx 409 pxor %mm1,%mm0 410 movd (%esp,%edi,4),%mm2 411 movl %ebp,%edi 412 psllq $9,%mm2 413 andl %ebx,%edi 414 shrl $3,%ebx 415 pxor %mm2,%mm0 416 movd (%esp,%esi,4),%mm1 417 movl %ebp,%esi 418 psllq $12,%mm1 419 andl %ebx,%esi 420 shrl $3,%ebx 421 pxor %mm1,%mm0 422 movd (%esp,%edi,4),%mm2 423 movl %ebp,%edi 424 psllq $15,%mm2 425 andl %ebx,%edi 426 shrl $3,%ebx 427 pxor %mm2,%mm0 428 movd (%esp,%esi,4),%mm1 429 movl %ebp,%esi 430 psllq $18,%mm1 431 andl %ebx,%esi 432 shrl $3,%ebx 433 pxor %mm1,%mm0 434 movd (%esp,%edi,4),%mm2 435 movl %ebp,%edi 436 psllq $21,%mm2 437 andl %ebx,%edi 438 shrl $3,%ebx 439 pxor %mm2,%mm0 440 movd (%esp,%esi,4),%mm1 441 movl %ebp,%esi 442 psllq $24,%mm1 443 andl %ebx,%esi 444 shrl $3,%ebx 445 pxor %mm1,%mm0 446 movd (%esp,%edi,4),%mm2 447 pxor %mm4,%mm0 448 psllq $27,%mm2 449 pxor %mm2,%mm0 450 movd (%esp,%esi,4),%mm1 451 pxor %mm5,%mm0 452 psllq $30,%mm1 453 addl $36,%esp 454 pxor %mm1,%mm0 455 ret 456.size _mul_1x1_mmx,.-_mul_1x1_mmx 457.type _mul_1x1_ialu,@function 458.align 16 459_mul_1x1_ialu: 460 subl $36,%esp 461 movl %eax,%ecx 462 leal (%eax,%eax,1),%edx 463 leal (,%eax,4),%ebp 464 andl $1073741823,%ecx 465 leal (%eax,%eax,1),%edi 466 sarl $31,%eax 467 movl $0,(%esp) 468 andl $2147483647,%edx 469 movl %ecx,4(%esp) 470 xorl %edx,%ecx 471 movl %edx,8(%esp) 472 xorl %ebp,%edx 473 movl %ecx,12(%esp) 474 xorl %edx,%ecx 475 movl %ebp,16(%esp) 476 xorl %edx,%ebp 477 movl %ecx,20(%esp) 478 xorl %ecx,%ebp 479 sarl $31,%edi 480 andl %ebx,%eax 481 movl %edx,24(%esp) 482 andl %ebx,%edi 483 movl %ebp,28(%esp) 484 movl %eax,%edx 485 shll $31,%eax 486 movl %edi,%ecx 487 shrl $1,%edx 488 movl $7,%esi 489 shll $30,%edi 490 andl %ebx,%esi 491 shrl $2,%ecx 492 xorl %edi,%eax 493 shrl $3,%ebx 494 movl $7,%edi 495 andl %ebx,%edi 496 shrl $3,%ebx 497 xorl %ecx,%edx 498 xorl (%esp,%esi,4),%eax 499 movl $7,%esi 500 andl %ebx,%esi 501 shrl $3,%ebx 502 movl (%esp,%edi,4),%ebp 503 movl $7,%edi 504 movl %ebp,%ecx 505 shll $3,%ebp 506 andl %ebx,%edi 507 shrl $29,%ecx 508 xorl %ebp,%eax 509 shrl $3,%ebx 510 xorl %ecx,%edx 511 movl (%esp,%esi,4),%ecx 512 movl $7,%esi 513 movl %ecx,%ebp 514 shll $6,%ecx 515 andl %ebx,%esi 516 shrl $26,%ebp 517 xorl %ecx,%eax 518 shrl $3,%ebx 519 xorl %ebp,%edx 520 movl (%esp,%edi,4),%ebp 521 movl $7,%edi 522 movl %ebp,%ecx 523 shll $9,%ebp 524 andl %ebx,%edi 525 shrl $23,%ecx 526 xorl %ebp,%eax 527 shrl $3,%ebx 528 xorl %ecx,%edx 529 movl (%esp,%esi,4),%ecx 530 movl $7,%esi 531 movl %ecx,%ebp 532 shll $12,%ecx 533 andl %ebx,%esi 534 shrl $20,%ebp 535 xorl %ecx,%eax 536 shrl $3,%ebx 537 xorl %ebp,%edx 538 movl (%esp,%edi,4),%ebp 539 movl $7,%edi 540 movl %ebp,%ecx 541 shll $15,%ebp 542 andl %ebx,%edi 543 shrl $17,%ecx 544 xorl %ebp,%eax 545 shrl $3,%ebx 546 xorl %ecx,%edx 547 movl (%esp,%esi,4),%ecx 548 movl $7,%esi 549 movl %ecx,%ebp 550 shll $18,%ecx 551 andl %ebx,%esi 552 shrl $14,%ebp 553 xorl %ecx,%eax 554 shrl $3,%ebx 555 xorl %ebp,%edx 556 movl (%esp,%edi,4),%ebp 557 movl $7,%edi 558 movl %ebp,%ecx 559 shll $21,%ebp 560 andl %ebx,%edi 561 shrl $11,%ecx 562 xorl %ebp,%eax 563 shrl $3,%ebx 564 xorl %ecx,%edx 565 movl (%esp,%esi,4),%ecx 566 movl $7,%esi 567 movl %ecx,%ebp 568 shll $24,%ecx 569 andl %ebx,%esi 570 shrl $8,%ebp 571 xorl %ecx,%eax 572 shrl $3,%ebx 573 xorl %ebp,%edx 574 movl (%esp,%edi,4),%ebp 575 movl %ebp,%ecx 576 shll $27,%ebp 577 movl (%esp,%esi,4),%edi 578 shrl $5,%ecx 579 movl %edi,%esi 580 xorl %ebp,%eax 581 shll $30,%edi 582 xorl %ecx,%edx 583 shrl $2,%esi 584 xorl %edi,%eax 585 xorl %esi,%edx 586 addl $36,%esp 587 ret 588.size _mul_1x1_ialu,.-_mul_1x1_ialu 589.globl bn_GF2m_mul_2x2 590.type bn_GF2m_mul_2x2,@function 591.align 16 592bn_GF2m_mul_2x2: 593.L_bn_GF2m_mul_2x2_begin: 594 leal OPENSSL_ia32cap_P,%edx 595 movl (%edx),%eax 596 movl 4(%edx),%edx 597 testl $8388608,%eax 598 jz .L000ialu 599 testl $16777216,%eax 600 jz .L001mmx 601 testl $2,%edx 602 jz .L001mmx 603 movups 8(%esp),%xmm0 604 shufps $177,%xmm0,%xmm0 605.byte 102,15,58,68,192,1 606 movl 4(%esp),%eax 607 movups %xmm0,(%eax) 608 ret 609.align 16 610.L001mmx: 611 pushl %ebp 612 pushl %ebx 613 pushl %esi 614 pushl %edi 615 movl 24(%esp),%eax 616 movl 32(%esp),%ebx 617 call _mul_1x1_mmx 618 movq %mm0,%mm7 619 movl 28(%esp),%eax 620 movl 36(%esp),%ebx 621 call _mul_1x1_mmx 622 movq %mm0,%mm6 623 movl 24(%esp),%eax 624 movl 32(%esp),%ebx 625 xorl 28(%esp),%eax 626 xorl 36(%esp),%ebx 627 call _mul_1x1_mmx 628 pxor %mm7,%mm0 629 movl 20(%esp),%eax 630 pxor %mm6,%mm0 631 movq %mm0,%mm2 632 psllq $32,%mm0 633 popl %edi 634 psrlq $32,%mm2 635 popl %esi 636 pxor %mm6,%mm0 637 popl %ebx 638 pxor %mm7,%mm2 639 movq %mm0,(%eax) 640 popl %ebp 641 movq %mm2,8(%eax) 642 emms 643 ret 644.align 16 645.L000ialu: 646 pushl %ebp 647 pushl %ebx 648 pushl %esi 649 pushl %edi 650 subl $20,%esp 651 movl 44(%esp),%eax 652 movl 52(%esp),%ebx 653 call _mul_1x1_ialu 654 movl %eax,8(%esp) 655 movl %edx,12(%esp) 656 movl 48(%esp),%eax 657 movl 56(%esp),%ebx 658 call _mul_1x1_ialu 659 movl %eax,(%esp) 660 movl %edx,4(%esp) 661 movl 44(%esp),%eax 662 movl 52(%esp),%ebx 663 xorl 48(%esp),%eax 664 xorl 56(%esp),%ebx 665 call _mul_1x1_ialu 666 movl 40(%esp),%ebp 667 movl (%esp),%ebx 668 movl 4(%esp),%ecx 669 movl 8(%esp),%edi 670 movl 12(%esp),%esi 671 xorl %edx,%eax 672 xorl %ecx,%edx 673 xorl %ebx,%eax 674 movl %ebx,(%ebp) 675 xorl %edi,%edx 676 movl %esi,12(%ebp) 677 xorl %esi,%eax 678 addl $20,%esp 679 xorl %esi,%edx 680 popl %edi 681 xorl %edx,%eax 682 popl %esi 683 movl %edx,8(%ebp) 684 popl %ebx 685 movl %eax,4(%ebp) 686 popl %ebp 687 ret 688.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin 689.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 690.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 691.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 692.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 693.byte 62,0 694.comm OPENSSL_ia32cap_P,8,4 695#endif 696