x86-mont.S revision 299966
1# $FreeBSD: stable/10/secure/lib/libcrypto/i386/x86-mont.S 299966 2016-05-16 19:30:27Z jkim $ 2# Do not modify. This file is auto-generated from x86-mont.pl. 3#ifdef PIC 4.file "x86-mont.S" 5.text 6.globl bn_mul_mont 7.type bn_mul_mont,@function 8.align 16 9bn_mul_mont: 10.L_bn_mul_mont_begin: 11 pushl %ebp 12 pushl %ebx 13 pushl %esi 14 pushl %edi 15 xorl %eax,%eax 16 movl 40(%esp),%edi 17 cmpl $4,%edi 18 jl .L000just_leave 19 leal 20(%esp),%esi 20 leal 24(%esp),%edx 21 movl %esp,%ebp 22 addl $2,%edi 23 negl %edi 24 leal -32(%esp,%edi,4),%esp 25 negl %edi 26 movl %esp,%eax 27 subl %edx,%eax 28 andl $2047,%eax 29 subl %eax,%esp 30 xorl %esp,%edx 31 andl $2048,%edx 32 xorl $2048,%edx 33 subl %edx,%esp 34 andl $-64,%esp 35 movl %ebp,%eax 36 subl %esp,%eax 37 andl $-4096,%eax 38.L001page_walk: 39 movl (%esp,%eax,1),%edx 40 subl $4096,%eax 41.byte 46 42 jnc .L001page_walk 43 movl (%esi),%eax 44 movl 4(%esi),%ebx 45 movl 8(%esi),%ecx 46 movl 12(%esi),%edx 47 movl 16(%esi),%esi 48 movl (%esi),%esi 49 movl %eax,4(%esp) 50 movl %ebx,8(%esp) 51 movl %ecx,12(%esp) 52 movl %edx,16(%esp) 53 movl %esi,20(%esp) 54 leal -3(%edi),%ebx 55 movl %ebp,24(%esp) 56 call .L002PIC_me_up 57.L002PIC_me_up: 58 popl %eax 59 leal _GLOBAL_OFFSET_TABLE_+[.-.L002PIC_me_up](%eax),%eax 60 movl OPENSSL_ia32cap_P@GOT(%eax),%eax 61 btl $26,(%eax) 62 jnc .L003non_sse2 63 movl $-1,%eax 64 movd %eax,%mm7 65 movl 8(%esp),%esi 66 movl 12(%esp),%edi 67 movl 16(%esp),%ebp 68 xorl %edx,%edx 69 xorl %ecx,%ecx 70 movd (%edi),%mm4 71 movd (%esi),%mm5 72 movd (%ebp),%mm3 73 pmuludq %mm4,%mm5 74 movq %mm5,%mm2 75 movq %mm5,%mm0 76 pand %mm7,%mm0 77 pmuludq 20(%esp),%mm5 78 pmuludq %mm5,%mm3 79 paddq %mm0,%mm3 80 movd 4(%ebp),%mm1 81 movd 4(%esi),%mm0 82 psrlq $32,%mm2 83 psrlq $32,%mm3 84 incl %ecx 85.align 16 86.L0041st: 87 pmuludq %mm4,%mm0 88 pmuludq %mm5,%mm1 89 paddq %mm0,%mm2 90 paddq %mm1,%mm3 91 movq %mm2,%mm0 92 pand %mm7,%mm0 93 movd 4(%ebp,%ecx,4),%mm1 94 paddq %mm0,%mm3 95 movd 4(%esi,%ecx,4),%mm0 96 psrlq $32,%mm2 97 movd %mm3,28(%esp,%ecx,4) 98 psrlq $32,%mm3 99 leal 1(%ecx),%ecx 100 cmpl %ebx,%ecx 101 jl .L0041st 102 pmuludq %mm4,%mm0 103 pmuludq %mm5,%mm1 104 paddq %mm0,%mm2 105 paddq %mm1,%mm3 106 movq %mm2,%mm0 107 pand %mm7,%mm0 108 paddq %mm0,%mm3 109 movd %mm3,28(%esp,%ecx,4) 110 psrlq $32,%mm2 111 psrlq $32,%mm3 112 paddq %mm2,%mm3 113 movq %mm3,32(%esp,%ebx,4) 114 incl %edx 115.L005outer: 116 xorl %ecx,%ecx 117 movd (%edi,%edx,4),%mm4 118 movd (%esi),%mm5 119 movd 32(%esp),%mm6 120 movd (%ebp),%mm3 121 pmuludq %mm4,%mm5 122 paddq %mm6,%mm5 123 movq %mm5,%mm0 124 movq %mm5,%mm2 125 pand %mm7,%mm0 126 pmuludq 20(%esp),%mm5 127 pmuludq %mm5,%mm3 128 paddq %mm0,%mm3 129 movd 36(%esp),%mm6 130 movd 4(%ebp),%mm1 131 movd 4(%esi),%mm0 132 psrlq $32,%mm2 133 psrlq $32,%mm3 134 paddq %mm6,%mm2 135 incl %ecx 136 decl %ebx 137.L006inner: 138 pmuludq %mm4,%mm0 139 pmuludq %mm5,%mm1 140 paddq %mm0,%mm2 141 paddq %mm1,%mm3 142 movq %mm2,%mm0 143 movd 36(%esp,%ecx,4),%mm6 144 pand %mm7,%mm0 145 movd 4(%ebp,%ecx,4),%mm1 146 paddq %mm0,%mm3 147 movd 4(%esi,%ecx,4),%mm0 148 psrlq $32,%mm2 149 movd %mm3,28(%esp,%ecx,4) 150 psrlq $32,%mm3 151 paddq %mm6,%mm2 152 decl %ebx 153 leal 1(%ecx),%ecx 154 jnz .L006inner 155 movl %ecx,%ebx 156 pmuludq %mm4,%mm0 157 pmuludq %mm5,%mm1 158 paddq %mm0,%mm2 159 paddq %mm1,%mm3 160 movq %mm2,%mm0 161 pand %mm7,%mm0 162 paddq %mm0,%mm3 163 movd %mm3,28(%esp,%ecx,4) 164 psrlq $32,%mm2 165 psrlq $32,%mm3 166 movd 36(%esp,%ebx,4),%mm6 167 paddq %mm2,%mm3 168 paddq %mm6,%mm3 169 movq %mm3,32(%esp,%ebx,4) 170 leal 1(%edx),%edx 171 cmpl %ebx,%edx 172 jle .L005outer 173 emms 174 jmp .L007common_tail 175.align 16 176.L003non_sse2: 177 movl 8(%esp),%esi 178 leal 1(%ebx),%ebp 179 movl 12(%esp),%edi 180 xorl %ecx,%ecx 181 movl %esi,%edx 182 andl $1,%ebp 183 subl %edi,%edx 184 leal 4(%edi,%ebx,4),%eax 185 orl %edx,%ebp 186 movl (%edi),%edi 187 jz .L008bn_sqr_mont 188 movl %eax,28(%esp) 189 movl (%esi),%eax 190 xorl %edx,%edx 191.align 16 192.L009mull: 193 movl %edx,%ebp 194 mull %edi 195 addl %eax,%ebp 196 leal 1(%ecx),%ecx 197 adcl $0,%edx 198 movl (%esi,%ecx,4),%eax 199 cmpl %ebx,%ecx 200 movl %ebp,28(%esp,%ecx,4) 201 jl .L009mull 202 movl %edx,%ebp 203 mull %edi 204 movl 20(%esp),%edi 205 addl %ebp,%eax 206 movl 16(%esp),%esi 207 adcl $0,%edx 208 imull 32(%esp),%edi 209 movl %eax,32(%esp,%ebx,4) 210 xorl %ecx,%ecx 211 movl %edx,36(%esp,%ebx,4) 212 movl %ecx,40(%esp,%ebx,4) 213 movl (%esi),%eax 214 mull %edi 215 addl 32(%esp),%eax 216 movl 4(%esi),%eax 217 adcl $0,%edx 218 incl %ecx 219 jmp .L0102ndmadd 220.align 16 221.L0111stmadd: 222 movl %edx,%ebp 223 mull %edi 224 addl 32(%esp,%ecx,4),%ebp 225 leal 1(%ecx),%ecx 226 adcl $0,%edx 227 addl %eax,%ebp 228 movl (%esi,%ecx,4),%eax 229 adcl $0,%edx 230 cmpl %ebx,%ecx 231 movl %ebp,28(%esp,%ecx,4) 232 jl .L0111stmadd 233 movl %edx,%ebp 234 mull %edi 235 addl 32(%esp,%ebx,4),%eax 236 movl 20(%esp),%edi 237 adcl $0,%edx 238 movl 16(%esp),%esi 239 addl %eax,%ebp 240 adcl $0,%edx 241 imull 32(%esp),%edi 242 xorl %ecx,%ecx 243 addl 36(%esp,%ebx,4),%edx 244 movl %ebp,32(%esp,%ebx,4) 245 adcl $0,%ecx 246 movl (%esi),%eax 247 movl %edx,36(%esp,%ebx,4) 248 movl %ecx,40(%esp,%ebx,4) 249 mull %edi 250 addl 32(%esp),%eax 251 movl 4(%esi),%eax 252 adcl $0,%edx 253 movl $1,%ecx 254.align 16 255.L0102ndmadd: 256 movl %edx,%ebp 257 mull %edi 258 addl 32(%esp,%ecx,4),%ebp 259 leal 1(%ecx),%ecx 260 adcl $0,%edx 261 addl %eax,%ebp 262 movl (%esi,%ecx,4),%eax 263 adcl $0,%edx 264 cmpl %ebx,%ecx 265 movl %ebp,24(%esp,%ecx,4) 266 jl .L0102ndmadd 267 movl %edx,%ebp 268 mull %edi 269 addl 32(%esp,%ebx,4),%ebp 270 adcl $0,%edx 271 addl %eax,%ebp 272 adcl $0,%edx 273 movl %ebp,28(%esp,%ebx,4) 274 xorl %eax,%eax 275 movl 12(%esp),%ecx 276 addl 36(%esp,%ebx,4),%edx 277 adcl 40(%esp,%ebx,4),%eax 278 leal 4(%ecx),%ecx 279 movl %edx,32(%esp,%ebx,4) 280 cmpl 28(%esp),%ecx 281 movl %eax,36(%esp,%ebx,4) 282 je .L007common_tail 283 movl (%ecx),%edi 284 movl 8(%esp),%esi 285 movl %ecx,12(%esp) 286 xorl %ecx,%ecx 287 xorl %edx,%edx 288 movl (%esi),%eax 289 jmp .L0111stmadd 290.align 16 291.L008bn_sqr_mont: 292 movl %ebx,(%esp) 293 movl %ecx,12(%esp) 294 movl %edi,%eax 295 mull %edi 296 movl %eax,32(%esp) 297 movl %edx,%ebx 298 shrl $1,%edx 299 andl $1,%ebx 300 incl %ecx 301.align 16 302.L012sqr: 303 movl (%esi,%ecx,4),%eax 304 movl %edx,%ebp 305 mull %edi 306 addl %ebp,%eax 307 leal 1(%ecx),%ecx 308 adcl $0,%edx 309 leal (%ebx,%eax,2),%ebp 310 shrl $31,%eax 311 cmpl (%esp),%ecx 312 movl %eax,%ebx 313 movl %ebp,28(%esp,%ecx,4) 314 jl .L012sqr 315 movl (%esi,%ecx,4),%eax 316 movl %edx,%ebp 317 mull %edi 318 addl %ebp,%eax 319 movl 20(%esp),%edi 320 adcl $0,%edx 321 movl 16(%esp),%esi 322 leal (%ebx,%eax,2),%ebp 323 imull 32(%esp),%edi 324 shrl $31,%eax 325 movl %ebp,32(%esp,%ecx,4) 326 leal (%eax,%edx,2),%ebp 327 movl (%esi),%eax 328 shrl $31,%edx 329 movl %ebp,36(%esp,%ecx,4) 330 movl %edx,40(%esp,%ecx,4) 331 mull %edi 332 addl 32(%esp),%eax 333 movl %ecx,%ebx 334 adcl $0,%edx 335 movl 4(%esi),%eax 336 movl $1,%ecx 337.align 16 338.L0133rdmadd: 339 movl %edx,%ebp 340 mull %edi 341 addl 32(%esp,%ecx,4),%ebp 342 adcl $0,%edx 343 addl %eax,%ebp 344 movl 4(%esi,%ecx,4),%eax 345 adcl $0,%edx 346 movl %ebp,28(%esp,%ecx,4) 347 movl %edx,%ebp 348 mull %edi 349 addl 36(%esp,%ecx,4),%ebp 350 leal 2(%ecx),%ecx 351 adcl $0,%edx 352 addl %eax,%ebp 353 movl (%esi,%ecx,4),%eax 354 adcl $0,%edx 355 cmpl %ebx,%ecx 356 movl %ebp,24(%esp,%ecx,4) 357 jl .L0133rdmadd 358 movl %edx,%ebp 359 mull %edi 360 addl 32(%esp,%ebx,4),%ebp 361 adcl $0,%edx 362 addl %eax,%ebp 363 adcl $0,%edx 364 movl %ebp,28(%esp,%ebx,4) 365 movl 12(%esp),%ecx 366 xorl %eax,%eax 367 movl 8(%esp),%esi 368 addl 36(%esp,%ebx,4),%edx 369 adcl 40(%esp,%ebx,4),%eax 370 movl %edx,32(%esp,%ebx,4) 371 cmpl %ebx,%ecx 372 movl %eax,36(%esp,%ebx,4) 373 je .L007common_tail 374 movl 4(%esi,%ecx,4),%edi 375 leal 1(%ecx),%ecx 376 movl %edi,%eax 377 movl %ecx,12(%esp) 378 mull %edi 379 addl 32(%esp,%ecx,4),%eax 380 adcl $0,%edx 381 movl %eax,32(%esp,%ecx,4) 382 xorl %ebp,%ebp 383 cmpl %ebx,%ecx 384 leal 1(%ecx),%ecx 385 je .L014sqrlast 386 movl %edx,%ebx 387 shrl $1,%edx 388 andl $1,%ebx 389.align 16 390.L015sqradd: 391 movl (%esi,%ecx,4),%eax 392 movl %edx,%ebp 393 mull %edi 394 addl %ebp,%eax 395 leal (%eax,%eax,1),%ebp 396 adcl $0,%edx 397 shrl $31,%eax 398 addl 32(%esp,%ecx,4),%ebp 399 leal 1(%ecx),%ecx 400 adcl $0,%eax 401 addl %ebx,%ebp 402 adcl $0,%eax 403 cmpl (%esp),%ecx 404 movl %ebp,28(%esp,%ecx,4) 405 movl %eax,%ebx 406 jle .L015sqradd 407 movl %edx,%ebp 408 addl %edx,%edx 409 shrl $31,%ebp 410 addl %ebx,%edx 411 adcl $0,%ebp 412.L014sqrlast: 413 movl 20(%esp),%edi 414 movl 16(%esp),%esi 415 imull 32(%esp),%edi 416 addl 32(%esp,%ecx,4),%edx 417 movl (%esi),%eax 418 adcl $0,%ebp 419 movl %edx,32(%esp,%ecx,4) 420 movl %ebp,36(%esp,%ecx,4) 421 mull %edi 422 addl 32(%esp),%eax 423 leal -1(%ecx),%ebx 424 adcl $0,%edx 425 movl $1,%ecx 426 movl 4(%esi),%eax 427 jmp .L0133rdmadd 428.align 16 429.L007common_tail: 430 movl 16(%esp),%ebp 431 movl 4(%esp),%edi 432 leal 32(%esp),%esi 433 movl (%esi),%eax 434 movl %ebx,%ecx 435 xorl %edx,%edx 436.align 16 437.L016sub: 438 sbbl (%ebp,%edx,4),%eax 439 movl %eax,(%edi,%edx,4) 440 decl %ecx 441 movl 4(%esi,%edx,4),%eax 442 leal 1(%edx),%edx 443 jge .L016sub 444 sbbl $0,%eax 445 andl %eax,%esi 446 notl %eax 447 movl %edi,%ebp 448 andl %eax,%ebp 449 orl %ebp,%esi 450.align 16 451.L017copy: 452 movl (%esi,%ebx,4),%eax 453 movl %eax,(%edi,%ebx,4) 454 movl %ecx,32(%esp,%ebx,4) 455 decl %ebx 456 jge .L017copy 457 movl 24(%esp),%esp 458 movl $1,%eax 459.L000just_leave: 460 popl %edi 461 popl %esi 462 popl %ebx 463 popl %ebp 464 ret 465.size bn_mul_mont,.-.L_bn_mul_mont_begin 466.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 467.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 468.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 469.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 470.byte 111,114,103,62,0 471.comm OPENSSL_ia32cap_P,8,4 472#else 473.file "x86-mont.S" 474.text 475.globl bn_mul_mont 476.type bn_mul_mont,@function 477.align 16 478bn_mul_mont: 479.L_bn_mul_mont_begin: 480 pushl %ebp 481 pushl %ebx 482 pushl %esi 483 pushl %edi 484 xorl %eax,%eax 485 movl 40(%esp),%edi 486 cmpl $4,%edi 487 jl .L000just_leave 488 leal 20(%esp),%esi 489 leal 24(%esp),%edx 490 movl %esp,%ebp 491 addl $2,%edi 492 negl %edi 493 leal -32(%esp,%edi,4),%esp 494 negl %edi 495 movl %esp,%eax 496 subl %edx,%eax 497 andl $2047,%eax 498 subl %eax,%esp 499 xorl %esp,%edx 500 andl $2048,%edx 501 xorl $2048,%edx 502 subl %edx,%esp 503 andl $-64,%esp 504 movl %ebp,%eax 505 subl %esp,%eax 506 andl $-4096,%eax 507.L001page_walk: 508 movl (%esp,%eax,1),%edx 509 subl $4096,%eax 510.byte 46 511 jnc .L001page_walk 512 movl (%esi),%eax 513 movl 4(%esi),%ebx 514 movl 8(%esi),%ecx 515 movl 12(%esi),%edx 516 movl 16(%esi),%esi 517 movl (%esi),%esi 518 movl %eax,4(%esp) 519 movl %ebx,8(%esp) 520 movl %ecx,12(%esp) 521 movl %edx,16(%esp) 522 movl %esi,20(%esp) 523 leal -3(%edi),%ebx 524 movl %ebp,24(%esp) 525 leal OPENSSL_ia32cap_P,%eax 526 btl $26,(%eax) 527 jnc .L002non_sse2 528 movl $-1,%eax 529 movd %eax,%mm7 530 movl 8(%esp),%esi 531 movl 12(%esp),%edi 532 movl 16(%esp),%ebp 533 xorl %edx,%edx 534 xorl %ecx,%ecx 535 movd (%edi),%mm4 536 movd (%esi),%mm5 537 movd (%ebp),%mm3 538 pmuludq %mm4,%mm5 539 movq %mm5,%mm2 540 movq %mm5,%mm0 541 pand %mm7,%mm0 542 pmuludq 20(%esp),%mm5 543 pmuludq %mm5,%mm3 544 paddq %mm0,%mm3 545 movd 4(%ebp),%mm1 546 movd 4(%esi),%mm0 547 psrlq $32,%mm2 548 psrlq $32,%mm3 549 incl %ecx 550.align 16 551.L0031st: 552 pmuludq %mm4,%mm0 553 pmuludq %mm5,%mm1 554 paddq %mm0,%mm2 555 paddq %mm1,%mm3 556 movq %mm2,%mm0 557 pand %mm7,%mm0 558 movd 4(%ebp,%ecx,4),%mm1 559 paddq %mm0,%mm3 560 movd 4(%esi,%ecx,4),%mm0 561 psrlq $32,%mm2 562 movd %mm3,28(%esp,%ecx,4) 563 psrlq $32,%mm3 564 leal 1(%ecx),%ecx 565 cmpl %ebx,%ecx 566 jl .L0031st 567 pmuludq %mm4,%mm0 568 pmuludq %mm5,%mm1 569 paddq %mm0,%mm2 570 paddq %mm1,%mm3 571 movq %mm2,%mm0 572 pand %mm7,%mm0 573 paddq %mm0,%mm3 574 movd %mm3,28(%esp,%ecx,4) 575 psrlq $32,%mm2 576 psrlq $32,%mm3 577 paddq %mm2,%mm3 578 movq %mm3,32(%esp,%ebx,4) 579 incl %edx 580.L004outer: 581 xorl %ecx,%ecx 582 movd (%edi,%edx,4),%mm4 583 movd (%esi),%mm5 584 movd 32(%esp),%mm6 585 movd (%ebp),%mm3 586 pmuludq %mm4,%mm5 587 paddq %mm6,%mm5 588 movq %mm5,%mm0 589 movq %mm5,%mm2 590 pand %mm7,%mm0 591 pmuludq 20(%esp),%mm5 592 pmuludq %mm5,%mm3 593 paddq %mm0,%mm3 594 movd 36(%esp),%mm6 595 movd 4(%ebp),%mm1 596 movd 4(%esi),%mm0 597 psrlq $32,%mm2 598 psrlq $32,%mm3 599 paddq %mm6,%mm2 600 incl %ecx 601 decl %ebx 602.L005inner: 603 pmuludq %mm4,%mm0 604 pmuludq %mm5,%mm1 605 paddq %mm0,%mm2 606 paddq %mm1,%mm3 607 movq %mm2,%mm0 608 movd 36(%esp,%ecx,4),%mm6 609 pand %mm7,%mm0 610 movd 4(%ebp,%ecx,4),%mm1 611 paddq %mm0,%mm3 612 movd 4(%esi,%ecx,4),%mm0 613 psrlq $32,%mm2 614 movd %mm3,28(%esp,%ecx,4) 615 psrlq $32,%mm3 616 paddq %mm6,%mm2 617 decl %ebx 618 leal 1(%ecx),%ecx 619 jnz .L005inner 620 movl %ecx,%ebx 621 pmuludq %mm4,%mm0 622 pmuludq %mm5,%mm1 623 paddq %mm0,%mm2 624 paddq %mm1,%mm3 625 movq %mm2,%mm0 626 pand %mm7,%mm0 627 paddq %mm0,%mm3 628 movd %mm3,28(%esp,%ecx,4) 629 psrlq $32,%mm2 630 psrlq $32,%mm3 631 movd 36(%esp,%ebx,4),%mm6 632 paddq %mm2,%mm3 633 paddq %mm6,%mm3 634 movq %mm3,32(%esp,%ebx,4) 635 leal 1(%edx),%edx 636 cmpl %ebx,%edx 637 jle .L004outer 638 emms 639 jmp .L006common_tail 640.align 16 641.L002non_sse2: 642 movl 8(%esp),%esi 643 leal 1(%ebx),%ebp 644 movl 12(%esp),%edi 645 xorl %ecx,%ecx 646 movl %esi,%edx 647 andl $1,%ebp 648 subl %edi,%edx 649 leal 4(%edi,%ebx,4),%eax 650 orl %edx,%ebp 651 movl (%edi),%edi 652 jz .L007bn_sqr_mont 653 movl %eax,28(%esp) 654 movl (%esi),%eax 655 xorl %edx,%edx 656.align 16 657.L008mull: 658 movl %edx,%ebp 659 mull %edi 660 addl %eax,%ebp 661 leal 1(%ecx),%ecx 662 adcl $0,%edx 663 movl (%esi,%ecx,4),%eax 664 cmpl %ebx,%ecx 665 movl %ebp,28(%esp,%ecx,4) 666 jl .L008mull 667 movl %edx,%ebp 668 mull %edi 669 movl 20(%esp),%edi 670 addl %ebp,%eax 671 movl 16(%esp),%esi 672 adcl $0,%edx 673 imull 32(%esp),%edi 674 movl %eax,32(%esp,%ebx,4) 675 xorl %ecx,%ecx 676 movl %edx,36(%esp,%ebx,4) 677 movl %ecx,40(%esp,%ebx,4) 678 movl (%esi),%eax 679 mull %edi 680 addl 32(%esp),%eax 681 movl 4(%esi),%eax 682 adcl $0,%edx 683 incl %ecx 684 jmp .L0092ndmadd 685.align 16 686.L0101stmadd: 687 movl %edx,%ebp 688 mull %edi 689 addl 32(%esp,%ecx,4),%ebp 690 leal 1(%ecx),%ecx 691 adcl $0,%edx 692 addl %eax,%ebp 693 movl (%esi,%ecx,4),%eax 694 adcl $0,%edx 695 cmpl %ebx,%ecx 696 movl %ebp,28(%esp,%ecx,4) 697 jl .L0101stmadd 698 movl %edx,%ebp 699 mull %edi 700 addl 32(%esp,%ebx,4),%eax 701 movl 20(%esp),%edi 702 adcl $0,%edx 703 movl 16(%esp),%esi 704 addl %eax,%ebp 705 adcl $0,%edx 706 imull 32(%esp),%edi 707 xorl %ecx,%ecx 708 addl 36(%esp,%ebx,4),%edx 709 movl %ebp,32(%esp,%ebx,4) 710 adcl $0,%ecx 711 movl (%esi),%eax 712 movl %edx,36(%esp,%ebx,4) 713 movl %ecx,40(%esp,%ebx,4) 714 mull %edi 715 addl 32(%esp),%eax 716 movl 4(%esi),%eax 717 adcl $0,%edx 718 movl $1,%ecx 719.align 16 720.L0092ndmadd: 721 movl %edx,%ebp 722 mull %edi 723 addl 32(%esp,%ecx,4),%ebp 724 leal 1(%ecx),%ecx 725 adcl $0,%edx 726 addl %eax,%ebp 727 movl (%esi,%ecx,4),%eax 728 adcl $0,%edx 729 cmpl %ebx,%ecx 730 movl %ebp,24(%esp,%ecx,4) 731 jl .L0092ndmadd 732 movl %edx,%ebp 733 mull %edi 734 addl 32(%esp,%ebx,4),%ebp 735 adcl $0,%edx 736 addl %eax,%ebp 737 adcl $0,%edx 738 movl %ebp,28(%esp,%ebx,4) 739 xorl %eax,%eax 740 movl 12(%esp),%ecx 741 addl 36(%esp,%ebx,4),%edx 742 adcl 40(%esp,%ebx,4),%eax 743 leal 4(%ecx),%ecx 744 movl %edx,32(%esp,%ebx,4) 745 cmpl 28(%esp),%ecx 746 movl %eax,36(%esp,%ebx,4) 747 je .L006common_tail 748 movl (%ecx),%edi 749 movl 8(%esp),%esi 750 movl %ecx,12(%esp) 751 xorl %ecx,%ecx 752 xorl %edx,%edx 753 movl (%esi),%eax 754 jmp .L0101stmadd 755.align 16 756.L007bn_sqr_mont: 757 movl %ebx,(%esp) 758 movl %ecx,12(%esp) 759 movl %edi,%eax 760 mull %edi 761 movl %eax,32(%esp) 762 movl %edx,%ebx 763 shrl $1,%edx 764 andl $1,%ebx 765 incl %ecx 766.align 16 767.L011sqr: 768 movl (%esi,%ecx,4),%eax 769 movl %edx,%ebp 770 mull %edi 771 addl %ebp,%eax 772 leal 1(%ecx),%ecx 773 adcl $0,%edx 774 leal (%ebx,%eax,2),%ebp 775 shrl $31,%eax 776 cmpl (%esp),%ecx 777 movl %eax,%ebx 778 movl %ebp,28(%esp,%ecx,4) 779 jl .L011sqr 780 movl (%esi,%ecx,4),%eax 781 movl %edx,%ebp 782 mull %edi 783 addl %ebp,%eax 784 movl 20(%esp),%edi 785 adcl $0,%edx 786 movl 16(%esp),%esi 787 leal (%ebx,%eax,2),%ebp 788 imull 32(%esp),%edi 789 shrl $31,%eax 790 movl %ebp,32(%esp,%ecx,4) 791 leal (%eax,%edx,2),%ebp 792 movl (%esi),%eax 793 shrl $31,%edx 794 movl %ebp,36(%esp,%ecx,4) 795 movl %edx,40(%esp,%ecx,4) 796 mull %edi 797 addl 32(%esp),%eax 798 movl %ecx,%ebx 799 adcl $0,%edx 800 movl 4(%esi),%eax 801 movl $1,%ecx 802.align 16 803.L0123rdmadd: 804 movl %edx,%ebp 805 mull %edi 806 addl 32(%esp,%ecx,4),%ebp 807 adcl $0,%edx 808 addl %eax,%ebp 809 movl 4(%esi,%ecx,4),%eax 810 adcl $0,%edx 811 movl %ebp,28(%esp,%ecx,4) 812 movl %edx,%ebp 813 mull %edi 814 addl 36(%esp,%ecx,4),%ebp 815 leal 2(%ecx),%ecx 816 adcl $0,%edx 817 addl %eax,%ebp 818 movl (%esi,%ecx,4),%eax 819 adcl $0,%edx 820 cmpl %ebx,%ecx 821 movl %ebp,24(%esp,%ecx,4) 822 jl .L0123rdmadd 823 movl %edx,%ebp 824 mull %edi 825 addl 32(%esp,%ebx,4),%ebp 826 adcl $0,%edx 827 addl %eax,%ebp 828 adcl $0,%edx 829 movl %ebp,28(%esp,%ebx,4) 830 movl 12(%esp),%ecx 831 xorl %eax,%eax 832 movl 8(%esp),%esi 833 addl 36(%esp,%ebx,4),%edx 834 adcl 40(%esp,%ebx,4),%eax 835 movl %edx,32(%esp,%ebx,4) 836 cmpl %ebx,%ecx 837 movl %eax,36(%esp,%ebx,4) 838 je .L006common_tail 839 movl 4(%esi,%ecx,4),%edi 840 leal 1(%ecx),%ecx 841 movl %edi,%eax 842 movl %ecx,12(%esp) 843 mull %edi 844 addl 32(%esp,%ecx,4),%eax 845 adcl $0,%edx 846 movl %eax,32(%esp,%ecx,4) 847 xorl %ebp,%ebp 848 cmpl %ebx,%ecx 849 leal 1(%ecx),%ecx 850 je .L013sqrlast 851 movl %edx,%ebx 852 shrl $1,%edx 853 andl $1,%ebx 854.align 16 855.L014sqradd: 856 movl (%esi,%ecx,4),%eax 857 movl %edx,%ebp 858 mull %edi 859 addl %ebp,%eax 860 leal (%eax,%eax,1),%ebp 861 adcl $0,%edx 862 shrl $31,%eax 863 addl 32(%esp,%ecx,4),%ebp 864 leal 1(%ecx),%ecx 865 adcl $0,%eax 866 addl %ebx,%ebp 867 adcl $0,%eax 868 cmpl (%esp),%ecx 869 movl %ebp,28(%esp,%ecx,4) 870 movl %eax,%ebx 871 jle .L014sqradd 872 movl %edx,%ebp 873 addl %edx,%edx 874 shrl $31,%ebp 875 addl %ebx,%edx 876 adcl $0,%ebp 877.L013sqrlast: 878 movl 20(%esp),%edi 879 movl 16(%esp),%esi 880 imull 32(%esp),%edi 881 addl 32(%esp,%ecx,4),%edx 882 movl (%esi),%eax 883 adcl $0,%ebp 884 movl %edx,32(%esp,%ecx,4) 885 movl %ebp,36(%esp,%ecx,4) 886 mull %edi 887 addl 32(%esp),%eax 888 leal -1(%ecx),%ebx 889 adcl $0,%edx 890 movl $1,%ecx 891 movl 4(%esi),%eax 892 jmp .L0123rdmadd 893.align 16 894.L006common_tail: 895 movl 16(%esp),%ebp 896 movl 4(%esp),%edi 897 leal 32(%esp),%esi 898 movl (%esi),%eax 899 movl %ebx,%ecx 900 xorl %edx,%edx 901.align 16 902.L015sub: 903 sbbl (%ebp,%edx,4),%eax 904 movl %eax,(%edi,%edx,4) 905 decl %ecx 906 movl 4(%esi,%edx,4),%eax 907 leal 1(%edx),%edx 908 jge .L015sub 909 sbbl $0,%eax 910 andl %eax,%esi 911 notl %eax 912 movl %edi,%ebp 913 andl %eax,%ebp 914 orl %ebp,%esi 915.align 16 916.L016copy: 917 movl (%esi,%ebx,4),%eax 918 movl %eax,(%edi,%ebx,4) 919 movl %ecx,32(%esp,%ebx,4) 920 decl %ebx 921 jge .L016copy 922 movl 24(%esp),%esp 923 movl $1,%eax 924.L000just_leave: 925 popl %edi 926 popl %esi 927 popl %ebx 928 popl %ebp 929 ret 930.size bn_mul_mont,.-.L_bn_mul_mont_begin 931.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 932.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 933.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 934.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 935.byte 111,114,103,62,0 936.comm OPENSSL_ia32cap_P,8,4 937#endif 938