1238405Sjkim # $FreeBSD$ 2238405Sjkim.text 3238405Sjkim 4238405Sjkim.globl bn_mul_mont 5238405Sjkim.type bn_mul_mont,@function 6238405Sjkim.align 16 7238405Sjkimbn_mul_mont: 8238405Sjkim testl $3,%r9d 9238405Sjkim jnz .Lmul_enter 10238405Sjkim cmpl $8,%r9d 11238405Sjkim jb .Lmul_enter 12238405Sjkim cmpq %rsi,%rdx 13238405Sjkim jne .Lmul4x_enter 14238405Sjkim jmp .Lsqr4x_enter 15238405Sjkim 16238405Sjkim.align 16 17238405Sjkim.Lmul_enter: 18238405Sjkim pushq %rbx 19238405Sjkim pushq %rbp 20238405Sjkim pushq %r12 21238405Sjkim pushq %r13 22238405Sjkim pushq %r14 23238405Sjkim pushq %r15 24238405Sjkim 25238405Sjkim movl %r9d,%r9d 26238405Sjkim leaq 2(%r9),%r10 27238405Sjkim movq %rsp,%r11 28238405Sjkim negq %r10 29238405Sjkim leaq (%rsp,%r10,8),%rsp 30238405Sjkim andq $-1024,%rsp 31238405Sjkim 32238405Sjkim movq %r11,8(%rsp,%r9,8) 33238405Sjkim.Lmul_body: 34238405Sjkim movq %rdx,%r12 35238405Sjkim movq (%r8),%r8 36238405Sjkim movq (%r12),%rbx 37238405Sjkim movq (%rsi),%rax 38238405Sjkim 39238405Sjkim xorq %r14,%r14 40238405Sjkim xorq %r15,%r15 41238405Sjkim 42238405Sjkim movq %r8,%rbp 43238405Sjkim mulq %rbx 44238405Sjkim movq %rax,%r10 45238405Sjkim movq (%rcx),%rax 46238405Sjkim 47238405Sjkim imulq %r10,%rbp 48238405Sjkim movq %rdx,%r11 49238405Sjkim 50238405Sjkim mulq %rbp 51238405Sjkim addq %rax,%r10 52238405Sjkim movq 8(%rsi),%rax 53238405Sjkim adcq $0,%rdx 54238405Sjkim movq %rdx,%r13 55238405Sjkim 56238405Sjkim leaq 1(%r15),%r15 57238405Sjkim jmp .L1st_enter 58238405Sjkim 59238405Sjkim.align 16 60238405Sjkim.L1st: 61238405Sjkim addq %rax,%r13 62238405Sjkim movq (%rsi,%r15,8),%rax 63238405Sjkim adcq $0,%rdx 64238405Sjkim addq %r11,%r13 65238405Sjkim movq %r10,%r11 66238405Sjkim adcq $0,%rdx 67238405Sjkim movq %r13,-16(%rsp,%r15,8) 68238405Sjkim movq %rdx,%r13 69238405Sjkim 70238405Sjkim.L1st_enter: 71238405Sjkim mulq %rbx 72238405Sjkim addq %rax,%r11 73238405Sjkim movq (%rcx,%r15,8),%rax 74238405Sjkim adcq $0,%rdx 75238405Sjkim leaq 1(%r15),%r15 76238405Sjkim movq %rdx,%r10 77238405Sjkim 78238405Sjkim mulq %rbp 79238405Sjkim cmpq %r9,%r15 80238405Sjkim jne .L1st 81238405Sjkim 82238405Sjkim addq %rax,%r13 83238405Sjkim movq (%rsi),%rax 84238405Sjkim adcq $0,%rdx 85238405Sjkim addq %r11,%r13 86238405Sjkim adcq $0,%rdx 87238405Sjkim movq %r13,-16(%rsp,%r15,8) 88238405Sjkim movq %rdx,%r13 89238405Sjkim movq %r10,%r11 90238405Sjkim 91238405Sjkim xorq %rdx,%rdx 92238405Sjkim addq %r11,%r13 93238405Sjkim adcq $0,%rdx 94238405Sjkim movq %r13,-8(%rsp,%r9,8) 95238405Sjkim movq %rdx,(%rsp,%r9,8) 96238405Sjkim 97238405Sjkim leaq 1(%r14),%r14 98238405Sjkim jmp .Louter 99238405Sjkim.align 16 100238405Sjkim.Louter: 101238405Sjkim movq (%r12,%r14,8),%rbx 102238405Sjkim xorq %r15,%r15 103238405Sjkim movq %r8,%rbp 104238405Sjkim movq (%rsp),%r10 105238405Sjkim mulq %rbx 106238405Sjkim addq %rax,%r10 107238405Sjkim movq (%rcx),%rax 108238405Sjkim adcq $0,%rdx 109238405Sjkim 110238405Sjkim imulq %r10,%rbp 111238405Sjkim movq %rdx,%r11 112238405Sjkim 113238405Sjkim mulq %rbp 114238405Sjkim addq %rax,%r10 115238405Sjkim movq 8(%rsi),%rax 116238405Sjkim adcq $0,%rdx 117238405Sjkim movq 8(%rsp),%r10 118238405Sjkim movq %rdx,%r13 119238405Sjkim 120238405Sjkim leaq 1(%r15),%r15 121238405Sjkim jmp .Linner_enter 122238405Sjkim 123238405Sjkim.align 16 124238405Sjkim.Linner: 125238405Sjkim addq %rax,%r13 126238405Sjkim movq (%rsi,%r15,8),%rax 127238405Sjkim adcq $0,%rdx 128238405Sjkim addq %r10,%r13 129238405Sjkim movq (%rsp,%r15,8),%r10 130238405Sjkim adcq $0,%rdx 131238405Sjkim movq %r13,-16(%rsp,%r15,8) 132238405Sjkim movq %rdx,%r13 133238405Sjkim 134238405Sjkim.Linner_enter: 135238405Sjkim mulq %rbx 136238405Sjkim addq %rax,%r11 137238405Sjkim movq (%rcx,%r15,8),%rax 138238405Sjkim adcq $0,%rdx 139238405Sjkim addq %r11,%r10 140238405Sjkim movq %rdx,%r11 141238405Sjkim adcq $0,%r11 142238405Sjkim leaq 1(%r15),%r15 143238405Sjkim 144238405Sjkim mulq %rbp 145238405Sjkim cmpq %r9,%r15 146238405Sjkim jne .Linner 147238405Sjkim 148238405Sjkim addq %rax,%r13 149238405Sjkim movq (%rsi),%rax 150238405Sjkim adcq $0,%rdx 151238405Sjkim addq %r10,%r13 152238405Sjkim movq (%rsp,%r15,8),%r10 153238405Sjkim adcq $0,%rdx 154238405Sjkim movq %r13,-16(%rsp,%r15,8) 155238405Sjkim movq %rdx,%r13 156238405Sjkim 157238405Sjkim xorq %rdx,%rdx 158238405Sjkim addq %r11,%r13 159238405Sjkim adcq $0,%rdx 160238405Sjkim addq %r10,%r13 161238405Sjkim adcq $0,%rdx 162238405Sjkim movq %r13,-8(%rsp,%r9,8) 163238405Sjkim movq %rdx,(%rsp,%r9,8) 164238405Sjkim 165238405Sjkim leaq 1(%r14),%r14 166238405Sjkim cmpq %r9,%r14 167238405Sjkim jl .Louter 168238405Sjkim 169238405Sjkim xorq %r14,%r14 170238405Sjkim movq (%rsp),%rax 171238405Sjkim leaq (%rsp),%rsi 172238405Sjkim movq %r9,%r15 173238405Sjkim jmp .Lsub 174238405Sjkim.align 16 175238405Sjkim.Lsub: sbbq (%rcx,%r14,8),%rax 176238405Sjkim movq %rax,(%rdi,%r14,8) 177238405Sjkim movq 8(%rsi,%r14,8),%rax 178238405Sjkim leaq 1(%r14),%r14 179238405Sjkim decq %r15 180238405Sjkim jnz .Lsub 181238405Sjkim 182238405Sjkim sbbq $0,%rax 183238405Sjkim xorq %r14,%r14 184238405Sjkim andq %rax,%rsi 185238405Sjkim notq %rax 186238405Sjkim movq %rdi,%rcx 187238405Sjkim andq %rax,%rcx 188238405Sjkim movq %r9,%r15 189238405Sjkim orq %rcx,%rsi 190238405Sjkim.align 16 191238405Sjkim.Lcopy: 192238405Sjkim movq (%rsi,%r14,8),%rax 193238405Sjkim movq %r14,(%rsp,%r14,8) 194238405Sjkim movq %rax,(%rdi,%r14,8) 195238405Sjkim leaq 1(%r14),%r14 196238405Sjkim subq $1,%r15 197238405Sjkim jnz .Lcopy 198238405Sjkim 199238405Sjkim movq 8(%rsp,%r9,8),%rsi 200238405Sjkim movq $1,%rax 201238405Sjkim movq (%rsi),%r15 202238405Sjkim movq 8(%rsi),%r14 203238405Sjkim movq 16(%rsi),%r13 204238405Sjkim movq 24(%rsi),%r12 205238405Sjkim movq 32(%rsi),%rbp 206238405Sjkim movq 40(%rsi),%rbx 207238405Sjkim leaq 48(%rsi),%rsp 208238405Sjkim.Lmul_epilogue: 209238405Sjkim .byte 0xf3,0xc3 210238405Sjkim.size bn_mul_mont,.-bn_mul_mont 211238405Sjkim.type bn_mul4x_mont,@function 212238405Sjkim.align 16 213238405Sjkimbn_mul4x_mont: 214238405Sjkim.Lmul4x_enter: 215238405Sjkim pushq %rbx 216238405Sjkim pushq %rbp 217238405Sjkim pushq %r12 218238405Sjkim pushq %r13 219238405Sjkim pushq %r14 220238405Sjkim pushq %r15 221238405Sjkim 222238405Sjkim movl %r9d,%r9d 223238405Sjkim leaq 4(%r9),%r10 224238405Sjkim movq %rsp,%r11 225238405Sjkim negq %r10 226238405Sjkim leaq (%rsp,%r10,8),%rsp 227238405Sjkim andq $-1024,%rsp 228238405Sjkim 229238405Sjkim movq %r11,8(%rsp,%r9,8) 230238405Sjkim.Lmul4x_body: 231238405Sjkim movq %rdi,16(%rsp,%r9,8) 232238405Sjkim movq %rdx,%r12 233238405Sjkim movq (%r8),%r8 234238405Sjkim movq (%r12),%rbx 235238405Sjkim movq (%rsi),%rax 236238405Sjkim 237238405Sjkim xorq %r14,%r14 238238405Sjkim xorq %r15,%r15 239238405Sjkim 240238405Sjkim movq %r8,%rbp 241238405Sjkim mulq %rbx 242238405Sjkim movq %rax,%r10 243238405Sjkim movq (%rcx),%rax 244238405Sjkim 245238405Sjkim imulq %r10,%rbp 246238405Sjkim movq %rdx,%r11 247238405Sjkim 248238405Sjkim mulq %rbp 249238405Sjkim addq %rax,%r10 250238405Sjkim movq 8(%rsi),%rax 251238405Sjkim adcq $0,%rdx 252238405Sjkim movq %rdx,%rdi 253238405Sjkim 254238405Sjkim mulq %rbx 255238405Sjkim addq %rax,%r11 256238405Sjkim movq 8(%rcx),%rax 257238405Sjkim adcq $0,%rdx 258238405Sjkim movq %rdx,%r10 259238405Sjkim 260238405Sjkim mulq %rbp 261238405Sjkim addq %rax,%rdi 262238405Sjkim movq 16(%rsi),%rax 263238405Sjkim adcq $0,%rdx 264238405Sjkim addq %r11,%rdi 265238405Sjkim leaq 4(%r15),%r15 266238405Sjkim adcq $0,%rdx 267238405Sjkim movq %rdi,(%rsp) 268238405Sjkim movq %rdx,%r13 269238405Sjkim jmp .L1st4x 270238405Sjkim.align 16 271238405Sjkim.L1st4x: 272238405Sjkim mulq %rbx 273238405Sjkim addq %rax,%r10 274238405Sjkim movq -16(%rcx,%r15,8),%rax 275238405Sjkim adcq $0,%rdx 276238405Sjkim movq %rdx,%r11 277238405Sjkim 278238405Sjkim mulq %rbp 279238405Sjkim addq %rax,%r13 280238405Sjkim movq -8(%rsi,%r15,8),%rax 281238405Sjkim adcq $0,%rdx 282238405Sjkim addq %r10,%r13 283238405Sjkim adcq $0,%rdx 284238405Sjkim movq %r13,-24(%rsp,%r15,8) 285238405Sjkim movq %rdx,%rdi 286238405Sjkim 287238405Sjkim mulq %rbx 288238405Sjkim addq %rax,%r11 289238405Sjkim movq -8(%rcx,%r15,8),%rax 290238405Sjkim adcq $0,%rdx 291238405Sjkim movq %rdx,%r10 292238405Sjkim 293238405Sjkim mulq %rbp 294238405Sjkim addq %rax,%rdi 295238405Sjkim movq (%rsi,%r15,8),%rax 296238405Sjkim adcq $0,%rdx 297238405Sjkim addq %r11,%rdi 298238405Sjkim adcq $0,%rdx 299238405Sjkim movq %rdi,-16(%rsp,%r15,8) 300238405Sjkim movq %rdx,%r13 301238405Sjkim 302238405Sjkim mulq %rbx 303238405Sjkim addq %rax,%r10 304238405Sjkim movq (%rcx,%r15,8),%rax 305238405Sjkim adcq $0,%rdx 306238405Sjkim movq %rdx,%r11 307238405Sjkim 308238405Sjkim mulq %rbp 309238405Sjkim addq %rax,%r13 310238405Sjkim movq 8(%rsi,%r15,8),%rax 311238405Sjkim adcq $0,%rdx 312238405Sjkim addq %r10,%r13 313238405Sjkim adcq $0,%rdx 314238405Sjkim movq %r13,-8(%rsp,%r15,8) 315238405Sjkim movq %rdx,%rdi 316238405Sjkim 317238405Sjkim mulq %rbx 318238405Sjkim addq %rax,%r11 319238405Sjkim movq 8(%rcx,%r15,8),%rax 320238405Sjkim adcq $0,%rdx 321238405Sjkim leaq 4(%r15),%r15 322238405Sjkim movq %rdx,%r10 323238405Sjkim 324238405Sjkim mulq %rbp 325238405Sjkim addq %rax,%rdi 326238405Sjkim movq -16(%rsi,%r15,8),%rax 327238405Sjkim adcq $0,%rdx 328238405Sjkim addq %r11,%rdi 329238405Sjkim adcq $0,%rdx 330238405Sjkim movq %rdi,-32(%rsp,%r15,8) 331238405Sjkim movq %rdx,%r13 332238405Sjkim cmpq %r9,%r15 333238405Sjkim jl .L1st4x 334238405Sjkim 335238405Sjkim mulq %rbx 336238405Sjkim addq %rax,%r10 337238405Sjkim movq -16(%rcx,%r15,8),%rax 338238405Sjkim adcq $0,%rdx 339238405Sjkim movq %rdx,%r11 340238405Sjkim 341238405Sjkim mulq %rbp 342238405Sjkim addq %rax,%r13 343238405Sjkim movq -8(%rsi,%r15,8),%rax 344238405Sjkim adcq $0,%rdx 345238405Sjkim addq %r10,%r13 346238405Sjkim adcq $0,%rdx 347238405Sjkim movq %r13,-24(%rsp,%r15,8) 348238405Sjkim movq %rdx,%rdi 349238405Sjkim 350238405Sjkim mulq %rbx 351238405Sjkim addq %rax,%r11 352238405Sjkim movq -8(%rcx,%r15,8),%rax 353238405Sjkim adcq $0,%rdx 354238405Sjkim movq %rdx,%r10 355238405Sjkim 356238405Sjkim mulq %rbp 357238405Sjkim addq %rax,%rdi 358238405Sjkim movq (%rsi),%rax 359238405Sjkim adcq $0,%rdx 360238405Sjkim addq %r11,%rdi 361238405Sjkim adcq $0,%rdx 362238405Sjkim movq %rdi,-16(%rsp,%r15,8) 363238405Sjkim movq %rdx,%r13 364238405Sjkim 365238405Sjkim xorq %rdi,%rdi 366238405Sjkim addq %r10,%r13 367238405Sjkim adcq $0,%rdi 368238405Sjkim movq %r13,-8(%rsp,%r15,8) 369238405Sjkim movq %rdi,(%rsp,%r15,8) 370238405Sjkim 371238405Sjkim leaq 1(%r14),%r14 372238405Sjkim.align 4 373238405Sjkim.Louter4x: 374238405Sjkim movq (%r12,%r14,8),%rbx 375238405Sjkim xorq %r15,%r15 376238405Sjkim movq (%rsp),%r10 377238405Sjkim movq %r8,%rbp 378238405Sjkim mulq %rbx 379238405Sjkim addq %rax,%r10 380238405Sjkim movq (%rcx),%rax 381238405Sjkim adcq $0,%rdx 382238405Sjkim 383238405Sjkim imulq %r10,%rbp 384238405Sjkim movq %rdx,%r11 385238405Sjkim 386238405Sjkim mulq %rbp 387238405Sjkim addq %rax,%r10 388238405Sjkim movq 8(%rsi),%rax 389238405Sjkim adcq $0,%rdx 390238405Sjkim movq %rdx,%rdi 391238405Sjkim 392238405Sjkim mulq %rbx 393238405Sjkim addq %rax,%r11 394238405Sjkim movq 8(%rcx),%rax 395238405Sjkim adcq $0,%rdx 396238405Sjkim addq 8(%rsp),%r11 397238405Sjkim adcq $0,%rdx 398238405Sjkim movq %rdx,%r10 399238405Sjkim 400238405Sjkim mulq %rbp 401238405Sjkim addq %rax,%rdi 402238405Sjkim movq 16(%rsi),%rax 403238405Sjkim adcq $0,%rdx 404238405Sjkim addq %r11,%rdi 405238405Sjkim leaq 4(%r15),%r15 406238405Sjkim adcq $0,%rdx 407238405Sjkim movq %rdi,(%rsp) 408238405Sjkim movq %rdx,%r13 409238405Sjkim jmp .Linner4x 410238405Sjkim.align 16 411238405Sjkim.Linner4x: 412238405Sjkim mulq %rbx 413238405Sjkim addq %rax,%r10 414238405Sjkim movq -16(%rcx,%r15,8),%rax 415238405Sjkim adcq $0,%rdx 416238405Sjkim addq -16(%rsp,%r15,8),%r10 417238405Sjkim adcq $0,%rdx 418238405Sjkim movq %rdx,%r11 419238405Sjkim 420238405Sjkim mulq %rbp 421238405Sjkim addq %rax,%r13 422238405Sjkim movq -8(%rsi,%r15,8),%rax 423238405Sjkim adcq $0,%rdx 424238405Sjkim addq %r10,%r13 425238405Sjkim adcq $0,%rdx 426238405Sjkim movq %r13,-24(%rsp,%r15,8) 427238405Sjkim movq %rdx,%rdi 428238405Sjkim 429238405Sjkim mulq %rbx 430238405Sjkim addq %rax,%r11 431238405Sjkim movq -8(%rcx,%r15,8),%rax 432238405Sjkim adcq $0,%rdx 433238405Sjkim addq -8(%rsp,%r15,8),%r11 434238405Sjkim adcq $0,%rdx 435238405Sjkim movq %rdx,%r10 436238405Sjkim 437238405Sjkim mulq %rbp 438238405Sjkim addq %rax,%rdi 439238405Sjkim movq (%rsi,%r15,8),%rax 440238405Sjkim adcq $0,%rdx 441238405Sjkim addq %r11,%rdi 442238405Sjkim adcq $0,%rdx 443238405Sjkim movq %rdi,-16(%rsp,%r15,8) 444238405Sjkim movq %rdx,%r13 445238405Sjkim 446238405Sjkim mulq %rbx 447238405Sjkim addq %rax,%r10 448238405Sjkim movq (%rcx,%r15,8),%rax 449238405Sjkim adcq $0,%rdx 450238405Sjkim addq (%rsp,%r15,8),%r10 451238405Sjkim adcq $0,%rdx 452238405Sjkim movq %rdx,%r11 453238405Sjkim 454238405Sjkim mulq %rbp 455238405Sjkim addq %rax,%r13 456238405Sjkim movq 8(%rsi,%r15,8),%rax 457238405Sjkim adcq $0,%rdx 458238405Sjkim addq %r10,%r13 459238405Sjkim adcq $0,%rdx 460238405Sjkim movq %r13,-8(%rsp,%r15,8) 461238405Sjkim movq %rdx,%rdi 462238405Sjkim 463238405Sjkim mulq %rbx 464238405Sjkim addq %rax,%r11 465238405Sjkim movq 8(%rcx,%r15,8),%rax 466238405Sjkim adcq $0,%rdx 467238405Sjkim addq 8(%rsp,%r15,8),%r11 468238405Sjkim adcq $0,%rdx 469238405Sjkim leaq 4(%r15),%r15 470238405Sjkim movq %rdx,%r10 471238405Sjkim 472238405Sjkim mulq %rbp 473238405Sjkim addq %rax,%rdi 474238405Sjkim movq -16(%rsi,%r15,8),%rax 475238405Sjkim adcq $0,%rdx 476238405Sjkim addq %r11,%rdi 477238405Sjkim adcq $0,%rdx 478238405Sjkim movq %rdi,-32(%rsp,%r15,8) 479238405Sjkim movq %rdx,%r13 480238405Sjkim cmpq %r9,%r15 481238405Sjkim jl .Linner4x 482238405Sjkim 483238405Sjkim mulq %rbx 484238405Sjkim addq %rax,%r10 485238405Sjkim movq -16(%rcx,%r15,8),%rax 486238405Sjkim adcq $0,%rdx 487238405Sjkim addq -16(%rsp,%r15,8),%r10 488238405Sjkim adcq $0,%rdx 489238405Sjkim movq %rdx,%r11 490238405Sjkim 491238405Sjkim mulq %rbp 492238405Sjkim addq %rax,%r13 493238405Sjkim movq -8(%rsi,%r15,8),%rax 494238405Sjkim adcq $0,%rdx 495238405Sjkim addq %r10,%r13 496238405Sjkim adcq $0,%rdx 497238405Sjkim movq %r13,-24(%rsp,%r15,8) 498238405Sjkim movq %rdx,%rdi 499238405Sjkim 500238405Sjkim mulq %rbx 501238405Sjkim addq %rax,%r11 502238405Sjkim movq -8(%rcx,%r15,8),%rax 503238405Sjkim adcq $0,%rdx 504238405Sjkim addq -8(%rsp,%r15,8),%r11 505238405Sjkim adcq $0,%rdx 506238405Sjkim leaq 1(%r14),%r14 507238405Sjkim movq %rdx,%r10 508238405Sjkim 509238405Sjkim mulq %rbp 510238405Sjkim addq %rax,%rdi 511238405Sjkim movq (%rsi),%rax 512238405Sjkim adcq $0,%rdx 513238405Sjkim addq %r11,%rdi 514238405Sjkim adcq $0,%rdx 515238405Sjkim movq %rdi,-16(%rsp,%r15,8) 516238405Sjkim movq %rdx,%r13 517238405Sjkim 518238405Sjkim xorq %rdi,%rdi 519238405Sjkim addq %r10,%r13 520238405Sjkim adcq $0,%rdi 521238405Sjkim addq (%rsp,%r9,8),%r13 522238405Sjkim adcq $0,%rdi 523238405Sjkim movq %r13,-8(%rsp,%r15,8) 524238405Sjkim movq %rdi,(%rsp,%r15,8) 525238405Sjkim 526238405Sjkim cmpq %r9,%r14 527238405Sjkim jl .Louter4x 528238405Sjkim movq 16(%rsp,%r9,8),%rdi 529238405Sjkim movq 0(%rsp),%rax 530238405Sjkim pxor %xmm0,%xmm0 531238405Sjkim movq 8(%rsp),%rdx 532238405Sjkim shrq $2,%r9 533238405Sjkim leaq (%rsp),%rsi 534238405Sjkim xorq %r14,%r14 535238405Sjkim 536238405Sjkim subq 0(%rcx),%rax 537238405Sjkim movq 16(%rsi),%rbx 538238405Sjkim movq 24(%rsi),%rbp 539238405Sjkim sbbq 8(%rcx),%rdx 540238405Sjkim leaq -1(%r9),%r15 541238405Sjkim jmp .Lsub4x 542238405Sjkim.align 16 543238405Sjkim.Lsub4x: 544238405Sjkim movq %rax,0(%rdi,%r14,8) 545238405Sjkim movq %rdx,8(%rdi,%r14,8) 546238405Sjkim sbbq 16(%rcx,%r14,8),%rbx 547238405Sjkim movq 32(%rsi,%r14,8),%rax 548238405Sjkim movq 40(%rsi,%r14,8),%rdx 549238405Sjkim sbbq 24(%rcx,%r14,8),%rbp 550238405Sjkim movq %rbx,16(%rdi,%r14,8) 551238405Sjkim movq %rbp,24(%rdi,%r14,8) 552238405Sjkim sbbq 32(%rcx,%r14,8),%rax 553238405Sjkim movq 48(%rsi,%r14,8),%rbx 554238405Sjkim movq 56(%rsi,%r14,8),%rbp 555238405Sjkim sbbq 40(%rcx,%r14,8),%rdx 556238405Sjkim leaq 4(%r14),%r14 557238405Sjkim decq %r15 558238405Sjkim jnz .Lsub4x 559238405Sjkim 560238405Sjkim movq %rax,0(%rdi,%r14,8) 561238405Sjkim movq 32(%rsi,%r14,8),%rax 562238405Sjkim sbbq 16(%rcx,%r14,8),%rbx 563238405Sjkim movq %rdx,8(%rdi,%r14,8) 564238405Sjkim sbbq 24(%rcx,%r14,8),%rbp 565238405Sjkim movq %rbx,16(%rdi,%r14,8) 566238405Sjkim 567238405Sjkim sbbq $0,%rax 568238405Sjkim movq %rbp,24(%rdi,%r14,8) 569238405Sjkim xorq %r14,%r14 570238405Sjkim andq %rax,%rsi 571238405Sjkim notq %rax 572238405Sjkim movq %rdi,%rcx 573238405Sjkim andq %rax,%rcx 574238405Sjkim leaq -1(%r9),%r15 575238405Sjkim orq %rcx,%rsi 576238405Sjkim 577238405Sjkim movdqu (%rsi),%xmm1 578238405Sjkim movdqa %xmm0,(%rsp) 579238405Sjkim movdqu %xmm1,(%rdi) 580238405Sjkim jmp .Lcopy4x 581238405Sjkim.align 16 582238405Sjkim.Lcopy4x: 583238405Sjkim movdqu 16(%rsi,%r14,1),%xmm2 584238405Sjkim movdqu 32(%rsi,%r14,1),%xmm1 585238405Sjkim movdqa %xmm0,16(%rsp,%r14,1) 586238405Sjkim movdqu %xmm2,16(%rdi,%r14,1) 587238405Sjkim movdqa %xmm0,32(%rsp,%r14,1) 588238405Sjkim movdqu %xmm1,32(%rdi,%r14,1) 589238405Sjkim leaq 32(%r14),%r14 590238405Sjkim decq %r15 591238405Sjkim jnz .Lcopy4x 592238405Sjkim 593238405Sjkim shlq $2,%r9 594238405Sjkim movdqu 16(%rsi,%r14,1),%xmm2 595238405Sjkim movdqa %xmm0,16(%rsp,%r14,1) 596238405Sjkim movdqu %xmm2,16(%rdi,%r14,1) 597238405Sjkim movq 8(%rsp,%r9,8),%rsi 598238405Sjkim movq $1,%rax 599238405Sjkim movq (%rsi),%r15 600238405Sjkim movq 8(%rsi),%r14 601238405Sjkim movq 16(%rsi),%r13 602238405Sjkim movq 24(%rsi),%r12 603238405Sjkim movq 32(%rsi),%rbp 604238405Sjkim movq 40(%rsi),%rbx 605238405Sjkim leaq 48(%rsi),%rsp 606238405Sjkim.Lmul4x_epilogue: 607238405Sjkim .byte 0xf3,0xc3 608238405Sjkim.size bn_mul4x_mont,.-bn_mul4x_mont 609238405Sjkim.type bn_sqr4x_mont,@function 610238405Sjkim.align 16 611238405Sjkimbn_sqr4x_mont: 612238405Sjkim.Lsqr4x_enter: 613238405Sjkim pushq %rbx 614238405Sjkim pushq %rbp 615238405Sjkim pushq %r12 616238405Sjkim pushq %r13 617238405Sjkim pushq %r14 618238405Sjkim pushq %r15 619238405Sjkim 620238405Sjkim shll $3,%r9d 621238405Sjkim xorq %r10,%r10 622238405Sjkim movq %rsp,%r11 623238405Sjkim subq %r9,%r10 624238405Sjkim movq (%r8),%r8 625238405Sjkim leaq -72(%rsp,%r10,2),%rsp 626238405Sjkim andq $-1024,%rsp 627238405Sjkim 628238405Sjkim 629238405Sjkim 630238405Sjkim 631238405Sjkim 632238405Sjkim 633238405Sjkim 634238405Sjkim 635238405Sjkim 636238405Sjkim 637238405Sjkim 638238405Sjkim movq %rdi,32(%rsp) 639238405Sjkim movq %rcx,40(%rsp) 640238405Sjkim movq %r8,48(%rsp) 641238405Sjkim movq %r11,56(%rsp) 642238405Sjkim.Lsqr4x_body: 643238405Sjkim 644238405Sjkim 645238405Sjkim 646238405Sjkim 647238405Sjkim 648238405Sjkim 649238405Sjkim 650238405Sjkim leaq 32(%r10),%rbp 651238405Sjkim leaq (%rsi,%r9,1),%rsi 652238405Sjkim 653238405Sjkim movq %r9,%rcx 654238405Sjkim 655238405Sjkim 656238405Sjkim movq -32(%rsi,%rbp,1),%r14 657238405Sjkim leaq 64(%rsp,%r9,2),%rdi 658238405Sjkim movq -24(%rsi,%rbp,1),%rax 659238405Sjkim leaq -32(%rdi,%rbp,1),%rdi 660238405Sjkim movq -16(%rsi,%rbp,1),%rbx 661238405Sjkim movq %rax,%r15 662238405Sjkim 663238405Sjkim mulq %r14 664238405Sjkim movq %rax,%r10 665238405Sjkim movq %rbx,%rax 666238405Sjkim movq %rdx,%r11 667238405Sjkim movq %r10,-24(%rdi,%rbp,1) 668238405Sjkim 669238405Sjkim xorq %r10,%r10 670238405Sjkim mulq %r14 671238405Sjkim addq %rax,%r11 672238405Sjkim movq %rbx,%rax 673238405Sjkim adcq %rdx,%r10 674238405Sjkim movq %r11,-16(%rdi,%rbp,1) 675238405Sjkim 676238405Sjkim leaq -16(%rbp),%rcx 677238405Sjkim 678238405Sjkim 679238405Sjkim movq 8(%rsi,%rcx,1),%rbx 680238405Sjkim mulq %r15 681238405Sjkim movq %rax,%r12 682238405Sjkim movq %rbx,%rax 683238405Sjkim movq %rdx,%r13 684238405Sjkim 685238405Sjkim xorq %r11,%r11 686238405Sjkim addq %r12,%r10 687238405Sjkim leaq 16(%rcx),%rcx 688238405Sjkim adcq $0,%r11 689238405Sjkim mulq %r14 690238405Sjkim addq %rax,%r10 691238405Sjkim movq %rbx,%rax 692238405Sjkim adcq %rdx,%r11 693238405Sjkim movq %r10,-8(%rdi,%rcx,1) 694238405Sjkim jmp .Lsqr4x_1st 695238405Sjkim 696238405Sjkim.align 16 697238405Sjkim.Lsqr4x_1st: 698238405Sjkim movq (%rsi,%rcx,1),%rbx 699238405Sjkim xorq %r12,%r12 700238405Sjkim mulq %r15 701238405Sjkim addq %rax,%r13 702238405Sjkim movq %rbx,%rax 703238405Sjkim adcq %rdx,%r12 704238405Sjkim 705238405Sjkim xorq %r10,%r10 706238405Sjkim addq %r13,%r11 707238405Sjkim adcq $0,%r10 708238405Sjkim mulq %r14 709238405Sjkim addq %rax,%r11 710238405Sjkim movq %rbx,%rax 711238405Sjkim adcq %rdx,%r10 712238405Sjkim movq %r11,(%rdi,%rcx,1) 713238405Sjkim 714238405Sjkim 715238405Sjkim movq 8(%rsi,%rcx,1),%rbx 716238405Sjkim xorq %r13,%r13 717238405Sjkim mulq %r15 718238405Sjkim addq %rax,%r12 719238405Sjkim movq %rbx,%rax 720238405Sjkim adcq %rdx,%r13 721238405Sjkim 722238405Sjkim xorq %r11,%r11 723238405Sjkim addq %r12,%r10 724238405Sjkim adcq $0,%r11 725238405Sjkim mulq %r14 726238405Sjkim addq %rax,%r10 727238405Sjkim movq %rbx,%rax 728238405Sjkim adcq %rdx,%r11 729238405Sjkim movq %r10,8(%rdi,%rcx,1) 730238405Sjkim 731238405Sjkim movq 16(%rsi,%rcx,1),%rbx 732238405Sjkim xorq %r12,%r12 733238405Sjkim mulq %r15 734238405Sjkim addq %rax,%r13 735238405Sjkim movq %rbx,%rax 736238405Sjkim adcq %rdx,%r12 737238405Sjkim 738238405Sjkim xorq %r10,%r10 739238405Sjkim addq %r13,%r11 740238405Sjkim adcq $0,%r10 741238405Sjkim mulq %r14 742238405Sjkim addq %rax,%r11 743238405Sjkim movq %rbx,%rax 744238405Sjkim adcq %rdx,%r10 745238405Sjkim movq %r11,16(%rdi,%rcx,1) 746238405Sjkim 747238405Sjkim 748238405Sjkim movq 24(%rsi,%rcx,1),%rbx 749238405Sjkim xorq %r13,%r13 750238405Sjkim mulq %r15 751238405Sjkim addq %rax,%r12 752238405Sjkim movq %rbx,%rax 753238405Sjkim adcq %rdx,%r13 754238405Sjkim 755238405Sjkim xorq %r11,%r11 756238405Sjkim addq %r12,%r10 757238405Sjkim leaq 32(%rcx),%rcx 758238405Sjkim adcq $0,%r11 759238405Sjkim mulq %r14 760238405Sjkim addq %rax,%r10 761238405Sjkim movq %rbx,%rax 762238405Sjkim adcq %rdx,%r11 763238405Sjkim movq %r10,-8(%rdi,%rcx,1) 764238405Sjkim 765238405Sjkim cmpq $0,%rcx 766238405Sjkim jne .Lsqr4x_1st 767238405Sjkim 768238405Sjkim xorq %r12,%r12 769238405Sjkim addq %r11,%r13 770238405Sjkim adcq $0,%r12 771238405Sjkim mulq %r15 772238405Sjkim addq %rax,%r13 773238405Sjkim adcq %rdx,%r12 774238405Sjkim 775238405Sjkim movq %r13,(%rdi) 776238405Sjkim leaq 16(%rbp),%rbp 777238405Sjkim movq %r12,8(%rdi) 778238405Sjkim jmp .Lsqr4x_outer 779238405Sjkim 780238405Sjkim.align 16 781238405Sjkim.Lsqr4x_outer: 782238405Sjkim movq -32(%rsi,%rbp,1),%r14 783238405Sjkim leaq 64(%rsp,%r9,2),%rdi 784238405Sjkim movq -24(%rsi,%rbp,1),%rax 785238405Sjkim leaq -32(%rdi,%rbp,1),%rdi 786238405Sjkim movq -16(%rsi,%rbp,1),%rbx 787238405Sjkim movq %rax,%r15 788238405Sjkim 789238405Sjkim movq -24(%rdi,%rbp,1),%r10 790238405Sjkim xorq %r11,%r11 791238405Sjkim mulq %r14 792238405Sjkim addq %rax,%r10 793238405Sjkim movq %rbx,%rax 794238405Sjkim adcq %rdx,%r11 795238405Sjkim movq %r10,-24(%rdi,%rbp,1) 796238405Sjkim 797238405Sjkim xorq %r10,%r10 798238405Sjkim addq -16(%rdi,%rbp,1),%r11 799238405Sjkim adcq $0,%r10 800238405Sjkim mulq %r14 801238405Sjkim addq %rax,%r11 802238405Sjkim movq %rbx,%rax 803238405Sjkim adcq %rdx,%r10 804238405Sjkim movq %r11,-16(%rdi,%rbp,1) 805238405Sjkim 806238405Sjkim leaq -16(%rbp),%rcx 807238405Sjkim xorq %r12,%r12 808238405Sjkim 809238405Sjkim 810238405Sjkim movq 8(%rsi,%rcx,1),%rbx 811238405Sjkim xorq %r13,%r13 812238405Sjkim addq 8(%rdi,%rcx,1),%r12 813238405Sjkim adcq $0,%r13 814238405Sjkim mulq %r15 815238405Sjkim addq %rax,%r12 816238405Sjkim movq %rbx,%rax 817238405Sjkim adcq %rdx,%r13 818238405Sjkim 819238405Sjkim xorq %r11,%r11 820238405Sjkim addq %r12,%r10 821238405Sjkim adcq $0,%r11 822238405Sjkim mulq %r14 823238405Sjkim addq %rax,%r10 824238405Sjkim movq %rbx,%rax 825238405Sjkim adcq %rdx,%r11 826238405Sjkim movq %r10,8(%rdi,%rcx,1) 827238405Sjkim 828238405Sjkim leaq 16(%rcx),%rcx 829238405Sjkim jmp .Lsqr4x_inner 830238405Sjkim 831238405Sjkim.align 16 832238405Sjkim.Lsqr4x_inner: 833238405Sjkim movq (%rsi,%rcx,1),%rbx 834238405Sjkim xorq %r12,%r12 835238405Sjkim addq (%rdi,%rcx,1),%r13 836238405Sjkim adcq $0,%r12 837238405Sjkim mulq %r15 838238405Sjkim addq %rax,%r13 839238405Sjkim movq %rbx,%rax 840238405Sjkim adcq %rdx,%r12 841238405Sjkim 842238405Sjkim xorq %r10,%r10 843238405Sjkim addq %r13,%r11 844238405Sjkim adcq $0,%r10 845238405Sjkim mulq %r14 846238405Sjkim addq %rax,%r11 847238405Sjkim movq %rbx,%rax 848238405Sjkim adcq %rdx,%r10 849238405Sjkim movq %r11,(%rdi,%rcx,1) 850238405Sjkim 851238405Sjkim movq 8(%rsi,%rcx,1),%rbx 852238405Sjkim xorq %r13,%r13 853238405Sjkim addq 8(%rdi,%rcx,1),%r12 854238405Sjkim adcq $0,%r13 855238405Sjkim mulq %r15 856238405Sjkim addq %rax,%r12 857238405Sjkim movq %rbx,%rax 858238405Sjkim adcq %rdx,%r13 859238405Sjkim 860238405Sjkim xorq %r11,%r11 861238405Sjkim addq %r12,%r10 862238405Sjkim leaq 16(%rcx),%rcx 863238405Sjkim adcq $0,%r11 864238405Sjkim mulq %r14 865238405Sjkim addq %rax,%r10 866238405Sjkim movq %rbx,%rax 867238405Sjkim adcq %rdx,%r11 868238405Sjkim movq %r10,-8(%rdi,%rcx,1) 869238405Sjkim 870238405Sjkim cmpq $0,%rcx 871238405Sjkim jne .Lsqr4x_inner 872238405Sjkim 873238405Sjkim xorq %r12,%r12 874238405Sjkim addq %r11,%r13 875238405Sjkim adcq $0,%r12 876238405Sjkim mulq %r15 877238405Sjkim addq %rax,%r13 878238405Sjkim adcq %rdx,%r12 879238405Sjkim 880238405Sjkim movq %r13,(%rdi) 881238405Sjkim movq %r12,8(%rdi) 882238405Sjkim 883238405Sjkim addq $16,%rbp 884238405Sjkim jnz .Lsqr4x_outer 885238405Sjkim 886238405Sjkim 887238405Sjkim movq -32(%rsi),%r14 888238405Sjkim leaq 64(%rsp,%r9,2),%rdi 889238405Sjkim movq -24(%rsi),%rax 890238405Sjkim leaq -32(%rdi,%rbp,1),%rdi 891238405Sjkim movq -16(%rsi),%rbx 892238405Sjkim movq %rax,%r15 893238405Sjkim 894238405Sjkim xorq %r11,%r11 895238405Sjkim mulq %r14 896238405Sjkim addq %rax,%r10 897238405Sjkim movq %rbx,%rax 898238405Sjkim adcq %rdx,%r11 899238405Sjkim movq %r10,-24(%rdi) 900238405Sjkim 901238405Sjkim xorq %r10,%r10 902238405Sjkim addq %r13,%r11 903238405Sjkim adcq $0,%r10 904238405Sjkim mulq %r14 905238405Sjkim addq %rax,%r11 906238405Sjkim movq %rbx,%rax 907238405Sjkim adcq %rdx,%r10 908238405Sjkim movq %r11,-16(%rdi) 909238405Sjkim 910238405Sjkim movq -8(%rsi),%rbx 911238405Sjkim mulq %r15 912238405Sjkim addq %rax,%r12 913238405Sjkim movq %rbx,%rax 914238405Sjkim adcq $0,%rdx 915238405Sjkim 916238405Sjkim xorq %r11,%r11 917238405Sjkim addq %r12,%r10 918238405Sjkim movq %rdx,%r13 919238405Sjkim adcq $0,%r11 920238405Sjkim mulq %r14 921238405Sjkim addq %rax,%r10 922238405Sjkim movq %rbx,%rax 923238405Sjkim adcq %rdx,%r11 924238405Sjkim movq %r10,-8(%rdi) 925238405Sjkim 926238405Sjkim xorq %r12,%r12 927238405Sjkim addq %r11,%r13 928238405Sjkim adcq $0,%r12 929238405Sjkim mulq %r15 930238405Sjkim addq %rax,%r13 931238405Sjkim movq -16(%rsi),%rax 932238405Sjkim adcq %rdx,%r12 933238405Sjkim 934238405Sjkim movq %r13,(%rdi) 935238405Sjkim movq %r12,8(%rdi) 936238405Sjkim 937238405Sjkim mulq %rbx 938238405Sjkim addq $16,%rbp 939238405Sjkim xorq %r14,%r14 940238405Sjkim subq %r9,%rbp 941238405Sjkim xorq %r15,%r15 942238405Sjkim 943238405Sjkim addq %r12,%rax 944238405Sjkim adcq $0,%rdx 945238405Sjkim movq %rax,8(%rdi) 946238405Sjkim movq %rdx,16(%rdi) 947238405Sjkim movq %r15,24(%rdi) 948238405Sjkim 949238405Sjkim movq -16(%rsi,%rbp,1),%rax 950238405Sjkim leaq 64(%rsp,%r9,2),%rdi 951238405Sjkim xorq %r10,%r10 952238405Sjkim movq -24(%rdi,%rbp,2),%r11 953238405Sjkim 954238405Sjkim leaq (%r14,%r10,2),%r12 955238405Sjkim shrq $63,%r10 956238405Sjkim leaq (%rcx,%r11,2),%r13 957238405Sjkim shrq $63,%r11 958238405Sjkim orq %r10,%r13 959238405Sjkim movq -16(%rdi,%rbp,2),%r10 960238405Sjkim movq %r11,%r14 961238405Sjkim mulq %rax 962238405Sjkim negq %r15 963238405Sjkim movq -8(%rdi,%rbp,2),%r11 964238405Sjkim adcq %rax,%r12 965238405Sjkim movq -8(%rsi,%rbp,1),%rax 966238405Sjkim movq %r12,-32(%rdi,%rbp,2) 967238405Sjkim adcq %rdx,%r13 968238405Sjkim 969238405Sjkim leaq (%r14,%r10,2),%rbx 970238405Sjkim movq %r13,-24(%rdi,%rbp,2) 971238405Sjkim sbbq %r15,%r15 972238405Sjkim shrq $63,%r10 973238405Sjkim leaq (%rcx,%r11,2),%r8 974238405Sjkim shrq $63,%r11 975238405Sjkim orq %r10,%r8 976238405Sjkim movq 0(%rdi,%rbp,2),%r10 977238405Sjkim movq %r11,%r14 978238405Sjkim mulq %rax 979238405Sjkim negq %r15 980238405Sjkim movq 8(%rdi,%rbp,2),%r11 981238405Sjkim adcq %rax,%rbx 982238405Sjkim movq 0(%rsi,%rbp,1),%rax 983238405Sjkim movq %rbx,-16(%rdi,%rbp,2) 984238405Sjkim adcq %rdx,%r8 985238405Sjkim leaq 16(%rbp),%rbp 986238405Sjkim movq %r8,-40(%rdi,%rbp,2) 987238405Sjkim sbbq %r15,%r15 988238405Sjkim jmp .Lsqr4x_shift_n_add 989238405Sjkim 990238405Sjkim.align 16 991238405Sjkim.Lsqr4x_shift_n_add: 992238405Sjkim leaq (%r14,%r10,2),%r12 993238405Sjkim shrq $63,%r10 994238405Sjkim leaq (%rcx,%r11,2),%r13 995238405Sjkim shrq $63,%r11 996238405Sjkim orq %r10,%r13 997238405Sjkim movq -16(%rdi,%rbp,2),%r10 998238405Sjkim movq %r11,%r14 999238405Sjkim mulq %rax 1000238405Sjkim negq %r15 1001238405Sjkim movq -8(%rdi,%rbp,2),%r11 1002238405Sjkim adcq %rax,%r12 1003238405Sjkim movq -8(%rsi,%rbp,1),%rax 1004238405Sjkim movq %r12,-32(%rdi,%rbp,2) 1005238405Sjkim adcq %rdx,%r13 1006238405Sjkim 1007238405Sjkim leaq (%r14,%r10,2),%rbx 1008238405Sjkim movq %r13,-24(%rdi,%rbp,2) 1009238405Sjkim sbbq %r15,%r15 1010238405Sjkim shrq $63,%r10 1011238405Sjkim leaq (%rcx,%r11,2),%r8 1012238405Sjkim shrq $63,%r11 1013238405Sjkim orq %r10,%r8 1014238405Sjkim movq 0(%rdi,%rbp,2),%r10 1015238405Sjkim movq %r11,%r14 1016238405Sjkim mulq %rax 1017238405Sjkim negq %r15 1018238405Sjkim movq 8(%rdi,%rbp,2),%r11 1019238405Sjkim adcq %rax,%rbx 1020238405Sjkim movq 0(%rsi,%rbp,1),%rax 1021238405Sjkim movq %rbx,-16(%rdi,%rbp,2) 1022238405Sjkim adcq %rdx,%r8 1023238405Sjkim 1024238405Sjkim leaq (%r14,%r10,2),%r12 1025238405Sjkim movq %r8,-8(%rdi,%rbp,2) 1026238405Sjkim sbbq %r15,%r15 1027238405Sjkim shrq $63,%r10 1028238405Sjkim leaq (%rcx,%r11,2),%r13 1029238405Sjkim shrq $63,%r11 1030238405Sjkim orq %r10,%r13 1031238405Sjkim movq 16(%rdi,%rbp,2),%r10 1032238405Sjkim movq %r11,%r14 1033238405Sjkim mulq %rax 1034238405Sjkim negq %r15 1035238405Sjkim movq 24(%rdi,%rbp,2),%r11 1036238405Sjkim adcq %rax,%r12 1037238405Sjkim movq 8(%rsi,%rbp,1),%rax 1038238405Sjkim movq %r12,0(%rdi,%rbp,2) 1039238405Sjkim adcq %rdx,%r13 1040238405Sjkim 1041238405Sjkim leaq (%r14,%r10,2),%rbx 1042238405Sjkim movq %r13,8(%rdi,%rbp,2) 1043238405Sjkim sbbq %r15,%r15 1044238405Sjkim shrq $63,%r10 1045238405Sjkim leaq (%rcx,%r11,2),%r8 1046238405Sjkim shrq $63,%r11 1047238405Sjkim orq %r10,%r8 1048238405Sjkim movq 32(%rdi,%rbp,2),%r10 1049238405Sjkim movq %r11,%r14 1050238405Sjkim mulq %rax 1051238405Sjkim negq %r15 1052238405Sjkim movq 40(%rdi,%rbp,2),%r11 1053238405Sjkim adcq %rax,%rbx 1054238405Sjkim movq 16(%rsi,%rbp,1),%rax 1055238405Sjkim movq %rbx,16(%rdi,%rbp,2) 1056238405Sjkim adcq %rdx,%r8 1057238405Sjkim movq %r8,24(%rdi,%rbp,2) 1058238405Sjkim sbbq %r15,%r15 1059238405Sjkim addq $32,%rbp 1060238405Sjkim jnz .Lsqr4x_shift_n_add 1061238405Sjkim 1062238405Sjkim leaq (%r14,%r10,2),%r12 1063238405Sjkim shrq $63,%r10 1064238405Sjkim leaq (%rcx,%r11,2),%r13 1065238405Sjkim shrq $63,%r11 1066238405Sjkim orq %r10,%r13 1067238405Sjkim movq -16(%rdi),%r10 1068238405Sjkim movq %r11,%r14 1069238405Sjkim mulq %rax 1070238405Sjkim negq %r15 1071238405Sjkim movq -8(%rdi),%r11 1072238405Sjkim adcq %rax,%r12 1073238405Sjkim movq -8(%rsi),%rax 1074238405Sjkim movq %r12,-32(%rdi) 1075238405Sjkim adcq %rdx,%r13 1076238405Sjkim 1077238405Sjkim leaq (%r14,%r10,2),%rbx 1078238405Sjkim movq %r13,-24(%rdi) 1079238405Sjkim sbbq %r15,%r15 1080238405Sjkim shrq $63,%r10 1081238405Sjkim leaq (%rcx,%r11,2),%r8 1082238405Sjkim shrq $63,%r11 1083238405Sjkim orq %r10,%r8 1084238405Sjkim mulq %rax 1085238405Sjkim negq %r15 1086238405Sjkim adcq %rax,%rbx 1087238405Sjkim adcq %rdx,%r8 1088238405Sjkim movq %rbx,-16(%rdi) 1089238405Sjkim movq %r8,-8(%rdi) 1090238405Sjkim movq 40(%rsp),%rsi 1091238405Sjkim movq 48(%rsp),%r8 1092238405Sjkim xorq %rcx,%rcx 1093238405Sjkim movq %r9,0(%rsp) 1094238405Sjkim subq %r9,%rcx 1095238405Sjkim movq 64(%rsp),%r10 1096238405Sjkim movq %r8,%r14 1097238405Sjkim leaq 64(%rsp,%r9,2),%rax 1098238405Sjkim leaq 64(%rsp,%r9,1),%rdi 1099238405Sjkim movq %rax,8(%rsp) 1100238405Sjkim leaq (%rsi,%r9,1),%rsi 1101238405Sjkim xorq %rbp,%rbp 1102238405Sjkim 1103238405Sjkim movq 0(%rsi,%rcx,1),%rax 1104238405Sjkim movq 8(%rsi,%rcx,1),%r9 1105238405Sjkim imulq %r10,%r14 1106238405Sjkim movq %rax,%rbx 1107238405Sjkim jmp .Lsqr4x_mont_outer 1108238405Sjkim 1109238405Sjkim.align 16 1110238405Sjkim.Lsqr4x_mont_outer: 1111238405Sjkim xorq %r11,%r11 1112238405Sjkim mulq %r14 1113238405Sjkim addq %rax,%r10 1114238405Sjkim movq %r9,%rax 1115238405Sjkim adcq %rdx,%r11 1116238405Sjkim movq %r8,%r15 1117238405Sjkim 1118238405Sjkim xorq %r10,%r10 1119238405Sjkim addq 8(%rdi,%rcx,1),%r11 1120238405Sjkim adcq $0,%r10 1121238405Sjkim mulq %r14 1122238405Sjkim addq %rax,%r11 1123238405Sjkim movq %rbx,%rax 1124238405Sjkim adcq %rdx,%r10 1125238405Sjkim 1126238405Sjkim imulq %r11,%r15 1127238405Sjkim 1128238405Sjkim movq 16(%rsi,%rcx,1),%rbx 1129238405Sjkim xorq %r13,%r13 1130238405Sjkim addq %r11,%r12 1131238405Sjkim adcq $0,%r13 1132238405Sjkim mulq %r15 1133238405Sjkim addq %rax,%r12 1134238405Sjkim movq %rbx,%rax 1135238405Sjkim adcq %rdx,%r13 1136238405Sjkim movq %r12,8(%rdi,%rcx,1) 1137238405Sjkim 1138238405Sjkim xorq %r11,%r11 1139238405Sjkim addq 16(%rdi,%rcx,1),%r10 1140238405Sjkim adcq $0,%r11 1141238405Sjkim mulq %r14 1142238405Sjkim addq %rax,%r10 1143238405Sjkim movq %r9,%rax 1144238405Sjkim adcq %rdx,%r11 1145238405Sjkim 1146238405Sjkim movq 24(%rsi,%rcx,1),%r9 1147238405Sjkim xorq %r12,%r12 1148238405Sjkim addq %r10,%r13 1149238405Sjkim adcq $0,%r12 1150238405Sjkim mulq %r15 1151238405Sjkim addq %rax,%r13 1152238405Sjkim movq %r9,%rax 1153238405Sjkim adcq %rdx,%r12 1154238405Sjkim movq %r13,16(%rdi,%rcx,1) 1155238405Sjkim 1156238405Sjkim xorq %r10,%r10 1157238405Sjkim addq 24(%rdi,%rcx,1),%r11 1158238405Sjkim leaq 32(%rcx),%rcx 1159238405Sjkim adcq $0,%r10 1160238405Sjkim mulq %r14 1161238405Sjkim addq %rax,%r11 1162238405Sjkim movq %rbx,%rax 1163238405Sjkim adcq %rdx,%r10 1164238405Sjkim jmp .Lsqr4x_mont_inner 1165238405Sjkim 1166238405Sjkim.align 16 1167238405Sjkim.Lsqr4x_mont_inner: 1168238405Sjkim movq (%rsi,%rcx,1),%rbx 1169238405Sjkim xorq %r13,%r13 1170238405Sjkim addq %r11,%r12 1171238405Sjkim adcq $0,%r13 1172238405Sjkim mulq %r15 1173238405Sjkim addq %rax,%r12 1174238405Sjkim movq %rbx,%rax 1175238405Sjkim adcq %rdx,%r13 1176238405Sjkim movq %r12,-8(%rdi,%rcx,1) 1177238405Sjkim 1178238405Sjkim xorq %r11,%r11 1179238405Sjkim addq (%rdi,%rcx,1),%r10 1180238405Sjkim adcq $0,%r11 1181238405Sjkim mulq %r14 1182238405Sjkim addq %rax,%r10 1183238405Sjkim movq %r9,%rax 1184238405Sjkim adcq %rdx,%r11 1185238405Sjkim 1186238405Sjkim movq 8(%rsi,%rcx,1),%r9 1187238405Sjkim xorq %r12,%r12 1188238405Sjkim addq %r10,%r13 1189238405Sjkim adcq $0,%r12 1190238405Sjkim mulq %r15 1191238405Sjkim addq %rax,%r13 1192238405Sjkim movq %r9,%rax 1193238405Sjkim adcq %rdx,%r12 1194238405Sjkim movq %r13,(%rdi,%rcx,1) 1195238405Sjkim 1196238405Sjkim xorq %r10,%r10 1197238405Sjkim addq 8(%rdi,%rcx,1),%r11 1198238405Sjkim adcq $0,%r10 1199238405Sjkim mulq %r14 1200238405Sjkim addq %rax,%r11 1201238405Sjkim movq %rbx,%rax 1202238405Sjkim adcq %rdx,%r10 1203238405Sjkim 1204238405Sjkim 1205238405Sjkim movq 16(%rsi,%rcx,1),%rbx 1206238405Sjkim xorq %r13,%r13 1207238405Sjkim addq %r11,%r12 1208238405Sjkim adcq $0,%r13 1209238405Sjkim mulq %r15 1210238405Sjkim addq %rax,%r12 1211238405Sjkim movq %rbx,%rax 1212238405Sjkim adcq %rdx,%r13 1213238405Sjkim movq %r12,8(%rdi,%rcx,1) 1214238405Sjkim 1215238405Sjkim xorq %r11,%r11 1216238405Sjkim addq 16(%rdi,%rcx,1),%r10 1217238405Sjkim adcq $0,%r11 1218238405Sjkim mulq %r14 1219238405Sjkim addq %rax,%r10 1220238405Sjkim movq %r9,%rax 1221238405Sjkim adcq %rdx,%r11 1222238405Sjkim 1223238405Sjkim movq 24(%rsi,%rcx,1),%r9 1224238405Sjkim xorq %r12,%r12 1225238405Sjkim addq %r10,%r13 1226238405Sjkim adcq $0,%r12 1227238405Sjkim mulq %r15 1228238405Sjkim addq %rax,%r13 1229238405Sjkim movq %r9,%rax 1230238405Sjkim adcq %rdx,%r12 1231238405Sjkim movq %r13,16(%rdi,%rcx,1) 1232238405Sjkim 1233238405Sjkim xorq %r10,%r10 1234238405Sjkim addq 24(%rdi,%rcx,1),%r11 1235238405Sjkim leaq 32(%rcx),%rcx 1236238405Sjkim adcq $0,%r10 1237238405Sjkim mulq %r14 1238238405Sjkim addq %rax,%r11 1239238405Sjkim movq %rbx,%rax 1240238405Sjkim adcq %rdx,%r10 1241238405Sjkim cmpq $0,%rcx 1242238405Sjkim jne .Lsqr4x_mont_inner 1243238405Sjkim 1244238405Sjkim subq 0(%rsp),%rcx 1245238405Sjkim movq %r8,%r14 1246238405Sjkim 1247238405Sjkim xorq %r13,%r13 1248238405Sjkim addq %r11,%r12 1249238405Sjkim adcq $0,%r13 1250238405Sjkim mulq %r15 1251238405Sjkim addq %rax,%r12 1252238405Sjkim movq %r9,%rax 1253238405Sjkim adcq %rdx,%r13 1254238405Sjkim movq %r12,-8(%rdi) 1255238405Sjkim 1256238405Sjkim xorq %r11,%r11 1257238405Sjkim addq (%rdi),%r10 1258238405Sjkim adcq $0,%r11 1259238405Sjkim movq 0(%rsi,%rcx,1),%rbx 1260238405Sjkim addq %rbp,%r10 1261238405Sjkim adcq $0,%r11 1262238405Sjkim 1263238405Sjkim imulq 16(%rdi,%rcx,1),%r14 1264238405Sjkim xorq %r12,%r12 1265238405Sjkim movq 8(%rsi,%rcx,1),%r9 1266238405Sjkim addq %r10,%r13 1267238405Sjkim movq 16(%rdi,%rcx,1),%r10 1268238405Sjkim adcq $0,%r12 1269238405Sjkim mulq %r15 1270238405Sjkim addq %rax,%r13 1271238405Sjkim movq %rbx,%rax 1272238405Sjkim adcq %rdx,%r12 1273238405Sjkim movq %r13,(%rdi) 1274238405Sjkim 1275238405Sjkim xorq %rbp,%rbp 1276238405Sjkim addq 8(%rdi),%r12 1277238405Sjkim adcq %rbp,%rbp 1278238405Sjkim addq %r11,%r12 1279238405Sjkim leaq 16(%rdi),%rdi 1280238405Sjkim adcq $0,%rbp 1281238405Sjkim movq %r12,-8(%rdi) 1282238405Sjkim cmpq 8(%rsp),%rdi 1283238405Sjkim jb .Lsqr4x_mont_outer 1284238405Sjkim 1285238405Sjkim movq 0(%rsp),%r9 1286238405Sjkim movq %rbp,(%rdi) 1287238405Sjkim movq 64(%rsp,%r9,1),%rax 1288238405Sjkim leaq 64(%rsp,%r9,1),%rbx 1289238405Sjkim movq 40(%rsp),%rsi 1290238405Sjkim shrq $5,%r9 1291238405Sjkim movq 8(%rbx),%rdx 1292238405Sjkim xorq %rbp,%rbp 1293238405Sjkim 1294238405Sjkim movq 32(%rsp),%rdi 1295238405Sjkim subq 0(%rsi),%rax 1296238405Sjkim movq 16(%rbx),%r10 1297238405Sjkim movq 24(%rbx),%r11 1298238405Sjkim sbbq 8(%rsi),%rdx 1299238405Sjkim leaq -1(%r9),%rcx 1300238405Sjkim jmp .Lsqr4x_sub 1301238405Sjkim.align 16 1302238405Sjkim.Lsqr4x_sub: 1303238405Sjkim movq %rax,0(%rdi,%rbp,8) 1304238405Sjkim movq %rdx,8(%rdi,%rbp,8) 1305238405Sjkim sbbq 16(%rsi,%rbp,8),%r10 1306238405Sjkim movq 32(%rbx,%rbp,8),%rax 1307238405Sjkim movq 40(%rbx,%rbp,8),%rdx 1308238405Sjkim sbbq 24(%rsi,%rbp,8),%r11 1309238405Sjkim movq %r10,16(%rdi,%rbp,8) 1310238405Sjkim movq %r11,24(%rdi,%rbp,8) 1311238405Sjkim sbbq 32(%rsi,%rbp,8),%rax 1312238405Sjkim movq 48(%rbx,%rbp,8),%r10 1313238405Sjkim movq 56(%rbx,%rbp,8),%r11 1314238405Sjkim sbbq 40(%rsi,%rbp,8),%rdx 1315238405Sjkim leaq 4(%rbp),%rbp 1316238405Sjkim decq %rcx 1317238405Sjkim jnz .Lsqr4x_sub 1318238405Sjkim 1319238405Sjkim movq %rax,0(%rdi,%rbp,8) 1320238405Sjkim movq 32(%rbx,%rbp,8),%rax 1321238405Sjkim sbbq 16(%rsi,%rbp,8),%r10 1322238405Sjkim movq %rdx,8(%rdi,%rbp,8) 1323238405Sjkim sbbq 24(%rsi,%rbp,8),%r11 1324238405Sjkim movq %r10,16(%rdi,%rbp,8) 1325238405Sjkim 1326238405Sjkim sbbq $0,%rax 1327238405Sjkim movq %r11,24(%rdi,%rbp,8) 1328238405Sjkim xorq %rbp,%rbp 1329238405Sjkim andq %rax,%rbx 1330238405Sjkim notq %rax 1331238405Sjkim movq %rdi,%rsi 1332238405Sjkim andq %rax,%rsi 1333238405Sjkim leaq -1(%r9),%rcx 1334238405Sjkim orq %rsi,%rbx 1335238405Sjkim 1336238405Sjkim pxor %xmm0,%xmm0 1337238405Sjkim leaq 64(%rsp,%r9,8),%rsi 1338238405Sjkim movdqu (%rbx),%xmm1 1339238405Sjkim leaq (%rsi,%r9,8),%rsi 1340238405Sjkim movdqa %xmm0,64(%rsp) 1341238405Sjkim movdqa %xmm0,(%rsi) 1342238405Sjkim movdqu %xmm1,(%rdi) 1343238405Sjkim jmp .Lsqr4x_copy 1344238405Sjkim.align 16 1345238405Sjkim.Lsqr4x_copy: 1346238405Sjkim movdqu 16(%rbx,%rbp,1),%xmm2 1347238405Sjkim movdqu 32(%rbx,%rbp,1),%xmm1 1348238405Sjkim movdqa %xmm0,80(%rsp,%rbp,1) 1349238405Sjkim movdqa %xmm0,96(%rsp,%rbp,1) 1350238405Sjkim movdqa %xmm0,16(%rsi,%rbp,1) 1351238405Sjkim movdqa %xmm0,32(%rsi,%rbp,1) 1352238405Sjkim movdqu %xmm2,16(%rdi,%rbp,1) 1353238405Sjkim movdqu %xmm1,32(%rdi,%rbp,1) 1354238405Sjkim leaq 32(%rbp),%rbp 1355238405Sjkim decq %rcx 1356238405Sjkim jnz .Lsqr4x_copy 1357238405Sjkim 1358238405Sjkim movdqu 16(%rbx,%rbp,1),%xmm2 1359238405Sjkim movdqa %xmm0,80(%rsp,%rbp,1) 1360238405Sjkim movdqa %xmm0,16(%rsi,%rbp,1) 1361238405Sjkim movdqu %xmm2,16(%rdi,%rbp,1) 1362238405Sjkim movq 56(%rsp),%rsi 1363238405Sjkim movq $1,%rax 1364238405Sjkim movq 0(%rsi),%r15 1365238405Sjkim movq 8(%rsi),%r14 1366238405Sjkim movq 16(%rsi),%r13 1367238405Sjkim movq 24(%rsi),%r12 1368238405Sjkim movq 32(%rsi),%rbp 1369238405Sjkim movq 40(%rsi),%rbx 1370238405Sjkim leaq 48(%rsi),%rsp 1371238405Sjkim.Lsqr4x_epilogue: 1372238405Sjkim .byte 0xf3,0xc3 1373238405Sjkim.size bn_sqr4x_mont,.-bn_sqr4x_mont 1374238405Sjkim.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1375238405Sjkim.align 16 1376