ghash-x86_64.S revision 299966
1# $FreeBSD: stable/10/secure/lib/libcrypto/amd64/ghash-x86_64.S 299966 2016-05-16 19:30:27Z jkim $ 2# Do not modify. This file is auto-generated from ghash-x86_64.pl. 3.text 4 5.globl gcm_gmult_4bit 6.type gcm_gmult_4bit,@function 7.align 16 8gcm_gmult_4bit: 9 pushq %rbx 10 pushq %rbp 11 pushq %r12 12.Lgmult_prologue: 13 14 movzbq 15(%rdi),%r8 15 leaq .Lrem_4bit(%rip),%r11 16 xorq %rax,%rax 17 xorq %rbx,%rbx 18 movb %r8b,%al 19 movb %r8b,%bl 20 shlb $4,%al 21 movq $14,%rcx 22 movq 8(%rsi,%rax,1),%r8 23 movq (%rsi,%rax,1),%r9 24 andb $240,%bl 25 movq %r8,%rdx 26 jmp .Loop1 27 28.align 16 29.Loop1: 30 shrq $4,%r8 31 andq $15,%rdx 32 movq %r9,%r10 33 movb (%rdi,%rcx,1),%al 34 shrq $4,%r9 35 xorq 8(%rsi,%rbx,1),%r8 36 shlq $60,%r10 37 xorq (%rsi,%rbx,1),%r9 38 movb %al,%bl 39 xorq (%r11,%rdx,8),%r9 40 movq %r8,%rdx 41 shlb $4,%al 42 xorq %r10,%r8 43 decq %rcx 44 js .Lbreak1 45 46 shrq $4,%r8 47 andq $15,%rdx 48 movq %r9,%r10 49 shrq $4,%r9 50 xorq 8(%rsi,%rax,1),%r8 51 shlq $60,%r10 52 xorq (%rsi,%rax,1),%r9 53 andb $240,%bl 54 xorq (%r11,%rdx,8),%r9 55 movq %r8,%rdx 56 xorq %r10,%r8 57 jmp .Loop1 58 59.align 16 60.Lbreak1: 61 shrq $4,%r8 62 andq $15,%rdx 63 movq %r9,%r10 64 shrq $4,%r9 65 xorq 8(%rsi,%rax,1),%r8 66 shlq $60,%r10 67 xorq (%rsi,%rax,1),%r9 68 andb $240,%bl 69 xorq (%r11,%rdx,8),%r9 70 movq %r8,%rdx 71 xorq %r10,%r8 72 73 shrq $4,%r8 74 andq $15,%rdx 75 movq %r9,%r10 76 shrq $4,%r9 77 xorq 8(%rsi,%rbx,1),%r8 78 shlq $60,%r10 79 xorq (%rsi,%rbx,1),%r9 80 xorq %r10,%r8 81 xorq (%r11,%rdx,8),%r9 82 83 bswapq %r8 84 bswapq %r9 85 movq %r8,8(%rdi) 86 movq %r9,(%rdi) 87 88 movq 16(%rsp),%rbx 89 leaq 24(%rsp),%rsp 90.Lgmult_epilogue: 91 .byte 0xf3,0xc3 92.size gcm_gmult_4bit,.-gcm_gmult_4bit 93.globl gcm_ghash_4bit 94.type gcm_ghash_4bit,@function 95.align 16 96gcm_ghash_4bit: 97 pushq %rbx 98 pushq %rbp 99 pushq %r12 100 pushq %r13 101 pushq %r14 102 pushq %r15 103 subq $280,%rsp 104.Lghash_prologue: 105 movq %rdx,%r14 106 movq %rcx,%r15 107 subq $-128,%rsi 108 leaq 16+128(%rsp),%rbp 109 xorl %edx,%edx 110 movq 0+0-128(%rsi),%r8 111 movq 0+8-128(%rsi),%rax 112 movb %al,%dl 113 shrq $4,%rax 114 movq %r8,%r10 115 shrq $4,%r8 116 movq 16+0-128(%rsi),%r9 117 shlb $4,%dl 118 movq 16+8-128(%rsi),%rbx 119 shlq $60,%r10 120 movb %dl,0(%rsp) 121 orq %r10,%rax 122 movb %bl,%dl 123 shrq $4,%rbx 124 movq %r9,%r10 125 shrq $4,%r9 126 movq %r8,0(%rbp) 127 movq 32+0-128(%rsi),%r8 128 shlb $4,%dl 129 movq %rax,0-128(%rbp) 130 movq 32+8-128(%rsi),%rax 131 shlq $60,%r10 132 movb %dl,1(%rsp) 133 orq %r10,%rbx 134 movb %al,%dl 135 shrq $4,%rax 136 movq %r8,%r10 137 shrq $4,%r8 138 movq %r9,8(%rbp) 139 movq 48+0-128(%rsi),%r9 140 shlb $4,%dl 141 movq %rbx,8-128(%rbp) 142 movq 48+8-128(%rsi),%rbx 143 shlq $60,%r10 144 movb %dl,2(%rsp) 145 orq %r10,%rax 146 movb %bl,%dl 147 shrq $4,%rbx 148 movq %r9,%r10 149 shrq $4,%r9 150 movq %r8,16(%rbp) 151 movq 64+0-128(%rsi),%r8 152 shlb $4,%dl 153 movq %rax,16-128(%rbp) 154 movq 64+8-128(%rsi),%rax 155 shlq $60,%r10 156 movb %dl,3(%rsp) 157 orq %r10,%rbx 158 movb %al,%dl 159 shrq $4,%rax 160 movq %r8,%r10 161 shrq $4,%r8 162 movq %r9,24(%rbp) 163 movq 80+0-128(%rsi),%r9 164 shlb $4,%dl 165 movq %rbx,24-128(%rbp) 166 movq 80+8-128(%rsi),%rbx 167 shlq $60,%r10 168 movb %dl,4(%rsp) 169 orq %r10,%rax 170 movb %bl,%dl 171 shrq $4,%rbx 172 movq %r9,%r10 173 shrq $4,%r9 174 movq %r8,32(%rbp) 175 movq 96+0-128(%rsi),%r8 176 shlb $4,%dl 177 movq %rax,32-128(%rbp) 178 movq 96+8-128(%rsi),%rax 179 shlq $60,%r10 180 movb %dl,5(%rsp) 181 orq %r10,%rbx 182 movb %al,%dl 183 shrq $4,%rax 184 movq %r8,%r10 185 shrq $4,%r8 186 movq %r9,40(%rbp) 187 movq 112+0-128(%rsi),%r9 188 shlb $4,%dl 189 movq %rbx,40-128(%rbp) 190 movq 112+8-128(%rsi),%rbx 191 shlq $60,%r10 192 movb %dl,6(%rsp) 193 orq %r10,%rax 194 movb %bl,%dl 195 shrq $4,%rbx 196 movq %r9,%r10 197 shrq $4,%r9 198 movq %r8,48(%rbp) 199 movq 128+0-128(%rsi),%r8 200 shlb $4,%dl 201 movq %rax,48-128(%rbp) 202 movq 128+8-128(%rsi),%rax 203 shlq $60,%r10 204 movb %dl,7(%rsp) 205 orq %r10,%rbx 206 movb %al,%dl 207 shrq $4,%rax 208 movq %r8,%r10 209 shrq $4,%r8 210 movq %r9,56(%rbp) 211 movq 144+0-128(%rsi),%r9 212 shlb $4,%dl 213 movq %rbx,56-128(%rbp) 214 movq 144+8-128(%rsi),%rbx 215 shlq $60,%r10 216 movb %dl,8(%rsp) 217 orq %r10,%rax 218 movb %bl,%dl 219 shrq $4,%rbx 220 movq %r9,%r10 221 shrq $4,%r9 222 movq %r8,64(%rbp) 223 movq 160+0-128(%rsi),%r8 224 shlb $4,%dl 225 movq %rax,64-128(%rbp) 226 movq 160+8-128(%rsi),%rax 227 shlq $60,%r10 228 movb %dl,9(%rsp) 229 orq %r10,%rbx 230 movb %al,%dl 231 shrq $4,%rax 232 movq %r8,%r10 233 shrq $4,%r8 234 movq %r9,72(%rbp) 235 movq 176+0-128(%rsi),%r9 236 shlb $4,%dl 237 movq %rbx,72-128(%rbp) 238 movq 176+8-128(%rsi),%rbx 239 shlq $60,%r10 240 movb %dl,10(%rsp) 241 orq %r10,%rax 242 movb %bl,%dl 243 shrq $4,%rbx 244 movq %r9,%r10 245 shrq $4,%r9 246 movq %r8,80(%rbp) 247 movq 192+0-128(%rsi),%r8 248 shlb $4,%dl 249 movq %rax,80-128(%rbp) 250 movq 192+8-128(%rsi),%rax 251 shlq $60,%r10 252 movb %dl,11(%rsp) 253 orq %r10,%rbx 254 movb %al,%dl 255 shrq $4,%rax 256 movq %r8,%r10 257 shrq $4,%r8 258 movq %r9,88(%rbp) 259 movq 208+0-128(%rsi),%r9 260 shlb $4,%dl 261 movq %rbx,88-128(%rbp) 262 movq 208+8-128(%rsi),%rbx 263 shlq $60,%r10 264 movb %dl,12(%rsp) 265 orq %r10,%rax 266 movb %bl,%dl 267 shrq $4,%rbx 268 movq %r9,%r10 269 shrq $4,%r9 270 movq %r8,96(%rbp) 271 movq 224+0-128(%rsi),%r8 272 shlb $4,%dl 273 movq %rax,96-128(%rbp) 274 movq 224+8-128(%rsi),%rax 275 shlq $60,%r10 276 movb %dl,13(%rsp) 277 orq %r10,%rbx 278 movb %al,%dl 279 shrq $4,%rax 280 movq %r8,%r10 281 shrq $4,%r8 282 movq %r9,104(%rbp) 283 movq 240+0-128(%rsi),%r9 284 shlb $4,%dl 285 movq %rbx,104-128(%rbp) 286 movq 240+8-128(%rsi),%rbx 287 shlq $60,%r10 288 movb %dl,14(%rsp) 289 orq %r10,%rax 290 movb %bl,%dl 291 shrq $4,%rbx 292 movq %r9,%r10 293 shrq $4,%r9 294 movq %r8,112(%rbp) 295 shlb $4,%dl 296 movq %rax,112-128(%rbp) 297 shlq $60,%r10 298 movb %dl,15(%rsp) 299 orq %r10,%rbx 300 movq %r9,120(%rbp) 301 movq %rbx,120-128(%rbp) 302 addq $-128,%rsi 303 movq 8(%rdi),%r8 304 movq 0(%rdi),%r9 305 addq %r14,%r15 306 leaq .Lrem_8bit(%rip),%r11 307 jmp .Louter_loop 308.align 16 309.Louter_loop: 310 xorq (%r14),%r9 311 movq 8(%r14),%rdx 312 leaq 16(%r14),%r14 313 xorq %r8,%rdx 314 movq %r9,(%rdi) 315 movq %rdx,8(%rdi) 316 shrq $32,%rdx 317 xorq %rax,%rax 318 roll $8,%edx 319 movb %dl,%al 320 movzbl %dl,%ebx 321 shlb $4,%al 322 shrl $4,%ebx 323 roll $8,%edx 324 movq 8(%rsi,%rax,1),%r8 325 movq (%rsi,%rax,1),%r9 326 movb %dl,%al 327 movzbl %dl,%ecx 328 shlb $4,%al 329 movzbq (%rsp,%rbx,1),%r12 330 shrl $4,%ecx 331 xorq %r8,%r12 332 movq %r9,%r10 333 shrq $8,%r8 334 movzbq %r12b,%r12 335 shrq $8,%r9 336 xorq -128(%rbp,%rbx,8),%r8 337 shlq $56,%r10 338 xorq (%rbp,%rbx,8),%r9 339 roll $8,%edx 340 xorq 8(%rsi,%rax,1),%r8 341 xorq (%rsi,%rax,1),%r9 342 movb %dl,%al 343 xorq %r10,%r8 344 movzwq (%r11,%r12,2),%r12 345 movzbl %dl,%ebx 346 shlb $4,%al 347 movzbq (%rsp,%rcx,1),%r13 348 shrl $4,%ebx 349 shlq $48,%r12 350 xorq %r8,%r13 351 movq %r9,%r10 352 xorq %r12,%r9 353 shrq $8,%r8 354 movzbq %r13b,%r13 355 shrq $8,%r9 356 xorq -128(%rbp,%rcx,8),%r8 357 shlq $56,%r10 358 xorq (%rbp,%rcx,8),%r9 359 roll $8,%edx 360 xorq 8(%rsi,%rax,1),%r8 361 xorq (%rsi,%rax,1),%r9 362 movb %dl,%al 363 xorq %r10,%r8 364 movzwq (%r11,%r13,2),%r13 365 movzbl %dl,%ecx 366 shlb $4,%al 367 movzbq (%rsp,%rbx,1),%r12 368 shrl $4,%ecx 369 shlq $48,%r13 370 xorq %r8,%r12 371 movq %r9,%r10 372 xorq %r13,%r9 373 shrq $8,%r8 374 movzbq %r12b,%r12 375 movl 8(%rdi),%edx 376 shrq $8,%r9 377 xorq -128(%rbp,%rbx,8),%r8 378 shlq $56,%r10 379 xorq (%rbp,%rbx,8),%r9 380 roll $8,%edx 381 xorq 8(%rsi,%rax,1),%r8 382 xorq (%rsi,%rax,1),%r9 383 movb %dl,%al 384 xorq %r10,%r8 385 movzwq (%r11,%r12,2),%r12 386 movzbl %dl,%ebx 387 shlb $4,%al 388 movzbq (%rsp,%rcx,1),%r13 389 shrl $4,%ebx 390 shlq $48,%r12 391 xorq %r8,%r13 392 movq %r9,%r10 393 xorq %r12,%r9 394 shrq $8,%r8 395 movzbq %r13b,%r13 396 shrq $8,%r9 397 xorq -128(%rbp,%rcx,8),%r8 398 shlq $56,%r10 399 xorq (%rbp,%rcx,8),%r9 400 roll $8,%edx 401 xorq 8(%rsi,%rax,1),%r8 402 xorq (%rsi,%rax,1),%r9 403 movb %dl,%al 404 xorq %r10,%r8 405 movzwq (%r11,%r13,2),%r13 406 movzbl %dl,%ecx 407 shlb $4,%al 408 movzbq (%rsp,%rbx,1),%r12 409 shrl $4,%ecx 410 shlq $48,%r13 411 xorq %r8,%r12 412 movq %r9,%r10 413 xorq %r13,%r9 414 shrq $8,%r8 415 movzbq %r12b,%r12 416 shrq $8,%r9 417 xorq -128(%rbp,%rbx,8),%r8 418 shlq $56,%r10 419 xorq (%rbp,%rbx,8),%r9 420 roll $8,%edx 421 xorq 8(%rsi,%rax,1),%r8 422 xorq (%rsi,%rax,1),%r9 423 movb %dl,%al 424 xorq %r10,%r8 425 movzwq (%r11,%r12,2),%r12 426 movzbl %dl,%ebx 427 shlb $4,%al 428 movzbq (%rsp,%rcx,1),%r13 429 shrl $4,%ebx 430 shlq $48,%r12 431 xorq %r8,%r13 432 movq %r9,%r10 433 xorq %r12,%r9 434 shrq $8,%r8 435 movzbq %r13b,%r13 436 shrq $8,%r9 437 xorq -128(%rbp,%rcx,8),%r8 438 shlq $56,%r10 439 xorq (%rbp,%rcx,8),%r9 440 roll $8,%edx 441 xorq 8(%rsi,%rax,1),%r8 442 xorq (%rsi,%rax,1),%r9 443 movb %dl,%al 444 xorq %r10,%r8 445 movzwq (%r11,%r13,2),%r13 446 movzbl %dl,%ecx 447 shlb $4,%al 448 movzbq (%rsp,%rbx,1),%r12 449 shrl $4,%ecx 450 shlq $48,%r13 451 xorq %r8,%r12 452 movq %r9,%r10 453 xorq %r13,%r9 454 shrq $8,%r8 455 movzbq %r12b,%r12 456 movl 4(%rdi),%edx 457 shrq $8,%r9 458 xorq -128(%rbp,%rbx,8),%r8 459 shlq $56,%r10 460 xorq (%rbp,%rbx,8),%r9 461 roll $8,%edx 462 xorq 8(%rsi,%rax,1),%r8 463 xorq (%rsi,%rax,1),%r9 464 movb %dl,%al 465 xorq %r10,%r8 466 movzwq (%r11,%r12,2),%r12 467 movzbl %dl,%ebx 468 shlb $4,%al 469 movzbq (%rsp,%rcx,1),%r13 470 shrl $4,%ebx 471 shlq $48,%r12 472 xorq %r8,%r13 473 movq %r9,%r10 474 xorq %r12,%r9 475 shrq $8,%r8 476 movzbq %r13b,%r13 477 shrq $8,%r9 478 xorq -128(%rbp,%rcx,8),%r8 479 shlq $56,%r10 480 xorq (%rbp,%rcx,8),%r9 481 roll $8,%edx 482 xorq 8(%rsi,%rax,1),%r8 483 xorq (%rsi,%rax,1),%r9 484 movb %dl,%al 485 xorq %r10,%r8 486 movzwq (%r11,%r13,2),%r13 487 movzbl %dl,%ecx 488 shlb $4,%al 489 movzbq (%rsp,%rbx,1),%r12 490 shrl $4,%ecx 491 shlq $48,%r13 492 xorq %r8,%r12 493 movq %r9,%r10 494 xorq %r13,%r9 495 shrq $8,%r8 496 movzbq %r12b,%r12 497 shrq $8,%r9 498 xorq -128(%rbp,%rbx,8),%r8 499 shlq $56,%r10 500 xorq (%rbp,%rbx,8),%r9 501 roll $8,%edx 502 xorq 8(%rsi,%rax,1),%r8 503 xorq (%rsi,%rax,1),%r9 504 movb %dl,%al 505 xorq %r10,%r8 506 movzwq (%r11,%r12,2),%r12 507 movzbl %dl,%ebx 508 shlb $4,%al 509 movzbq (%rsp,%rcx,1),%r13 510 shrl $4,%ebx 511 shlq $48,%r12 512 xorq %r8,%r13 513 movq %r9,%r10 514 xorq %r12,%r9 515 shrq $8,%r8 516 movzbq %r13b,%r13 517 shrq $8,%r9 518 xorq -128(%rbp,%rcx,8),%r8 519 shlq $56,%r10 520 xorq (%rbp,%rcx,8),%r9 521 roll $8,%edx 522 xorq 8(%rsi,%rax,1),%r8 523 xorq (%rsi,%rax,1),%r9 524 movb %dl,%al 525 xorq %r10,%r8 526 movzwq (%r11,%r13,2),%r13 527 movzbl %dl,%ecx 528 shlb $4,%al 529 movzbq (%rsp,%rbx,1),%r12 530 shrl $4,%ecx 531 shlq $48,%r13 532 xorq %r8,%r12 533 movq %r9,%r10 534 xorq %r13,%r9 535 shrq $8,%r8 536 movzbq %r12b,%r12 537 movl 0(%rdi),%edx 538 shrq $8,%r9 539 xorq -128(%rbp,%rbx,8),%r8 540 shlq $56,%r10 541 xorq (%rbp,%rbx,8),%r9 542 roll $8,%edx 543 xorq 8(%rsi,%rax,1),%r8 544 xorq (%rsi,%rax,1),%r9 545 movb %dl,%al 546 xorq %r10,%r8 547 movzwq (%r11,%r12,2),%r12 548 movzbl %dl,%ebx 549 shlb $4,%al 550 movzbq (%rsp,%rcx,1),%r13 551 shrl $4,%ebx 552 shlq $48,%r12 553 xorq %r8,%r13 554 movq %r9,%r10 555 xorq %r12,%r9 556 shrq $8,%r8 557 movzbq %r13b,%r13 558 shrq $8,%r9 559 xorq -128(%rbp,%rcx,8),%r8 560 shlq $56,%r10 561 xorq (%rbp,%rcx,8),%r9 562 roll $8,%edx 563 xorq 8(%rsi,%rax,1),%r8 564 xorq (%rsi,%rax,1),%r9 565 movb %dl,%al 566 xorq %r10,%r8 567 movzwq (%r11,%r13,2),%r13 568 movzbl %dl,%ecx 569 shlb $4,%al 570 movzbq (%rsp,%rbx,1),%r12 571 shrl $4,%ecx 572 shlq $48,%r13 573 xorq %r8,%r12 574 movq %r9,%r10 575 xorq %r13,%r9 576 shrq $8,%r8 577 movzbq %r12b,%r12 578 shrq $8,%r9 579 xorq -128(%rbp,%rbx,8),%r8 580 shlq $56,%r10 581 xorq (%rbp,%rbx,8),%r9 582 roll $8,%edx 583 xorq 8(%rsi,%rax,1),%r8 584 xorq (%rsi,%rax,1),%r9 585 movb %dl,%al 586 xorq %r10,%r8 587 movzwq (%r11,%r12,2),%r12 588 movzbl %dl,%ebx 589 shlb $4,%al 590 movzbq (%rsp,%rcx,1),%r13 591 shrl $4,%ebx 592 shlq $48,%r12 593 xorq %r8,%r13 594 movq %r9,%r10 595 xorq %r12,%r9 596 shrq $8,%r8 597 movzbq %r13b,%r13 598 shrq $8,%r9 599 xorq -128(%rbp,%rcx,8),%r8 600 shlq $56,%r10 601 xorq (%rbp,%rcx,8),%r9 602 roll $8,%edx 603 xorq 8(%rsi,%rax,1),%r8 604 xorq (%rsi,%rax,1),%r9 605 movb %dl,%al 606 xorq %r10,%r8 607 movzwq (%r11,%r13,2),%r13 608 movzbl %dl,%ecx 609 shlb $4,%al 610 movzbq (%rsp,%rbx,1),%r12 611 andl $240,%ecx 612 shlq $48,%r13 613 xorq %r8,%r12 614 movq %r9,%r10 615 xorq %r13,%r9 616 shrq $8,%r8 617 movzbq %r12b,%r12 618 movl -4(%rdi),%edx 619 shrq $8,%r9 620 xorq -128(%rbp,%rbx,8),%r8 621 shlq $56,%r10 622 xorq (%rbp,%rbx,8),%r9 623 movzwq (%r11,%r12,2),%r12 624 xorq 8(%rsi,%rax,1),%r8 625 xorq (%rsi,%rax,1),%r9 626 shlq $48,%r12 627 xorq %r10,%r8 628 xorq %r12,%r9 629 movzbq %r8b,%r13 630 shrq $4,%r8 631 movq %r9,%r10 632 shlb $4,%r13b 633 shrq $4,%r9 634 xorq 8(%rsi,%rcx,1),%r8 635 movzwq (%r11,%r13,2),%r13 636 shlq $60,%r10 637 xorq (%rsi,%rcx,1),%r9 638 xorq %r10,%r8 639 shlq $48,%r13 640 bswapq %r8 641 xorq %r13,%r9 642 bswapq %r9 643 cmpq %r15,%r14 644 jb .Louter_loop 645 movq %r8,8(%rdi) 646 movq %r9,(%rdi) 647 648 leaq 280(%rsp),%rsi 649 movq 0(%rsi),%r15 650 movq 8(%rsi),%r14 651 movq 16(%rsi),%r13 652 movq 24(%rsi),%r12 653 movq 32(%rsi),%rbp 654 movq 40(%rsi),%rbx 655 leaq 48(%rsi),%rsp 656.Lghash_epilogue: 657 .byte 0xf3,0xc3 658.size gcm_ghash_4bit,.-gcm_ghash_4bit 659.globl gcm_init_clmul 660.type gcm_init_clmul,@function 661.align 16 662gcm_init_clmul: 663 movdqu (%rsi),%xmm2 664 pshufd $78,%xmm2,%xmm2 665 666 667 pshufd $255,%xmm2,%xmm4 668 movdqa %xmm2,%xmm3 669 psllq $1,%xmm2 670 pxor %xmm5,%xmm5 671 psrlq $63,%xmm3 672 pcmpgtd %xmm4,%xmm5 673 pslldq $8,%xmm3 674 por %xmm3,%xmm2 675 676 677 pand .L0x1c2_polynomial(%rip),%xmm5 678 pxor %xmm5,%xmm2 679 680 681 movdqa %xmm2,%xmm0 682 movdqa %xmm0,%xmm1 683 pshufd $78,%xmm0,%xmm3 684 pshufd $78,%xmm2,%xmm4 685 pxor %xmm0,%xmm3 686 pxor %xmm2,%xmm4 687.byte 102,15,58,68,194,0 688.byte 102,15,58,68,202,17 689.byte 102,15,58,68,220,0 690 pxor %xmm0,%xmm3 691 pxor %xmm1,%xmm3 692 693 movdqa %xmm3,%xmm4 694 psrldq $8,%xmm3 695 pslldq $8,%xmm4 696 pxor %xmm3,%xmm1 697 pxor %xmm4,%xmm0 698 699 movdqa %xmm0,%xmm3 700 psllq $1,%xmm0 701 pxor %xmm3,%xmm0 702 psllq $5,%xmm0 703 pxor %xmm3,%xmm0 704 psllq $57,%xmm0 705 movdqa %xmm0,%xmm4 706 pslldq $8,%xmm0 707 psrldq $8,%xmm4 708 pxor %xmm3,%xmm0 709 pxor %xmm4,%xmm1 710 711 712 movdqa %xmm0,%xmm4 713 psrlq $5,%xmm0 714 pxor %xmm4,%xmm0 715 psrlq $1,%xmm0 716 pxor %xmm4,%xmm0 717 pxor %xmm1,%xmm4 718 psrlq $1,%xmm0 719 pxor %xmm4,%xmm0 720 movdqu %xmm2,(%rdi) 721 movdqu %xmm0,16(%rdi) 722 .byte 0xf3,0xc3 723.size gcm_init_clmul,.-gcm_init_clmul 724.globl gcm_gmult_clmul 725.type gcm_gmult_clmul,@function 726.align 16 727gcm_gmult_clmul: 728 movdqu (%rdi),%xmm0 729 movdqa .Lbswap_mask(%rip),%xmm5 730 movdqu (%rsi),%xmm2 731.byte 102,15,56,0,197 732 movdqa %xmm0,%xmm1 733 pshufd $78,%xmm0,%xmm3 734 pshufd $78,%xmm2,%xmm4 735 pxor %xmm0,%xmm3 736 pxor %xmm2,%xmm4 737.byte 102,15,58,68,194,0 738.byte 102,15,58,68,202,17 739.byte 102,15,58,68,220,0 740 pxor %xmm0,%xmm3 741 pxor %xmm1,%xmm3 742 743 movdqa %xmm3,%xmm4 744 psrldq $8,%xmm3 745 pslldq $8,%xmm4 746 pxor %xmm3,%xmm1 747 pxor %xmm4,%xmm0 748 749 movdqa %xmm0,%xmm3 750 psllq $1,%xmm0 751 pxor %xmm3,%xmm0 752 psllq $5,%xmm0 753 pxor %xmm3,%xmm0 754 psllq $57,%xmm0 755 movdqa %xmm0,%xmm4 756 pslldq $8,%xmm0 757 psrldq $8,%xmm4 758 pxor %xmm3,%xmm0 759 pxor %xmm4,%xmm1 760 761 762 movdqa %xmm0,%xmm4 763 psrlq $5,%xmm0 764 pxor %xmm4,%xmm0 765 psrlq $1,%xmm0 766 pxor %xmm4,%xmm0 767 pxor %xmm1,%xmm4 768 psrlq $1,%xmm0 769 pxor %xmm4,%xmm0 770.byte 102,15,56,0,197 771 movdqu %xmm0,(%rdi) 772 .byte 0xf3,0xc3 773.size gcm_gmult_clmul,.-gcm_gmult_clmul 774.globl gcm_ghash_clmul 775.type gcm_ghash_clmul,@function 776.align 16 777gcm_ghash_clmul: 778 movdqa .Lbswap_mask(%rip),%xmm5 779 780 movdqu (%rdi),%xmm0 781 movdqu (%rsi),%xmm2 782.byte 102,15,56,0,197 783 784 subq $16,%rcx 785 jz .Lodd_tail 786 787 movdqu 16(%rsi),%xmm8 788 789 790 791 792 793 movdqu (%rdx),%xmm3 794 movdqu 16(%rdx),%xmm6 795.byte 102,15,56,0,221 796.byte 102,15,56,0,245 797 pxor %xmm3,%xmm0 798 movdqa %xmm6,%xmm7 799 pshufd $78,%xmm6,%xmm3 800 pshufd $78,%xmm2,%xmm4 801 pxor %xmm6,%xmm3 802 pxor %xmm2,%xmm4 803.byte 102,15,58,68,242,0 804.byte 102,15,58,68,250,17 805.byte 102,15,58,68,220,0 806 pxor %xmm6,%xmm3 807 pxor %xmm7,%xmm3 808 809 movdqa %xmm3,%xmm4 810 psrldq $8,%xmm3 811 pslldq $8,%xmm4 812 pxor %xmm3,%xmm7 813 pxor %xmm4,%xmm6 814 movdqa %xmm0,%xmm1 815 pshufd $78,%xmm0,%xmm3 816 pshufd $78,%xmm8,%xmm4 817 pxor %xmm0,%xmm3 818 pxor %xmm8,%xmm4 819 820 leaq 32(%rdx),%rdx 821 subq $32,%rcx 822 jbe .Leven_tail 823 824.Lmod_loop: 825.byte 102,65,15,58,68,192,0 826.byte 102,65,15,58,68,200,17 827.byte 102,15,58,68,220,0 828 pxor %xmm0,%xmm3 829 pxor %xmm1,%xmm3 830 831 movdqa %xmm3,%xmm4 832 psrldq $8,%xmm3 833 pslldq $8,%xmm4 834 pxor %xmm3,%xmm1 835 pxor %xmm4,%xmm0 836 movdqu (%rdx),%xmm3 837 pxor %xmm6,%xmm0 838 pxor %xmm7,%xmm1 839 840 movdqu 16(%rdx),%xmm6 841.byte 102,15,56,0,221 842.byte 102,15,56,0,245 843 844 movdqa %xmm6,%xmm7 845 pshufd $78,%xmm6,%xmm9 846 pshufd $78,%xmm2,%xmm10 847 pxor %xmm6,%xmm9 848 pxor %xmm2,%xmm10 849 pxor %xmm3,%xmm1 850 851 movdqa %xmm0,%xmm3 852 psllq $1,%xmm0 853 pxor %xmm3,%xmm0 854 psllq $5,%xmm0 855 pxor %xmm3,%xmm0 856.byte 102,15,58,68,242,0 857 psllq $57,%xmm0 858 movdqa %xmm0,%xmm4 859 pslldq $8,%xmm0 860 psrldq $8,%xmm4 861 pxor %xmm3,%xmm0 862 pxor %xmm4,%xmm1 863 864.byte 102,15,58,68,250,17 865 movdqa %xmm0,%xmm4 866 psrlq $5,%xmm0 867 pxor %xmm4,%xmm0 868 psrlq $1,%xmm0 869 pxor %xmm4,%xmm0 870 pxor %xmm1,%xmm4 871 psrlq $1,%xmm0 872 pxor %xmm4,%xmm0 873 874.byte 102,69,15,58,68,202,0 875 movdqa %xmm0,%xmm1 876 pshufd $78,%xmm0,%xmm3 877 pshufd $78,%xmm8,%xmm4 878 pxor %xmm0,%xmm3 879 pxor %xmm8,%xmm4 880 881 pxor %xmm6,%xmm9 882 pxor %xmm7,%xmm9 883 movdqa %xmm9,%xmm10 884 psrldq $8,%xmm9 885 pslldq $8,%xmm10 886 pxor %xmm9,%xmm7 887 pxor %xmm10,%xmm6 888 889 leaq 32(%rdx),%rdx 890 subq $32,%rcx 891 ja .Lmod_loop 892 893.Leven_tail: 894.byte 102,65,15,58,68,192,0 895.byte 102,65,15,58,68,200,17 896.byte 102,15,58,68,220,0 897 pxor %xmm0,%xmm3 898 pxor %xmm1,%xmm3 899 900 movdqa %xmm3,%xmm4 901 psrldq $8,%xmm3 902 pslldq $8,%xmm4 903 pxor %xmm3,%xmm1 904 pxor %xmm4,%xmm0 905 pxor %xmm6,%xmm0 906 pxor %xmm7,%xmm1 907 908 movdqa %xmm0,%xmm3 909 psllq $1,%xmm0 910 pxor %xmm3,%xmm0 911 psllq $5,%xmm0 912 pxor %xmm3,%xmm0 913 psllq $57,%xmm0 914 movdqa %xmm0,%xmm4 915 pslldq $8,%xmm0 916 psrldq $8,%xmm4 917 pxor %xmm3,%xmm0 918 pxor %xmm4,%xmm1 919 920 921 movdqa %xmm0,%xmm4 922 psrlq $5,%xmm0 923 pxor %xmm4,%xmm0 924 psrlq $1,%xmm0 925 pxor %xmm4,%xmm0 926 pxor %xmm1,%xmm4 927 psrlq $1,%xmm0 928 pxor %xmm4,%xmm0 929 testq %rcx,%rcx 930 jnz .Ldone 931 932.Lodd_tail: 933 movdqu (%rdx),%xmm3 934.byte 102,15,56,0,221 935 pxor %xmm3,%xmm0 936 movdqa %xmm0,%xmm1 937 pshufd $78,%xmm0,%xmm3 938 pshufd $78,%xmm2,%xmm4 939 pxor %xmm0,%xmm3 940 pxor %xmm2,%xmm4 941.byte 102,15,58,68,194,0 942.byte 102,15,58,68,202,17 943.byte 102,15,58,68,220,0 944 pxor %xmm0,%xmm3 945 pxor %xmm1,%xmm3 946 947 movdqa %xmm3,%xmm4 948 psrldq $8,%xmm3 949 pslldq $8,%xmm4 950 pxor %xmm3,%xmm1 951 pxor %xmm4,%xmm0 952 953 movdqa %xmm0,%xmm3 954 psllq $1,%xmm0 955 pxor %xmm3,%xmm0 956 psllq $5,%xmm0 957 pxor %xmm3,%xmm0 958 psllq $57,%xmm0 959 movdqa %xmm0,%xmm4 960 pslldq $8,%xmm0 961 psrldq $8,%xmm4 962 pxor %xmm3,%xmm0 963 pxor %xmm4,%xmm1 964 965 966 movdqa %xmm0,%xmm4 967 psrlq $5,%xmm0 968 pxor %xmm4,%xmm0 969 psrlq $1,%xmm0 970 pxor %xmm4,%xmm0 971 pxor %xmm1,%xmm4 972 psrlq $1,%xmm0 973 pxor %xmm4,%xmm0 974.Ldone: 975.byte 102,15,56,0,197 976 movdqu %xmm0,(%rdi) 977 .byte 0xf3,0xc3 978.LSEH_end_gcm_ghash_clmul: 979.size gcm_ghash_clmul,.-gcm_ghash_clmul 980.align 64 981.Lbswap_mask: 982.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 983.L0x1c2_polynomial: 984.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 985.align 64 986.type .Lrem_4bit,@object 987.Lrem_4bit: 988.long 0,0,0,471859200,0,943718400,0,610271232 989.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 990.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 991.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 992.type .Lrem_8bit,@object 993.Lrem_8bit: 994.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E 995.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E 996.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E 997.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E 998.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E 999.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E 1000.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E 1001.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E 1002.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE 1003.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE 1004.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE 1005.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE 1006.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E 1007.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E 1008.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE 1009.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE 1010.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E 1011.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E 1012.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E 1013.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E 1014.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E 1015.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E 1016.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E 1017.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E 1018.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE 1019.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE 1020.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE 1021.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE 1022.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E 1023.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E 1024.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE 1025.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE 1026 1027.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1028.align 64 1029