aesni-x86_64.S revision 299966
1# $FreeBSD: stable/10/secure/lib/libcrypto/amd64/aesni-x86_64.S 299966 2016-05-16 19:30:27Z jkim $ 2# Do not modify. This file is auto-generated from aesni-x86_64.pl. 3.text 4.globl aesni_encrypt 5.type aesni_encrypt,@function 6.align 16 7aesni_encrypt: 8 movups (%rdi),%xmm2 9 movl 240(%rdx),%eax 10 movups (%rdx),%xmm0 11 movups 16(%rdx),%xmm1 12 leaq 32(%rdx),%rdx 13 xorps %xmm0,%xmm2 14.Loop_enc1_1: 15.byte 102,15,56,220,209 16 decl %eax 17 movups (%rdx),%xmm1 18 leaq 16(%rdx),%rdx 19 jnz .Loop_enc1_1 20.byte 102,15,56,221,209 21 movups %xmm2,(%rsi) 22 .byte 0xf3,0xc3 23.size aesni_encrypt,.-aesni_encrypt 24 25.globl aesni_decrypt 26.type aesni_decrypt,@function 27.align 16 28aesni_decrypt: 29 movups (%rdi),%xmm2 30 movl 240(%rdx),%eax 31 movups (%rdx),%xmm0 32 movups 16(%rdx),%xmm1 33 leaq 32(%rdx),%rdx 34 xorps %xmm0,%xmm2 35.Loop_dec1_2: 36.byte 102,15,56,222,209 37 decl %eax 38 movups (%rdx),%xmm1 39 leaq 16(%rdx),%rdx 40 jnz .Loop_dec1_2 41.byte 102,15,56,223,209 42 movups %xmm2,(%rsi) 43 .byte 0xf3,0xc3 44.size aesni_decrypt, .-aesni_decrypt 45.type _aesni_encrypt3,@function 46.align 16 47_aesni_encrypt3: 48 movups (%rcx),%xmm0 49 shrl $1,%eax 50 movups 16(%rcx),%xmm1 51 leaq 32(%rcx),%rcx 52 xorps %xmm0,%xmm2 53 xorps %xmm0,%xmm3 54 xorps %xmm0,%xmm4 55 movups (%rcx),%xmm0 56 57.Lenc_loop3: 58.byte 102,15,56,220,209 59.byte 102,15,56,220,217 60 decl %eax 61.byte 102,15,56,220,225 62 movups 16(%rcx),%xmm1 63.byte 102,15,56,220,208 64.byte 102,15,56,220,216 65 leaq 32(%rcx),%rcx 66.byte 102,15,56,220,224 67 movups (%rcx),%xmm0 68 jnz .Lenc_loop3 69 70.byte 102,15,56,220,209 71.byte 102,15,56,220,217 72.byte 102,15,56,220,225 73.byte 102,15,56,221,208 74.byte 102,15,56,221,216 75.byte 102,15,56,221,224 76 .byte 0xf3,0xc3 77.size _aesni_encrypt3,.-_aesni_encrypt3 78.type _aesni_decrypt3,@function 79.align 16 80_aesni_decrypt3: 81 movups (%rcx),%xmm0 82 shrl $1,%eax 83 movups 16(%rcx),%xmm1 84 leaq 32(%rcx),%rcx 85 xorps %xmm0,%xmm2 86 xorps %xmm0,%xmm3 87 xorps %xmm0,%xmm4 88 movups (%rcx),%xmm0 89 90.Ldec_loop3: 91.byte 102,15,56,222,209 92.byte 102,15,56,222,217 93 decl %eax 94.byte 102,15,56,222,225 95 movups 16(%rcx),%xmm1 96.byte 102,15,56,222,208 97.byte 102,15,56,222,216 98 leaq 32(%rcx),%rcx 99.byte 102,15,56,222,224 100 movups (%rcx),%xmm0 101 jnz .Ldec_loop3 102 103.byte 102,15,56,222,209 104.byte 102,15,56,222,217 105.byte 102,15,56,222,225 106.byte 102,15,56,223,208 107.byte 102,15,56,223,216 108.byte 102,15,56,223,224 109 .byte 0xf3,0xc3 110.size _aesni_decrypt3,.-_aesni_decrypt3 111.type _aesni_encrypt4,@function 112.align 16 113_aesni_encrypt4: 114 movups (%rcx),%xmm0 115 shrl $1,%eax 116 movups 16(%rcx),%xmm1 117 leaq 32(%rcx),%rcx 118 xorps %xmm0,%xmm2 119 xorps %xmm0,%xmm3 120 xorps %xmm0,%xmm4 121 xorps %xmm0,%xmm5 122 movups (%rcx),%xmm0 123 124.Lenc_loop4: 125.byte 102,15,56,220,209 126.byte 102,15,56,220,217 127 decl %eax 128.byte 102,15,56,220,225 129.byte 102,15,56,220,233 130 movups 16(%rcx),%xmm1 131.byte 102,15,56,220,208 132.byte 102,15,56,220,216 133 leaq 32(%rcx),%rcx 134.byte 102,15,56,220,224 135.byte 102,15,56,220,232 136 movups (%rcx),%xmm0 137 jnz .Lenc_loop4 138 139.byte 102,15,56,220,209 140.byte 102,15,56,220,217 141.byte 102,15,56,220,225 142.byte 102,15,56,220,233 143.byte 102,15,56,221,208 144.byte 102,15,56,221,216 145.byte 102,15,56,221,224 146.byte 102,15,56,221,232 147 .byte 0xf3,0xc3 148.size _aesni_encrypt4,.-_aesni_encrypt4 149.type _aesni_decrypt4,@function 150.align 16 151_aesni_decrypt4: 152 movups (%rcx),%xmm0 153 shrl $1,%eax 154 movups 16(%rcx),%xmm1 155 leaq 32(%rcx),%rcx 156 xorps %xmm0,%xmm2 157 xorps %xmm0,%xmm3 158 xorps %xmm0,%xmm4 159 xorps %xmm0,%xmm5 160 movups (%rcx),%xmm0 161 162.Ldec_loop4: 163.byte 102,15,56,222,209 164.byte 102,15,56,222,217 165 decl %eax 166.byte 102,15,56,222,225 167.byte 102,15,56,222,233 168 movups 16(%rcx),%xmm1 169.byte 102,15,56,222,208 170.byte 102,15,56,222,216 171 leaq 32(%rcx),%rcx 172.byte 102,15,56,222,224 173.byte 102,15,56,222,232 174 movups (%rcx),%xmm0 175 jnz .Ldec_loop4 176 177.byte 102,15,56,222,209 178.byte 102,15,56,222,217 179.byte 102,15,56,222,225 180.byte 102,15,56,222,233 181.byte 102,15,56,223,208 182.byte 102,15,56,223,216 183.byte 102,15,56,223,224 184.byte 102,15,56,223,232 185 .byte 0xf3,0xc3 186.size _aesni_decrypt4,.-_aesni_decrypt4 187.type _aesni_encrypt6,@function 188.align 16 189_aesni_encrypt6: 190 movups (%rcx),%xmm0 191 shrl $1,%eax 192 movups 16(%rcx),%xmm1 193 leaq 32(%rcx),%rcx 194 xorps %xmm0,%xmm2 195 pxor %xmm0,%xmm3 196.byte 102,15,56,220,209 197 pxor %xmm0,%xmm4 198.byte 102,15,56,220,217 199 pxor %xmm0,%xmm5 200.byte 102,15,56,220,225 201 pxor %xmm0,%xmm6 202.byte 102,15,56,220,233 203 pxor %xmm0,%xmm7 204 decl %eax 205.byte 102,15,56,220,241 206 movups (%rcx),%xmm0 207.byte 102,15,56,220,249 208 jmp .Lenc_loop6_enter 209.align 16 210.Lenc_loop6: 211.byte 102,15,56,220,209 212.byte 102,15,56,220,217 213 decl %eax 214.byte 102,15,56,220,225 215.byte 102,15,56,220,233 216.byte 102,15,56,220,241 217.byte 102,15,56,220,249 218.Lenc_loop6_enter: 219 movups 16(%rcx),%xmm1 220.byte 102,15,56,220,208 221.byte 102,15,56,220,216 222 leaq 32(%rcx),%rcx 223.byte 102,15,56,220,224 224.byte 102,15,56,220,232 225.byte 102,15,56,220,240 226.byte 102,15,56,220,248 227 movups (%rcx),%xmm0 228 jnz .Lenc_loop6 229 230.byte 102,15,56,220,209 231.byte 102,15,56,220,217 232.byte 102,15,56,220,225 233.byte 102,15,56,220,233 234.byte 102,15,56,220,241 235.byte 102,15,56,220,249 236.byte 102,15,56,221,208 237.byte 102,15,56,221,216 238.byte 102,15,56,221,224 239.byte 102,15,56,221,232 240.byte 102,15,56,221,240 241.byte 102,15,56,221,248 242 .byte 0xf3,0xc3 243.size _aesni_encrypt6,.-_aesni_encrypt6 244.type _aesni_decrypt6,@function 245.align 16 246_aesni_decrypt6: 247 movups (%rcx),%xmm0 248 shrl $1,%eax 249 movups 16(%rcx),%xmm1 250 leaq 32(%rcx),%rcx 251 xorps %xmm0,%xmm2 252 pxor %xmm0,%xmm3 253.byte 102,15,56,222,209 254 pxor %xmm0,%xmm4 255.byte 102,15,56,222,217 256 pxor %xmm0,%xmm5 257.byte 102,15,56,222,225 258 pxor %xmm0,%xmm6 259.byte 102,15,56,222,233 260 pxor %xmm0,%xmm7 261 decl %eax 262.byte 102,15,56,222,241 263 movups (%rcx),%xmm0 264.byte 102,15,56,222,249 265 jmp .Ldec_loop6_enter 266.align 16 267.Ldec_loop6: 268.byte 102,15,56,222,209 269.byte 102,15,56,222,217 270 decl %eax 271.byte 102,15,56,222,225 272.byte 102,15,56,222,233 273.byte 102,15,56,222,241 274.byte 102,15,56,222,249 275.Ldec_loop6_enter: 276 movups 16(%rcx),%xmm1 277.byte 102,15,56,222,208 278.byte 102,15,56,222,216 279 leaq 32(%rcx),%rcx 280.byte 102,15,56,222,224 281.byte 102,15,56,222,232 282.byte 102,15,56,222,240 283.byte 102,15,56,222,248 284 movups (%rcx),%xmm0 285 jnz .Ldec_loop6 286 287.byte 102,15,56,222,209 288.byte 102,15,56,222,217 289.byte 102,15,56,222,225 290.byte 102,15,56,222,233 291.byte 102,15,56,222,241 292.byte 102,15,56,222,249 293.byte 102,15,56,223,208 294.byte 102,15,56,223,216 295.byte 102,15,56,223,224 296.byte 102,15,56,223,232 297.byte 102,15,56,223,240 298.byte 102,15,56,223,248 299 .byte 0xf3,0xc3 300.size _aesni_decrypt6,.-_aesni_decrypt6 301.type _aesni_encrypt8,@function 302.align 16 303_aesni_encrypt8: 304 movups (%rcx),%xmm0 305 shrl $1,%eax 306 movups 16(%rcx),%xmm1 307 leaq 32(%rcx),%rcx 308 xorps %xmm0,%xmm2 309 xorps %xmm0,%xmm3 310.byte 102,15,56,220,209 311 pxor %xmm0,%xmm4 312.byte 102,15,56,220,217 313 pxor %xmm0,%xmm5 314.byte 102,15,56,220,225 315 pxor %xmm0,%xmm6 316.byte 102,15,56,220,233 317 pxor %xmm0,%xmm7 318 decl %eax 319.byte 102,15,56,220,241 320 pxor %xmm0,%xmm8 321.byte 102,15,56,220,249 322 pxor %xmm0,%xmm9 323 movups (%rcx),%xmm0 324.byte 102,68,15,56,220,193 325.byte 102,68,15,56,220,201 326 movups 16(%rcx),%xmm1 327 jmp .Lenc_loop8_enter 328.align 16 329.Lenc_loop8: 330.byte 102,15,56,220,209 331.byte 102,15,56,220,217 332 decl %eax 333.byte 102,15,56,220,225 334.byte 102,15,56,220,233 335.byte 102,15,56,220,241 336.byte 102,15,56,220,249 337.byte 102,68,15,56,220,193 338.byte 102,68,15,56,220,201 339 movups 16(%rcx),%xmm1 340.Lenc_loop8_enter: 341.byte 102,15,56,220,208 342.byte 102,15,56,220,216 343 leaq 32(%rcx),%rcx 344.byte 102,15,56,220,224 345.byte 102,15,56,220,232 346.byte 102,15,56,220,240 347.byte 102,15,56,220,248 348.byte 102,68,15,56,220,192 349.byte 102,68,15,56,220,200 350 movups (%rcx),%xmm0 351 jnz .Lenc_loop8 352 353.byte 102,15,56,220,209 354.byte 102,15,56,220,217 355.byte 102,15,56,220,225 356.byte 102,15,56,220,233 357.byte 102,15,56,220,241 358.byte 102,15,56,220,249 359.byte 102,68,15,56,220,193 360.byte 102,68,15,56,220,201 361.byte 102,15,56,221,208 362.byte 102,15,56,221,216 363.byte 102,15,56,221,224 364.byte 102,15,56,221,232 365.byte 102,15,56,221,240 366.byte 102,15,56,221,248 367.byte 102,68,15,56,221,192 368.byte 102,68,15,56,221,200 369 .byte 0xf3,0xc3 370.size _aesni_encrypt8,.-_aesni_encrypt8 371.type _aesni_decrypt8,@function 372.align 16 373_aesni_decrypt8: 374 movups (%rcx),%xmm0 375 shrl $1,%eax 376 movups 16(%rcx),%xmm1 377 leaq 32(%rcx),%rcx 378 xorps %xmm0,%xmm2 379 xorps %xmm0,%xmm3 380.byte 102,15,56,222,209 381 pxor %xmm0,%xmm4 382.byte 102,15,56,222,217 383 pxor %xmm0,%xmm5 384.byte 102,15,56,222,225 385 pxor %xmm0,%xmm6 386.byte 102,15,56,222,233 387 pxor %xmm0,%xmm7 388 decl %eax 389.byte 102,15,56,222,241 390 pxor %xmm0,%xmm8 391.byte 102,15,56,222,249 392 pxor %xmm0,%xmm9 393 movups (%rcx),%xmm0 394.byte 102,68,15,56,222,193 395.byte 102,68,15,56,222,201 396 movups 16(%rcx),%xmm1 397 jmp .Ldec_loop8_enter 398.align 16 399.Ldec_loop8: 400.byte 102,15,56,222,209 401.byte 102,15,56,222,217 402 decl %eax 403.byte 102,15,56,222,225 404.byte 102,15,56,222,233 405.byte 102,15,56,222,241 406.byte 102,15,56,222,249 407.byte 102,68,15,56,222,193 408.byte 102,68,15,56,222,201 409 movups 16(%rcx),%xmm1 410.Ldec_loop8_enter: 411.byte 102,15,56,222,208 412.byte 102,15,56,222,216 413 leaq 32(%rcx),%rcx 414.byte 102,15,56,222,224 415.byte 102,15,56,222,232 416.byte 102,15,56,222,240 417.byte 102,15,56,222,248 418.byte 102,68,15,56,222,192 419.byte 102,68,15,56,222,200 420 movups (%rcx),%xmm0 421 jnz .Ldec_loop8 422 423.byte 102,15,56,222,209 424.byte 102,15,56,222,217 425.byte 102,15,56,222,225 426.byte 102,15,56,222,233 427.byte 102,15,56,222,241 428.byte 102,15,56,222,249 429.byte 102,68,15,56,222,193 430.byte 102,68,15,56,222,201 431.byte 102,15,56,223,208 432.byte 102,15,56,223,216 433.byte 102,15,56,223,224 434.byte 102,15,56,223,232 435.byte 102,15,56,223,240 436.byte 102,15,56,223,248 437.byte 102,68,15,56,223,192 438.byte 102,68,15,56,223,200 439 .byte 0xf3,0xc3 440.size _aesni_decrypt8,.-_aesni_decrypt8 441.globl aesni_ecb_encrypt 442.type aesni_ecb_encrypt,@function 443.align 16 444aesni_ecb_encrypt: 445 andq $-16,%rdx 446 jz .Lecb_ret 447 448 movl 240(%rcx),%eax 449 movups (%rcx),%xmm0 450 movq %rcx,%r11 451 movl %eax,%r10d 452 testl %r8d,%r8d 453 jz .Lecb_decrypt 454 455 cmpq $128,%rdx 456 jb .Lecb_enc_tail 457 458 movdqu (%rdi),%xmm2 459 movdqu 16(%rdi),%xmm3 460 movdqu 32(%rdi),%xmm4 461 movdqu 48(%rdi),%xmm5 462 movdqu 64(%rdi),%xmm6 463 movdqu 80(%rdi),%xmm7 464 movdqu 96(%rdi),%xmm8 465 movdqu 112(%rdi),%xmm9 466 leaq 128(%rdi),%rdi 467 subq $128,%rdx 468 jmp .Lecb_enc_loop8_enter 469.align 16 470.Lecb_enc_loop8: 471 movups %xmm2,(%rsi) 472 movq %r11,%rcx 473 movdqu (%rdi),%xmm2 474 movl %r10d,%eax 475 movups %xmm3,16(%rsi) 476 movdqu 16(%rdi),%xmm3 477 movups %xmm4,32(%rsi) 478 movdqu 32(%rdi),%xmm4 479 movups %xmm5,48(%rsi) 480 movdqu 48(%rdi),%xmm5 481 movups %xmm6,64(%rsi) 482 movdqu 64(%rdi),%xmm6 483 movups %xmm7,80(%rsi) 484 movdqu 80(%rdi),%xmm7 485 movups %xmm8,96(%rsi) 486 movdqu 96(%rdi),%xmm8 487 movups %xmm9,112(%rsi) 488 leaq 128(%rsi),%rsi 489 movdqu 112(%rdi),%xmm9 490 leaq 128(%rdi),%rdi 491.Lecb_enc_loop8_enter: 492 493 call _aesni_encrypt8 494 495 subq $128,%rdx 496 jnc .Lecb_enc_loop8 497 498 movups %xmm2,(%rsi) 499 movq %r11,%rcx 500 movups %xmm3,16(%rsi) 501 movl %r10d,%eax 502 movups %xmm4,32(%rsi) 503 movups %xmm5,48(%rsi) 504 movups %xmm6,64(%rsi) 505 movups %xmm7,80(%rsi) 506 movups %xmm8,96(%rsi) 507 movups %xmm9,112(%rsi) 508 leaq 128(%rsi),%rsi 509 addq $128,%rdx 510 jz .Lecb_ret 511 512.Lecb_enc_tail: 513 movups (%rdi),%xmm2 514 cmpq $32,%rdx 515 jb .Lecb_enc_one 516 movups 16(%rdi),%xmm3 517 je .Lecb_enc_two 518 movups 32(%rdi),%xmm4 519 cmpq $64,%rdx 520 jb .Lecb_enc_three 521 movups 48(%rdi),%xmm5 522 je .Lecb_enc_four 523 movups 64(%rdi),%xmm6 524 cmpq $96,%rdx 525 jb .Lecb_enc_five 526 movups 80(%rdi),%xmm7 527 je .Lecb_enc_six 528 movdqu 96(%rdi),%xmm8 529 call _aesni_encrypt8 530 movups %xmm2,(%rsi) 531 movups %xmm3,16(%rsi) 532 movups %xmm4,32(%rsi) 533 movups %xmm5,48(%rsi) 534 movups %xmm6,64(%rsi) 535 movups %xmm7,80(%rsi) 536 movups %xmm8,96(%rsi) 537 jmp .Lecb_ret 538.align 16 539.Lecb_enc_one: 540 movups (%rcx),%xmm0 541 movups 16(%rcx),%xmm1 542 leaq 32(%rcx),%rcx 543 xorps %xmm0,%xmm2 544.Loop_enc1_3: 545.byte 102,15,56,220,209 546 decl %eax 547 movups (%rcx),%xmm1 548 leaq 16(%rcx),%rcx 549 jnz .Loop_enc1_3 550.byte 102,15,56,221,209 551 movups %xmm2,(%rsi) 552 jmp .Lecb_ret 553.align 16 554.Lecb_enc_two: 555 xorps %xmm4,%xmm4 556 call _aesni_encrypt3 557 movups %xmm2,(%rsi) 558 movups %xmm3,16(%rsi) 559 jmp .Lecb_ret 560.align 16 561.Lecb_enc_three: 562 call _aesni_encrypt3 563 movups %xmm2,(%rsi) 564 movups %xmm3,16(%rsi) 565 movups %xmm4,32(%rsi) 566 jmp .Lecb_ret 567.align 16 568.Lecb_enc_four: 569 call _aesni_encrypt4 570 movups %xmm2,(%rsi) 571 movups %xmm3,16(%rsi) 572 movups %xmm4,32(%rsi) 573 movups %xmm5,48(%rsi) 574 jmp .Lecb_ret 575.align 16 576.Lecb_enc_five: 577 xorps %xmm7,%xmm7 578 call _aesni_encrypt6 579 movups %xmm2,(%rsi) 580 movups %xmm3,16(%rsi) 581 movups %xmm4,32(%rsi) 582 movups %xmm5,48(%rsi) 583 movups %xmm6,64(%rsi) 584 jmp .Lecb_ret 585.align 16 586.Lecb_enc_six: 587 call _aesni_encrypt6 588 movups %xmm2,(%rsi) 589 movups %xmm3,16(%rsi) 590 movups %xmm4,32(%rsi) 591 movups %xmm5,48(%rsi) 592 movups %xmm6,64(%rsi) 593 movups %xmm7,80(%rsi) 594 jmp .Lecb_ret 595 596.align 16 597.Lecb_decrypt: 598 cmpq $128,%rdx 599 jb .Lecb_dec_tail 600 601 movdqu (%rdi),%xmm2 602 movdqu 16(%rdi),%xmm3 603 movdqu 32(%rdi),%xmm4 604 movdqu 48(%rdi),%xmm5 605 movdqu 64(%rdi),%xmm6 606 movdqu 80(%rdi),%xmm7 607 movdqu 96(%rdi),%xmm8 608 movdqu 112(%rdi),%xmm9 609 leaq 128(%rdi),%rdi 610 subq $128,%rdx 611 jmp .Lecb_dec_loop8_enter 612.align 16 613.Lecb_dec_loop8: 614 movups %xmm2,(%rsi) 615 movq %r11,%rcx 616 movdqu (%rdi),%xmm2 617 movl %r10d,%eax 618 movups %xmm3,16(%rsi) 619 movdqu 16(%rdi),%xmm3 620 movups %xmm4,32(%rsi) 621 movdqu 32(%rdi),%xmm4 622 movups %xmm5,48(%rsi) 623 movdqu 48(%rdi),%xmm5 624 movups %xmm6,64(%rsi) 625 movdqu 64(%rdi),%xmm6 626 movups %xmm7,80(%rsi) 627 movdqu 80(%rdi),%xmm7 628 movups %xmm8,96(%rsi) 629 movdqu 96(%rdi),%xmm8 630 movups %xmm9,112(%rsi) 631 leaq 128(%rsi),%rsi 632 movdqu 112(%rdi),%xmm9 633 leaq 128(%rdi),%rdi 634.Lecb_dec_loop8_enter: 635 636 call _aesni_decrypt8 637 638 movups (%r11),%xmm0 639 subq $128,%rdx 640 jnc .Lecb_dec_loop8 641 642 movups %xmm2,(%rsi) 643 movq %r11,%rcx 644 movups %xmm3,16(%rsi) 645 movl %r10d,%eax 646 movups %xmm4,32(%rsi) 647 movups %xmm5,48(%rsi) 648 movups %xmm6,64(%rsi) 649 movups %xmm7,80(%rsi) 650 movups %xmm8,96(%rsi) 651 movups %xmm9,112(%rsi) 652 leaq 128(%rsi),%rsi 653 addq $128,%rdx 654 jz .Lecb_ret 655 656.Lecb_dec_tail: 657 movups (%rdi),%xmm2 658 cmpq $32,%rdx 659 jb .Lecb_dec_one 660 movups 16(%rdi),%xmm3 661 je .Lecb_dec_two 662 movups 32(%rdi),%xmm4 663 cmpq $64,%rdx 664 jb .Lecb_dec_three 665 movups 48(%rdi),%xmm5 666 je .Lecb_dec_four 667 movups 64(%rdi),%xmm6 668 cmpq $96,%rdx 669 jb .Lecb_dec_five 670 movups 80(%rdi),%xmm7 671 je .Lecb_dec_six 672 movups 96(%rdi),%xmm8 673 movups (%rcx),%xmm0 674 call _aesni_decrypt8 675 movups %xmm2,(%rsi) 676 movups %xmm3,16(%rsi) 677 movups %xmm4,32(%rsi) 678 movups %xmm5,48(%rsi) 679 movups %xmm6,64(%rsi) 680 movups %xmm7,80(%rsi) 681 movups %xmm8,96(%rsi) 682 jmp .Lecb_ret 683.align 16 684.Lecb_dec_one: 685 movups (%rcx),%xmm0 686 movups 16(%rcx),%xmm1 687 leaq 32(%rcx),%rcx 688 xorps %xmm0,%xmm2 689.Loop_dec1_4: 690.byte 102,15,56,222,209 691 decl %eax 692 movups (%rcx),%xmm1 693 leaq 16(%rcx),%rcx 694 jnz .Loop_dec1_4 695.byte 102,15,56,223,209 696 movups %xmm2,(%rsi) 697 jmp .Lecb_ret 698.align 16 699.Lecb_dec_two: 700 xorps %xmm4,%xmm4 701 call _aesni_decrypt3 702 movups %xmm2,(%rsi) 703 movups %xmm3,16(%rsi) 704 jmp .Lecb_ret 705.align 16 706.Lecb_dec_three: 707 call _aesni_decrypt3 708 movups %xmm2,(%rsi) 709 movups %xmm3,16(%rsi) 710 movups %xmm4,32(%rsi) 711 jmp .Lecb_ret 712.align 16 713.Lecb_dec_four: 714 call _aesni_decrypt4 715 movups %xmm2,(%rsi) 716 movups %xmm3,16(%rsi) 717 movups %xmm4,32(%rsi) 718 movups %xmm5,48(%rsi) 719 jmp .Lecb_ret 720.align 16 721.Lecb_dec_five: 722 xorps %xmm7,%xmm7 723 call _aesni_decrypt6 724 movups %xmm2,(%rsi) 725 movups %xmm3,16(%rsi) 726 movups %xmm4,32(%rsi) 727 movups %xmm5,48(%rsi) 728 movups %xmm6,64(%rsi) 729 jmp .Lecb_ret 730.align 16 731.Lecb_dec_six: 732 call _aesni_decrypt6 733 movups %xmm2,(%rsi) 734 movups %xmm3,16(%rsi) 735 movups %xmm4,32(%rsi) 736 movups %xmm5,48(%rsi) 737 movups %xmm6,64(%rsi) 738 movups %xmm7,80(%rsi) 739 740.Lecb_ret: 741 .byte 0xf3,0xc3 742.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 743.globl aesni_ccm64_encrypt_blocks 744.type aesni_ccm64_encrypt_blocks,@function 745.align 16 746aesni_ccm64_encrypt_blocks: 747 movl 240(%rcx),%eax 748 movdqu (%r8),%xmm9 749 movdqa .Lincrement64(%rip),%xmm6 750 movdqa .Lbswap_mask(%rip),%xmm7 751 752 shrl $1,%eax 753 leaq 0(%rcx),%r11 754 movdqu (%r9),%xmm3 755 movdqa %xmm9,%xmm2 756 movl %eax,%r10d 757.byte 102,68,15,56,0,207 758 jmp .Lccm64_enc_outer 759.align 16 760.Lccm64_enc_outer: 761 movups (%r11),%xmm0 762 movl %r10d,%eax 763 movups (%rdi),%xmm8 764 765 xorps %xmm0,%xmm2 766 movups 16(%r11),%xmm1 767 xorps %xmm8,%xmm0 768 leaq 32(%r11),%rcx 769 xorps %xmm0,%xmm3 770 movups (%rcx),%xmm0 771 772.Lccm64_enc2_loop: 773.byte 102,15,56,220,209 774 decl %eax 775.byte 102,15,56,220,217 776 movups 16(%rcx),%xmm1 777.byte 102,15,56,220,208 778 leaq 32(%rcx),%rcx 779.byte 102,15,56,220,216 780 movups 0(%rcx),%xmm0 781 jnz .Lccm64_enc2_loop 782.byte 102,15,56,220,209 783.byte 102,15,56,220,217 784 paddq %xmm6,%xmm9 785.byte 102,15,56,221,208 786.byte 102,15,56,221,216 787 788 decq %rdx 789 leaq 16(%rdi),%rdi 790 xorps %xmm2,%xmm8 791 movdqa %xmm9,%xmm2 792 movups %xmm8,(%rsi) 793 leaq 16(%rsi),%rsi 794.byte 102,15,56,0,215 795 jnz .Lccm64_enc_outer 796 797 movups %xmm3,(%r9) 798 .byte 0xf3,0xc3 799.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 800.globl aesni_ccm64_decrypt_blocks 801.type aesni_ccm64_decrypt_blocks,@function 802.align 16 803aesni_ccm64_decrypt_blocks: 804 movl 240(%rcx),%eax 805 movups (%r8),%xmm9 806 movdqu (%r9),%xmm3 807 movdqa .Lincrement64(%rip),%xmm6 808 movdqa .Lbswap_mask(%rip),%xmm7 809 810 movaps %xmm9,%xmm2 811 movl %eax,%r10d 812 movq %rcx,%r11 813.byte 102,68,15,56,0,207 814 movups (%rcx),%xmm0 815 movups 16(%rcx),%xmm1 816 leaq 32(%rcx),%rcx 817 xorps %xmm0,%xmm2 818.Loop_enc1_5: 819.byte 102,15,56,220,209 820 decl %eax 821 movups (%rcx),%xmm1 822 leaq 16(%rcx),%rcx 823 jnz .Loop_enc1_5 824.byte 102,15,56,221,209 825 movups (%rdi),%xmm8 826 paddq %xmm6,%xmm9 827 leaq 16(%rdi),%rdi 828 jmp .Lccm64_dec_outer 829.align 16 830.Lccm64_dec_outer: 831 xorps %xmm2,%xmm8 832 movdqa %xmm9,%xmm2 833 movl %r10d,%eax 834 movups %xmm8,(%rsi) 835 leaq 16(%rsi),%rsi 836.byte 102,15,56,0,215 837 838 subq $1,%rdx 839 jz .Lccm64_dec_break 840 841 movups (%r11),%xmm0 842 shrl $1,%eax 843 movups 16(%r11),%xmm1 844 xorps %xmm0,%xmm8 845 leaq 32(%r11),%rcx 846 xorps %xmm0,%xmm2 847 xorps %xmm8,%xmm3 848 movups (%rcx),%xmm0 849 850.Lccm64_dec2_loop: 851.byte 102,15,56,220,209 852 decl %eax 853.byte 102,15,56,220,217 854 movups 16(%rcx),%xmm1 855.byte 102,15,56,220,208 856 leaq 32(%rcx),%rcx 857.byte 102,15,56,220,216 858 movups 0(%rcx),%xmm0 859 jnz .Lccm64_dec2_loop 860 movups (%rdi),%xmm8 861 paddq %xmm6,%xmm9 862.byte 102,15,56,220,209 863.byte 102,15,56,220,217 864 leaq 16(%rdi),%rdi 865.byte 102,15,56,221,208 866.byte 102,15,56,221,216 867 jmp .Lccm64_dec_outer 868 869.align 16 870.Lccm64_dec_break: 871 872 movups (%r11),%xmm0 873 movups 16(%r11),%xmm1 874 xorps %xmm0,%xmm8 875 leaq 32(%r11),%r11 876 xorps %xmm8,%xmm3 877.Loop_enc1_6: 878.byte 102,15,56,220,217 879 decl %eax 880 movups (%r11),%xmm1 881 leaq 16(%r11),%r11 882 jnz .Loop_enc1_6 883.byte 102,15,56,221,217 884 movups %xmm3,(%r9) 885 .byte 0xf3,0xc3 886.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 887.globl aesni_ctr32_encrypt_blocks 888.type aesni_ctr32_encrypt_blocks,@function 889.align 16 890aesni_ctr32_encrypt_blocks: 891 cmpq $1,%rdx 892 je .Lctr32_one_shortcut 893 894 movdqu (%r8),%xmm14 895 movdqa .Lbswap_mask(%rip),%xmm15 896 xorl %eax,%eax 897.byte 102,69,15,58,22,242,3 898.byte 102,68,15,58,34,240,3 899 900 movl 240(%rcx),%eax 901 bswapl %r10d 902 pxor %xmm12,%xmm12 903 pxor %xmm13,%xmm13 904.byte 102,69,15,58,34,226,0 905 leaq 3(%r10),%r11 906.byte 102,69,15,58,34,235,0 907 incl %r10d 908.byte 102,69,15,58,34,226,1 909 incq %r11 910.byte 102,69,15,58,34,235,1 911 incl %r10d 912.byte 102,69,15,58,34,226,2 913 incq %r11 914.byte 102,69,15,58,34,235,2 915 movdqa %xmm12,-40(%rsp) 916.byte 102,69,15,56,0,231 917 movdqa %xmm13,-24(%rsp) 918.byte 102,69,15,56,0,239 919 920 pshufd $192,%xmm12,%xmm2 921 pshufd $128,%xmm12,%xmm3 922 pshufd $64,%xmm12,%xmm4 923 cmpq $6,%rdx 924 jb .Lctr32_tail 925 shrl $1,%eax 926 movq %rcx,%r11 927 movl %eax,%r10d 928 subq $6,%rdx 929 jmp .Lctr32_loop6 930 931.align 16 932.Lctr32_loop6: 933 pshufd $192,%xmm13,%xmm5 934 por %xmm14,%xmm2 935 movups (%r11),%xmm0 936 pshufd $128,%xmm13,%xmm6 937 por %xmm14,%xmm3 938 movups 16(%r11),%xmm1 939 pshufd $64,%xmm13,%xmm7 940 por %xmm14,%xmm4 941 por %xmm14,%xmm5 942 xorps %xmm0,%xmm2 943 por %xmm14,%xmm6 944 por %xmm14,%xmm7 945 946 947 948 949 pxor %xmm0,%xmm3 950.byte 102,15,56,220,209 951 leaq 32(%r11),%rcx 952 pxor %xmm0,%xmm4 953.byte 102,15,56,220,217 954 movdqa .Lincrement32(%rip),%xmm13 955 pxor %xmm0,%xmm5 956.byte 102,15,56,220,225 957 movdqa -40(%rsp),%xmm12 958 pxor %xmm0,%xmm6 959.byte 102,15,56,220,233 960 pxor %xmm0,%xmm7 961 movups (%rcx),%xmm0 962 decl %eax 963.byte 102,15,56,220,241 964.byte 102,15,56,220,249 965 jmp .Lctr32_enc_loop6_enter 966.align 16 967.Lctr32_enc_loop6: 968.byte 102,15,56,220,209 969.byte 102,15,56,220,217 970 decl %eax 971.byte 102,15,56,220,225 972.byte 102,15,56,220,233 973.byte 102,15,56,220,241 974.byte 102,15,56,220,249 975.Lctr32_enc_loop6_enter: 976 movups 16(%rcx),%xmm1 977.byte 102,15,56,220,208 978.byte 102,15,56,220,216 979 leaq 32(%rcx),%rcx 980.byte 102,15,56,220,224 981.byte 102,15,56,220,232 982.byte 102,15,56,220,240 983.byte 102,15,56,220,248 984 movups (%rcx),%xmm0 985 jnz .Lctr32_enc_loop6 986 987.byte 102,15,56,220,209 988 paddd %xmm13,%xmm12 989.byte 102,15,56,220,217 990 paddd -24(%rsp),%xmm13 991.byte 102,15,56,220,225 992 movdqa %xmm12,-40(%rsp) 993.byte 102,15,56,220,233 994 movdqa %xmm13,-24(%rsp) 995.byte 102,15,56,220,241 996.byte 102,69,15,56,0,231 997.byte 102,15,56,220,249 998.byte 102,69,15,56,0,239 999 1000.byte 102,15,56,221,208 1001 movups (%rdi),%xmm8 1002.byte 102,15,56,221,216 1003 movups 16(%rdi),%xmm9 1004.byte 102,15,56,221,224 1005 movups 32(%rdi),%xmm10 1006.byte 102,15,56,221,232 1007 movups 48(%rdi),%xmm11 1008.byte 102,15,56,221,240 1009 movups 64(%rdi),%xmm1 1010.byte 102,15,56,221,248 1011 movups 80(%rdi),%xmm0 1012 leaq 96(%rdi),%rdi 1013 1014 xorps %xmm2,%xmm8 1015 pshufd $192,%xmm12,%xmm2 1016 xorps %xmm3,%xmm9 1017 pshufd $128,%xmm12,%xmm3 1018 movups %xmm8,(%rsi) 1019 xorps %xmm4,%xmm10 1020 pshufd $64,%xmm12,%xmm4 1021 movups %xmm9,16(%rsi) 1022 xorps %xmm5,%xmm11 1023 movups %xmm10,32(%rsi) 1024 xorps %xmm6,%xmm1 1025 movups %xmm11,48(%rsi) 1026 xorps %xmm7,%xmm0 1027 movups %xmm1,64(%rsi) 1028 movups %xmm0,80(%rsi) 1029 leaq 96(%rsi),%rsi 1030 movl %r10d,%eax 1031 subq $6,%rdx 1032 jnc .Lctr32_loop6 1033 1034 addq $6,%rdx 1035 jz .Lctr32_done 1036 movq %r11,%rcx 1037 leal 1(%rax,%rax,1),%eax 1038 1039.Lctr32_tail: 1040 por %xmm14,%xmm2 1041 movups (%rdi),%xmm8 1042 cmpq $2,%rdx 1043 jb .Lctr32_one 1044 1045 por %xmm14,%xmm3 1046 movups 16(%rdi),%xmm9 1047 je .Lctr32_two 1048 1049 pshufd $192,%xmm13,%xmm5 1050 por %xmm14,%xmm4 1051 movups 32(%rdi),%xmm10 1052 cmpq $4,%rdx 1053 jb .Lctr32_three 1054 1055 pshufd $128,%xmm13,%xmm6 1056 por %xmm14,%xmm5 1057 movups 48(%rdi),%xmm11 1058 je .Lctr32_four 1059 1060 por %xmm14,%xmm6 1061 xorps %xmm7,%xmm7 1062 1063 call _aesni_encrypt6 1064 1065 movups 64(%rdi),%xmm1 1066 xorps %xmm2,%xmm8 1067 xorps %xmm3,%xmm9 1068 movups %xmm8,(%rsi) 1069 xorps %xmm4,%xmm10 1070 movups %xmm9,16(%rsi) 1071 xorps %xmm5,%xmm11 1072 movups %xmm10,32(%rsi) 1073 xorps %xmm6,%xmm1 1074 movups %xmm11,48(%rsi) 1075 movups %xmm1,64(%rsi) 1076 jmp .Lctr32_done 1077 1078.align 16 1079.Lctr32_one_shortcut: 1080 movups (%r8),%xmm2 1081 movups (%rdi),%xmm8 1082 movl 240(%rcx),%eax 1083.Lctr32_one: 1084 movups (%rcx),%xmm0 1085 movups 16(%rcx),%xmm1 1086 leaq 32(%rcx),%rcx 1087 xorps %xmm0,%xmm2 1088.Loop_enc1_7: 1089.byte 102,15,56,220,209 1090 decl %eax 1091 movups (%rcx),%xmm1 1092 leaq 16(%rcx),%rcx 1093 jnz .Loop_enc1_7 1094.byte 102,15,56,221,209 1095 xorps %xmm2,%xmm8 1096 movups %xmm8,(%rsi) 1097 jmp .Lctr32_done 1098 1099.align 16 1100.Lctr32_two: 1101 xorps %xmm4,%xmm4 1102 call _aesni_encrypt3 1103 xorps %xmm2,%xmm8 1104 xorps %xmm3,%xmm9 1105 movups %xmm8,(%rsi) 1106 movups %xmm9,16(%rsi) 1107 jmp .Lctr32_done 1108 1109.align 16 1110.Lctr32_three: 1111 call _aesni_encrypt3 1112 xorps %xmm2,%xmm8 1113 xorps %xmm3,%xmm9 1114 movups %xmm8,(%rsi) 1115 xorps %xmm4,%xmm10 1116 movups %xmm9,16(%rsi) 1117 movups %xmm10,32(%rsi) 1118 jmp .Lctr32_done 1119 1120.align 16 1121.Lctr32_four: 1122 call _aesni_encrypt4 1123 xorps %xmm2,%xmm8 1124 xorps %xmm3,%xmm9 1125 movups %xmm8,(%rsi) 1126 xorps %xmm4,%xmm10 1127 movups %xmm9,16(%rsi) 1128 xorps %xmm5,%xmm11 1129 movups %xmm10,32(%rsi) 1130 movups %xmm11,48(%rsi) 1131 1132.Lctr32_done: 1133 .byte 0xf3,0xc3 1134.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1135.globl aesni_xts_encrypt 1136.type aesni_xts_encrypt,@function 1137.align 16 1138aesni_xts_encrypt: 1139 leaq -104(%rsp),%rsp 1140 movups (%r9),%xmm15 1141 movl 240(%r8),%eax 1142 movl 240(%rcx),%r10d 1143 movups (%r8),%xmm0 1144 movups 16(%r8),%xmm1 1145 leaq 32(%r8),%r8 1146 xorps %xmm0,%xmm15 1147.Loop_enc1_8: 1148.byte 102,68,15,56,220,249 1149 decl %eax 1150 movups (%r8),%xmm1 1151 leaq 16(%r8),%r8 1152 jnz .Loop_enc1_8 1153.byte 102,68,15,56,221,249 1154 movq %rcx,%r11 1155 movl %r10d,%eax 1156 movq %rdx,%r9 1157 andq $-16,%rdx 1158 1159 movdqa .Lxts_magic(%rip),%xmm8 1160 pxor %xmm14,%xmm14 1161 pcmpgtd %xmm15,%xmm14 1162 pshufd $19,%xmm14,%xmm9 1163 pxor %xmm14,%xmm14 1164 movdqa %xmm15,%xmm10 1165 paddq %xmm15,%xmm15 1166 pand %xmm8,%xmm9 1167 pcmpgtd %xmm15,%xmm14 1168 pxor %xmm9,%xmm15 1169 pshufd $19,%xmm14,%xmm9 1170 pxor %xmm14,%xmm14 1171 movdqa %xmm15,%xmm11 1172 paddq %xmm15,%xmm15 1173 pand %xmm8,%xmm9 1174 pcmpgtd %xmm15,%xmm14 1175 pxor %xmm9,%xmm15 1176 pshufd $19,%xmm14,%xmm9 1177 pxor %xmm14,%xmm14 1178 movdqa %xmm15,%xmm12 1179 paddq %xmm15,%xmm15 1180 pand %xmm8,%xmm9 1181 pcmpgtd %xmm15,%xmm14 1182 pxor %xmm9,%xmm15 1183 pshufd $19,%xmm14,%xmm9 1184 pxor %xmm14,%xmm14 1185 movdqa %xmm15,%xmm13 1186 paddq %xmm15,%xmm15 1187 pand %xmm8,%xmm9 1188 pcmpgtd %xmm15,%xmm14 1189 pxor %xmm9,%xmm15 1190 subq $96,%rdx 1191 jc .Lxts_enc_short 1192 1193 shrl $1,%eax 1194 subl $1,%eax 1195 movl %eax,%r10d 1196 jmp .Lxts_enc_grandloop 1197 1198.align 16 1199.Lxts_enc_grandloop: 1200 pshufd $19,%xmm14,%xmm9 1201 movdqa %xmm15,%xmm14 1202 paddq %xmm15,%xmm15 1203 movdqu 0(%rdi),%xmm2 1204 pand %xmm8,%xmm9 1205 movdqu 16(%rdi),%xmm3 1206 pxor %xmm9,%xmm15 1207 1208 movdqu 32(%rdi),%xmm4 1209 pxor %xmm10,%xmm2 1210 movdqu 48(%rdi),%xmm5 1211 pxor %xmm11,%xmm3 1212 movdqu 64(%rdi),%xmm6 1213 pxor %xmm12,%xmm4 1214 movdqu 80(%rdi),%xmm7 1215 leaq 96(%rdi),%rdi 1216 pxor %xmm13,%xmm5 1217 movups (%r11),%xmm0 1218 pxor %xmm14,%xmm6 1219 pxor %xmm15,%xmm7 1220 1221 1222 1223 movups 16(%r11),%xmm1 1224 pxor %xmm0,%xmm2 1225 pxor %xmm0,%xmm3 1226 movdqa %xmm10,0(%rsp) 1227.byte 102,15,56,220,209 1228 leaq 32(%r11),%rcx 1229 pxor %xmm0,%xmm4 1230 movdqa %xmm11,16(%rsp) 1231.byte 102,15,56,220,217 1232 pxor %xmm0,%xmm5 1233 movdqa %xmm12,32(%rsp) 1234.byte 102,15,56,220,225 1235 pxor %xmm0,%xmm6 1236 movdqa %xmm13,48(%rsp) 1237.byte 102,15,56,220,233 1238 pxor %xmm0,%xmm7 1239 movups (%rcx),%xmm0 1240 decl %eax 1241 movdqa %xmm14,64(%rsp) 1242.byte 102,15,56,220,241 1243 movdqa %xmm15,80(%rsp) 1244.byte 102,15,56,220,249 1245 pxor %xmm14,%xmm14 1246 pcmpgtd %xmm15,%xmm14 1247 jmp .Lxts_enc_loop6_enter 1248 1249.align 16 1250.Lxts_enc_loop6: 1251.byte 102,15,56,220,209 1252.byte 102,15,56,220,217 1253 decl %eax 1254.byte 102,15,56,220,225 1255.byte 102,15,56,220,233 1256.byte 102,15,56,220,241 1257.byte 102,15,56,220,249 1258.Lxts_enc_loop6_enter: 1259 movups 16(%rcx),%xmm1 1260.byte 102,15,56,220,208 1261.byte 102,15,56,220,216 1262 leaq 32(%rcx),%rcx 1263.byte 102,15,56,220,224 1264.byte 102,15,56,220,232 1265.byte 102,15,56,220,240 1266.byte 102,15,56,220,248 1267 movups (%rcx),%xmm0 1268 jnz .Lxts_enc_loop6 1269 1270 pshufd $19,%xmm14,%xmm9 1271 pxor %xmm14,%xmm14 1272 paddq %xmm15,%xmm15 1273.byte 102,15,56,220,209 1274 pand %xmm8,%xmm9 1275.byte 102,15,56,220,217 1276 pcmpgtd %xmm15,%xmm14 1277.byte 102,15,56,220,225 1278 pxor %xmm9,%xmm15 1279.byte 102,15,56,220,233 1280.byte 102,15,56,220,241 1281.byte 102,15,56,220,249 1282 movups 16(%rcx),%xmm1 1283 1284 pshufd $19,%xmm14,%xmm9 1285 pxor %xmm14,%xmm14 1286 movdqa %xmm15,%xmm10 1287 paddq %xmm15,%xmm15 1288.byte 102,15,56,220,208 1289 pand %xmm8,%xmm9 1290.byte 102,15,56,220,216 1291 pcmpgtd %xmm15,%xmm14 1292.byte 102,15,56,220,224 1293 pxor %xmm9,%xmm15 1294.byte 102,15,56,220,232 1295.byte 102,15,56,220,240 1296.byte 102,15,56,220,248 1297 movups 32(%rcx),%xmm0 1298 1299 pshufd $19,%xmm14,%xmm9 1300 pxor %xmm14,%xmm14 1301 movdqa %xmm15,%xmm11 1302 paddq %xmm15,%xmm15 1303.byte 102,15,56,220,209 1304 pand %xmm8,%xmm9 1305.byte 102,15,56,220,217 1306 pcmpgtd %xmm15,%xmm14 1307.byte 102,15,56,220,225 1308 pxor %xmm9,%xmm15 1309.byte 102,15,56,220,233 1310.byte 102,15,56,220,241 1311.byte 102,15,56,220,249 1312 1313 pshufd $19,%xmm14,%xmm9 1314 pxor %xmm14,%xmm14 1315 movdqa %xmm15,%xmm12 1316 paddq %xmm15,%xmm15 1317.byte 102,15,56,221,208 1318 pand %xmm8,%xmm9 1319.byte 102,15,56,221,216 1320 pcmpgtd %xmm15,%xmm14 1321.byte 102,15,56,221,224 1322 pxor %xmm9,%xmm15 1323.byte 102,15,56,221,232 1324.byte 102,15,56,221,240 1325.byte 102,15,56,221,248 1326 1327 pshufd $19,%xmm14,%xmm9 1328 pxor %xmm14,%xmm14 1329 movdqa %xmm15,%xmm13 1330 paddq %xmm15,%xmm15 1331 xorps 0(%rsp),%xmm2 1332 pand %xmm8,%xmm9 1333 xorps 16(%rsp),%xmm3 1334 pcmpgtd %xmm15,%xmm14 1335 pxor %xmm9,%xmm15 1336 1337 xorps 32(%rsp),%xmm4 1338 movups %xmm2,0(%rsi) 1339 xorps 48(%rsp),%xmm5 1340 movups %xmm3,16(%rsi) 1341 xorps 64(%rsp),%xmm6 1342 movups %xmm4,32(%rsi) 1343 xorps 80(%rsp),%xmm7 1344 movups %xmm5,48(%rsi) 1345 movl %r10d,%eax 1346 movups %xmm6,64(%rsi) 1347 movups %xmm7,80(%rsi) 1348 leaq 96(%rsi),%rsi 1349 subq $96,%rdx 1350 jnc .Lxts_enc_grandloop 1351 1352 leal 3(%rax,%rax,1),%eax 1353 movq %r11,%rcx 1354 movl %eax,%r10d 1355 1356.Lxts_enc_short: 1357 addq $96,%rdx 1358 jz .Lxts_enc_done 1359 1360 cmpq $32,%rdx 1361 jb .Lxts_enc_one 1362 je .Lxts_enc_two 1363 1364 cmpq $64,%rdx 1365 jb .Lxts_enc_three 1366 je .Lxts_enc_four 1367 1368 pshufd $19,%xmm14,%xmm9 1369 movdqa %xmm15,%xmm14 1370 paddq %xmm15,%xmm15 1371 movdqu (%rdi),%xmm2 1372 pand %xmm8,%xmm9 1373 movdqu 16(%rdi),%xmm3 1374 pxor %xmm9,%xmm15 1375 1376 movdqu 32(%rdi),%xmm4 1377 pxor %xmm10,%xmm2 1378 movdqu 48(%rdi),%xmm5 1379 pxor %xmm11,%xmm3 1380 movdqu 64(%rdi),%xmm6 1381 leaq 80(%rdi),%rdi 1382 pxor %xmm12,%xmm4 1383 pxor %xmm13,%xmm5 1384 pxor %xmm14,%xmm6 1385 1386 call _aesni_encrypt6 1387 1388 xorps %xmm10,%xmm2 1389 movdqa %xmm15,%xmm10 1390 xorps %xmm11,%xmm3 1391 xorps %xmm12,%xmm4 1392 movdqu %xmm2,(%rsi) 1393 xorps %xmm13,%xmm5 1394 movdqu %xmm3,16(%rsi) 1395 xorps %xmm14,%xmm6 1396 movdqu %xmm4,32(%rsi) 1397 movdqu %xmm5,48(%rsi) 1398 movdqu %xmm6,64(%rsi) 1399 leaq 80(%rsi),%rsi 1400 jmp .Lxts_enc_done 1401 1402.align 16 1403.Lxts_enc_one: 1404 movups (%rdi),%xmm2 1405 leaq 16(%rdi),%rdi 1406 xorps %xmm10,%xmm2 1407 movups (%rcx),%xmm0 1408 movups 16(%rcx),%xmm1 1409 leaq 32(%rcx),%rcx 1410 xorps %xmm0,%xmm2 1411.Loop_enc1_9: 1412.byte 102,15,56,220,209 1413 decl %eax 1414 movups (%rcx),%xmm1 1415 leaq 16(%rcx),%rcx 1416 jnz .Loop_enc1_9 1417.byte 102,15,56,221,209 1418 xorps %xmm10,%xmm2 1419 movdqa %xmm11,%xmm10 1420 movups %xmm2,(%rsi) 1421 leaq 16(%rsi),%rsi 1422 jmp .Lxts_enc_done 1423 1424.align 16 1425.Lxts_enc_two: 1426 movups (%rdi),%xmm2 1427 movups 16(%rdi),%xmm3 1428 leaq 32(%rdi),%rdi 1429 xorps %xmm10,%xmm2 1430 xorps %xmm11,%xmm3 1431 1432 call _aesni_encrypt3 1433 1434 xorps %xmm10,%xmm2 1435 movdqa %xmm12,%xmm10 1436 xorps %xmm11,%xmm3 1437 movups %xmm2,(%rsi) 1438 movups %xmm3,16(%rsi) 1439 leaq 32(%rsi),%rsi 1440 jmp .Lxts_enc_done 1441 1442.align 16 1443.Lxts_enc_three: 1444 movups (%rdi),%xmm2 1445 movups 16(%rdi),%xmm3 1446 movups 32(%rdi),%xmm4 1447 leaq 48(%rdi),%rdi 1448 xorps %xmm10,%xmm2 1449 xorps %xmm11,%xmm3 1450 xorps %xmm12,%xmm4 1451 1452 call _aesni_encrypt3 1453 1454 xorps %xmm10,%xmm2 1455 movdqa %xmm13,%xmm10 1456 xorps %xmm11,%xmm3 1457 xorps %xmm12,%xmm4 1458 movups %xmm2,(%rsi) 1459 movups %xmm3,16(%rsi) 1460 movups %xmm4,32(%rsi) 1461 leaq 48(%rsi),%rsi 1462 jmp .Lxts_enc_done 1463 1464.align 16 1465.Lxts_enc_four: 1466 movups (%rdi),%xmm2 1467 movups 16(%rdi),%xmm3 1468 movups 32(%rdi),%xmm4 1469 xorps %xmm10,%xmm2 1470 movups 48(%rdi),%xmm5 1471 leaq 64(%rdi),%rdi 1472 xorps %xmm11,%xmm3 1473 xorps %xmm12,%xmm4 1474 xorps %xmm13,%xmm5 1475 1476 call _aesni_encrypt4 1477 1478 xorps %xmm10,%xmm2 1479 movdqa %xmm15,%xmm10 1480 xorps %xmm11,%xmm3 1481 xorps %xmm12,%xmm4 1482 movups %xmm2,(%rsi) 1483 xorps %xmm13,%xmm5 1484 movups %xmm3,16(%rsi) 1485 movups %xmm4,32(%rsi) 1486 movups %xmm5,48(%rsi) 1487 leaq 64(%rsi),%rsi 1488 jmp .Lxts_enc_done 1489 1490.align 16 1491.Lxts_enc_done: 1492 andq $15,%r9 1493 jz .Lxts_enc_ret 1494 movq %r9,%rdx 1495 1496.Lxts_enc_steal: 1497 movzbl (%rdi),%eax 1498 movzbl -16(%rsi),%ecx 1499 leaq 1(%rdi),%rdi 1500 movb %al,-16(%rsi) 1501 movb %cl,0(%rsi) 1502 leaq 1(%rsi),%rsi 1503 subq $1,%rdx 1504 jnz .Lxts_enc_steal 1505 1506 subq %r9,%rsi 1507 movq %r11,%rcx 1508 movl %r10d,%eax 1509 1510 movups -16(%rsi),%xmm2 1511 xorps %xmm10,%xmm2 1512 movups (%rcx),%xmm0 1513 movups 16(%rcx),%xmm1 1514 leaq 32(%rcx),%rcx 1515 xorps %xmm0,%xmm2 1516.Loop_enc1_10: 1517.byte 102,15,56,220,209 1518 decl %eax 1519 movups (%rcx),%xmm1 1520 leaq 16(%rcx),%rcx 1521 jnz .Loop_enc1_10 1522.byte 102,15,56,221,209 1523 xorps %xmm10,%xmm2 1524 movups %xmm2,-16(%rsi) 1525 1526.Lxts_enc_ret: 1527 leaq 104(%rsp),%rsp 1528.Lxts_enc_epilogue: 1529 .byte 0xf3,0xc3 1530.size aesni_xts_encrypt,.-aesni_xts_encrypt 1531.globl aesni_xts_decrypt 1532.type aesni_xts_decrypt,@function 1533.align 16 1534aesni_xts_decrypt: 1535 leaq -104(%rsp),%rsp 1536 movups (%r9),%xmm15 1537 movl 240(%r8),%eax 1538 movl 240(%rcx),%r10d 1539 movups (%r8),%xmm0 1540 movups 16(%r8),%xmm1 1541 leaq 32(%r8),%r8 1542 xorps %xmm0,%xmm15 1543.Loop_enc1_11: 1544.byte 102,68,15,56,220,249 1545 decl %eax 1546 movups (%r8),%xmm1 1547 leaq 16(%r8),%r8 1548 jnz .Loop_enc1_11 1549.byte 102,68,15,56,221,249 1550 xorl %eax,%eax 1551 testq $15,%rdx 1552 setnz %al 1553 shlq $4,%rax 1554 subq %rax,%rdx 1555 1556 movq %rcx,%r11 1557 movl %r10d,%eax 1558 movq %rdx,%r9 1559 andq $-16,%rdx 1560 1561 movdqa .Lxts_magic(%rip),%xmm8 1562 pxor %xmm14,%xmm14 1563 pcmpgtd %xmm15,%xmm14 1564 pshufd $19,%xmm14,%xmm9 1565 pxor %xmm14,%xmm14 1566 movdqa %xmm15,%xmm10 1567 paddq %xmm15,%xmm15 1568 pand %xmm8,%xmm9 1569 pcmpgtd %xmm15,%xmm14 1570 pxor %xmm9,%xmm15 1571 pshufd $19,%xmm14,%xmm9 1572 pxor %xmm14,%xmm14 1573 movdqa %xmm15,%xmm11 1574 paddq %xmm15,%xmm15 1575 pand %xmm8,%xmm9 1576 pcmpgtd %xmm15,%xmm14 1577 pxor %xmm9,%xmm15 1578 pshufd $19,%xmm14,%xmm9 1579 pxor %xmm14,%xmm14 1580 movdqa %xmm15,%xmm12 1581 paddq %xmm15,%xmm15 1582 pand %xmm8,%xmm9 1583 pcmpgtd %xmm15,%xmm14 1584 pxor %xmm9,%xmm15 1585 pshufd $19,%xmm14,%xmm9 1586 pxor %xmm14,%xmm14 1587 movdqa %xmm15,%xmm13 1588 paddq %xmm15,%xmm15 1589 pand %xmm8,%xmm9 1590 pcmpgtd %xmm15,%xmm14 1591 pxor %xmm9,%xmm15 1592 subq $96,%rdx 1593 jc .Lxts_dec_short 1594 1595 shrl $1,%eax 1596 subl $1,%eax 1597 movl %eax,%r10d 1598 jmp .Lxts_dec_grandloop 1599 1600.align 16 1601.Lxts_dec_grandloop: 1602 pshufd $19,%xmm14,%xmm9 1603 movdqa %xmm15,%xmm14 1604 paddq %xmm15,%xmm15 1605 movdqu 0(%rdi),%xmm2 1606 pand %xmm8,%xmm9 1607 movdqu 16(%rdi),%xmm3 1608 pxor %xmm9,%xmm15 1609 1610 movdqu 32(%rdi),%xmm4 1611 pxor %xmm10,%xmm2 1612 movdqu 48(%rdi),%xmm5 1613 pxor %xmm11,%xmm3 1614 movdqu 64(%rdi),%xmm6 1615 pxor %xmm12,%xmm4 1616 movdqu 80(%rdi),%xmm7 1617 leaq 96(%rdi),%rdi 1618 pxor %xmm13,%xmm5 1619 movups (%r11),%xmm0 1620 pxor %xmm14,%xmm6 1621 pxor %xmm15,%xmm7 1622 1623 1624 1625 movups 16(%r11),%xmm1 1626 pxor %xmm0,%xmm2 1627 pxor %xmm0,%xmm3 1628 movdqa %xmm10,0(%rsp) 1629.byte 102,15,56,222,209 1630 leaq 32(%r11),%rcx 1631 pxor %xmm0,%xmm4 1632 movdqa %xmm11,16(%rsp) 1633.byte 102,15,56,222,217 1634 pxor %xmm0,%xmm5 1635 movdqa %xmm12,32(%rsp) 1636.byte 102,15,56,222,225 1637 pxor %xmm0,%xmm6 1638 movdqa %xmm13,48(%rsp) 1639.byte 102,15,56,222,233 1640 pxor %xmm0,%xmm7 1641 movups (%rcx),%xmm0 1642 decl %eax 1643 movdqa %xmm14,64(%rsp) 1644.byte 102,15,56,222,241 1645 movdqa %xmm15,80(%rsp) 1646.byte 102,15,56,222,249 1647 pxor %xmm14,%xmm14 1648 pcmpgtd %xmm15,%xmm14 1649 jmp .Lxts_dec_loop6_enter 1650 1651.align 16 1652.Lxts_dec_loop6: 1653.byte 102,15,56,222,209 1654.byte 102,15,56,222,217 1655 decl %eax 1656.byte 102,15,56,222,225 1657.byte 102,15,56,222,233 1658.byte 102,15,56,222,241 1659.byte 102,15,56,222,249 1660.Lxts_dec_loop6_enter: 1661 movups 16(%rcx),%xmm1 1662.byte 102,15,56,222,208 1663.byte 102,15,56,222,216 1664 leaq 32(%rcx),%rcx 1665.byte 102,15,56,222,224 1666.byte 102,15,56,222,232 1667.byte 102,15,56,222,240 1668.byte 102,15,56,222,248 1669 movups (%rcx),%xmm0 1670 jnz .Lxts_dec_loop6 1671 1672 pshufd $19,%xmm14,%xmm9 1673 pxor %xmm14,%xmm14 1674 paddq %xmm15,%xmm15 1675.byte 102,15,56,222,209 1676 pand %xmm8,%xmm9 1677.byte 102,15,56,222,217 1678 pcmpgtd %xmm15,%xmm14 1679.byte 102,15,56,222,225 1680 pxor %xmm9,%xmm15 1681.byte 102,15,56,222,233 1682.byte 102,15,56,222,241 1683.byte 102,15,56,222,249 1684 movups 16(%rcx),%xmm1 1685 1686 pshufd $19,%xmm14,%xmm9 1687 pxor %xmm14,%xmm14 1688 movdqa %xmm15,%xmm10 1689 paddq %xmm15,%xmm15 1690.byte 102,15,56,222,208 1691 pand %xmm8,%xmm9 1692.byte 102,15,56,222,216 1693 pcmpgtd %xmm15,%xmm14 1694.byte 102,15,56,222,224 1695 pxor %xmm9,%xmm15 1696.byte 102,15,56,222,232 1697.byte 102,15,56,222,240 1698.byte 102,15,56,222,248 1699 movups 32(%rcx),%xmm0 1700 1701 pshufd $19,%xmm14,%xmm9 1702 pxor %xmm14,%xmm14 1703 movdqa %xmm15,%xmm11 1704 paddq %xmm15,%xmm15 1705.byte 102,15,56,222,209 1706 pand %xmm8,%xmm9 1707.byte 102,15,56,222,217 1708 pcmpgtd %xmm15,%xmm14 1709.byte 102,15,56,222,225 1710 pxor %xmm9,%xmm15 1711.byte 102,15,56,222,233 1712.byte 102,15,56,222,241 1713.byte 102,15,56,222,249 1714 1715 pshufd $19,%xmm14,%xmm9 1716 pxor %xmm14,%xmm14 1717 movdqa %xmm15,%xmm12 1718 paddq %xmm15,%xmm15 1719.byte 102,15,56,223,208 1720 pand %xmm8,%xmm9 1721.byte 102,15,56,223,216 1722 pcmpgtd %xmm15,%xmm14 1723.byte 102,15,56,223,224 1724 pxor %xmm9,%xmm15 1725.byte 102,15,56,223,232 1726.byte 102,15,56,223,240 1727.byte 102,15,56,223,248 1728 1729 pshufd $19,%xmm14,%xmm9 1730 pxor %xmm14,%xmm14 1731 movdqa %xmm15,%xmm13 1732 paddq %xmm15,%xmm15 1733 xorps 0(%rsp),%xmm2 1734 pand %xmm8,%xmm9 1735 xorps 16(%rsp),%xmm3 1736 pcmpgtd %xmm15,%xmm14 1737 pxor %xmm9,%xmm15 1738 1739 xorps 32(%rsp),%xmm4 1740 movups %xmm2,0(%rsi) 1741 xorps 48(%rsp),%xmm5 1742 movups %xmm3,16(%rsi) 1743 xorps 64(%rsp),%xmm6 1744 movups %xmm4,32(%rsi) 1745 xorps 80(%rsp),%xmm7 1746 movups %xmm5,48(%rsi) 1747 movl %r10d,%eax 1748 movups %xmm6,64(%rsi) 1749 movups %xmm7,80(%rsi) 1750 leaq 96(%rsi),%rsi 1751 subq $96,%rdx 1752 jnc .Lxts_dec_grandloop 1753 1754 leal 3(%rax,%rax,1),%eax 1755 movq %r11,%rcx 1756 movl %eax,%r10d 1757 1758.Lxts_dec_short: 1759 addq $96,%rdx 1760 jz .Lxts_dec_done 1761 1762 cmpq $32,%rdx 1763 jb .Lxts_dec_one 1764 je .Lxts_dec_two 1765 1766 cmpq $64,%rdx 1767 jb .Lxts_dec_three 1768 je .Lxts_dec_four 1769 1770 pshufd $19,%xmm14,%xmm9 1771 movdqa %xmm15,%xmm14 1772 paddq %xmm15,%xmm15 1773 movdqu (%rdi),%xmm2 1774 pand %xmm8,%xmm9 1775 movdqu 16(%rdi),%xmm3 1776 pxor %xmm9,%xmm15 1777 1778 movdqu 32(%rdi),%xmm4 1779 pxor %xmm10,%xmm2 1780 movdqu 48(%rdi),%xmm5 1781 pxor %xmm11,%xmm3 1782 movdqu 64(%rdi),%xmm6 1783 leaq 80(%rdi),%rdi 1784 pxor %xmm12,%xmm4 1785 pxor %xmm13,%xmm5 1786 pxor %xmm14,%xmm6 1787 1788 call _aesni_decrypt6 1789 1790 xorps %xmm10,%xmm2 1791 xorps %xmm11,%xmm3 1792 xorps %xmm12,%xmm4 1793 movdqu %xmm2,(%rsi) 1794 xorps %xmm13,%xmm5 1795 movdqu %xmm3,16(%rsi) 1796 xorps %xmm14,%xmm6 1797 movdqu %xmm4,32(%rsi) 1798 pxor %xmm14,%xmm14 1799 movdqu %xmm5,48(%rsi) 1800 pcmpgtd %xmm15,%xmm14 1801 movdqu %xmm6,64(%rsi) 1802 leaq 80(%rsi),%rsi 1803 pshufd $19,%xmm14,%xmm11 1804 andq $15,%r9 1805 jz .Lxts_dec_ret 1806 1807 movdqa %xmm15,%xmm10 1808 paddq %xmm15,%xmm15 1809 pand %xmm8,%xmm11 1810 pxor %xmm15,%xmm11 1811 jmp .Lxts_dec_done2 1812 1813.align 16 1814.Lxts_dec_one: 1815 movups (%rdi),%xmm2 1816 leaq 16(%rdi),%rdi 1817 xorps %xmm10,%xmm2 1818 movups (%rcx),%xmm0 1819 movups 16(%rcx),%xmm1 1820 leaq 32(%rcx),%rcx 1821 xorps %xmm0,%xmm2 1822.Loop_dec1_12: 1823.byte 102,15,56,222,209 1824 decl %eax 1825 movups (%rcx),%xmm1 1826 leaq 16(%rcx),%rcx 1827 jnz .Loop_dec1_12 1828.byte 102,15,56,223,209 1829 xorps %xmm10,%xmm2 1830 movdqa %xmm11,%xmm10 1831 movups %xmm2,(%rsi) 1832 movdqa %xmm12,%xmm11 1833 leaq 16(%rsi),%rsi 1834 jmp .Lxts_dec_done 1835 1836.align 16 1837.Lxts_dec_two: 1838 movups (%rdi),%xmm2 1839 movups 16(%rdi),%xmm3 1840 leaq 32(%rdi),%rdi 1841 xorps %xmm10,%xmm2 1842 xorps %xmm11,%xmm3 1843 1844 call _aesni_decrypt3 1845 1846 xorps %xmm10,%xmm2 1847 movdqa %xmm12,%xmm10 1848 xorps %xmm11,%xmm3 1849 movdqa %xmm13,%xmm11 1850 movups %xmm2,(%rsi) 1851 movups %xmm3,16(%rsi) 1852 leaq 32(%rsi),%rsi 1853 jmp .Lxts_dec_done 1854 1855.align 16 1856.Lxts_dec_three: 1857 movups (%rdi),%xmm2 1858 movups 16(%rdi),%xmm3 1859 movups 32(%rdi),%xmm4 1860 leaq 48(%rdi),%rdi 1861 xorps %xmm10,%xmm2 1862 xorps %xmm11,%xmm3 1863 xorps %xmm12,%xmm4 1864 1865 call _aesni_decrypt3 1866 1867 xorps %xmm10,%xmm2 1868 movdqa %xmm13,%xmm10 1869 xorps %xmm11,%xmm3 1870 movdqa %xmm15,%xmm11 1871 xorps %xmm12,%xmm4 1872 movups %xmm2,(%rsi) 1873 movups %xmm3,16(%rsi) 1874 movups %xmm4,32(%rsi) 1875 leaq 48(%rsi),%rsi 1876 jmp .Lxts_dec_done 1877 1878.align 16 1879.Lxts_dec_four: 1880 pshufd $19,%xmm14,%xmm9 1881 movdqa %xmm15,%xmm14 1882 paddq %xmm15,%xmm15 1883 movups (%rdi),%xmm2 1884 pand %xmm8,%xmm9 1885 movups 16(%rdi),%xmm3 1886 pxor %xmm9,%xmm15 1887 1888 movups 32(%rdi),%xmm4 1889 xorps %xmm10,%xmm2 1890 movups 48(%rdi),%xmm5 1891 leaq 64(%rdi),%rdi 1892 xorps %xmm11,%xmm3 1893 xorps %xmm12,%xmm4 1894 xorps %xmm13,%xmm5 1895 1896 call _aesni_decrypt4 1897 1898 xorps %xmm10,%xmm2 1899 movdqa %xmm14,%xmm10 1900 xorps %xmm11,%xmm3 1901 movdqa %xmm15,%xmm11 1902 xorps %xmm12,%xmm4 1903 movups %xmm2,(%rsi) 1904 xorps %xmm13,%xmm5 1905 movups %xmm3,16(%rsi) 1906 movups %xmm4,32(%rsi) 1907 movups %xmm5,48(%rsi) 1908 leaq 64(%rsi),%rsi 1909 jmp .Lxts_dec_done 1910 1911.align 16 1912.Lxts_dec_done: 1913 andq $15,%r9 1914 jz .Lxts_dec_ret 1915.Lxts_dec_done2: 1916 movq %r9,%rdx 1917 movq %r11,%rcx 1918 movl %r10d,%eax 1919 1920 movups (%rdi),%xmm2 1921 xorps %xmm11,%xmm2 1922 movups (%rcx),%xmm0 1923 movups 16(%rcx),%xmm1 1924 leaq 32(%rcx),%rcx 1925 xorps %xmm0,%xmm2 1926.Loop_dec1_13: 1927.byte 102,15,56,222,209 1928 decl %eax 1929 movups (%rcx),%xmm1 1930 leaq 16(%rcx),%rcx 1931 jnz .Loop_dec1_13 1932.byte 102,15,56,223,209 1933 xorps %xmm11,%xmm2 1934 movups %xmm2,(%rsi) 1935 1936.Lxts_dec_steal: 1937 movzbl 16(%rdi),%eax 1938 movzbl (%rsi),%ecx 1939 leaq 1(%rdi),%rdi 1940 movb %al,(%rsi) 1941 movb %cl,16(%rsi) 1942 leaq 1(%rsi),%rsi 1943 subq $1,%rdx 1944 jnz .Lxts_dec_steal 1945 1946 subq %r9,%rsi 1947 movq %r11,%rcx 1948 movl %r10d,%eax 1949 1950 movups (%rsi),%xmm2 1951 xorps %xmm10,%xmm2 1952 movups (%rcx),%xmm0 1953 movups 16(%rcx),%xmm1 1954 leaq 32(%rcx),%rcx 1955 xorps %xmm0,%xmm2 1956.Loop_dec1_14: 1957.byte 102,15,56,222,209 1958 decl %eax 1959 movups (%rcx),%xmm1 1960 leaq 16(%rcx),%rcx 1961 jnz .Loop_dec1_14 1962.byte 102,15,56,223,209 1963 xorps %xmm10,%xmm2 1964 movups %xmm2,(%rsi) 1965 1966.Lxts_dec_ret: 1967 leaq 104(%rsp),%rsp 1968.Lxts_dec_epilogue: 1969 .byte 0xf3,0xc3 1970.size aesni_xts_decrypt,.-aesni_xts_decrypt 1971.globl aesni_cbc_encrypt 1972.type aesni_cbc_encrypt,@function 1973.align 16 1974aesni_cbc_encrypt: 1975 testq %rdx,%rdx 1976 jz .Lcbc_ret 1977 1978 movl 240(%rcx),%r10d 1979 movq %rcx,%r11 1980 testl %r9d,%r9d 1981 jz .Lcbc_decrypt 1982 1983 movups (%r8),%xmm2 1984 movl %r10d,%eax 1985 cmpq $16,%rdx 1986 jb .Lcbc_enc_tail 1987 subq $16,%rdx 1988 jmp .Lcbc_enc_loop 1989.align 16 1990.Lcbc_enc_loop: 1991 movups (%rdi),%xmm3 1992 leaq 16(%rdi),%rdi 1993 1994 movups (%rcx),%xmm0 1995 movups 16(%rcx),%xmm1 1996 xorps %xmm0,%xmm3 1997 leaq 32(%rcx),%rcx 1998 xorps %xmm3,%xmm2 1999.Loop_enc1_15: 2000.byte 102,15,56,220,209 2001 decl %eax 2002 movups (%rcx),%xmm1 2003 leaq 16(%rcx),%rcx 2004 jnz .Loop_enc1_15 2005.byte 102,15,56,221,209 2006 movl %r10d,%eax 2007 movq %r11,%rcx 2008 movups %xmm2,0(%rsi) 2009 leaq 16(%rsi),%rsi 2010 subq $16,%rdx 2011 jnc .Lcbc_enc_loop 2012 addq $16,%rdx 2013 jnz .Lcbc_enc_tail 2014 movups %xmm2,(%r8) 2015 jmp .Lcbc_ret 2016 2017.Lcbc_enc_tail: 2018 movq %rdx,%rcx 2019 xchgq %rdi,%rsi 2020.long 0x9066A4F3 2021 movl $16,%ecx 2022 subq %rdx,%rcx 2023 xorl %eax,%eax 2024.long 0x9066AAF3 2025 leaq -16(%rdi),%rdi 2026 movl %r10d,%eax 2027 movq %rdi,%rsi 2028 movq %r11,%rcx 2029 xorq %rdx,%rdx 2030 jmp .Lcbc_enc_loop 2031 2032.align 16 2033.Lcbc_decrypt: 2034 movups (%r8),%xmm9 2035 movl %r10d,%eax 2036 cmpq $112,%rdx 2037 jbe .Lcbc_dec_tail 2038 shrl $1,%r10d 2039 subq $112,%rdx 2040 movl %r10d,%eax 2041 movaps %xmm9,-24(%rsp) 2042 jmp .Lcbc_dec_loop8_enter 2043.align 16 2044.Lcbc_dec_loop8: 2045 movaps %xmm0,-24(%rsp) 2046 movups %xmm9,(%rsi) 2047 leaq 16(%rsi),%rsi 2048.Lcbc_dec_loop8_enter: 2049 movups (%rcx),%xmm0 2050 movups (%rdi),%xmm2 2051 movups 16(%rdi),%xmm3 2052 movups 16(%rcx),%xmm1 2053 2054 leaq 32(%rcx),%rcx 2055 movdqu 32(%rdi),%xmm4 2056 xorps %xmm0,%xmm2 2057 movdqu 48(%rdi),%xmm5 2058 xorps %xmm0,%xmm3 2059 movdqu 64(%rdi),%xmm6 2060.byte 102,15,56,222,209 2061 pxor %xmm0,%xmm4 2062 movdqu 80(%rdi),%xmm7 2063.byte 102,15,56,222,217 2064 pxor %xmm0,%xmm5 2065 movdqu 96(%rdi),%xmm8 2066.byte 102,15,56,222,225 2067 pxor %xmm0,%xmm6 2068 movdqu 112(%rdi),%xmm9 2069.byte 102,15,56,222,233 2070 pxor %xmm0,%xmm7 2071 decl %eax 2072.byte 102,15,56,222,241 2073 pxor %xmm0,%xmm8 2074.byte 102,15,56,222,249 2075 pxor %xmm0,%xmm9 2076 movups (%rcx),%xmm0 2077.byte 102,68,15,56,222,193 2078.byte 102,68,15,56,222,201 2079 movups 16(%rcx),%xmm1 2080 2081 call .Ldec_loop8_enter 2082 2083 movups (%rdi),%xmm1 2084 movups 16(%rdi),%xmm0 2085 xorps -24(%rsp),%xmm2 2086 xorps %xmm1,%xmm3 2087 movups 32(%rdi),%xmm1 2088 xorps %xmm0,%xmm4 2089 movups 48(%rdi),%xmm0 2090 xorps %xmm1,%xmm5 2091 movups 64(%rdi),%xmm1 2092 xorps %xmm0,%xmm6 2093 movups 80(%rdi),%xmm0 2094 xorps %xmm1,%xmm7 2095 movups 96(%rdi),%xmm1 2096 xorps %xmm0,%xmm8 2097 movups 112(%rdi),%xmm0 2098 xorps %xmm1,%xmm9 2099 movups %xmm2,(%rsi) 2100 movups %xmm3,16(%rsi) 2101 movups %xmm4,32(%rsi) 2102 movups %xmm5,48(%rsi) 2103 movl %r10d,%eax 2104 movups %xmm6,64(%rsi) 2105 movq %r11,%rcx 2106 movups %xmm7,80(%rsi) 2107 leaq 128(%rdi),%rdi 2108 movups %xmm8,96(%rsi) 2109 leaq 112(%rsi),%rsi 2110 subq $128,%rdx 2111 ja .Lcbc_dec_loop8 2112 2113 movaps %xmm9,%xmm2 2114 movaps %xmm0,%xmm9 2115 addq $112,%rdx 2116 jle .Lcbc_dec_tail_collected 2117 movups %xmm2,(%rsi) 2118 leal 1(%r10,%r10,1),%eax 2119 leaq 16(%rsi),%rsi 2120.Lcbc_dec_tail: 2121 movups (%rdi),%xmm2 2122 movaps %xmm2,%xmm8 2123 cmpq $16,%rdx 2124 jbe .Lcbc_dec_one 2125 2126 movups 16(%rdi),%xmm3 2127 movaps %xmm3,%xmm7 2128 cmpq $32,%rdx 2129 jbe .Lcbc_dec_two 2130 2131 movups 32(%rdi),%xmm4 2132 movaps %xmm4,%xmm6 2133 cmpq $48,%rdx 2134 jbe .Lcbc_dec_three 2135 2136 movups 48(%rdi),%xmm5 2137 cmpq $64,%rdx 2138 jbe .Lcbc_dec_four 2139 2140 movups 64(%rdi),%xmm6 2141 cmpq $80,%rdx 2142 jbe .Lcbc_dec_five 2143 2144 movups 80(%rdi),%xmm7 2145 cmpq $96,%rdx 2146 jbe .Lcbc_dec_six 2147 2148 movups 96(%rdi),%xmm8 2149 movaps %xmm9,-24(%rsp) 2150 call _aesni_decrypt8 2151 movups (%rdi),%xmm1 2152 movups 16(%rdi),%xmm0 2153 xorps -24(%rsp),%xmm2 2154 xorps %xmm1,%xmm3 2155 movups 32(%rdi),%xmm1 2156 xorps %xmm0,%xmm4 2157 movups 48(%rdi),%xmm0 2158 xorps %xmm1,%xmm5 2159 movups 64(%rdi),%xmm1 2160 xorps %xmm0,%xmm6 2161 movups 80(%rdi),%xmm0 2162 xorps %xmm1,%xmm7 2163 movups 96(%rdi),%xmm9 2164 xorps %xmm0,%xmm8 2165 movups %xmm2,(%rsi) 2166 movups %xmm3,16(%rsi) 2167 movups %xmm4,32(%rsi) 2168 movups %xmm5,48(%rsi) 2169 movups %xmm6,64(%rsi) 2170 movups %xmm7,80(%rsi) 2171 leaq 96(%rsi),%rsi 2172 movaps %xmm8,%xmm2 2173 subq $112,%rdx 2174 jmp .Lcbc_dec_tail_collected 2175.align 16 2176.Lcbc_dec_one: 2177 movups (%rcx),%xmm0 2178 movups 16(%rcx),%xmm1 2179 leaq 32(%rcx),%rcx 2180 xorps %xmm0,%xmm2 2181.Loop_dec1_16: 2182.byte 102,15,56,222,209 2183 decl %eax 2184 movups (%rcx),%xmm1 2185 leaq 16(%rcx),%rcx 2186 jnz .Loop_dec1_16 2187.byte 102,15,56,223,209 2188 xorps %xmm9,%xmm2 2189 movaps %xmm8,%xmm9 2190 subq $16,%rdx 2191 jmp .Lcbc_dec_tail_collected 2192.align 16 2193.Lcbc_dec_two: 2194 xorps %xmm4,%xmm4 2195 call _aesni_decrypt3 2196 xorps %xmm9,%xmm2 2197 xorps %xmm8,%xmm3 2198 movups %xmm2,(%rsi) 2199 movaps %xmm7,%xmm9 2200 movaps %xmm3,%xmm2 2201 leaq 16(%rsi),%rsi 2202 subq $32,%rdx 2203 jmp .Lcbc_dec_tail_collected 2204.align 16 2205.Lcbc_dec_three: 2206 call _aesni_decrypt3 2207 xorps %xmm9,%xmm2 2208 xorps %xmm8,%xmm3 2209 movups %xmm2,(%rsi) 2210 xorps %xmm7,%xmm4 2211 movups %xmm3,16(%rsi) 2212 movaps %xmm6,%xmm9 2213 movaps %xmm4,%xmm2 2214 leaq 32(%rsi),%rsi 2215 subq $48,%rdx 2216 jmp .Lcbc_dec_tail_collected 2217.align 16 2218.Lcbc_dec_four: 2219 call _aesni_decrypt4 2220 xorps %xmm9,%xmm2 2221 movups 48(%rdi),%xmm9 2222 xorps %xmm8,%xmm3 2223 movups %xmm2,(%rsi) 2224 xorps %xmm7,%xmm4 2225 movups %xmm3,16(%rsi) 2226 xorps %xmm6,%xmm5 2227 movups %xmm4,32(%rsi) 2228 movaps %xmm5,%xmm2 2229 leaq 48(%rsi),%rsi 2230 subq $64,%rdx 2231 jmp .Lcbc_dec_tail_collected 2232.align 16 2233.Lcbc_dec_five: 2234 xorps %xmm7,%xmm7 2235 call _aesni_decrypt6 2236 movups 16(%rdi),%xmm1 2237 movups 32(%rdi),%xmm0 2238 xorps %xmm9,%xmm2 2239 xorps %xmm8,%xmm3 2240 xorps %xmm1,%xmm4 2241 movups 48(%rdi),%xmm1 2242 xorps %xmm0,%xmm5 2243 movups 64(%rdi),%xmm9 2244 xorps %xmm1,%xmm6 2245 movups %xmm2,(%rsi) 2246 movups %xmm3,16(%rsi) 2247 movups %xmm4,32(%rsi) 2248 movups %xmm5,48(%rsi) 2249 leaq 64(%rsi),%rsi 2250 movaps %xmm6,%xmm2 2251 subq $80,%rdx 2252 jmp .Lcbc_dec_tail_collected 2253.align 16 2254.Lcbc_dec_six: 2255 call _aesni_decrypt6 2256 movups 16(%rdi),%xmm1 2257 movups 32(%rdi),%xmm0 2258 xorps %xmm9,%xmm2 2259 xorps %xmm8,%xmm3 2260 xorps %xmm1,%xmm4 2261 movups 48(%rdi),%xmm1 2262 xorps %xmm0,%xmm5 2263 movups 64(%rdi),%xmm0 2264 xorps %xmm1,%xmm6 2265 movups 80(%rdi),%xmm9 2266 xorps %xmm0,%xmm7 2267 movups %xmm2,(%rsi) 2268 movups %xmm3,16(%rsi) 2269 movups %xmm4,32(%rsi) 2270 movups %xmm5,48(%rsi) 2271 movups %xmm6,64(%rsi) 2272 leaq 80(%rsi),%rsi 2273 movaps %xmm7,%xmm2 2274 subq $96,%rdx 2275 jmp .Lcbc_dec_tail_collected 2276.align 16 2277.Lcbc_dec_tail_collected: 2278 andq $15,%rdx 2279 movups %xmm9,(%r8) 2280 jnz .Lcbc_dec_tail_partial 2281 movups %xmm2,(%rsi) 2282 jmp .Lcbc_dec_ret 2283.align 16 2284.Lcbc_dec_tail_partial: 2285 movaps %xmm2,-24(%rsp) 2286 movq $16,%rcx 2287 movq %rsi,%rdi 2288 subq %rdx,%rcx 2289 leaq -24(%rsp),%rsi 2290.long 0x9066A4F3 2291 2292.Lcbc_dec_ret: 2293.Lcbc_ret: 2294 .byte 0xf3,0xc3 2295.size aesni_cbc_encrypt,.-aesni_cbc_encrypt 2296.globl aesni_set_decrypt_key 2297.type aesni_set_decrypt_key,@function 2298.align 16 2299aesni_set_decrypt_key: 2300.byte 0x48,0x83,0xEC,0x08 2301 call __aesni_set_encrypt_key 2302 shll $4,%esi 2303 testl %eax,%eax 2304 jnz .Ldec_key_ret 2305 leaq 16(%rdx,%rsi,1),%rdi 2306 2307 movups (%rdx),%xmm0 2308 movups (%rdi),%xmm1 2309 movups %xmm0,(%rdi) 2310 movups %xmm1,(%rdx) 2311 leaq 16(%rdx),%rdx 2312 leaq -16(%rdi),%rdi 2313 2314.Ldec_key_inverse: 2315 movups (%rdx),%xmm0 2316 movups (%rdi),%xmm1 2317.byte 102,15,56,219,192 2318.byte 102,15,56,219,201 2319 leaq 16(%rdx),%rdx 2320 leaq -16(%rdi),%rdi 2321 movups %xmm0,16(%rdi) 2322 movups %xmm1,-16(%rdx) 2323 cmpq %rdx,%rdi 2324 ja .Ldec_key_inverse 2325 2326 movups (%rdx),%xmm0 2327.byte 102,15,56,219,192 2328 movups %xmm0,(%rdi) 2329.Ldec_key_ret: 2330 addq $8,%rsp 2331 .byte 0xf3,0xc3 2332.LSEH_end_set_decrypt_key: 2333.size aesni_set_decrypt_key,.-aesni_set_decrypt_key 2334.globl aesni_set_encrypt_key 2335.type aesni_set_encrypt_key,@function 2336.align 16 2337aesni_set_encrypt_key: 2338__aesni_set_encrypt_key: 2339.byte 0x48,0x83,0xEC,0x08 2340 movq $-1,%rax 2341 testq %rdi,%rdi 2342 jz .Lenc_key_ret 2343 testq %rdx,%rdx 2344 jz .Lenc_key_ret 2345 2346 movups (%rdi),%xmm0 2347 xorps %xmm4,%xmm4 2348 leaq 16(%rdx),%rax 2349 cmpl $256,%esi 2350 je .L14rounds 2351 cmpl $192,%esi 2352 je .L12rounds 2353 cmpl $128,%esi 2354 jne .Lbad_keybits 2355 2356.L10rounds: 2357 movl $9,%esi 2358 movups %xmm0,(%rdx) 2359.byte 102,15,58,223,200,1 2360 call .Lkey_expansion_128_cold 2361.byte 102,15,58,223,200,2 2362 call .Lkey_expansion_128 2363.byte 102,15,58,223,200,4 2364 call .Lkey_expansion_128 2365.byte 102,15,58,223,200,8 2366 call .Lkey_expansion_128 2367.byte 102,15,58,223,200,16 2368 call .Lkey_expansion_128 2369.byte 102,15,58,223,200,32 2370 call .Lkey_expansion_128 2371.byte 102,15,58,223,200,64 2372 call .Lkey_expansion_128 2373.byte 102,15,58,223,200,128 2374 call .Lkey_expansion_128 2375.byte 102,15,58,223,200,27 2376 call .Lkey_expansion_128 2377.byte 102,15,58,223,200,54 2378 call .Lkey_expansion_128 2379 movups %xmm0,(%rax) 2380 movl %esi,80(%rax) 2381 xorl %eax,%eax 2382 jmp .Lenc_key_ret 2383 2384.align 16 2385.L12rounds: 2386 movq 16(%rdi),%xmm2 2387 movl $11,%esi 2388 movups %xmm0,(%rdx) 2389.byte 102,15,58,223,202,1 2390 call .Lkey_expansion_192a_cold 2391.byte 102,15,58,223,202,2 2392 call .Lkey_expansion_192b 2393.byte 102,15,58,223,202,4 2394 call .Lkey_expansion_192a 2395.byte 102,15,58,223,202,8 2396 call .Lkey_expansion_192b 2397.byte 102,15,58,223,202,16 2398 call .Lkey_expansion_192a 2399.byte 102,15,58,223,202,32 2400 call .Lkey_expansion_192b 2401.byte 102,15,58,223,202,64 2402 call .Lkey_expansion_192a 2403.byte 102,15,58,223,202,128 2404 call .Lkey_expansion_192b 2405 movups %xmm0,(%rax) 2406 movl %esi,48(%rax) 2407 xorq %rax,%rax 2408 jmp .Lenc_key_ret 2409 2410.align 16 2411.L14rounds: 2412 movups 16(%rdi),%xmm2 2413 movl $13,%esi 2414 leaq 16(%rax),%rax 2415 movups %xmm0,(%rdx) 2416 movups %xmm2,16(%rdx) 2417.byte 102,15,58,223,202,1 2418 call .Lkey_expansion_256a_cold 2419.byte 102,15,58,223,200,1 2420 call .Lkey_expansion_256b 2421.byte 102,15,58,223,202,2 2422 call .Lkey_expansion_256a 2423.byte 102,15,58,223,200,2 2424 call .Lkey_expansion_256b 2425.byte 102,15,58,223,202,4 2426 call .Lkey_expansion_256a 2427.byte 102,15,58,223,200,4 2428 call .Lkey_expansion_256b 2429.byte 102,15,58,223,202,8 2430 call .Lkey_expansion_256a 2431.byte 102,15,58,223,200,8 2432 call .Lkey_expansion_256b 2433.byte 102,15,58,223,202,16 2434 call .Lkey_expansion_256a 2435.byte 102,15,58,223,200,16 2436 call .Lkey_expansion_256b 2437.byte 102,15,58,223,202,32 2438 call .Lkey_expansion_256a 2439.byte 102,15,58,223,200,32 2440 call .Lkey_expansion_256b 2441.byte 102,15,58,223,202,64 2442 call .Lkey_expansion_256a 2443 movups %xmm0,(%rax) 2444 movl %esi,16(%rax) 2445 xorq %rax,%rax 2446 jmp .Lenc_key_ret 2447 2448.align 16 2449.Lbad_keybits: 2450 movq $-2,%rax 2451.Lenc_key_ret: 2452 addq $8,%rsp 2453 .byte 0xf3,0xc3 2454.LSEH_end_set_encrypt_key: 2455 2456.align 16 2457.Lkey_expansion_128: 2458 movups %xmm0,(%rax) 2459 leaq 16(%rax),%rax 2460.Lkey_expansion_128_cold: 2461 shufps $16,%xmm0,%xmm4 2462 xorps %xmm4,%xmm0 2463 shufps $140,%xmm0,%xmm4 2464 xorps %xmm4,%xmm0 2465 shufps $255,%xmm1,%xmm1 2466 xorps %xmm1,%xmm0 2467 .byte 0xf3,0xc3 2468 2469.align 16 2470.Lkey_expansion_192a: 2471 movups %xmm0,(%rax) 2472 leaq 16(%rax),%rax 2473.Lkey_expansion_192a_cold: 2474 movaps %xmm2,%xmm5 2475.Lkey_expansion_192b_warm: 2476 shufps $16,%xmm0,%xmm4 2477 movdqa %xmm2,%xmm3 2478 xorps %xmm4,%xmm0 2479 shufps $140,%xmm0,%xmm4 2480 pslldq $4,%xmm3 2481 xorps %xmm4,%xmm0 2482 pshufd $85,%xmm1,%xmm1 2483 pxor %xmm3,%xmm2 2484 pxor %xmm1,%xmm0 2485 pshufd $255,%xmm0,%xmm3 2486 pxor %xmm3,%xmm2 2487 .byte 0xf3,0xc3 2488 2489.align 16 2490.Lkey_expansion_192b: 2491 movaps %xmm0,%xmm3 2492 shufps $68,%xmm0,%xmm5 2493 movups %xmm5,(%rax) 2494 shufps $78,%xmm2,%xmm3 2495 movups %xmm3,16(%rax) 2496 leaq 32(%rax),%rax 2497 jmp .Lkey_expansion_192b_warm 2498 2499.align 16 2500.Lkey_expansion_256a: 2501 movups %xmm2,(%rax) 2502 leaq 16(%rax),%rax 2503.Lkey_expansion_256a_cold: 2504 shufps $16,%xmm0,%xmm4 2505 xorps %xmm4,%xmm0 2506 shufps $140,%xmm0,%xmm4 2507 xorps %xmm4,%xmm0 2508 shufps $255,%xmm1,%xmm1 2509 xorps %xmm1,%xmm0 2510 .byte 0xf3,0xc3 2511 2512.align 16 2513.Lkey_expansion_256b: 2514 movups %xmm0,(%rax) 2515 leaq 16(%rax),%rax 2516 2517 shufps $16,%xmm2,%xmm4 2518 xorps %xmm4,%xmm2 2519 shufps $140,%xmm2,%xmm4 2520 xorps %xmm4,%xmm2 2521 shufps $170,%xmm1,%xmm1 2522 xorps %xmm1,%xmm2 2523 .byte 0xf3,0xc3 2524.size aesni_set_encrypt_key,.-aesni_set_encrypt_key 2525.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 2526.align 64 2527.Lbswap_mask: 2528.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 2529.Lincrement32: 2530.long 6,6,6,0 2531.Lincrement64: 2532.long 1,0,0,0 2533.Lxts_magic: 2534.long 0x87,0,1,0 2535 2536.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 2537.align 64 2538