1/* 2 * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the OpenSSL license (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10#include <openssl/opensslconf.h> 11 12#include <stdio.h> 13#include <string.h> 14 15 16#include <openssl/evp.h> 17#include <openssl/objects.h> 18#include <openssl/aes.h> 19#include <openssl/sha.h> 20#include <openssl/rand.h> 21#include "modes_local.h" 22#include "internal/constant_time.h" 23#include "crypto/evp.h" 24 25typedef struct { 26 AES_KEY ks; 27 SHA256_CTX head, tail, md; 28 size_t payload_length; /* AAD length in decrypt case */ 29 union { 30 unsigned int tls_ver; 31 unsigned char tls_aad[16]; /* 13 used */ 32 } aux; 33} EVP_AES_HMAC_SHA256; 34 35# define NO_PAYLOAD_LENGTH ((size_t)-1) 36 37#if defined(AESNI_ASM) && ( \ 38 defined(__x86_64) || defined(__x86_64__) || \ 39 defined(_M_AMD64) || defined(_M_X64) ) 40 41extern unsigned int OPENSSL_ia32cap_P[]; 42# define AESNI_CAPABLE (1<<(57-32)) 43 44int aesni_set_encrypt_key(const unsigned char *userKey, int bits, 45 AES_KEY *key); 46int aesni_set_decrypt_key(const unsigned char *userKey, int bits, 47 AES_KEY *key); 48 49void aesni_cbc_encrypt(const unsigned char *in, 50 unsigned char *out, 51 size_t length, 52 const AES_KEY *key, unsigned char *ivec, int enc); 53 54int aesni_cbc_sha256_enc(const void *inp, void *out, size_t blocks, 55 const AES_KEY *key, unsigned char iv[16], 56 SHA256_CTX *ctx, const void *in0); 57 58# define data(ctx) ((EVP_AES_HMAC_SHA256 *)EVP_CIPHER_CTX_get_cipher_data(ctx)) 59 60static int aesni_cbc_hmac_sha256_init_key(EVP_CIPHER_CTX *ctx, 61 const unsigned char *inkey, 62 const unsigned char *iv, int enc) 63{ 64 EVP_AES_HMAC_SHA256 *key = data(ctx); 65 int ret; 66 67 if (enc) 68 ret = aesni_set_encrypt_key(inkey, 69 EVP_CIPHER_CTX_key_length(ctx) * 8, 70 &key->ks); 71 else 72 ret = aesni_set_decrypt_key(inkey, 73 EVP_CIPHER_CTX_key_length(ctx) * 8, 74 &key->ks); 75 76 SHA256_Init(&key->head); /* handy when benchmarking */ 77 key->tail = key->head; 78 key->md = key->head; 79 80 key->payload_length = NO_PAYLOAD_LENGTH; 81 82 return ret < 0 ? 0 : 1; 83} 84 85# define STITCHED_CALL 86 87# if !defined(STITCHED_CALL) 88# define aes_off 0 89# endif 90 91void sha256_block_data_order(void *c, const void *p, size_t len); 92 93static void sha256_update(SHA256_CTX *c, const void *data, size_t len) 94{ 95 const unsigned char *ptr = data; 96 size_t res; 97 98 if ((res = c->num)) { 99 res = SHA256_CBLOCK - res; 100 if (len < res) 101 res = len; 102 SHA256_Update(c, ptr, res); 103 ptr += res; 104 len -= res; 105 } 106 107 res = len % SHA256_CBLOCK; 108 len -= res; 109 110 if (len) { 111 sha256_block_data_order(c, ptr, len / SHA256_CBLOCK); 112 113 ptr += len; 114 c->Nh += len >> 29; 115 c->Nl += len <<= 3; 116 if (c->Nl < (unsigned int)len) 117 c->Nh++; 118 } 119 120 if (res) 121 SHA256_Update(c, ptr, res); 122} 123 124# ifdef SHA256_Update 125# undef SHA256_Update 126# endif 127# define SHA256_Update sha256_update 128 129# if !defined(OPENSSL_NO_MULTIBLOCK) 130 131typedef struct { 132 unsigned int A[8], B[8], C[8], D[8], E[8], F[8], G[8], H[8]; 133} SHA256_MB_CTX; 134typedef struct { 135 const unsigned char *ptr; 136 int blocks; 137} HASH_DESC; 138 139void sha256_multi_block(SHA256_MB_CTX *, const HASH_DESC *, int); 140 141typedef struct { 142 const unsigned char *inp; 143 unsigned char *out; 144 int blocks; 145 u64 iv[2]; 146} CIPH_DESC; 147 148void aesni_multi_cbc_encrypt(CIPH_DESC *, void *, int); 149 150static size_t tls1_1_multi_block_encrypt(EVP_AES_HMAC_SHA256 *key, 151 unsigned char *out, 152 const unsigned char *inp, 153 size_t inp_len, int n4x) 154{ /* n4x is 1 or 2 */ 155 HASH_DESC hash_d[8], edges[8]; 156 CIPH_DESC ciph_d[8]; 157 unsigned char storage[sizeof(SHA256_MB_CTX) + 32]; 158 union { 159 u64 q[16]; 160 u32 d[32]; 161 u8 c[128]; 162 } blocks[8]; 163 SHA256_MB_CTX *ctx; 164 unsigned int frag, last, packlen, i, x4 = 4 * n4x, minblocks, processed = 165 0; 166 size_t ret = 0; 167 u8 *IVs; 168# if defined(BSWAP8) 169 u64 seqnum; 170# endif 171 172 /* ask for IVs in bulk */ 173 if (RAND_bytes((IVs = blocks[0].c), 16 * x4) <= 0) 174 return 0; 175 176 /* align */ 177 ctx = (SHA256_MB_CTX *) (storage + 32 - ((size_t)storage % 32)); 178 179 frag = (unsigned int)inp_len >> (1 + n4x); 180 last = (unsigned int)inp_len + frag - (frag << (1 + n4x)); 181 if (last > frag && ((last + 13 + 9) % 64) < (x4 - 1)) { 182 frag++; 183 last -= x4 - 1; 184 } 185 186 packlen = 5 + 16 + ((frag + 32 + 16) & -16); 187 188 /* populate descriptors with pointers and IVs */ 189 hash_d[0].ptr = inp; 190 ciph_d[0].inp = inp; 191 /* 5+16 is place for header and explicit IV */ 192 ciph_d[0].out = out + 5 + 16; 193 memcpy(ciph_d[0].out - 16, IVs, 16); 194 memcpy(ciph_d[0].iv, IVs, 16); 195 IVs += 16; 196 197 for (i = 1; i < x4; i++) { 198 ciph_d[i].inp = hash_d[i].ptr = hash_d[i - 1].ptr + frag; 199 ciph_d[i].out = ciph_d[i - 1].out + packlen; 200 memcpy(ciph_d[i].out - 16, IVs, 16); 201 memcpy(ciph_d[i].iv, IVs, 16); 202 IVs += 16; 203 } 204 205# if defined(BSWAP8) 206 memcpy(blocks[0].c, key->md.data, 8); 207 seqnum = BSWAP8(blocks[0].q[0]); 208# endif 209 for (i = 0; i < x4; i++) { 210 unsigned int len = (i == (x4 - 1) ? last : frag); 211# if !defined(BSWAP8) 212 unsigned int carry, j; 213# endif 214 215 ctx->A[i] = key->md.h[0]; 216 ctx->B[i] = key->md.h[1]; 217 ctx->C[i] = key->md.h[2]; 218 ctx->D[i] = key->md.h[3]; 219 ctx->E[i] = key->md.h[4]; 220 ctx->F[i] = key->md.h[5]; 221 ctx->G[i] = key->md.h[6]; 222 ctx->H[i] = key->md.h[7]; 223 224 /* fix seqnum */ 225# if defined(BSWAP8) 226 blocks[i].q[0] = BSWAP8(seqnum + i); 227# else 228 for (carry = i, j = 8; j--;) { 229 blocks[i].c[j] = ((u8 *)key->md.data)[j] + carry; 230 carry = (blocks[i].c[j] - carry) >> (sizeof(carry) * 8 - 1); 231 } 232# endif 233 blocks[i].c[8] = ((u8 *)key->md.data)[8]; 234 blocks[i].c[9] = ((u8 *)key->md.data)[9]; 235 blocks[i].c[10] = ((u8 *)key->md.data)[10]; 236 /* fix length */ 237 blocks[i].c[11] = (u8)(len >> 8); 238 blocks[i].c[12] = (u8)(len); 239 240 memcpy(blocks[i].c + 13, hash_d[i].ptr, 64 - 13); 241 hash_d[i].ptr += 64 - 13; 242 hash_d[i].blocks = (len - (64 - 13)) / 64; 243 244 edges[i].ptr = blocks[i].c; 245 edges[i].blocks = 1; 246 } 247 248 /* hash 13-byte headers and first 64-13 bytes of inputs */ 249 sha256_multi_block(ctx, edges, n4x); 250 /* hash bulk inputs */ 251# define MAXCHUNKSIZE 2048 252# if MAXCHUNKSIZE%64 253# error "MAXCHUNKSIZE is not divisible by 64" 254# elif MAXCHUNKSIZE 255 /* 256 * goal is to minimize pressure on L1 cache by moving in shorter steps, 257 * so that hashed data is still in the cache by the time we encrypt it 258 */ 259 minblocks = ((frag <= last ? frag : last) - (64 - 13)) / 64; 260 if (minblocks > MAXCHUNKSIZE / 64) { 261 for (i = 0; i < x4; i++) { 262 edges[i].ptr = hash_d[i].ptr; 263 edges[i].blocks = MAXCHUNKSIZE / 64; 264 ciph_d[i].blocks = MAXCHUNKSIZE / 16; 265 } 266 do { 267 sha256_multi_block(ctx, edges, n4x); 268 aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); 269 270 for (i = 0; i < x4; i++) { 271 edges[i].ptr = hash_d[i].ptr += MAXCHUNKSIZE; 272 hash_d[i].blocks -= MAXCHUNKSIZE / 64; 273 edges[i].blocks = MAXCHUNKSIZE / 64; 274 ciph_d[i].inp += MAXCHUNKSIZE; 275 ciph_d[i].out += MAXCHUNKSIZE; 276 ciph_d[i].blocks = MAXCHUNKSIZE / 16; 277 memcpy(ciph_d[i].iv, ciph_d[i].out - 16, 16); 278 } 279 processed += MAXCHUNKSIZE; 280 minblocks -= MAXCHUNKSIZE / 64; 281 } while (minblocks > MAXCHUNKSIZE / 64); 282 } 283# endif 284# undef MAXCHUNKSIZE 285 sha256_multi_block(ctx, hash_d, n4x); 286 287 memset(blocks, 0, sizeof(blocks)); 288 for (i = 0; i < x4; i++) { 289 unsigned int len = (i == (x4 - 1) ? last : frag), 290 off = hash_d[i].blocks * 64; 291 const unsigned char *ptr = hash_d[i].ptr + off; 292 293 off = (len - processed) - (64 - 13) - off; /* remainder actually */ 294 memcpy(blocks[i].c, ptr, off); 295 blocks[i].c[off] = 0x80; 296 len += 64 + 13; /* 64 is HMAC header */ 297 len *= 8; /* convert to bits */ 298 if (off < (64 - 8)) { 299# ifdef BSWAP4 300 blocks[i].d[15] = BSWAP4(len); 301# else 302 PUTU32(blocks[i].c + 60, len); 303# endif 304 edges[i].blocks = 1; 305 } else { 306# ifdef BSWAP4 307 blocks[i].d[31] = BSWAP4(len); 308# else 309 PUTU32(blocks[i].c + 124, len); 310# endif 311 edges[i].blocks = 2; 312 } 313 edges[i].ptr = blocks[i].c; 314 } 315 316 /* hash input tails and finalize */ 317 sha256_multi_block(ctx, edges, n4x); 318 319 memset(blocks, 0, sizeof(blocks)); 320 for (i = 0; i < x4; i++) { 321# ifdef BSWAP4 322 blocks[i].d[0] = BSWAP4(ctx->A[i]); 323 ctx->A[i] = key->tail.h[0]; 324 blocks[i].d[1] = BSWAP4(ctx->B[i]); 325 ctx->B[i] = key->tail.h[1]; 326 blocks[i].d[2] = BSWAP4(ctx->C[i]); 327 ctx->C[i] = key->tail.h[2]; 328 blocks[i].d[3] = BSWAP4(ctx->D[i]); 329 ctx->D[i] = key->tail.h[3]; 330 blocks[i].d[4] = BSWAP4(ctx->E[i]); 331 ctx->E[i] = key->tail.h[4]; 332 blocks[i].d[5] = BSWAP4(ctx->F[i]); 333 ctx->F[i] = key->tail.h[5]; 334 blocks[i].d[6] = BSWAP4(ctx->G[i]); 335 ctx->G[i] = key->tail.h[6]; 336 blocks[i].d[7] = BSWAP4(ctx->H[i]); 337 ctx->H[i] = key->tail.h[7]; 338 blocks[i].c[32] = 0x80; 339 blocks[i].d[15] = BSWAP4((64 + 32) * 8); 340# else 341 PUTU32(blocks[i].c + 0, ctx->A[i]); 342 ctx->A[i] = key->tail.h[0]; 343 PUTU32(blocks[i].c + 4, ctx->B[i]); 344 ctx->B[i] = key->tail.h[1]; 345 PUTU32(blocks[i].c + 8, ctx->C[i]); 346 ctx->C[i] = key->tail.h[2]; 347 PUTU32(blocks[i].c + 12, ctx->D[i]); 348 ctx->D[i] = key->tail.h[3]; 349 PUTU32(blocks[i].c + 16, ctx->E[i]); 350 ctx->E[i] = key->tail.h[4]; 351 PUTU32(blocks[i].c + 20, ctx->F[i]); 352 ctx->F[i] = key->tail.h[5]; 353 PUTU32(blocks[i].c + 24, ctx->G[i]); 354 ctx->G[i] = key->tail.h[6]; 355 PUTU32(blocks[i].c + 28, ctx->H[i]); 356 ctx->H[i] = key->tail.h[7]; 357 blocks[i].c[32] = 0x80; 358 PUTU32(blocks[i].c + 60, (64 + 32) * 8); 359# endif 360 edges[i].ptr = blocks[i].c; 361 edges[i].blocks = 1; 362 } 363 364 /* finalize MACs */ 365 sha256_multi_block(ctx, edges, n4x); 366 367 for (i = 0; i < x4; i++) { 368 unsigned int len = (i == (x4 - 1) ? last : frag), pad, j; 369 unsigned char *out0 = out; 370 371 memcpy(ciph_d[i].out, ciph_d[i].inp, len - processed); 372 ciph_d[i].inp = ciph_d[i].out; 373 374 out += 5 + 16 + len; 375 376 /* write MAC */ 377 PUTU32(out + 0, ctx->A[i]); 378 PUTU32(out + 4, ctx->B[i]); 379 PUTU32(out + 8, ctx->C[i]); 380 PUTU32(out + 12, ctx->D[i]); 381 PUTU32(out + 16, ctx->E[i]); 382 PUTU32(out + 20, ctx->F[i]); 383 PUTU32(out + 24, ctx->G[i]); 384 PUTU32(out + 28, ctx->H[i]); 385 out += 32; 386 len += 32; 387 388 /* pad */ 389 pad = 15 - len % 16; 390 for (j = 0; j <= pad; j++) 391 *(out++) = pad; 392 len += pad + 1; 393 394 ciph_d[i].blocks = (len - processed) / 16; 395 len += 16; /* account for explicit iv */ 396 397 /* arrange header */ 398 out0[0] = ((u8 *)key->md.data)[8]; 399 out0[1] = ((u8 *)key->md.data)[9]; 400 out0[2] = ((u8 *)key->md.data)[10]; 401 out0[3] = (u8)(len >> 8); 402 out0[4] = (u8)(len); 403 404 ret += len + 5; 405 inp += frag; 406 } 407 408 aesni_multi_cbc_encrypt(ciph_d, &key->ks, n4x); 409 410 OPENSSL_cleanse(blocks, sizeof(blocks)); 411 OPENSSL_cleanse(ctx, sizeof(*ctx)); 412 413 return ret; 414} 415# endif 416 417static int aesni_cbc_hmac_sha256_cipher(EVP_CIPHER_CTX *ctx, 418 unsigned char *out, 419 const unsigned char *in, size_t len) 420{ 421 EVP_AES_HMAC_SHA256 *key = data(ctx); 422 unsigned int l; 423 size_t plen = key->payload_length, iv = 0, /* explicit IV in TLS 1.1 and 424 * later */ 425 sha_off = 0; 426# if defined(STITCHED_CALL) 427 size_t aes_off = 0, blocks; 428 429 sha_off = SHA256_CBLOCK - key->md.num; 430# endif 431 432 key->payload_length = NO_PAYLOAD_LENGTH; 433 434 if (len % AES_BLOCK_SIZE) 435 return 0; 436 437 if (EVP_CIPHER_CTX_encrypting(ctx)) { 438 if (plen == NO_PAYLOAD_LENGTH) 439 plen = len; 440 else if (len != 441 ((plen + SHA256_DIGEST_LENGTH + 442 AES_BLOCK_SIZE) & -AES_BLOCK_SIZE)) 443 return 0; 444 else if (key->aux.tls_ver >= TLS1_1_VERSION) 445 iv = AES_BLOCK_SIZE; 446 447# if defined(STITCHED_CALL) 448 /* 449 * Assembly stitch handles AVX-capable processors, but its 450 * performance is not optimal on AMD Jaguar, ~40% worse, for 451 * unknown reasons. Incidentally processor in question supports 452 * AVX, but not AMD-specific XOP extension, which can be used 453 * to identify it and avoid stitch invocation. So that after we 454 * establish that current CPU supports AVX, we even see if it's 455 * either even XOP-capable Bulldozer-based or GenuineIntel one. 456 * But SHAEXT-capable go ahead... 457 */ 458 if (((OPENSSL_ia32cap_P[2] & (1 << 29)) || /* SHAEXT? */ 459 ((OPENSSL_ia32cap_P[1] & (1 << (60 - 32))) && /* AVX? */ 460 ((OPENSSL_ia32cap_P[1] & (1 << (43 - 32))) /* XOP? */ 461 | (OPENSSL_ia32cap_P[0] & (1 << 30))))) && /* "Intel CPU"? */ 462 plen > (sha_off + iv) && 463 (blocks = (plen - (sha_off + iv)) / SHA256_CBLOCK)) { 464 SHA256_Update(&key->md, in + iv, sha_off); 465 466 (void)aesni_cbc_sha256_enc(in, out, blocks, &key->ks, 467 EVP_CIPHER_CTX_iv_noconst(ctx), 468 &key->md, in + iv + sha_off); 469 blocks *= SHA256_CBLOCK; 470 aes_off += blocks; 471 sha_off += blocks; 472 key->md.Nh += blocks >> 29; 473 key->md.Nl += blocks <<= 3; 474 if (key->md.Nl < (unsigned int)blocks) 475 key->md.Nh++; 476 } else { 477 sha_off = 0; 478 } 479# endif 480 sha_off += iv; 481 SHA256_Update(&key->md, in + sha_off, plen - sha_off); 482 483 if (plen != len) { /* "TLS" mode of operation */ 484 if (in != out) 485 memcpy(out + aes_off, in + aes_off, plen - aes_off); 486 487 /* calculate HMAC and append it to payload */ 488 SHA256_Final(out + plen, &key->md); 489 key->md = key->tail; 490 SHA256_Update(&key->md, out + plen, SHA256_DIGEST_LENGTH); 491 SHA256_Final(out + plen, &key->md); 492 493 /* pad the payload|hmac */ 494 plen += SHA256_DIGEST_LENGTH; 495 for (l = len - plen - 1; plen < len; plen++) 496 out[plen] = l; 497 /* encrypt HMAC|padding at once */ 498 aesni_cbc_encrypt(out + aes_off, out + aes_off, len - aes_off, 499 &key->ks, EVP_CIPHER_CTX_iv_noconst(ctx), 1); 500 } else { 501 aesni_cbc_encrypt(in + aes_off, out + aes_off, len - aes_off, 502 &key->ks, EVP_CIPHER_CTX_iv_noconst(ctx), 1); 503 } 504 } else { 505 union { 506 unsigned int u[SHA256_DIGEST_LENGTH / sizeof(unsigned int)]; 507 unsigned char c[64 + SHA256_DIGEST_LENGTH]; 508 } mac, *pmac; 509 510 /* arrange cache line alignment */ 511 pmac = (void *)(((size_t)mac.c + 63) & ((size_t)0 - 64)); 512 513 /* decrypt HMAC|padding at once */ 514 aesni_cbc_encrypt(in, out, len, &key->ks, 515 EVP_CIPHER_CTX_iv_noconst(ctx), 0); 516 517 if (plen != NO_PAYLOAD_LENGTH) { /* "TLS" mode of operation */ 518 size_t inp_len, mask, j, i; 519 unsigned int res, maxpad, pad, bitlen; 520 int ret = 1; 521 union { 522 unsigned int u[SHA_LBLOCK]; 523 unsigned char c[SHA256_CBLOCK]; 524 } *data = (void *)key->md.data; 525 526 if ((key->aux.tls_aad[plen - 4] << 8 | key->aux.tls_aad[plen - 3]) 527 >= TLS1_1_VERSION) 528 iv = AES_BLOCK_SIZE; 529 530 if (len < (iv + SHA256_DIGEST_LENGTH + 1)) 531 return 0; 532 533 /* omit explicit iv */ 534 out += iv; 535 len -= iv; 536 537 /* figure out payload length */ 538 pad = out[len - 1]; 539 maxpad = len - (SHA256_DIGEST_LENGTH + 1); 540 maxpad |= (255 - maxpad) >> (sizeof(maxpad) * 8 - 8); 541 maxpad &= 255; 542 543 mask = constant_time_ge(maxpad, pad); 544 ret &= mask; 545 /* 546 * If pad is invalid then we will fail the above test but we must 547 * continue anyway because we are in constant time code. However, 548 * we'll use the maxpad value instead of the supplied pad to make 549 * sure we perform well defined pointer arithmetic. 550 */ 551 pad = constant_time_select(mask, pad, maxpad); 552 553 inp_len = len - (SHA256_DIGEST_LENGTH + pad + 1); 554 555 key->aux.tls_aad[plen - 2] = inp_len >> 8; 556 key->aux.tls_aad[plen - 1] = inp_len; 557 558 /* calculate HMAC */ 559 key->md = key->head; 560 SHA256_Update(&key->md, key->aux.tls_aad, plen); 561 562# if 1 /* see original reference version in #else */ 563 len -= SHA256_DIGEST_LENGTH; /* amend mac */ 564 if (len >= (256 + SHA256_CBLOCK)) { 565 j = (len - (256 + SHA256_CBLOCK)) & (0 - SHA256_CBLOCK); 566 j += SHA256_CBLOCK - key->md.num; 567 SHA256_Update(&key->md, out, j); 568 out += j; 569 len -= j; 570 inp_len -= j; 571 } 572 573 /* but pretend as if we hashed padded payload */ 574 bitlen = key->md.Nl + (inp_len << 3); /* at most 18 bits */ 575# ifdef BSWAP4 576 bitlen = BSWAP4(bitlen); 577# else 578 mac.c[0] = 0; 579 mac.c[1] = (unsigned char)(bitlen >> 16); 580 mac.c[2] = (unsigned char)(bitlen >> 8); 581 mac.c[3] = (unsigned char)bitlen; 582 bitlen = mac.u[0]; 583# endif 584 585 pmac->u[0] = 0; 586 pmac->u[1] = 0; 587 pmac->u[2] = 0; 588 pmac->u[3] = 0; 589 pmac->u[4] = 0; 590 pmac->u[5] = 0; 591 pmac->u[6] = 0; 592 pmac->u[7] = 0; 593 594 for (res = key->md.num, j = 0; j < len; j++) { 595 size_t c = out[j]; 596 mask = (j - inp_len) >> (sizeof(j) * 8 - 8); 597 c &= mask; 598 c |= 0x80 & ~mask & ~((inp_len - j) >> (sizeof(j) * 8 - 8)); 599 data->c[res++] = (unsigned char)c; 600 601 if (res != SHA256_CBLOCK) 602 continue; 603 604 /* j is not incremented yet */ 605 mask = 0 - ((inp_len + 7 - j) >> (sizeof(j) * 8 - 1)); 606 data->u[SHA_LBLOCK - 1] |= bitlen & mask; 607 sha256_block_data_order(&key->md, data, 1); 608 mask &= 0 - ((j - inp_len - 72) >> (sizeof(j) * 8 - 1)); 609 pmac->u[0] |= key->md.h[0] & mask; 610 pmac->u[1] |= key->md.h[1] & mask; 611 pmac->u[2] |= key->md.h[2] & mask; 612 pmac->u[3] |= key->md.h[3] & mask; 613 pmac->u[4] |= key->md.h[4] & mask; 614 pmac->u[5] |= key->md.h[5] & mask; 615 pmac->u[6] |= key->md.h[6] & mask; 616 pmac->u[7] |= key->md.h[7] & mask; 617 res = 0; 618 } 619 620 for (i = res; i < SHA256_CBLOCK; i++, j++) 621 data->c[i] = 0; 622 623 if (res > SHA256_CBLOCK - 8) { 624 mask = 0 - ((inp_len + 8 - j) >> (sizeof(j) * 8 - 1)); 625 data->u[SHA_LBLOCK - 1] |= bitlen & mask; 626 sha256_block_data_order(&key->md, data, 1); 627 mask &= 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1)); 628 pmac->u[0] |= key->md.h[0] & mask; 629 pmac->u[1] |= key->md.h[1] & mask; 630 pmac->u[2] |= key->md.h[2] & mask; 631 pmac->u[3] |= key->md.h[3] & mask; 632 pmac->u[4] |= key->md.h[4] & mask; 633 pmac->u[5] |= key->md.h[5] & mask; 634 pmac->u[6] |= key->md.h[6] & mask; 635 pmac->u[7] |= key->md.h[7] & mask; 636 637 memset(data, 0, SHA256_CBLOCK); 638 j += 64; 639 } 640 data->u[SHA_LBLOCK - 1] = bitlen; 641 sha256_block_data_order(&key->md, data, 1); 642 mask = 0 - ((j - inp_len - 73) >> (sizeof(j) * 8 - 1)); 643 pmac->u[0] |= key->md.h[0] & mask; 644 pmac->u[1] |= key->md.h[1] & mask; 645 pmac->u[2] |= key->md.h[2] & mask; 646 pmac->u[3] |= key->md.h[3] & mask; 647 pmac->u[4] |= key->md.h[4] & mask; 648 pmac->u[5] |= key->md.h[5] & mask; 649 pmac->u[6] |= key->md.h[6] & mask; 650 pmac->u[7] |= key->md.h[7] & mask; 651 652# ifdef BSWAP4 653 pmac->u[0] = BSWAP4(pmac->u[0]); 654 pmac->u[1] = BSWAP4(pmac->u[1]); 655 pmac->u[2] = BSWAP4(pmac->u[2]); 656 pmac->u[3] = BSWAP4(pmac->u[3]); 657 pmac->u[4] = BSWAP4(pmac->u[4]); 658 pmac->u[5] = BSWAP4(pmac->u[5]); 659 pmac->u[6] = BSWAP4(pmac->u[6]); 660 pmac->u[7] = BSWAP4(pmac->u[7]); 661# else 662 for (i = 0; i < 8; i++) { 663 res = pmac->u[i]; 664 pmac->c[4 * i + 0] = (unsigned char)(res >> 24); 665 pmac->c[4 * i + 1] = (unsigned char)(res >> 16); 666 pmac->c[4 * i + 2] = (unsigned char)(res >> 8); 667 pmac->c[4 * i + 3] = (unsigned char)res; 668 } 669# endif 670 len += SHA256_DIGEST_LENGTH; 671# else 672 SHA256_Update(&key->md, out, inp_len); 673 res = key->md.num; 674 SHA256_Final(pmac->c, &key->md); 675 676 { 677 unsigned int inp_blocks, pad_blocks; 678 679 /* but pretend as if we hashed padded payload */ 680 inp_blocks = 681 1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1)); 682 res += (unsigned int)(len - inp_len); 683 pad_blocks = res / SHA256_CBLOCK; 684 res %= SHA256_CBLOCK; 685 pad_blocks += 686 1 + ((SHA256_CBLOCK - 9 - res) >> (sizeof(res) * 8 - 1)); 687 for (; inp_blocks < pad_blocks; inp_blocks++) 688 sha1_block_data_order(&key->md, data, 1); 689 } 690# endif /* pre-lucky-13 reference version of above */ 691 key->md = key->tail; 692 SHA256_Update(&key->md, pmac->c, SHA256_DIGEST_LENGTH); 693 SHA256_Final(pmac->c, &key->md); 694 695 /* verify HMAC */ 696 out += inp_len; 697 len -= inp_len; 698# if 1 /* see original reference version in #else */ 699 { 700 unsigned char *p = 701 out + len - 1 - maxpad - SHA256_DIGEST_LENGTH; 702 size_t off = out - p; 703 unsigned int c, cmask; 704 705 maxpad += SHA256_DIGEST_LENGTH; 706 for (res = 0, i = 0, j = 0; j < maxpad; j++) { 707 c = p[j]; 708 cmask = 709 ((int)(j - off - SHA256_DIGEST_LENGTH)) >> 710 (sizeof(int) * 8 - 1); 711 res |= (c ^ pad) & ~cmask; /* ... and padding */ 712 cmask &= ((int)(off - 1 - j)) >> (sizeof(int) * 8 - 1); 713 res |= (c ^ pmac->c[i]) & cmask; 714 i += 1 & cmask; 715 } 716 maxpad -= SHA256_DIGEST_LENGTH; 717 718 res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1)); 719 ret &= (int)~res; 720 } 721# else /* pre-lucky-13 reference version of above */ 722 for (res = 0, i = 0; i < SHA256_DIGEST_LENGTH; i++) 723 res |= out[i] ^ pmac->c[i]; 724 res = 0 - ((0 - res) >> (sizeof(res) * 8 - 1)); 725 ret &= (int)~res; 726 727 /* verify padding */ 728 pad = (pad & ~res) | (maxpad & res); 729 out = out + len - 1 - pad; 730 for (res = 0, i = 0; i < pad; i++) 731 res |= out[i] ^ pad; 732 733 res = (0 - res) >> (sizeof(res) * 8 - 1); 734 ret &= (int)~res; 735# endif 736 return ret; 737 } else { 738 SHA256_Update(&key->md, out, len); 739 } 740 } 741 742 return 1; 743} 744 745static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, 746 void *ptr) 747{ 748 EVP_AES_HMAC_SHA256 *key = data(ctx); 749 unsigned int u_arg = (unsigned int)arg; 750 751 switch (type) { 752 case EVP_CTRL_AEAD_SET_MAC_KEY: 753 { 754 unsigned int i; 755 unsigned char hmac_key[64]; 756 757 memset(hmac_key, 0, sizeof(hmac_key)); 758 759 if (arg < 0) 760 return -1; 761 762 if (u_arg > sizeof(hmac_key)) { 763 SHA256_Init(&key->head); 764 SHA256_Update(&key->head, ptr, arg); 765 SHA256_Final(hmac_key, &key->head); 766 } else { 767 memcpy(hmac_key, ptr, arg); 768 } 769 770 for (i = 0; i < sizeof(hmac_key); i++) 771 hmac_key[i] ^= 0x36; /* ipad */ 772 SHA256_Init(&key->head); 773 SHA256_Update(&key->head, hmac_key, sizeof(hmac_key)); 774 775 for (i = 0; i < sizeof(hmac_key); i++) 776 hmac_key[i] ^= 0x36 ^ 0x5c; /* opad */ 777 SHA256_Init(&key->tail); 778 SHA256_Update(&key->tail, hmac_key, sizeof(hmac_key)); 779 780 OPENSSL_cleanse(hmac_key, sizeof(hmac_key)); 781 782 return 1; 783 } 784 case EVP_CTRL_AEAD_TLS1_AAD: 785 { 786 unsigned char *p = ptr; 787 unsigned int len; 788 789 if (arg != EVP_AEAD_TLS1_AAD_LEN) 790 return -1; 791 792 len = p[arg - 2] << 8 | p[arg - 1]; 793 794 if (EVP_CIPHER_CTX_encrypting(ctx)) { 795 key->payload_length = len; 796 if ((key->aux.tls_ver = 797 p[arg - 4] << 8 | p[arg - 3]) >= TLS1_1_VERSION) { 798 if (len < AES_BLOCK_SIZE) 799 return 0; 800 len -= AES_BLOCK_SIZE; 801 p[arg - 2] = len >> 8; 802 p[arg - 1] = len; 803 } 804 key->md = key->head; 805 SHA256_Update(&key->md, p, arg); 806 807 return (int)(((len + SHA256_DIGEST_LENGTH + 808 AES_BLOCK_SIZE) & -AES_BLOCK_SIZE) 809 - len); 810 } else { 811 memcpy(key->aux.tls_aad, ptr, arg); 812 key->payload_length = arg; 813 814 return SHA256_DIGEST_LENGTH; 815 } 816 } 817# if !defined(OPENSSL_NO_MULTIBLOCK) 818 case EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE: 819 return (int)(5 + 16 + ((arg + 32 + 16) & -16)); 820 case EVP_CTRL_TLS1_1_MULTIBLOCK_AAD: 821 { 822 EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = 823 (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; 824 unsigned int n4x = 1, x4; 825 unsigned int frag, last, packlen, inp_len; 826 827 if (arg < 0) 828 return -1; 829 830 if (u_arg < sizeof(EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM)) 831 return -1; 832 833 inp_len = param->inp[11] << 8 | param->inp[12]; 834 835 if (EVP_CIPHER_CTX_encrypting(ctx)) { 836 if ((param->inp[9] << 8 | param->inp[10]) < TLS1_1_VERSION) 837 return -1; 838 839 if (inp_len) { 840 if (inp_len < 4096) 841 return 0; /* too short */ 842 843 if (inp_len >= 8192 && OPENSSL_ia32cap_P[2] & (1 << 5)) 844 n4x = 2; /* AVX2 */ 845 } else if ((n4x = param->interleave / 4) && n4x <= 2) 846 inp_len = param->len; 847 else 848 return -1; 849 850 key->md = key->head; 851 SHA256_Update(&key->md, param->inp, 13); 852 853 x4 = 4 * n4x; 854 n4x += 1; 855 856 frag = inp_len >> n4x; 857 last = inp_len + frag - (frag << n4x); 858 if (last > frag && ((last + 13 + 9) % 64 < (x4 - 1))) { 859 frag++; 860 last -= x4 - 1; 861 } 862 863 packlen = 5 + 16 + ((frag + 32 + 16) & -16); 864 packlen = (packlen << n4x) - packlen; 865 packlen += 5 + 16 + ((last + 32 + 16) & -16); 866 867 param->interleave = x4; 868 869 return (int)packlen; 870 } else 871 return -1; /* not yet */ 872 } 873 case EVP_CTRL_TLS1_1_MULTIBLOCK_ENCRYPT: 874 { 875 EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *param = 876 (EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM *) ptr; 877 878 return (int)tls1_1_multi_block_encrypt(key, param->out, 879 param->inp, param->len, 880 param->interleave / 4); 881 } 882 case EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT: 883# endif 884 default: 885 return -1; 886 } 887} 888 889static EVP_CIPHER aesni_128_cbc_hmac_sha256_cipher = { 890# ifdef NID_aes_128_cbc_hmac_sha256 891 NID_aes_128_cbc_hmac_sha256, 892# else 893 NID_undef, 894# endif 895 AES_BLOCK_SIZE, 16, AES_BLOCK_SIZE, 896 EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | 897 EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, 898 aesni_cbc_hmac_sha256_init_key, 899 aesni_cbc_hmac_sha256_cipher, 900 NULL, 901 sizeof(EVP_AES_HMAC_SHA256), 902 EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv, 903 EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv, 904 aesni_cbc_hmac_sha256_ctrl, 905 NULL 906}; 907 908static EVP_CIPHER aesni_256_cbc_hmac_sha256_cipher = { 909# ifdef NID_aes_256_cbc_hmac_sha256 910 NID_aes_256_cbc_hmac_sha256, 911# else 912 NID_undef, 913# endif 914 AES_BLOCK_SIZE, 32, AES_BLOCK_SIZE, 915 EVP_CIPH_CBC_MODE | EVP_CIPH_FLAG_DEFAULT_ASN1 | 916 EVP_CIPH_FLAG_AEAD_CIPHER | EVP_CIPH_FLAG_TLS1_1_MULTIBLOCK, 917 aesni_cbc_hmac_sha256_init_key, 918 aesni_cbc_hmac_sha256_cipher, 919 NULL, 920 sizeof(EVP_AES_HMAC_SHA256), 921 EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_set_asn1_iv, 922 EVP_CIPH_FLAG_DEFAULT_ASN1 ? NULL : EVP_CIPHER_get_asn1_iv, 923 aesni_cbc_hmac_sha256_ctrl, 924 NULL 925}; 926 927const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void) 928{ 929 return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) && 930 aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ? 931 &aesni_128_cbc_hmac_sha256_cipher : NULL); 932} 933 934const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void) 935{ 936 return ((OPENSSL_ia32cap_P[1] & AESNI_CAPABLE) && 937 aesni_cbc_sha256_enc(NULL, NULL, 0, NULL, NULL, NULL, NULL) ? 938 &aesni_256_cbc_hmac_sha256_cipher : NULL); 939} 940#else 941const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha256(void) 942{ 943 return NULL; 944} 945 946const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha256(void) 947{ 948 return NULL; 949} 950#endif /* AESNI_ASM */ 951