1/* crypto/bn/bn_exp.c */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58/* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112#include "cryptlib.h" 113#include "constant_time_locl.h" 114#include "bn_lcl.h" 115 116#include <stdlib.h> 117#ifdef _WIN32 118# include <malloc.h> 119# ifndef alloca 120# define alloca _alloca 121# endif 122#elif defined(__GNUC__) 123# ifndef alloca 124# define alloca(s) __builtin_alloca((s)) 125# endif 126#endif 127 128/* maximum precomputation table size for *variable* sliding windows */ 129#define TABLE_SIZE 32 130 131/* this one works - simple but works */ 132int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 133{ 134 int i, bits, ret = 0; 135 BIGNUM *v, *rr; 136 137 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 138 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 139 BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 140 return -1; 141 } 142 143 BN_CTX_start(ctx); 144 if ((r == a) || (r == p)) 145 rr = BN_CTX_get(ctx); 146 else 147 rr = r; 148 v = BN_CTX_get(ctx); 149 if (rr == NULL || v == NULL) 150 goto err; 151 152 if (BN_copy(v, a) == NULL) 153 goto err; 154 bits = BN_num_bits(p); 155 156 if (BN_is_odd(p)) { 157 if (BN_copy(rr, a) == NULL) 158 goto err; 159 } else { 160 if (!BN_one(rr)) 161 goto err; 162 } 163 164 for (i = 1; i < bits; i++) { 165 if (!BN_sqr(v, v, ctx)) 166 goto err; 167 if (BN_is_bit_set(p, i)) { 168 if (!BN_mul(rr, rr, v, ctx)) 169 goto err; 170 } 171 } 172 if (r != rr) 173 BN_copy(r, rr); 174 ret = 1; 175 err: 176 BN_CTX_end(ctx); 177 bn_check_top(r); 178 return (ret); 179} 180 181int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 182 BN_CTX *ctx) 183{ 184 int ret; 185 186 bn_check_top(a); 187 bn_check_top(p); 188 bn_check_top(m); 189 190 /*- 191 * For even modulus m = 2^k*m_odd, it might make sense to compute 192 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 193 * exponentiation for the odd part), using appropriate exponent 194 * reductions, and combine the results using the CRT. 195 * 196 * For now, we use Montgomery only if the modulus is odd; otherwise, 197 * exponentiation using the reciprocal-based quick remaindering 198 * algorithm is used. 199 * 200 * (Timing obtained with expspeed.c [computations a^p mod m 201 * where a, p, m are of the same length: 256, 512, 1024, 2048, 202 * 4096, 8192 bits], compared to the running time of the 203 * standard algorithm: 204 * 205 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 206 * 55 .. 77 % [UltraSparc processor, but 207 * debug-solaris-sparcv8-gcc conf.] 208 * 209 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 210 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 211 * 212 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 213 * at 2048 and more bits, but at 512 and 1024 bits, it was 214 * slower even than the standard algorithm! 215 * 216 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 217 * should be obtained when the new Montgomery reduction code 218 * has been integrated into OpenSSL.) 219 */ 220 221#define MONT_MUL_MOD 222#define MONT_EXP_WORD 223#define RECP_MUL_MOD 224 225#ifdef MONT_MUL_MOD 226 /* 227 * I have finally been able to take out this pre-condition of the top bit 228 * being set. It was caused by an error in BN_div with negatives. There 229 * was also another problem when for a^b%m a >= m. eay 07-May-97 230 */ 231 /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 232 233 if (BN_is_odd(m)) { 234# ifdef MONT_EXP_WORD 235 if (a->top == 1 && !a->neg 236 && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) { 237 BN_ULONG A = a->d[0]; 238 ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL); 239 } else 240# endif 241 ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL); 242 } else 243#endif 244#ifdef RECP_MUL_MOD 245 { 246 ret = BN_mod_exp_recp(r, a, p, m, ctx); 247 } 248#else 249 { 250 ret = BN_mod_exp_simple(r, a, p, m, ctx); 251 } 252#endif 253 254 bn_check_top(r); 255 return (ret); 256} 257 258int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 259 const BIGNUM *m, BN_CTX *ctx) 260{ 261 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 262 int start = 1; 263 BIGNUM *aa; 264 /* Table of variables obtained from 'ctx' */ 265 BIGNUM *val[TABLE_SIZE]; 266 BN_RECP_CTX recp; 267 268 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 269 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 270 BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 271 return -1; 272 } 273 274 bits = BN_num_bits(p); 275 276 if (bits == 0) { 277 ret = BN_one(r); 278 return ret; 279 } 280 281 BN_CTX_start(ctx); 282 aa = BN_CTX_get(ctx); 283 val[0] = BN_CTX_get(ctx); 284 if (!aa || !val[0]) 285 goto err; 286 287 BN_RECP_CTX_init(&recp); 288 if (m->neg) { 289 /* ignore sign of 'm' */ 290 if (!BN_copy(aa, m)) 291 goto err; 292 aa->neg = 0; 293 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 294 goto err; 295 } else { 296 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 297 goto err; 298 } 299 300 if (!BN_nnmod(val[0], a, m, ctx)) 301 goto err; /* 1 */ 302 if (BN_is_zero(val[0])) { 303 BN_zero(r); 304 ret = 1; 305 goto err; 306 } 307 308 window = BN_window_bits_for_exponent_size(bits); 309 if (window > 1) { 310 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 311 goto err; /* 2 */ 312 j = 1 << (window - 1); 313 for (i = 1; i < j; i++) { 314 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 315 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) 316 goto err; 317 } 318 } 319 320 start = 1; /* This is used to avoid multiplication etc 321 * when there is only the value '1' in the 322 * buffer. */ 323 wvalue = 0; /* The 'value' of the window */ 324 wstart = bits - 1; /* The top bit of the window */ 325 wend = 0; /* The bottom bit of the window */ 326 327 if (!BN_one(r)) 328 goto err; 329 330 for (;;) { 331 if (BN_is_bit_set(p, wstart) == 0) { 332 if (!start) 333 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 334 goto err; 335 if (wstart == 0) 336 break; 337 wstart--; 338 continue; 339 } 340 /* 341 * We now have wstart on a 'set' bit, we now need to work out how bit 342 * a window to do. To do this we need to scan forward until the last 343 * set bit before the end of the window 344 */ 345 j = wstart; 346 wvalue = 1; 347 wend = 0; 348 for (i = 1; i < window; i++) { 349 if (wstart - i < 0) 350 break; 351 if (BN_is_bit_set(p, wstart - i)) { 352 wvalue <<= (i - wend); 353 wvalue |= 1; 354 wend = i; 355 } 356 } 357 358 /* wend is the size of the current window */ 359 j = wend + 1; 360 /* add the 'bytes above' */ 361 if (!start) 362 for (i = 0; i < j; i++) { 363 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 364 goto err; 365 } 366 367 /* wvalue will be an odd number < 2^window */ 368 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) 369 goto err; 370 371 /* move the 'window' down further */ 372 wstart -= wend + 1; 373 wvalue = 0; 374 start = 0; 375 if (wstart < 0) 376 break; 377 } 378 ret = 1; 379 err: 380 BN_CTX_end(ctx); 381 BN_RECP_CTX_free(&recp); 382 bn_check_top(r); 383 return (ret); 384} 385 386int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 387 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 388{ 389 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 390 int start = 1; 391 BIGNUM *d, *r; 392 const BIGNUM *aa; 393 /* Table of variables obtained from 'ctx' */ 394 BIGNUM *val[TABLE_SIZE]; 395 BN_MONT_CTX *mont = NULL; 396 397 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 398 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 399 } 400 401 bn_check_top(a); 402 bn_check_top(p); 403 bn_check_top(m); 404 405 if (!BN_is_odd(m)) { 406 BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); 407 return (0); 408 } 409 bits = BN_num_bits(p); 410 if (bits == 0) { 411 ret = BN_one(rr); 412 return ret; 413 } 414 415 BN_CTX_start(ctx); 416 d = BN_CTX_get(ctx); 417 r = BN_CTX_get(ctx); 418 val[0] = BN_CTX_get(ctx); 419 if (!d || !r || !val[0]) 420 goto err; 421 422 /* 423 * If this is not done, things will break in the montgomery part 424 */ 425 426 if (in_mont != NULL) 427 mont = in_mont; 428 else { 429 if ((mont = BN_MONT_CTX_new()) == NULL) 430 goto err; 431 if (!BN_MONT_CTX_set(mont, m, ctx)) 432 goto err; 433 } 434 435 if (a->neg || BN_ucmp(a, m) >= 0) { 436 if (!BN_nnmod(val[0], a, m, ctx)) 437 goto err; 438 aa = val[0]; 439 } else 440 aa = a; 441 if (BN_is_zero(aa)) { 442 BN_zero(rr); 443 ret = 1; 444 goto err; 445 } 446 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 447 goto err; /* 1 */ 448 449 window = BN_window_bits_for_exponent_size(bits); 450 if (window > 1) { 451 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 452 goto err; /* 2 */ 453 j = 1 << (window - 1); 454 for (i = 1; i < j; i++) { 455 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 456 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) 457 goto err; 458 } 459 } 460 461 start = 1; /* This is used to avoid multiplication etc 462 * when there is only the value '1' in the 463 * buffer. */ 464 wvalue = 0; /* The 'value' of the window */ 465 wstart = bits - 1; /* The top bit of the window */ 466 wend = 0; /* The bottom bit of the window */ 467 468 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 469 goto err; 470 for (;;) { 471 if (BN_is_bit_set(p, wstart) == 0) { 472 if (!start) { 473 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 474 goto err; 475 } 476 if (wstart == 0) 477 break; 478 wstart--; 479 continue; 480 } 481 /* 482 * We now have wstart on a 'set' bit, we now need to work out how bit 483 * a window to do. To do this we need to scan forward until the last 484 * set bit before the end of the window 485 */ 486 j = wstart; 487 wvalue = 1; 488 wend = 0; 489 for (i = 1; i < window; i++) { 490 if (wstart - i < 0) 491 break; 492 if (BN_is_bit_set(p, wstart - i)) { 493 wvalue <<= (i - wend); 494 wvalue |= 1; 495 wend = i; 496 } 497 } 498 499 /* wend is the size of the current window */ 500 j = wend + 1; 501 /* add the 'bytes above' */ 502 if (!start) 503 for (i = 0; i < j; i++) { 504 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 505 goto err; 506 } 507 508 /* wvalue will be an odd number < 2^window */ 509 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 510 goto err; 511 512 /* move the 'window' down further */ 513 wstart -= wend + 1; 514 wvalue = 0; 515 start = 0; 516 if (wstart < 0) 517 break; 518 } 519 if (!BN_from_montgomery(rr, r, mont, ctx)) 520 goto err; 521 ret = 1; 522 err: 523 if ((in_mont == NULL) && (mont != NULL)) 524 BN_MONT_CTX_free(mont); 525 BN_CTX_end(ctx); 526 bn_check_top(rr); 527 return (ret); 528} 529 530/* 531 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific 532 * layout so that accessing any of these table values shows the same access 533 * pattern as far as cache lines are concerned. The following functions are 534 * used to transfer a BIGNUM from/to that table. 535 */ 536 537static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, 538 unsigned char *buf, int idx, 539 int window) 540{ 541 int i, j; 542 int width = 1 << window; 543 BN_ULONG *table = (BN_ULONG *)buf; 544 545 if (top > b->top) 546 top = b->top; /* this works because 'buf' is explicitly 547 * zeroed */ 548 for (i = 0, j = idx; i < top; i++, j += width) { 549 table[j] = b->d[i]; 550 } 551 552 return 1; 553} 554 555static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, 556 unsigned char *buf, int idx, 557 int window) 558{ 559 int i, j; 560 int width = 1 << window; 561 volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 562 563 if (bn_wexpand(b, top) == NULL) 564 return 0; 565 566 if (window <= 3) { 567 for (i = 0; i < top; i++, table += width) { 568 BN_ULONG acc = 0; 569 570 for (j = 0; j < width; j++) { 571 acc |= table[j] & 572 ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 573 } 574 575 b->d[i] = acc; 576 } 577 } else { 578 int xstride = 1 << (window - 2); 579 BN_ULONG y0, y1, y2, y3; 580 581 i = idx >> (window - 2); /* equivalent of idx / xstride */ 582 idx &= xstride - 1; /* equivalent of idx % xstride */ 583 584 y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); 585 y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); 586 y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); 587 y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); 588 589 for (i = 0; i < top; i++, table += width) { 590 BN_ULONG acc = 0; 591 592 for (j = 0; j < xstride; j++) { 593 acc |= ( (table[j + 0 * xstride] & y0) | 594 (table[j + 1 * xstride] & y1) | 595 (table[j + 2 * xstride] & y2) | 596 (table[j + 3 * xstride] & y3) ) 597 & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 598 } 599 600 b->d[i] = acc; 601 } 602 } 603 604 b->top = top; 605 bn_correct_top(b); 606 return 1; 607} 608 609/* 610 * Given a pointer value, compute the next address that is a cache line 611 * multiple. 612 */ 613#define MOD_EXP_CTIME_ALIGN(x_) \ 614 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 615 616/* 617 * This variant of BN_mod_exp_mont() uses fixed windows and the special 618 * precomputation memory layout to limit data-dependency to a minimum to 619 * protect secret exponents (cf. the hyper-threading timing attacks pointed 620 * out by Colin Percival, 621 * http://www.daemong-consideredperthreading-considered-harmful/) 622 */ 623int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 624 const BIGNUM *m, BN_CTX *ctx, 625 BN_MONT_CTX *in_mont) 626{ 627 int i, bits, ret = 0, window, wvalue; 628 int top; 629 BN_MONT_CTX *mont = NULL; 630 631 int numPowers; 632 unsigned char *powerbufFree = NULL; 633 int powerbufLen = 0; 634 unsigned char *powerbuf = NULL; 635 BIGNUM tmp, am; 636 637 bn_check_top(a); 638 bn_check_top(p); 639 bn_check_top(m); 640 641 top = m->top; 642 643 if (!(m->d[0] & 1)) { 644 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); 645 return (0); 646 } 647 bits = BN_num_bits(p); 648 if (bits == 0) { 649 ret = BN_one(rr); 650 return ret; 651 } 652 653 BN_CTX_start(ctx); 654 655 /* 656 * Allocate a montgomery context if it was not supplied by the caller. If 657 * this is not done, things will break in the montgomery part. 658 */ 659 if (in_mont != NULL) 660 mont = in_mont; 661 else { 662 if ((mont = BN_MONT_CTX_new()) == NULL) 663 goto err; 664 if (!BN_MONT_CTX_set(mont, m, ctx)) 665 goto err; 666 } 667 668 /* Get the window size to use with size of p. */ 669 window = BN_window_bits_for_ctime_exponent_size(bits); 670#if defined(OPENSSL_BN_ASM_MONT5) 671 if (window == 6 && bits <= 1024) 672 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 673#endif 674 675 /* 676 * Allocate a buffer large enough to hold all of the pre-computed powers 677 * of am, am itself and tmp. 678 */ 679 numPowers = 1 << window; 680 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 681 ((2 * top) > 682 numPowers ? (2 * top) : numPowers)); 683#ifdef alloca 684 if (powerbufLen < 3072) 685 powerbufFree = 686 alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 687 else 688#endif 689 if ((powerbufFree = 690 (unsigned char *)OPENSSL_malloc(powerbufLen + 691 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) 692 == NULL) 693 goto err; 694 695 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 696 memset(powerbuf, 0, powerbufLen); 697 698#ifdef alloca 699 if (powerbufLen < 3072) 700 powerbufFree = NULL; 701#endif 702 703 /* lay down tmp and am right after powers table */ 704 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 705 am.d = tmp.d + top; 706 tmp.top = am.top = 0; 707 tmp.dmax = am.dmax = top; 708 tmp.neg = am.neg = 0; 709 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 710 711 /* prepare a^0 in Montgomery domain */ 712#if 1 713 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 714 goto err; 715#else 716 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 717 for (i = 1; i < top; i++) 718 tmp.d[i] = (~m->d[i]) & BN_MASK2; 719 tmp.top = top; 720#endif 721 722 /* prepare a^1 in Montgomery domain */ 723 if (a->neg || BN_ucmp(a, m) >= 0) { 724 if (!BN_mod(&am, a, m, ctx)) 725 goto err; 726 if (!BN_to_montgomery(&am, &am, mont, ctx)) 727 goto err; 728 } else if (!BN_to_montgomery(&am, a, mont, ctx)) 729 goto err; 730 731#if defined(OPENSSL_BN_ASM_MONT5) 732 if (window == 5 && top > 1) { 733 /* 734 * This optimization uses ideas from http://eprint.iacr.org/2011/239, 735 * specifically optimization of cache-timing attack countermeasures 736 * and pre-computation optimization. 737 */ 738 739 /* 740 * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 741 * 512-bit RSA is hardly relevant, we omit it to spare size... 742 */ 743 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 744 const void *table, const BN_ULONG *np, 745 const BN_ULONG *n0, int num, int power); 746 void bn_scatter5(const BN_ULONG *inp, size_t num, 747 void *table, size_t power); 748 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); 749 750 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 751 752 /* 753 * BN_to_montgomery can contaminate words above .top [in 754 * BN_DEBUG[_DEBUG] build]... 755 */ 756 for (i = am.top; i < top; i++) 757 am.d[i] = 0; 758 for (i = tmp.top; i < top; i++) 759 tmp.d[i] = 0; 760 761 bn_scatter5(tmp.d, top, powerbuf, 0); 762 bn_scatter5(am.d, am.top, powerbuf, 1); 763 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 764 bn_scatter5(tmp.d, top, powerbuf, 2); 765 766# if 0 767 for (i = 3; i < 32; i++) { 768 /* Calculate a^i = a^(i-1) * a */ 769 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 770 bn_scatter5(tmp.d, top, powerbuf, i); 771 } 772# else 773 /* same as above, but uses squaring for 1/2 of operations */ 774 for (i = 4; i < 32; i *= 2) { 775 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 776 bn_scatter5(tmp.d, top, powerbuf, i); 777 } 778 for (i = 3; i < 8; i += 2) { 779 int j; 780 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 781 bn_scatter5(tmp.d, top, powerbuf, i); 782 for (j = 2 * i; j < 32; j *= 2) { 783 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 784 bn_scatter5(tmp.d, top, powerbuf, j); 785 } 786 } 787 for (; i < 16; i += 2) { 788 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 789 bn_scatter5(tmp.d, top, powerbuf, i); 790 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 791 bn_scatter5(tmp.d, top, powerbuf, 2 * i); 792 } 793 for (; i < 32; i += 2) { 794 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 795 bn_scatter5(tmp.d, top, powerbuf, i); 796 } 797# endif 798 bits--; 799 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 800 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 801 bn_gather5(tmp.d, top, powerbuf, wvalue); 802 803 /* 804 * Scan the exponent one window at a time starting from the most 805 * significant bits. 806 */ 807 while (bits >= 0) { 808 for (wvalue = 0, i = 0; i < 5; i++, bits--) 809 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 810 811 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 812 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 813 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 814 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 815 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 816 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 817 } 818 819 tmp.top = top; 820 bn_correct_top(&tmp); 821 } else 822#endif 823 { 824 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window)) 825 goto err; 826 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window)) 827 goto err; 828 829 /* 830 * If the window size is greater than 1, then calculate 831 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even 832 * powers could instead be computed as (a^(i/2))^2 to use the slight 833 * performance advantage of sqr over mul). 834 */ 835 if (window > 1) { 836 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 837 goto err; 838 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, 839 window)) 840 goto err; 841 for (i = 3; i < numPowers; i++) { 842 /* Calculate a^i = a^(i-1) * a */ 843 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) 844 goto err; 845 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, 846 window)) 847 goto err; 848 } 849 } 850 851 bits--; 852 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 853 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 854 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, 855 window)) 856 goto err; 857 858 /* 859 * Scan the exponent one window at a time starting from the most 860 * significant bits. 861 */ 862 while (bits >= 0) { 863 wvalue = 0; /* The 'value' of the window */ 864 865 /* Scan the window, squaring the result as we go */ 866 for (i = 0; i < window; i++, bits--) { 867 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) 868 goto err; 869 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 870 } 871 872 /* 873 * Fetch the appropriate pre-computed value from the pre-buf 874 */ 875 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, 876 window)) 877 goto err; 878 879 /* Multiply the result into the intermediate result */ 880 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 881 goto err; 882 } 883 } 884 885 /* Convert the final result from montgomery to standard format */ 886 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 887 goto err; 888 ret = 1; 889 err: 890 if ((in_mont == NULL) && (mont != NULL)) 891 BN_MONT_CTX_free(mont); 892 if (powerbuf != NULL) { 893 OPENSSL_cleanse(powerbuf, powerbufLen); 894 if (powerbufFree) 895 OPENSSL_free(powerbufFree); 896 } 897 BN_CTX_end(ctx); 898 return (ret); 899} 900 901int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 902 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 903{ 904 BN_MONT_CTX *mont = NULL; 905 int b, bits, ret = 0; 906 int r_is_one; 907 BN_ULONG w, next_w; 908 BIGNUM *d, *r, *t; 909 BIGNUM *swap_tmp; 910#define BN_MOD_MUL_WORD(r, w, m) \ 911 (BN_mul_word(r, (w)) && \ 912 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 913 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 914 /* 915 * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is 916 * probably more overhead than always using BN_mod (which uses BN_copy if 917 * a similar test returns true). 918 */ 919 /* 920 * We can use BN_mod and do not need BN_nnmod because our accumulator is 921 * never negative (the result of BN_mod does not depend on the sign of 922 * the modulus). 923 */ 924#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 925 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 926 927 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 928 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 929 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 930 return -1; 931 } 932 933 bn_check_top(p); 934 bn_check_top(m); 935 936 if (!BN_is_odd(m)) { 937 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); 938 return (0); 939 } 940 if (m->top == 1) 941 a %= m->d[0]; /* make sure that 'a' is reduced */ 942 943 bits = BN_num_bits(p); 944 if (bits == 0) { 945 /* x**0 mod 1 is still zero. */ 946 if (BN_is_one(m)) { 947 ret = 1; 948 BN_zero(rr); 949 } else 950 ret = BN_one(rr); 951 return ret; 952 } 953 if (a == 0) { 954 BN_zero(rr); 955 ret = 1; 956 return ret; 957 } 958 959 BN_CTX_start(ctx); 960 d = BN_CTX_get(ctx); 961 r = BN_CTX_get(ctx); 962 t = BN_CTX_get(ctx); 963 if (d == NULL || r == NULL || t == NULL) 964 goto err; 965 966 if (in_mont != NULL) 967 mont = in_mont; 968 else { 969 if ((mont = BN_MONT_CTX_new()) == NULL) 970 goto err; 971 if (!BN_MONT_CTX_set(mont, m, ctx)) 972 goto err; 973 } 974 975 r_is_one = 1; /* except for Montgomery factor */ 976 977 /* bits-1 >= 0 */ 978 979 /* The result is accumulated in the product r*w. */ 980 w = a; /* bit 'bits-1' of 'p' is always set */ 981 for (b = bits - 2; b >= 0; b--) { 982 /* First, square r*w. */ 983 next_w = w * w; 984 if ((next_w / w) != w) { /* overflow */ 985 if (r_is_one) { 986 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 987 goto err; 988 r_is_one = 0; 989 } else { 990 if (!BN_MOD_MUL_WORD(r, w, m)) 991 goto err; 992 } 993 next_w = 1; 994 } 995 w = next_w; 996 if (!r_is_one) { 997 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 998 goto err; 999 } 1000 1001 /* Second, multiply r*w by 'a' if exponent bit is set. */ 1002 if (BN_is_bit_set(p, b)) { 1003 next_w = w * a; 1004 if ((next_w / a) != w) { /* overflow */ 1005 if (r_is_one) { 1006 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1007 goto err; 1008 r_is_one = 0; 1009 } else { 1010 if (!BN_MOD_MUL_WORD(r, w, m)) 1011 goto err; 1012 } 1013 next_w = a; 1014 } 1015 w = next_w; 1016 } 1017 } 1018 1019 /* Finally, set r:=r*w. */ 1020 if (w != 1) { 1021 if (r_is_one) { 1022 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1023 goto err; 1024 r_is_one = 0; 1025 } else { 1026 if (!BN_MOD_MUL_WORD(r, w, m)) 1027 goto err; 1028 } 1029 } 1030 1031 if (r_is_one) { /* can happen only if a == 1 */ 1032 if (!BN_one(rr)) 1033 goto err; 1034 } else { 1035 if (!BN_from_montgomery(rr, r, mont, ctx)) 1036 goto err; 1037 } 1038 ret = 1; 1039 err: 1040 if ((in_mont == NULL) && (mont != NULL)) 1041 BN_MONT_CTX_free(mont); 1042 BN_CTX_end(ctx); 1043 bn_check_top(rr); 1044 return (ret); 1045} 1046 1047/* The old fallback, simple version :-) */ 1048int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 1049 const BIGNUM *m, BN_CTX *ctx) 1050{ 1051 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1052 int start = 1; 1053 BIGNUM *d; 1054 /* Table of variables obtained from 'ctx' */ 1055 BIGNUM *val[TABLE_SIZE]; 1056 1057 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1058 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1059 BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1060 return -1; 1061 } 1062 1063 bits = BN_num_bits(p); 1064 1065 if (bits == 0) { 1066 ret = BN_one(r); 1067 return ret; 1068 } 1069 1070 BN_CTX_start(ctx); 1071 d = BN_CTX_get(ctx); 1072 val[0] = BN_CTX_get(ctx); 1073 if (!d || !val[0]) 1074 goto err; 1075 1076 if (!BN_nnmod(val[0], a, m, ctx)) 1077 goto err; /* 1 */ 1078 if (BN_is_zero(val[0])) { 1079 BN_zero(r); 1080 ret = 1; 1081 goto err; 1082 } 1083 1084 window = BN_window_bits_for_exponent_size(bits); 1085 if (window > 1) { 1086 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1087 goto err; /* 2 */ 1088 j = 1 << (window - 1); 1089 for (i = 1; i < j; i++) { 1090 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1091 !BN_mod_mul(val[i], val[i - 1], d, m, ctx)) 1092 goto err; 1093 } 1094 } 1095 1096 start = 1; /* This is used to avoid multiplication etc 1097 * when there is only the value '1' in the 1098 * buffer. */ 1099 wvalue = 0; /* The 'value' of the window */ 1100 wstart = bits - 1; /* The top bit of the window */ 1101 wend = 0; /* The bottom bit of the window */ 1102 1103 if (!BN_one(r)) 1104 goto err; 1105 1106 for (;;) { 1107 if (BN_is_bit_set(p, wstart) == 0) { 1108 if (!start) 1109 if (!BN_mod_mul(r, r, r, m, ctx)) 1110 goto err; 1111 if (wstart == 0) 1112 break; 1113 wstart--; 1114 continue; 1115 } 1116 /* 1117 * We now have wstart on a 'set' bit, we now need to work out how bit 1118 * a window to do. To do this we need to scan forward until the last 1119 * set bit before the end of the window 1120 */ 1121 j = wstart; 1122 wvalue = 1; 1123 wend = 0; 1124 for (i = 1; i < window; i++) { 1125 if (wstart - i < 0) 1126 break; 1127 if (BN_is_bit_set(p, wstart - i)) { 1128 wvalue <<= (i - wend); 1129 wvalue |= 1; 1130 wend = i; 1131 } 1132 } 1133 1134 /* wend is the size of the current window */ 1135 j = wend + 1; 1136 /* add the 'bytes above' */ 1137 if (!start) 1138 for (i = 0; i < j; i++) { 1139 if (!BN_mod_mul(r, r, r, m, ctx)) 1140 goto err; 1141 } 1142 1143 /* wvalue will be an odd number < 2^window */ 1144 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1145 goto err; 1146 1147 /* move the 'window' down further */ 1148 wstart -= wend + 1; 1149 wvalue = 0; 1150 start = 0; 1151 if (wstart < 0) 1152 break; 1153 } 1154 ret = 1; 1155 err: 1156 BN_CTX_end(ctx); 1157 bn_check_top(r); 1158 return (ret); 1159} 1160