bn_exp.c revision 295016
1/* crypto/bn/bn_exp.c */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58/* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112#include "cryptlib.h" 113#include "bn_lcl.h" 114 115#include <stdlib.h> 116#ifdef _WIN32 117# include <malloc.h> 118# ifndef alloca 119# define alloca _alloca 120# endif 121#elif defined(__GNUC__) 122# ifndef alloca 123# define alloca(s) __builtin_alloca((s)) 124# endif 125#endif 126 127/* maximum precomputation table size for *variable* sliding windows */ 128#define TABLE_SIZE 32 129 130/* this one works - simple but works */ 131int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 132{ 133 int i, bits, ret = 0; 134 BIGNUM *v, *rr; 135 136 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 137 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 138 BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 139 return -1; 140 } 141 142 BN_CTX_start(ctx); 143 if ((r == a) || (r == p)) 144 rr = BN_CTX_get(ctx); 145 else 146 rr = r; 147 v = BN_CTX_get(ctx); 148 if (rr == NULL || v == NULL) 149 goto err; 150 151 if (BN_copy(v, a) == NULL) 152 goto err; 153 bits = BN_num_bits(p); 154 155 if (BN_is_odd(p)) { 156 if (BN_copy(rr, a) == NULL) 157 goto err; 158 } else { 159 if (!BN_one(rr)) 160 goto err; 161 } 162 163 for (i = 1; i < bits; i++) { 164 if (!BN_sqr(v, v, ctx)) 165 goto err; 166 if (BN_is_bit_set(p, i)) { 167 if (!BN_mul(rr, rr, v, ctx)) 168 goto err; 169 } 170 } 171 if (r != rr) 172 BN_copy(r, rr); 173 ret = 1; 174 err: 175 BN_CTX_end(ctx); 176 bn_check_top(r); 177 return (ret); 178} 179 180int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 181 BN_CTX *ctx) 182{ 183 int ret; 184 185 bn_check_top(a); 186 bn_check_top(p); 187 bn_check_top(m); 188 189 /*- 190 * For even modulus m = 2^k*m_odd, it might make sense to compute 191 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 192 * exponentiation for the odd part), using appropriate exponent 193 * reductions, and combine the results using the CRT. 194 * 195 * For now, we use Montgomery only if the modulus is odd; otherwise, 196 * exponentiation using the reciprocal-based quick remaindering 197 * algorithm is used. 198 * 199 * (Timing obtained with expspeed.c [computations a^p mod m 200 * where a, p, m are of the same length: 256, 512, 1024, 2048, 201 * 4096, 8192 bits], compared to the running time of the 202 * standard algorithm: 203 * 204 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 205 * 55 .. 77 % [UltraSparc processor, but 206 * debug-solaris-sparcv8-gcc conf.] 207 * 208 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 209 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 210 * 211 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 212 * at 2048 and more bits, but at 512 and 1024 bits, it was 213 * slower even than the standard algorithm! 214 * 215 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 216 * should be obtained when the new Montgomery reduction code 217 * has been integrated into OpenSSL.) 218 */ 219 220#define MONT_MUL_MOD 221#define MONT_EXP_WORD 222#define RECP_MUL_MOD 223 224#ifdef MONT_MUL_MOD 225 /* 226 * I have finally been able to take out this pre-condition of the top bit 227 * being set. It was caused by an error in BN_div with negatives. There 228 * was also another problem when for a^b%m a >= m. eay 07-May-97 229 */ 230 /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 231 232 if (BN_is_odd(m)) { 233# ifdef MONT_EXP_WORD 234 if (a->top == 1 && !a->neg 235 && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) { 236 BN_ULONG A = a->d[0]; 237 ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL); 238 } else 239# endif 240 ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL); 241 } else 242#endif 243#ifdef RECP_MUL_MOD 244 { 245 ret = BN_mod_exp_recp(r, a, p, m, ctx); 246 } 247#else 248 { 249 ret = BN_mod_exp_simple(r, a, p, m, ctx); 250 } 251#endif 252 253 bn_check_top(r); 254 return (ret); 255} 256 257int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 258 const BIGNUM *m, BN_CTX *ctx) 259{ 260 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 261 int start = 1; 262 BIGNUM *aa; 263 /* Table of variables obtained from 'ctx' */ 264 BIGNUM *val[TABLE_SIZE]; 265 BN_RECP_CTX recp; 266 267 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 268 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 269 BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 270 return -1; 271 } 272 273 bits = BN_num_bits(p); 274 if (bits == 0) { 275 /* x**0 mod 1 is still zero. */ 276 if (BN_is_one(m)) { 277 ret = 1; 278 BN_zero(r); 279 } else { 280 ret = BN_one(r); 281 } 282 return ret; 283 } 284 285 BN_CTX_start(ctx); 286 aa = BN_CTX_get(ctx); 287 val[0] = BN_CTX_get(ctx); 288 if (!aa || !val[0]) 289 goto err; 290 291 BN_RECP_CTX_init(&recp); 292 if (m->neg) { 293 /* ignore sign of 'm' */ 294 if (!BN_copy(aa, m)) 295 goto err; 296 aa->neg = 0; 297 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 298 goto err; 299 } else { 300 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 301 goto err; 302 } 303 304 if (!BN_nnmod(val[0], a, m, ctx)) 305 goto err; /* 1 */ 306 if (BN_is_zero(val[0])) { 307 BN_zero(r); 308 ret = 1; 309 goto err; 310 } 311 312 window = BN_window_bits_for_exponent_size(bits); 313 if (window > 1) { 314 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 315 goto err; /* 2 */ 316 j = 1 << (window - 1); 317 for (i = 1; i < j; i++) { 318 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 319 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) 320 goto err; 321 } 322 } 323 324 start = 1; /* This is used to avoid multiplication etc 325 * when there is only the value '1' in the 326 * buffer. */ 327 wvalue = 0; /* The 'value' of the window */ 328 wstart = bits - 1; /* The top bit of the window */ 329 wend = 0; /* The bottom bit of the window */ 330 331 if (!BN_one(r)) 332 goto err; 333 334 for (;;) { 335 if (BN_is_bit_set(p, wstart) == 0) { 336 if (!start) 337 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 338 goto err; 339 if (wstart == 0) 340 break; 341 wstart--; 342 continue; 343 } 344 /* 345 * We now have wstart on a 'set' bit, we now need to work out how bit 346 * a window to do. To do this we need to scan forward until the last 347 * set bit before the end of the window 348 */ 349 j = wstart; 350 wvalue = 1; 351 wend = 0; 352 for (i = 1; i < window; i++) { 353 if (wstart - i < 0) 354 break; 355 if (BN_is_bit_set(p, wstart - i)) { 356 wvalue <<= (i - wend); 357 wvalue |= 1; 358 wend = i; 359 } 360 } 361 362 /* wend is the size of the current window */ 363 j = wend + 1; 364 /* add the 'bytes above' */ 365 if (!start) 366 for (i = 0; i < j; i++) { 367 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 368 goto err; 369 } 370 371 /* wvalue will be an odd number < 2^window */ 372 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) 373 goto err; 374 375 /* move the 'window' down further */ 376 wstart -= wend + 1; 377 wvalue = 0; 378 start = 0; 379 if (wstart < 0) 380 break; 381 } 382 ret = 1; 383 err: 384 BN_CTX_end(ctx); 385 BN_RECP_CTX_free(&recp); 386 bn_check_top(r); 387 return (ret); 388} 389 390int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 391 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 392{ 393 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 394 int start = 1; 395 BIGNUM *d, *r; 396 const BIGNUM *aa; 397 /* Table of variables obtained from 'ctx' */ 398 BIGNUM *val[TABLE_SIZE]; 399 BN_MONT_CTX *mont = NULL; 400 401 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 402 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 403 } 404 405 bn_check_top(a); 406 bn_check_top(p); 407 bn_check_top(m); 408 409 if (!BN_is_odd(m)) { 410 BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); 411 return (0); 412 } 413 bits = BN_num_bits(p); 414 if (bits == 0) { 415 /* x**0 mod 1 is still zero. */ 416 if (BN_is_one(m)) { 417 ret = 1; 418 BN_zero(rr); 419 } else { 420 ret = BN_one(rr); 421 } 422 return ret; 423 } 424 425 BN_CTX_start(ctx); 426 d = BN_CTX_get(ctx); 427 r = BN_CTX_get(ctx); 428 val[0] = BN_CTX_get(ctx); 429 if (!d || !r || !val[0]) 430 goto err; 431 432 /* 433 * If this is not done, things will break in the montgomery part 434 */ 435 436 if (in_mont != NULL) 437 mont = in_mont; 438 else { 439 if ((mont = BN_MONT_CTX_new()) == NULL) 440 goto err; 441 if (!BN_MONT_CTX_set(mont, m, ctx)) 442 goto err; 443 } 444 445 if (a->neg || BN_ucmp(a, m) >= 0) { 446 if (!BN_nnmod(val[0], a, m, ctx)) 447 goto err; 448 aa = val[0]; 449 } else 450 aa = a; 451 if (BN_is_zero(aa)) { 452 BN_zero(rr); 453 ret = 1; 454 goto err; 455 } 456 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 457 goto err; /* 1 */ 458 459 window = BN_window_bits_for_exponent_size(bits); 460 if (window > 1) { 461 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 462 goto err; /* 2 */ 463 j = 1 << (window - 1); 464 for (i = 1; i < j; i++) { 465 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 466 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) 467 goto err; 468 } 469 } 470 471 start = 1; /* This is used to avoid multiplication etc 472 * when there is only the value '1' in the 473 * buffer. */ 474 wvalue = 0; /* The 'value' of the window */ 475 wstart = bits - 1; /* The top bit of the window */ 476 wend = 0; /* The bottom bit of the window */ 477 478 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 479 goto err; 480 for (;;) { 481 if (BN_is_bit_set(p, wstart) == 0) { 482 if (!start) { 483 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 484 goto err; 485 } 486 if (wstart == 0) 487 break; 488 wstart--; 489 continue; 490 } 491 /* 492 * We now have wstart on a 'set' bit, we now need to work out how bit 493 * a window to do. To do this we need to scan forward until the last 494 * set bit before the end of the window 495 */ 496 j = wstart; 497 wvalue = 1; 498 wend = 0; 499 for (i = 1; i < window; i++) { 500 if (wstart - i < 0) 501 break; 502 if (BN_is_bit_set(p, wstart - i)) { 503 wvalue <<= (i - wend); 504 wvalue |= 1; 505 wend = i; 506 } 507 } 508 509 /* wend is the size of the current window */ 510 j = wend + 1; 511 /* add the 'bytes above' */ 512 if (!start) 513 for (i = 0; i < j; i++) { 514 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 515 goto err; 516 } 517 518 /* wvalue will be an odd number < 2^window */ 519 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 520 goto err; 521 522 /* move the 'window' down further */ 523 wstart -= wend + 1; 524 wvalue = 0; 525 start = 0; 526 if (wstart < 0) 527 break; 528 } 529 if (!BN_from_montgomery(rr, r, mont, ctx)) 530 goto err; 531 ret = 1; 532 err: 533 if ((in_mont == NULL) && (mont != NULL)) 534 BN_MONT_CTX_free(mont); 535 BN_CTX_end(ctx); 536 bn_check_top(rr); 537 return (ret); 538} 539 540/* 541 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific 542 * layout so that accessing any of these table values shows the same access 543 * pattern as far as cache lines are concerned. The following functions are 544 * used to transfer a BIGNUM from/to that table. 545 */ 546 547static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, 548 unsigned char *buf, int idx, 549 int width) 550{ 551 size_t i, j; 552 553 if (top > b->top) 554 top = b->top; /* this works because 'buf' is explicitly 555 * zeroed */ 556 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 557 buf[j] = ((unsigned char *)b->d)[i]; 558 } 559 560 return 1; 561} 562 563static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, 564 unsigned char *buf, int idx, 565 int width) 566{ 567 size_t i, j; 568 569 if (bn_wexpand(b, top) == NULL) 570 return 0; 571 572 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 573 ((unsigned char *)b->d)[i] = buf[j]; 574 } 575 576 b->top = top; 577 bn_correct_top(b); 578 return 1; 579} 580 581/* 582 * Given a pointer value, compute the next address that is a cache line 583 * multiple. 584 */ 585#define MOD_EXP_CTIME_ALIGN(x_) \ 586 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 587 588/* 589 * This variant of BN_mod_exp_mont() uses fixed windows and the special 590 * precomputation memory layout to limit data-dependency to a minimum to 591 * protect secret exponents (cf. the hyper-threading timing attacks pointed 592 * out by Colin Percival, 593 * http://www.daemonology.net/hyperthreading-considered-harmful/) 594 */ 595int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 596 const BIGNUM *m, BN_CTX *ctx, 597 BN_MONT_CTX *in_mont) 598{ 599 int i, bits, ret = 0, window, wvalue; 600 int top; 601 BN_MONT_CTX *mont = NULL; 602 603 int numPowers; 604 unsigned char *powerbufFree = NULL; 605 int powerbufLen = 0; 606 unsigned char *powerbuf = NULL; 607 BIGNUM tmp, am; 608 609 bn_check_top(a); 610 bn_check_top(p); 611 bn_check_top(m); 612 613 if (!BN_is_odd(m)) { 614 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); 615 return (0); 616 } 617 618 top = m->top; 619 620 bits = BN_num_bits(p); 621 if (bits == 0) { 622 /* x**0 mod 1 is still zero. */ 623 if (BN_is_one(m)) { 624 ret = 1; 625 BN_zero(rr); 626 } else { 627 ret = BN_one(rr); 628 } 629 return ret; 630 } 631 632 BN_CTX_start(ctx); 633 634 /* 635 * Allocate a montgomery context if it was not supplied by the caller. If 636 * this is not done, things will break in the montgomery part. 637 */ 638 if (in_mont != NULL) 639 mont = in_mont; 640 else { 641 if ((mont = BN_MONT_CTX_new()) == NULL) 642 goto err; 643 if (!BN_MONT_CTX_set(mont, m, ctx)) 644 goto err; 645 } 646 647 /* Get the window size to use with size of p. */ 648 window = BN_window_bits_for_ctime_exponent_size(bits); 649#if defined(OPENSSL_BN_ASM_MONT5) 650 if (window == 6 && bits <= 1024) 651 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 652#endif 653 654 /* 655 * Allocate a buffer large enough to hold all of the pre-computed powers 656 * of am, am itself and tmp. 657 */ 658 numPowers = 1 << window; 659 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 660 ((2 * top) > 661 numPowers ? (2 * top) : numPowers)); 662#ifdef alloca 663 if (powerbufLen < 3072) 664 powerbufFree = 665 alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 666 else 667#endif 668 if ((powerbufFree = 669 (unsigned char *)OPENSSL_malloc(powerbufLen + 670 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) 671 == NULL) 672 goto err; 673 674 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 675 memset(powerbuf, 0, powerbufLen); 676 677#ifdef alloca 678 if (powerbufLen < 3072) 679 powerbufFree = NULL; 680#endif 681 682 /* lay down tmp and am right after powers table */ 683 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 684 am.d = tmp.d + top; 685 tmp.top = am.top = 0; 686 tmp.dmax = am.dmax = top; 687 tmp.neg = am.neg = 0; 688 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 689 690 /* prepare a^0 in Montgomery domain */ 691#if 1 692 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 693 goto err; 694#else 695 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 696 for (i = 1; i < top; i++) 697 tmp.d[i] = (~m->d[i]) & BN_MASK2; 698 tmp.top = top; 699#endif 700 701 /* prepare a^1 in Montgomery domain */ 702 if (a->neg || BN_ucmp(a, m) >= 0) { 703 if (!BN_mod(&am, a, m, ctx)) 704 goto err; 705 if (!BN_to_montgomery(&am, &am, mont, ctx)) 706 goto err; 707 } else if (!BN_to_montgomery(&am, a, mont, ctx)) 708 goto err; 709 710#if defined(OPENSSL_BN_ASM_MONT5) 711 if (window == 5 && top > 1) { 712 /* 713 * This optimization uses ideas from http://eprint.iacr.org/2011/239, 714 * specifically optimization of cache-timing attack countermeasures 715 * and pre-computation optimization. 716 */ 717 718 /* 719 * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 720 * 512-bit RSA is hardly relevant, we omit it to spare size... 721 */ 722 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 723 const void *table, const BN_ULONG *np, 724 const BN_ULONG *n0, int num, int power); 725 void bn_scatter5(const BN_ULONG *inp, size_t num, 726 void *table, size_t power); 727 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); 728 729 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 730 731 /* 732 * BN_to_montgomery can contaminate words above .top [in 733 * BN_DEBUG[_DEBUG] build]... 734 */ 735 for (i = am.top; i < top; i++) 736 am.d[i] = 0; 737 for (i = tmp.top; i < top; i++) 738 tmp.d[i] = 0; 739 740 bn_scatter5(tmp.d, top, powerbuf, 0); 741 bn_scatter5(am.d, am.top, powerbuf, 1); 742 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 743 bn_scatter5(tmp.d, top, powerbuf, 2); 744 745# if 0 746 for (i = 3; i < 32; i++) { 747 /* Calculate a^i = a^(i-1) * a */ 748 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 749 bn_scatter5(tmp.d, top, powerbuf, i); 750 } 751# else 752 /* same as above, but uses squaring for 1/2 of operations */ 753 for (i = 4; i < 32; i *= 2) { 754 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 755 bn_scatter5(tmp.d, top, powerbuf, i); 756 } 757 for (i = 3; i < 8; i += 2) { 758 int j; 759 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 760 bn_scatter5(tmp.d, top, powerbuf, i); 761 for (j = 2 * i; j < 32; j *= 2) { 762 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 763 bn_scatter5(tmp.d, top, powerbuf, j); 764 } 765 } 766 for (; i < 16; i += 2) { 767 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 768 bn_scatter5(tmp.d, top, powerbuf, i); 769 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 770 bn_scatter5(tmp.d, top, powerbuf, 2 * i); 771 } 772 for (; i < 32; i += 2) { 773 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 774 bn_scatter5(tmp.d, top, powerbuf, i); 775 } 776# endif 777 bits--; 778 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 779 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 780 bn_gather5(tmp.d, top, powerbuf, wvalue); 781 782 /* 783 * Scan the exponent one window at a time starting from the most 784 * significant bits. 785 */ 786 while (bits >= 0) { 787 for (wvalue = 0, i = 0; i < 5; i++, bits--) 788 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 789 790 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 791 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 792 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 793 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 794 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 795 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 796 } 797 798 tmp.top = top; 799 bn_correct_top(&tmp); 800 } else 801#endif 802 { 803 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) 804 goto err; 805 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) 806 goto err; 807 808 /* 809 * If the window size is greater than 1, then calculate 810 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even 811 * powers could instead be computed as (a^(i/2))^2 to use the slight 812 * performance advantage of sqr over mul). 813 */ 814 if (window > 1) { 815 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 816 goto err; 817 if (!MOD_EXP_CTIME_COPY_TO_PREBUF 818 (&tmp, top, powerbuf, 2, numPowers)) 819 goto err; 820 for (i = 3; i < numPowers; i++) { 821 /* Calculate a^i = a^(i-1) * a */ 822 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) 823 goto err; 824 if (!MOD_EXP_CTIME_COPY_TO_PREBUF 825 (&tmp, top, powerbuf, i, numPowers)) 826 goto err; 827 } 828 } 829 830 bits--; 831 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 832 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 833 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF 834 (&tmp, top, powerbuf, wvalue, numPowers)) 835 goto err; 836 837 /* 838 * Scan the exponent one window at a time starting from the most 839 * significant bits. 840 */ 841 while (bits >= 0) { 842 wvalue = 0; /* The 'value' of the window */ 843 844 /* Scan the window, squaring the result as we go */ 845 for (i = 0; i < window; i++, bits--) { 846 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) 847 goto err; 848 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 849 } 850 851 /* 852 * Fetch the appropriate pre-computed value from the pre-buf 853 */ 854 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF 855 (&am, top, powerbuf, wvalue, numPowers)) 856 goto err; 857 858 /* Multiply the result into the intermediate result */ 859 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 860 goto err; 861 } 862 } 863 864 /* Convert the final result from montgomery to standard format */ 865 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 866 goto err; 867 ret = 1; 868 err: 869 if ((in_mont == NULL) && (mont != NULL)) 870 BN_MONT_CTX_free(mont); 871 if (powerbuf != NULL) { 872 OPENSSL_cleanse(powerbuf, powerbufLen); 873 if (powerbufFree) 874 OPENSSL_free(powerbufFree); 875 } 876 BN_CTX_end(ctx); 877 return (ret); 878} 879 880int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 881 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 882{ 883 BN_MONT_CTX *mont = NULL; 884 int b, bits, ret = 0; 885 int r_is_one; 886 BN_ULONG w, next_w; 887 BIGNUM *d, *r, *t; 888 BIGNUM *swap_tmp; 889#define BN_MOD_MUL_WORD(r, w, m) \ 890 (BN_mul_word(r, (w)) && \ 891 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 892 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 893 /* 894 * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is 895 * probably more overhead than always using BN_mod (which uses BN_copy if 896 * a similar test returns true). 897 */ 898 /* 899 * We can use BN_mod and do not need BN_nnmod because our accumulator is 900 * never negative (the result of BN_mod does not depend on the sign of 901 * the modulus). 902 */ 903#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 904 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 905 906 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 907 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 908 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 909 return -1; 910 } 911 912 bn_check_top(p); 913 bn_check_top(m); 914 915 if (!BN_is_odd(m)) { 916 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); 917 return (0); 918 } 919 if (m->top == 1) 920 a %= m->d[0]; /* make sure that 'a' is reduced */ 921 922 bits = BN_num_bits(p); 923 if (bits == 0) { 924 /* x**0 mod 1 is still zero. */ 925 if (BN_is_one(m)) { 926 ret = 1; 927 BN_zero(rr); 928 } else { 929 ret = BN_one(rr); 930 } 931 return ret; 932 } 933 if (a == 0) { 934 BN_zero(rr); 935 ret = 1; 936 return ret; 937 } 938 939 BN_CTX_start(ctx); 940 d = BN_CTX_get(ctx); 941 r = BN_CTX_get(ctx); 942 t = BN_CTX_get(ctx); 943 if (d == NULL || r == NULL || t == NULL) 944 goto err; 945 946 if (in_mont != NULL) 947 mont = in_mont; 948 else { 949 if ((mont = BN_MONT_CTX_new()) == NULL) 950 goto err; 951 if (!BN_MONT_CTX_set(mont, m, ctx)) 952 goto err; 953 } 954 955 r_is_one = 1; /* except for Montgomery factor */ 956 957 /* bits-1 >= 0 */ 958 959 /* The result is accumulated in the product r*w. */ 960 w = a; /* bit 'bits-1' of 'p' is always set */ 961 for (b = bits - 2; b >= 0; b--) { 962 /* First, square r*w. */ 963 next_w = w * w; 964 if ((next_w / w) != w) { /* overflow */ 965 if (r_is_one) { 966 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 967 goto err; 968 r_is_one = 0; 969 } else { 970 if (!BN_MOD_MUL_WORD(r, w, m)) 971 goto err; 972 } 973 next_w = 1; 974 } 975 w = next_w; 976 if (!r_is_one) { 977 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 978 goto err; 979 } 980 981 /* Second, multiply r*w by 'a' if exponent bit is set. */ 982 if (BN_is_bit_set(p, b)) { 983 next_w = w * a; 984 if ((next_w / a) != w) { /* overflow */ 985 if (r_is_one) { 986 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 987 goto err; 988 r_is_one = 0; 989 } else { 990 if (!BN_MOD_MUL_WORD(r, w, m)) 991 goto err; 992 } 993 next_w = a; 994 } 995 w = next_w; 996 } 997 } 998 999 /* Finally, set r:=r*w. */ 1000 if (w != 1) { 1001 if (r_is_one) { 1002 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1003 goto err; 1004 r_is_one = 0; 1005 } else { 1006 if (!BN_MOD_MUL_WORD(r, w, m)) 1007 goto err; 1008 } 1009 } 1010 1011 if (r_is_one) { /* can happen only if a == 1 */ 1012 if (!BN_one(rr)) 1013 goto err; 1014 } else { 1015 if (!BN_from_montgomery(rr, r, mont, ctx)) 1016 goto err; 1017 } 1018 ret = 1; 1019 err: 1020 if ((in_mont == NULL) && (mont != NULL)) 1021 BN_MONT_CTX_free(mont); 1022 BN_CTX_end(ctx); 1023 bn_check_top(rr); 1024 return (ret); 1025} 1026 1027/* The old fallback, simple version :-) */ 1028int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 1029 const BIGNUM *m, BN_CTX *ctx) 1030{ 1031 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1032 int start = 1; 1033 BIGNUM *d; 1034 /* Table of variables obtained from 'ctx' */ 1035 BIGNUM *val[TABLE_SIZE]; 1036 1037 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1038 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1039 BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1040 return -1; 1041 } 1042 1043 bits = BN_num_bits(p); 1044 if (bits == 0) { 1045 /* x**0 mod 1 is still zero. */ 1046 if (BN_is_one(m)) { 1047 ret = 1; 1048 BN_zero(r); 1049 } else { 1050 ret = BN_one(r); 1051 } 1052 return ret; 1053 } 1054 1055 BN_CTX_start(ctx); 1056 d = BN_CTX_get(ctx); 1057 val[0] = BN_CTX_get(ctx); 1058 if (!d || !val[0]) 1059 goto err; 1060 1061 if (!BN_nnmod(val[0], a, m, ctx)) 1062 goto err; /* 1 */ 1063 if (BN_is_zero(val[0])) { 1064 BN_zero(r); 1065 ret = 1; 1066 goto err; 1067 } 1068 1069 window = BN_window_bits_for_exponent_size(bits); 1070 if (window > 1) { 1071 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1072 goto err; /* 2 */ 1073 j = 1 << (window - 1); 1074 for (i = 1; i < j; i++) { 1075 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1076 !BN_mod_mul(val[i], val[i - 1], d, m, ctx)) 1077 goto err; 1078 } 1079 } 1080 1081 start = 1; /* This is used to avoid multiplication etc 1082 * when there is only the value '1' in the 1083 * buffer. */ 1084 wvalue = 0; /* The 'value' of the window */ 1085 wstart = bits - 1; /* The top bit of the window */ 1086 wend = 0; /* The bottom bit of the window */ 1087 1088 if (!BN_one(r)) 1089 goto err; 1090 1091 for (;;) { 1092 if (BN_is_bit_set(p, wstart) == 0) { 1093 if (!start) 1094 if (!BN_mod_mul(r, r, r, m, ctx)) 1095 goto err; 1096 if (wstart == 0) 1097 break; 1098 wstart--; 1099 continue; 1100 } 1101 /* 1102 * We now have wstart on a 'set' bit, we now need to work out how bit 1103 * a window to do. To do this we need to scan forward until the last 1104 * set bit before the end of the window 1105 */ 1106 j = wstart; 1107 wvalue = 1; 1108 wend = 0; 1109 for (i = 1; i < window; i++) { 1110 if (wstart - i < 0) 1111 break; 1112 if (BN_is_bit_set(p, wstart - i)) { 1113 wvalue <<= (i - wend); 1114 wvalue |= 1; 1115 wend = i; 1116 } 1117 } 1118 1119 /* wend is the size of the current window */ 1120 j = wend + 1; 1121 /* add the 'bytes above' */ 1122 if (!start) 1123 for (i = 0; i < j; i++) { 1124 if (!BN_mod_mul(r, r, r, m, ctx)) 1125 goto err; 1126 } 1127 1128 /* wvalue will be an odd number < 2^window */ 1129 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1130 goto err; 1131 1132 /* move the 'window' down further */ 1133 wstart -= wend + 1; 1134 wvalue = 0; 1135 start = 0; 1136 if (wstart < 0) 1137 break; 1138 } 1139 ret = 1; 1140 err: 1141 BN_CTX_end(ctx); 1142 bn_check_top(r); 1143 return (ret); 1144} 1145