bn_exp.c revision 291721
1/* crypto/bn/bn_exp.c */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58/* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112#include "cryptlib.h" 113#include "bn_lcl.h" 114 115#include <stdlib.h> 116#ifdef _WIN32 117# include <malloc.h> 118# ifndef alloca 119# define alloca _alloca 120# endif 121#elif defined(__GNUC__) 122# ifndef alloca 123# define alloca(s) __builtin_alloca((s)) 124# endif 125#endif 126 127/* maximum precomputation table size for *variable* sliding windows */ 128#define TABLE_SIZE 32 129 130/* this one works - simple but works */ 131int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 132{ 133 int i, bits, ret = 0; 134 BIGNUM *v, *rr; 135 136 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 137 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 138 BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 139 return -1; 140 } 141 142 BN_CTX_start(ctx); 143 if ((r == a) || (r == p)) 144 rr = BN_CTX_get(ctx); 145 else 146 rr = r; 147 v = BN_CTX_get(ctx); 148 if (rr == NULL || v == NULL) 149 goto err; 150 151 if (BN_copy(v, a) == NULL) 152 goto err; 153 bits = BN_num_bits(p); 154 155 if (BN_is_odd(p)) { 156 if (BN_copy(rr, a) == NULL) 157 goto err; 158 } else { 159 if (!BN_one(rr)) 160 goto err; 161 } 162 163 for (i = 1; i < bits; i++) { 164 if (!BN_sqr(v, v, ctx)) 165 goto err; 166 if (BN_is_bit_set(p, i)) { 167 if (!BN_mul(rr, rr, v, ctx)) 168 goto err; 169 } 170 } 171 if (r != rr) 172 BN_copy(r, rr); 173 ret = 1; 174 err: 175 BN_CTX_end(ctx); 176 bn_check_top(r); 177 return (ret); 178} 179 180int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 181 BN_CTX *ctx) 182{ 183 int ret; 184 185 bn_check_top(a); 186 bn_check_top(p); 187 bn_check_top(m); 188 189 /*- 190 * For even modulus m = 2^k*m_odd, it might make sense to compute 191 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 192 * exponentiation for the odd part), using appropriate exponent 193 * reductions, and combine the results using the CRT. 194 * 195 * For now, we use Montgomery only if the modulus is odd; otherwise, 196 * exponentiation using the reciprocal-based quick remaindering 197 * algorithm is used. 198 * 199 * (Timing obtained with expspeed.c [computations a^p mod m 200 * where a, p, m are of the same length: 256, 512, 1024, 2048, 201 * 4096, 8192 bits], compared to the running time of the 202 * standard algorithm: 203 * 204 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 205 * 55 .. 77 % [UltraSparc processor, but 206 * debug-solaris-sparcv8-gcc conf.] 207 * 208 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 209 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 210 * 211 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 212 * at 2048 and more bits, but at 512 and 1024 bits, it was 213 * slower even than the standard algorithm! 214 * 215 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 216 * should be obtained when the new Montgomery reduction code 217 * has been integrated into OpenSSL.) 218 */ 219 220#define MONT_MUL_MOD 221#define MONT_EXP_WORD 222#define RECP_MUL_MOD 223 224#ifdef MONT_MUL_MOD 225 /* 226 * I have finally been able to take out this pre-condition of the top bit 227 * being set. It was caused by an error in BN_div with negatives. There 228 * was also another problem when for a^b%m a >= m. eay 07-May-97 229 */ 230 /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 231 232 if (BN_is_odd(m)) { 233# ifdef MONT_EXP_WORD 234 if (a->top == 1 && !a->neg 235 && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) { 236 BN_ULONG A = a->d[0]; 237 ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL); 238 } else 239# endif 240 ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL); 241 } else 242#endif 243#ifdef RECP_MUL_MOD 244 { 245 ret = BN_mod_exp_recp(r, a, p, m, ctx); 246 } 247#else 248 { 249 ret = BN_mod_exp_simple(r, a, p, m, ctx); 250 } 251#endif 252 253 bn_check_top(r); 254 return (ret); 255} 256 257int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 258 const BIGNUM *m, BN_CTX *ctx) 259{ 260 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 261 int start = 1; 262 BIGNUM *aa; 263 /* Table of variables obtained from 'ctx' */ 264 BIGNUM *val[TABLE_SIZE]; 265 BN_RECP_CTX recp; 266 267 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 268 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 269 BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 270 return -1; 271 } 272 273 bits = BN_num_bits(p); 274 275 if (bits == 0) { 276 ret = BN_one(r); 277 return ret; 278 } 279 280 BN_CTX_start(ctx); 281 aa = BN_CTX_get(ctx); 282 val[0] = BN_CTX_get(ctx); 283 if (!aa || !val[0]) 284 goto err; 285 286 BN_RECP_CTX_init(&recp); 287 if (m->neg) { 288 /* ignore sign of 'm' */ 289 if (!BN_copy(aa, m)) 290 goto err; 291 aa->neg = 0; 292 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 293 goto err; 294 } else { 295 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 296 goto err; 297 } 298 299 if (!BN_nnmod(val[0], a, m, ctx)) 300 goto err; /* 1 */ 301 if (BN_is_zero(val[0])) { 302 BN_zero(r); 303 ret = 1; 304 goto err; 305 } 306 307 window = BN_window_bits_for_exponent_size(bits); 308 if (window > 1) { 309 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 310 goto err; /* 2 */ 311 j = 1 << (window - 1); 312 for (i = 1; i < j; i++) { 313 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 314 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) 315 goto err; 316 } 317 } 318 319 start = 1; /* This is used to avoid multiplication etc 320 * when there is only the value '1' in the 321 * buffer. */ 322 wvalue = 0; /* The 'value' of the window */ 323 wstart = bits - 1; /* The top bit of the window */ 324 wend = 0; /* The bottom bit of the window */ 325 326 if (!BN_one(r)) 327 goto err; 328 329 for (;;) { 330 if (BN_is_bit_set(p, wstart) == 0) { 331 if (!start) 332 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 333 goto err; 334 if (wstart == 0) 335 break; 336 wstart--; 337 continue; 338 } 339 /* 340 * We now have wstart on a 'set' bit, we now need to work out how bit 341 * a window to do. To do this we need to scan forward until the last 342 * set bit before the end of the window 343 */ 344 j = wstart; 345 wvalue = 1; 346 wend = 0; 347 for (i = 1; i < window; i++) { 348 if (wstart - i < 0) 349 break; 350 if (BN_is_bit_set(p, wstart - i)) { 351 wvalue <<= (i - wend); 352 wvalue |= 1; 353 wend = i; 354 } 355 } 356 357 /* wend is the size of the current window */ 358 j = wend + 1; 359 /* add the 'bytes above' */ 360 if (!start) 361 for (i = 0; i < j; i++) { 362 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 363 goto err; 364 } 365 366 /* wvalue will be an odd number < 2^window */ 367 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) 368 goto err; 369 370 /* move the 'window' down further */ 371 wstart -= wend + 1; 372 wvalue = 0; 373 start = 0; 374 if (wstart < 0) 375 break; 376 } 377 ret = 1; 378 err: 379 BN_CTX_end(ctx); 380 BN_RECP_CTX_free(&recp); 381 bn_check_top(r); 382 return (ret); 383} 384 385int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 386 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 387{ 388 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 389 int start = 1; 390 BIGNUM *d, *r; 391 const BIGNUM *aa; 392 /* Table of variables obtained from 'ctx' */ 393 BIGNUM *val[TABLE_SIZE]; 394 BN_MONT_CTX *mont = NULL; 395 396 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 397 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 398 } 399 400 bn_check_top(a); 401 bn_check_top(p); 402 bn_check_top(m); 403 404 if (!BN_is_odd(m)) { 405 BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); 406 return (0); 407 } 408 bits = BN_num_bits(p); 409 if (bits == 0) { 410 ret = BN_one(rr); 411 return ret; 412 } 413 414 BN_CTX_start(ctx); 415 d = BN_CTX_get(ctx); 416 r = BN_CTX_get(ctx); 417 val[0] = BN_CTX_get(ctx); 418 if (!d || !r || !val[0]) 419 goto err; 420 421 /* 422 * If this is not done, things will break in the montgomery part 423 */ 424 425 if (in_mont != NULL) 426 mont = in_mont; 427 else { 428 if ((mont = BN_MONT_CTX_new()) == NULL) 429 goto err; 430 if (!BN_MONT_CTX_set(mont, m, ctx)) 431 goto err; 432 } 433 434 if (a->neg || BN_ucmp(a, m) >= 0) { 435 if (!BN_nnmod(val[0], a, m, ctx)) 436 goto err; 437 aa = val[0]; 438 } else 439 aa = a; 440 if (BN_is_zero(aa)) { 441 BN_zero(rr); 442 ret = 1; 443 goto err; 444 } 445 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 446 goto err; /* 1 */ 447 448 window = BN_window_bits_for_exponent_size(bits); 449 if (window > 1) { 450 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 451 goto err; /* 2 */ 452 j = 1 << (window - 1); 453 for (i = 1; i < j; i++) { 454 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 455 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) 456 goto err; 457 } 458 } 459 460 start = 1; /* This is used to avoid multiplication etc 461 * when there is only the value '1' in the 462 * buffer. */ 463 wvalue = 0; /* The 'value' of the window */ 464 wstart = bits - 1; /* The top bit of the window */ 465 wend = 0; /* The bottom bit of the window */ 466 467 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 468 goto err; 469 for (;;) { 470 if (BN_is_bit_set(p, wstart) == 0) { 471 if (!start) { 472 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 473 goto err; 474 } 475 if (wstart == 0) 476 break; 477 wstart--; 478 continue; 479 } 480 /* 481 * We now have wstart on a 'set' bit, we now need to work out how bit 482 * a window to do. To do this we need to scan forward until the last 483 * set bit before the end of the window 484 */ 485 j = wstart; 486 wvalue = 1; 487 wend = 0; 488 for (i = 1; i < window; i++) { 489 if (wstart - i < 0) 490 break; 491 if (BN_is_bit_set(p, wstart - i)) { 492 wvalue <<= (i - wend); 493 wvalue |= 1; 494 wend = i; 495 } 496 } 497 498 /* wend is the size of the current window */ 499 j = wend + 1; 500 /* add the 'bytes above' */ 501 if (!start) 502 for (i = 0; i < j; i++) { 503 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 504 goto err; 505 } 506 507 /* wvalue will be an odd number < 2^window */ 508 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 509 goto err; 510 511 /* move the 'window' down further */ 512 wstart -= wend + 1; 513 wvalue = 0; 514 start = 0; 515 if (wstart < 0) 516 break; 517 } 518 if (!BN_from_montgomery(rr, r, mont, ctx)) 519 goto err; 520 ret = 1; 521 err: 522 if ((in_mont == NULL) && (mont != NULL)) 523 BN_MONT_CTX_free(mont); 524 BN_CTX_end(ctx); 525 bn_check_top(rr); 526 return (ret); 527} 528 529/* 530 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific 531 * layout so that accessing any of these table values shows the same access 532 * pattern as far as cache lines are concerned. The following functions are 533 * used to transfer a BIGNUM from/to that table. 534 */ 535 536static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, 537 unsigned char *buf, int idx, 538 int width) 539{ 540 size_t i, j; 541 542 if (top > b->top) 543 top = b->top; /* this works because 'buf' is explicitly 544 * zeroed */ 545 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 546 buf[j] = ((unsigned char *)b->d)[i]; 547 } 548 549 return 1; 550} 551 552static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, 553 unsigned char *buf, int idx, 554 int width) 555{ 556 size_t i, j; 557 558 if (bn_wexpand(b, top) == NULL) 559 return 0; 560 561 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 562 ((unsigned char *)b->d)[i] = buf[j]; 563 } 564 565 b->top = top; 566 bn_correct_top(b); 567 return 1; 568} 569 570/* 571 * Given a pointer value, compute the next address that is a cache line 572 * multiple. 573 */ 574#define MOD_EXP_CTIME_ALIGN(x_) \ 575 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 576 577/* 578 * This variant of BN_mod_exp_mont() uses fixed windows and the special 579 * precomputation memory layout to limit data-dependency to a minimum to 580 * protect secret exponents (cf. the hyper-threading timing attacks pointed 581 * out by Colin Percival, 582 * http://www.daemong-consideredperthreading-considered-harmful/) 583 */ 584int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 585 const BIGNUM *m, BN_CTX *ctx, 586 BN_MONT_CTX *in_mont) 587{ 588 int i, bits, ret = 0, window, wvalue; 589 int top; 590 BN_MONT_CTX *mont = NULL; 591 592 int numPowers; 593 unsigned char *powerbufFree = NULL; 594 int powerbufLen = 0; 595 unsigned char *powerbuf = NULL; 596 BIGNUM tmp, am; 597 598 bn_check_top(a); 599 bn_check_top(p); 600 bn_check_top(m); 601 602 if (!BN_is_odd(m)) { 603 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); 604 return (0); 605 } 606 607 top = m->top; 608 609 bits = BN_num_bits(p); 610 if (bits == 0) { 611 ret = BN_one(rr); 612 return ret; 613 } 614 615 BN_CTX_start(ctx); 616 617 /* 618 * Allocate a montgomery context if it was not supplied by the caller. If 619 * this is not done, things will break in the montgomery part. 620 */ 621 if (in_mont != NULL) 622 mont = in_mont; 623 else { 624 if ((mont = BN_MONT_CTX_new()) == NULL) 625 goto err; 626 if (!BN_MONT_CTX_set(mont, m, ctx)) 627 goto err; 628 } 629 630 /* Get the window size to use with size of p. */ 631 window = BN_window_bits_for_ctime_exponent_size(bits); 632#if defined(OPENSSL_BN_ASM_MONT5) 633 if (window == 6 && bits <= 1024) 634 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 635#endif 636 637 /* 638 * Allocate a buffer large enough to hold all of the pre-computed powers 639 * of am, am itself and tmp. 640 */ 641 numPowers = 1 << window; 642 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 643 ((2 * top) > 644 numPowers ? (2 * top) : numPowers)); 645#ifdef alloca 646 if (powerbufLen < 3072) 647 powerbufFree = 648 alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 649 else 650#endif 651 if ((powerbufFree = 652 (unsigned char *)OPENSSL_malloc(powerbufLen + 653 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) 654 == NULL) 655 goto err; 656 657 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 658 memset(powerbuf, 0, powerbufLen); 659 660#ifdef alloca 661 if (powerbufLen < 3072) 662 powerbufFree = NULL; 663#endif 664 665 /* lay down tmp and am right after powers table */ 666 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 667 am.d = tmp.d + top; 668 tmp.top = am.top = 0; 669 tmp.dmax = am.dmax = top; 670 tmp.neg = am.neg = 0; 671 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 672 673 /* prepare a^0 in Montgomery domain */ 674#if 1 675 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 676 goto err; 677#else 678 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 679 for (i = 1; i < top; i++) 680 tmp.d[i] = (~m->d[i]) & BN_MASK2; 681 tmp.top = top; 682#endif 683 684 /* prepare a^1 in Montgomery domain */ 685 if (a->neg || BN_ucmp(a, m) >= 0) { 686 if (!BN_mod(&am, a, m, ctx)) 687 goto err; 688 if (!BN_to_montgomery(&am, &am, mont, ctx)) 689 goto err; 690 } else if (!BN_to_montgomery(&am, a, mont, ctx)) 691 goto err; 692 693#if defined(OPENSSL_BN_ASM_MONT5) 694 if (window == 5 && top > 1) { 695 /* 696 * This optimization uses ideas from http://eprint.iacr.org/2011/239, 697 * specifically optimization of cache-timing attack countermeasures 698 * and pre-computation optimization. 699 */ 700 701 /* 702 * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 703 * 512-bit RSA is hardly relevant, we omit it to spare size... 704 */ 705 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 706 const void *table, const BN_ULONG *np, 707 const BN_ULONG *n0, int num, int power); 708 void bn_scatter5(const BN_ULONG *inp, size_t num, 709 void *table, size_t power); 710 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); 711 712 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 713 714 /* 715 * BN_to_montgomery can contaminate words above .top [in 716 * BN_DEBUG[_DEBUG] build]... 717 */ 718 for (i = am.top; i < top; i++) 719 am.d[i] = 0; 720 for (i = tmp.top; i < top; i++) 721 tmp.d[i] = 0; 722 723 bn_scatter5(tmp.d, top, powerbuf, 0); 724 bn_scatter5(am.d, am.top, powerbuf, 1); 725 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 726 bn_scatter5(tmp.d, top, powerbuf, 2); 727 728# if 0 729 for (i = 3; i < 32; i++) { 730 /* Calculate a^i = a^(i-1) * a */ 731 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 732 bn_scatter5(tmp.d, top, powerbuf, i); 733 } 734# else 735 /* same as above, but uses squaring for 1/2 of operations */ 736 for (i = 4; i < 32; i *= 2) { 737 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 738 bn_scatter5(tmp.d, top, powerbuf, i); 739 } 740 for (i = 3; i < 8; i += 2) { 741 int j; 742 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 743 bn_scatter5(tmp.d, top, powerbuf, i); 744 for (j = 2 * i; j < 32; j *= 2) { 745 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 746 bn_scatter5(tmp.d, top, powerbuf, j); 747 } 748 } 749 for (; i < 16; i += 2) { 750 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 751 bn_scatter5(tmp.d, top, powerbuf, i); 752 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 753 bn_scatter5(tmp.d, top, powerbuf, 2 * i); 754 } 755 for (; i < 32; i += 2) { 756 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 757 bn_scatter5(tmp.d, top, powerbuf, i); 758 } 759# endif 760 bits--; 761 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 762 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 763 bn_gather5(tmp.d, top, powerbuf, wvalue); 764 765 /* 766 * Scan the exponent one window at a time starting from the most 767 * significant bits. 768 */ 769 while (bits >= 0) { 770 for (wvalue = 0, i = 0; i < 5; i++, bits--) 771 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 772 773 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 774 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 775 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 776 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 777 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 778 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 779 } 780 781 tmp.top = top; 782 bn_correct_top(&tmp); 783 } else 784#endif 785 { 786 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) 787 goto err; 788 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) 789 goto err; 790 791 /* 792 * If the window size is greater than 1, then calculate 793 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even 794 * powers could instead be computed as (a^(i/2))^2 to use the slight 795 * performance advantage of sqr over mul). 796 */ 797 if (window > 1) { 798 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 799 goto err; 800 if (!MOD_EXP_CTIME_COPY_TO_PREBUF 801 (&tmp, top, powerbuf, 2, numPowers)) 802 goto err; 803 for (i = 3; i < numPowers; i++) { 804 /* Calculate a^i = a^(i-1) * a */ 805 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) 806 goto err; 807 if (!MOD_EXP_CTIME_COPY_TO_PREBUF 808 (&tmp, top, powerbuf, i, numPowers)) 809 goto err; 810 } 811 } 812 813 bits--; 814 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 815 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 816 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF 817 (&tmp, top, powerbuf, wvalue, numPowers)) 818 goto err; 819 820 /* 821 * Scan the exponent one window at a time starting from the most 822 * significant bits. 823 */ 824 while (bits >= 0) { 825 wvalue = 0; /* The 'value' of the window */ 826 827 /* Scan the window, squaring the result as we go */ 828 for (i = 0; i < window; i++, bits--) { 829 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) 830 goto err; 831 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 832 } 833 834 /* 835 * Fetch the appropriate pre-computed value from the pre-buf 836 */ 837 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF 838 (&am, top, powerbuf, wvalue, numPowers)) 839 goto err; 840 841 /* Multiply the result into the intermediate result */ 842 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 843 goto err; 844 } 845 } 846 847 /* Convert the final result from montgomery to standard format */ 848 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 849 goto err; 850 ret = 1; 851 err: 852 if ((in_mont == NULL) && (mont != NULL)) 853 BN_MONT_CTX_free(mont); 854 if (powerbuf != NULL) { 855 OPENSSL_cleanse(powerbuf, powerbufLen); 856 if (powerbufFree) 857 OPENSSL_free(powerbufFree); 858 } 859 BN_CTX_end(ctx); 860 return (ret); 861} 862 863int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 864 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 865{ 866 BN_MONT_CTX *mont = NULL; 867 int b, bits, ret = 0; 868 int r_is_one; 869 BN_ULONG w, next_w; 870 BIGNUM *d, *r, *t; 871 BIGNUM *swap_tmp; 872#define BN_MOD_MUL_WORD(r, w, m) \ 873 (BN_mul_word(r, (w)) && \ 874 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 875 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 876 /* 877 * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is 878 * probably more overhead than always using BN_mod (which uses BN_copy if 879 * a similar test returns true). 880 */ 881 /* 882 * We can use BN_mod and do not need BN_nnmod because our accumulator is 883 * never negative (the result of BN_mod does not depend on the sign of 884 * the modulus). 885 */ 886#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 887 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 888 889 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 890 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 891 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 892 return -1; 893 } 894 895 bn_check_top(p); 896 bn_check_top(m); 897 898 if (!BN_is_odd(m)) { 899 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); 900 return (0); 901 } 902 if (m->top == 1) 903 a %= m->d[0]; /* make sure that 'a' is reduced */ 904 905 bits = BN_num_bits(p); 906 if (bits == 0) { 907 /* x**0 mod 1 is still zero. */ 908 if (BN_is_one(m)) { 909 ret = 1; 910 BN_zero(rr); 911 } else 912 ret = BN_one(rr); 913 return ret; 914 } 915 if (a == 0) { 916 BN_zero(rr); 917 ret = 1; 918 return ret; 919 } 920 921 BN_CTX_start(ctx); 922 d = BN_CTX_get(ctx); 923 r = BN_CTX_get(ctx); 924 t = BN_CTX_get(ctx); 925 if (d == NULL || r == NULL || t == NULL) 926 goto err; 927 928 if (in_mont != NULL) 929 mont = in_mont; 930 else { 931 if ((mont = BN_MONT_CTX_new()) == NULL) 932 goto err; 933 if (!BN_MONT_CTX_set(mont, m, ctx)) 934 goto err; 935 } 936 937 r_is_one = 1; /* except for Montgomery factor */ 938 939 /* bits-1 >= 0 */ 940 941 /* The result is accumulated in the product r*w. */ 942 w = a; /* bit 'bits-1' of 'p' is always set */ 943 for (b = bits - 2; b >= 0; b--) { 944 /* First, square r*w. */ 945 next_w = w * w; 946 if ((next_w / w) != w) { /* overflow */ 947 if (r_is_one) { 948 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 949 goto err; 950 r_is_one = 0; 951 } else { 952 if (!BN_MOD_MUL_WORD(r, w, m)) 953 goto err; 954 } 955 next_w = 1; 956 } 957 w = next_w; 958 if (!r_is_one) { 959 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 960 goto err; 961 } 962 963 /* Second, multiply r*w by 'a' if exponent bit is set. */ 964 if (BN_is_bit_set(p, b)) { 965 next_w = w * a; 966 if ((next_w / a) != w) { /* overflow */ 967 if (r_is_one) { 968 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 969 goto err; 970 r_is_one = 0; 971 } else { 972 if (!BN_MOD_MUL_WORD(r, w, m)) 973 goto err; 974 } 975 next_w = a; 976 } 977 w = next_w; 978 } 979 } 980 981 /* Finally, set r:=r*w. */ 982 if (w != 1) { 983 if (r_is_one) { 984 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 985 goto err; 986 r_is_one = 0; 987 } else { 988 if (!BN_MOD_MUL_WORD(r, w, m)) 989 goto err; 990 } 991 } 992 993 if (r_is_one) { /* can happen only if a == 1 */ 994 if (!BN_one(rr)) 995 goto err; 996 } else { 997 if (!BN_from_montgomery(rr, r, mont, ctx)) 998 goto err; 999 } 1000 ret = 1; 1001 err: 1002 if ((in_mont == NULL) && (mont != NULL)) 1003 BN_MONT_CTX_free(mont); 1004 BN_CTX_end(ctx); 1005 bn_check_top(rr); 1006 return (ret); 1007} 1008 1009/* The old fallback, simple version :-) */ 1010int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 1011 const BIGNUM *m, BN_CTX *ctx) 1012{ 1013 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1014 int start = 1; 1015 BIGNUM *d; 1016 /* Table of variables obtained from 'ctx' */ 1017 BIGNUM *val[TABLE_SIZE]; 1018 1019 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1020 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1021 BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1022 return -1; 1023 } 1024 1025 bits = BN_num_bits(p); 1026 1027 if (bits == 0) { 1028 ret = BN_one(r); 1029 return ret; 1030 } 1031 1032 BN_CTX_start(ctx); 1033 d = BN_CTX_get(ctx); 1034 val[0] = BN_CTX_get(ctx); 1035 if (!d || !val[0]) 1036 goto err; 1037 1038 if (!BN_nnmod(val[0], a, m, ctx)) 1039 goto err; /* 1 */ 1040 if (BN_is_zero(val[0])) { 1041 BN_zero(r); 1042 ret = 1; 1043 goto err; 1044 } 1045 1046 window = BN_window_bits_for_exponent_size(bits); 1047 if (window > 1) { 1048 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1049 goto err; /* 2 */ 1050 j = 1 << (window - 1); 1051 for (i = 1; i < j; i++) { 1052 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1053 !BN_mod_mul(val[i], val[i - 1], d, m, ctx)) 1054 goto err; 1055 } 1056 } 1057 1058 start = 1; /* This is used to avoid multiplication etc 1059 * when there is only the value '1' in the 1060 * buffer. */ 1061 wvalue = 0; /* The 'value' of the window */ 1062 wstart = bits - 1; /* The top bit of the window */ 1063 wend = 0; /* The bottom bit of the window */ 1064 1065 if (!BN_one(r)) 1066 goto err; 1067 1068 for (;;) { 1069 if (BN_is_bit_set(p, wstart) == 0) { 1070 if (!start) 1071 if (!BN_mod_mul(r, r, r, m, ctx)) 1072 goto err; 1073 if (wstart == 0) 1074 break; 1075 wstart--; 1076 continue; 1077 } 1078 /* 1079 * We now have wstart on a 'set' bit, we now need to work out how bit 1080 * a window to do. To do this we need to scan forward until the last 1081 * set bit before the end of the window 1082 */ 1083 j = wstart; 1084 wvalue = 1; 1085 wend = 0; 1086 for (i = 1; i < window; i++) { 1087 if (wstart - i < 0) 1088 break; 1089 if (BN_is_bit_set(p, wstart - i)) { 1090 wvalue <<= (i - wend); 1091 wvalue |= 1; 1092 wend = i; 1093 } 1094 } 1095 1096 /* wend is the size of the current window */ 1097 j = wend + 1; 1098 /* add the 'bytes above' */ 1099 if (!start) 1100 for (i = 0; i < j; i++) { 1101 if (!BN_mod_mul(r, r, r, m, ctx)) 1102 goto err; 1103 } 1104 1105 /* wvalue will be an odd number < 2^window */ 1106 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1107 goto err; 1108 1109 /* move the 'window' down further */ 1110 wstart -= wend + 1; 1111 wvalue = 0; 1112 start = 0; 1113 if (wstart < 0) 1114 break; 1115 } 1116 ret = 1; 1117 err: 1118 BN_CTX_end(ctx); 1119 bn_check_top(r); 1120 return (ret); 1121} 1122