bn_exp.c revision 280304
1/* crypto/bn/bn_exp.c */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58/* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112#include "cryptlib.h" 113#include "bn_lcl.h" 114 115#include <stdlib.h> 116#ifdef _WIN32 117# include <malloc.h> 118# ifndef alloca 119# define alloca _alloca 120# endif 121#elif defined(__GNUC__) 122# ifndef alloca 123# define alloca(s) __builtin_alloca((s)) 124# endif 125#endif 126 127/* maximum precomputation table size for *variable* sliding windows */ 128#define TABLE_SIZE 32 129 130/* this one works - simple but works */ 131int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 132{ 133 int i, bits, ret = 0; 134 BIGNUM *v, *rr; 135 136 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 137 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 138 BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 139 return -1; 140 } 141 142 BN_CTX_start(ctx); 143 if ((r == a) || (r == p)) 144 rr = BN_CTX_get(ctx); 145 else 146 rr = r; 147 v = BN_CTX_get(ctx); 148 if (rr == NULL || v == NULL) 149 goto err; 150 151 if (BN_copy(v, a) == NULL) 152 goto err; 153 bits = BN_num_bits(p); 154 155 if (BN_is_odd(p)) { 156 if (BN_copy(rr, a) == NULL) 157 goto err; 158 } else { 159 if (!BN_one(rr)) 160 goto err; 161 } 162 163 for (i = 1; i < bits; i++) { 164 if (!BN_sqr(v, v, ctx)) 165 goto err; 166 if (BN_is_bit_set(p, i)) { 167 if (!BN_mul(rr, rr, v, ctx)) 168 goto err; 169 } 170 } 171 if (r != rr) 172 BN_copy(r, rr); 173 ret = 1; 174 err: 175 BN_CTX_end(ctx); 176 bn_check_top(r); 177 return (ret); 178} 179 180int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 181 BN_CTX *ctx) 182{ 183 int ret; 184 185 bn_check_top(a); 186 bn_check_top(p); 187 bn_check_top(m); 188 189 /*- 190 * For even modulus m = 2^k*m_odd, it might make sense to compute 191 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 192 * exponentiation for the odd part), using appropriate exponent 193 * reductions, and combine the results using the CRT. 194 * 195 * For now, we use Montgomery only if the modulus is odd; otherwise, 196 * exponentiation using the reciprocal-based quick remaindering 197 * algorithm is used. 198 * 199 * (Timing obtained with expspeed.c [computations a^p mod m 200 * where a, p, m are of the same length: 256, 512, 1024, 2048, 201 * 4096, 8192 bits], compared to the running time of the 202 * standard algorithm: 203 * 204 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 205 * 55 .. 77 % [UltraSparc processor, but 206 * debug-solaris-sparcv8-gcc conf.] 207 * 208 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 209 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 210 * 211 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 212 * at 2048 and more bits, but at 512 and 1024 bits, it was 213 * slower even than the standard algorithm! 214 * 215 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 216 * should be obtained when the new Montgomery reduction code 217 * has been integrated into OpenSSL.) 218 */ 219 220#define MONT_MUL_MOD 221#define MONT_EXP_WORD 222#define RECP_MUL_MOD 223 224#ifdef MONT_MUL_MOD 225 /* 226 * I have finally been able to take out this pre-condition of the top bit 227 * being set. It was caused by an error in BN_div with negatives. There 228 * was also another problem when for a^b%m a >= m. eay 07-May-97 229 */ 230 /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 231 232 if (BN_is_odd(m)) { 233# ifdef MONT_EXP_WORD 234 if (a->top == 1 && !a->neg 235 && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) { 236 BN_ULONG A = a->d[0]; 237 ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL); 238 } else 239# endif 240 ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL); 241 } else 242#endif 243#ifdef RECP_MUL_MOD 244 { 245 ret = BN_mod_exp_recp(r, a, p, m, ctx); 246 } 247#else 248 { 249 ret = BN_mod_exp_simple(r, a, p, m, ctx); 250 } 251#endif 252 253 bn_check_top(r); 254 return (ret); 255} 256 257int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 258 const BIGNUM *m, BN_CTX *ctx) 259{ 260 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 261 int start = 1; 262 BIGNUM *aa; 263 /* Table of variables obtained from 'ctx' */ 264 BIGNUM *val[TABLE_SIZE]; 265 BN_RECP_CTX recp; 266 267 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 268 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 269 BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 270 return -1; 271 } 272 273 bits = BN_num_bits(p); 274 275 if (bits == 0) { 276 ret = BN_one(r); 277 return ret; 278 } 279 280 BN_CTX_start(ctx); 281 aa = BN_CTX_get(ctx); 282 val[0] = BN_CTX_get(ctx); 283 if (!aa || !val[0]) 284 goto err; 285 286 BN_RECP_CTX_init(&recp); 287 if (m->neg) { 288 /* ignore sign of 'm' */ 289 if (!BN_copy(aa, m)) 290 goto err; 291 aa->neg = 0; 292 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 293 goto err; 294 } else { 295 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 296 goto err; 297 } 298 299 if (!BN_nnmod(val[0], a, m, ctx)) 300 goto err; /* 1 */ 301 if (BN_is_zero(val[0])) { 302 BN_zero(r); 303 ret = 1; 304 goto err; 305 } 306 307 window = BN_window_bits_for_exponent_size(bits); 308 if (window > 1) { 309 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 310 goto err; /* 2 */ 311 j = 1 << (window - 1); 312 for (i = 1; i < j; i++) { 313 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 314 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) 315 goto err; 316 } 317 } 318 319 start = 1; /* This is used to avoid multiplication etc 320 * when there is only the value '1' in the 321 * buffer. */ 322 wvalue = 0; /* The 'value' of the window */ 323 wstart = bits - 1; /* The top bit of the window */ 324 wend = 0; /* The bottom bit of the window */ 325 326 if (!BN_one(r)) 327 goto err; 328 329 for (;;) { 330 if (BN_is_bit_set(p, wstart) == 0) { 331 if (!start) 332 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 333 goto err; 334 if (wstart == 0) 335 break; 336 wstart--; 337 continue; 338 } 339 /* 340 * We now have wstart on a 'set' bit, we now need to work out how bit 341 * a window to do. To do this we need to scan forward until the last 342 * set bit before the end of the window 343 */ 344 j = wstart; 345 wvalue = 1; 346 wend = 0; 347 for (i = 1; i < window; i++) { 348 if (wstart - i < 0) 349 break; 350 if (BN_is_bit_set(p, wstart - i)) { 351 wvalue <<= (i - wend); 352 wvalue |= 1; 353 wend = i; 354 } 355 } 356 357 /* wend is the size of the current window */ 358 j = wend + 1; 359 /* add the 'bytes above' */ 360 if (!start) 361 for (i = 0; i < j; i++) { 362 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 363 goto err; 364 } 365 366 /* wvalue will be an odd number < 2^window */ 367 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) 368 goto err; 369 370 /* move the 'window' down further */ 371 wstart -= wend + 1; 372 wvalue = 0; 373 start = 0; 374 if (wstart < 0) 375 break; 376 } 377 ret = 1; 378 err: 379 BN_CTX_end(ctx); 380 BN_RECP_CTX_free(&recp); 381 bn_check_top(r); 382 return (ret); 383} 384 385int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 386 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 387{ 388 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 389 int start = 1; 390 BIGNUM *d, *r; 391 const BIGNUM *aa; 392 /* Table of variables obtained from 'ctx' */ 393 BIGNUM *val[TABLE_SIZE]; 394 BN_MONT_CTX *mont = NULL; 395 396 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 397 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 398 } 399 400 bn_check_top(a); 401 bn_check_top(p); 402 bn_check_top(m); 403 404 if (!BN_is_odd(m)) { 405 BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); 406 return (0); 407 } 408 bits = BN_num_bits(p); 409 if (bits == 0) { 410 ret = BN_one(rr); 411 return ret; 412 } 413 414 BN_CTX_start(ctx); 415 d = BN_CTX_get(ctx); 416 r = BN_CTX_get(ctx); 417 val[0] = BN_CTX_get(ctx); 418 if (!d || !r || !val[0]) 419 goto err; 420 421 /* 422 * If this is not done, things will break in the montgomery part 423 */ 424 425 if (in_mont != NULL) 426 mont = in_mont; 427 else { 428 if ((mont = BN_MONT_CTX_new()) == NULL) 429 goto err; 430 if (!BN_MONT_CTX_set(mont, m, ctx)) 431 goto err; 432 } 433 434 if (a->neg || BN_ucmp(a, m) >= 0) { 435 if (!BN_nnmod(val[0], a, m, ctx)) 436 goto err; 437 aa = val[0]; 438 } else 439 aa = a; 440 if (BN_is_zero(aa)) { 441 BN_zero(rr); 442 ret = 1; 443 goto err; 444 } 445 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 446 goto err; /* 1 */ 447 448 window = BN_window_bits_for_exponent_size(bits); 449 if (window > 1) { 450 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 451 goto err; /* 2 */ 452 j = 1 << (window - 1); 453 for (i = 1; i < j; i++) { 454 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 455 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) 456 goto err; 457 } 458 } 459 460 start = 1; /* This is used to avoid multiplication etc 461 * when there is only the value '1' in the 462 * buffer. */ 463 wvalue = 0; /* The 'value' of the window */ 464 wstart = bits - 1; /* The top bit of the window */ 465 wend = 0; /* The bottom bit of the window */ 466 467 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 468 goto err; 469 for (;;) { 470 if (BN_is_bit_set(p, wstart) == 0) { 471 if (!start) { 472 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 473 goto err; 474 } 475 if (wstart == 0) 476 break; 477 wstart--; 478 continue; 479 } 480 /* 481 * We now have wstart on a 'set' bit, we now need to work out how bit 482 * a window to do. To do this we need to scan forward until the last 483 * set bit before the end of the window 484 */ 485 j = wstart; 486 wvalue = 1; 487 wend = 0; 488 for (i = 1; i < window; i++) { 489 if (wstart - i < 0) 490 break; 491 if (BN_is_bit_set(p, wstart - i)) { 492 wvalue <<= (i - wend); 493 wvalue |= 1; 494 wend = i; 495 } 496 } 497 498 /* wend is the size of the current window */ 499 j = wend + 1; 500 /* add the 'bytes above' */ 501 if (!start) 502 for (i = 0; i < j; i++) { 503 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 504 goto err; 505 } 506 507 /* wvalue will be an odd number < 2^window */ 508 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 509 goto err; 510 511 /* move the 'window' down further */ 512 wstart -= wend + 1; 513 wvalue = 0; 514 start = 0; 515 if (wstart < 0) 516 break; 517 } 518 if (!BN_from_montgomery(rr, r, mont, ctx)) 519 goto err; 520 ret = 1; 521 err: 522 if ((in_mont == NULL) && (mont != NULL)) 523 BN_MONT_CTX_free(mont); 524 BN_CTX_end(ctx); 525 bn_check_top(rr); 526 return (ret); 527} 528 529/* 530 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific 531 * layout so that accessing any of these table values shows the same access 532 * pattern as far as cache lines are concerned. The following functions are 533 * used to transfer a BIGNUM from/to that table. 534 */ 535 536static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, 537 unsigned char *buf, int idx, 538 int width) 539{ 540 size_t i, j; 541 542 if (top > b->top) 543 top = b->top; /* this works because 'buf' is explicitly 544 * zeroed */ 545 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 546 buf[j] = ((unsigned char *)b->d)[i]; 547 } 548 549 return 1; 550} 551 552static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, 553 unsigned char *buf, int idx, 554 int width) 555{ 556 size_t i, j; 557 558 if (bn_wexpand(b, top) == NULL) 559 return 0; 560 561 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 562 ((unsigned char *)b->d)[i] = buf[j]; 563 } 564 565 b->top = top; 566 bn_correct_top(b); 567 return 1; 568} 569 570/* 571 * Given a pointer value, compute the next address that is a cache line 572 * multiple. 573 */ 574#define MOD_EXP_CTIME_ALIGN(x_) \ 575 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 576 577/* 578 * This variant of BN_mod_exp_mont() uses fixed windows and the special 579 * precomputation memory layout to limit data-dependency to a minimum to 580 * protect secret exponents (cf. the hyper-threading timing attacks pointed 581 * out by Colin Percival, 582 * http://www.daemong-consideredperthreading-considered-harmful/) 583 */ 584int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 585 const BIGNUM *m, BN_CTX *ctx, 586 BN_MONT_CTX *in_mont) 587{ 588 int i, bits, ret = 0, window, wvalue; 589 int top; 590 BN_MONT_CTX *mont = NULL; 591 592 int numPowers; 593 unsigned char *powerbufFree = NULL; 594 int powerbufLen = 0; 595 unsigned char *powerbuf = NULL; 596 BIGNUM tmp, am; 597 598 bn_check_top(a); 599 bn_check_top(p); 600 bn_check_top(m); 601 602 top = m->top; 603 604 if (!(m->d[0] & 1)) { 605 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); 606 return (0); 607 } 608 bits = BN_num_bits(p); 609 if (bits == 0) { 610 ret = BN_one(rr); 611 return ret; 612 } 613 614 BN_CTX_start(ctx); 615 616 /* 617 * Allocate a montgomery context if it was not supplied by the caller. If 618 * this is not done, things will break in the montgomery part. 619 */ 620 if (in_mont != NULL) 621 mont = in_mont; 622 else { 623 if ((mont = BN_MONT_CTX_new()) == NULL) 624 goto err; 625 if (!BN_MONT_CTX_set(mont, m, ctx)) 626 goto err; 627 } 628 629 /* Get the window size to use with size of p. */ 630 window = BN_window_bits_for_ctime_exponent_size(bits); 631#if defined(OPENSSL_BN_ASM_MONT5) 632 if (window == 6 && bits <= 1024) 633 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 634#endif 635 636 /* 637 * Allocate a buffer large enough to hold all of the pre-computed powers 638 * of am, am itself and tmp. 639 */ 640 numPowers = 1 << window; 641 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 642 ((2 * top) > 643 numPowers ? (2 * top) : numPowers)); 644#ifdef alloca 645 if (powerbufLen < 3072) 646 powerbufFree = 647 alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 648 else 649#endif 650 if ((powerbufFree = 651 (unsigned char *)OPENSSL_malloc(powerbufLen + 652 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) 653 == NULL) 654 goto err; 655 656 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 657 memset(powerbuf, 0, powerbufLen); 658 659#ifdef alloca 660 if (powerbufLen < 3072) 661 powerbufFree = NULL; 662#endif 663 664 /* lay down tmp and am right after powers table */ 665 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 666 am.d = tmp.d + top; 667 tmp.top = am.top = 0; 668 tmp.dmax = am.dmax = top; 669 tmp.neg = am.neg = 0; 670 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 671 672 /* prepare a^0 in Montgomery domain */ 673#if 1 674 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 675 goto err; 676#else 677 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 678 for (i = 1; i < top; i++) 679 tmp.d[i] = (~m->d[i]) & BN_MASK2; 680 tmp.top = top; 681#endif 682 683 /* prepare a^1 in Montgomery domain */ 684 if (a->neg || BN_ucmp(a, m) >= 0) { 685 if (!BN_mod(&am, a, m, ctx)) 686 goto err; 687 if (!BN_to_montgomery(&am, &am, mont, ctx)) 688 goto err; 689 } else if (!BN_to_montgomery(&am, a, mont, ctx)) 690 goto err; 691 692#if defined(OPENSSL_BN_ASM_MONT5) 693 if (window == 5 && top > 1) { 694 /* 695 * This optimization uses ideas from http://eprint.iacr.org/2011/239, 696 * specifically optimization of cache-timing attack countermeasures 697 * and pre-computation optimization. 698 */ 699 700 /* 701 * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 702 * 512-bit RSA is hardly relevant, we omit it to spare size... 703 */ 704 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 705 const void *table, const BN_ULONG *np, 706 const BN_ULONG *n0, int num, int power); 707 void bn_scatter5(const BN_ULONG *inp, size_t num, 708 void *table, size_t power); 709 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); 710 711 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 712 713 /* 714 * BN_to_montgomery can contaminate words above .top [in 715 * BN_DEBUG[_DEBUG] build]... 716 */ 717 for (i = am.top; i < top; i++) 718 am.d[i] = 0; 719 for (i = tmp.top; i < top; i++) 720 tmp.d[i] = 0; 721 722 bn_scatter5(tmp.d, top, powerbuf, 0); 723 bn_scatter5(am.d, am.top, powerbuf, 1); 724 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 725 bn_scatter5(tmp.d, top, powerbuf, 2); 726 727# if 0 728 for (i = 3; i < 32; i++) { 729 /* Calculate a^i = a^(i-1) * a */ 730 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 731 bn_scatter5(tmp.d, top, powerbuf, i); 732 } 733# else 734 /* same as above, but uses squaring for 1/2 of operations */ 735 for (i = 4; i < 32; i *= 2) { 736 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 737 bn_scatter5(tmp.d, top, powerbuf, i); 738 } 739 for (i = 3; i < 8; i += 2) { 740 int j; 741 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 742 bn_scatter5(tmp.d, top, powerbuf, i); 743 for (j = 2 * i; j < 32; j *= 2) { 744 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 745 bn_scatter5(tmp.d, top, powerbuf, j); 746 } 747 } 748 for (; i < 16; i += 2) { 749 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 750 bn_scatter5(tmp.d, top, powerbuf, i); 751 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 752 bn_scatter5(tmp.d, top, powerbuf, 2 * i); 753 } 754 for (; i < 32; i += 2) { 755 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 756 bn_scatter5(tmp.d, top, powerbuf, i); 757 } 758# endif 759 bits--; 760 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 761 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 762 bn_gather5(tmp.d, top, powerbuf, wvalue); 763 764 /* 765 * Scan the exponent one window at a time starting from the most 766 * significant bits. 767 */ 768 while (bits >= 0) { 769 for (wvalue = 0, i = 0; i < 5; i++, bits--) 770 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 771 772 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 773 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 774 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 775 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 776 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 777 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 778 } 779 780 tmp.top = top; 781 bn_correct_top(&tmp); 782 } else 783#endif 784 { 785 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) 786 goto err; 787 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) 788 goto err; 789 790 /* 791 * If the window size is greater than 1, then calculate 792 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even 793 * powers could instead be computed as (a^(i/2))^2 to use the slight 794 * performance advantage of sqr over mul). 795 */ 796 if (window > 1) { 797 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 798 goto err; 799 if (!MOD_EXP_CTIME_COPY_TO_PREBUF 800 (&tmp, top, powerbuf, 2, numPowers)) 801 goto err; 802 for (i = 3; i < numPowers; i++) { 803 /* Calculate a^i = a^(i-1) * a */ 804 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) 805 goto err; 806 if (!MOD_EXP_CTIME_COPY_TO_PREBUF 807 (&tmp, top, powerbuf, i, numPowers)) 808 goto err; 809 } 810 } 811 812 bits--; 813 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 814 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 815 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF 816 (&tmp, top, powerbuf, wvalue, numPowers)) 817 goto err; 818 819 /* 820 * Scan the exponent one window at a time starting from the most 821 * significant bits. 822 */ 823 while (bits >= 0) { 824 wvalue = 0; /* The 'value' of the window */ 825 826 /* Scan the window, squaring the result as we go */ 827 for (i = 0; i < window; i++, bits--) { 828 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) 829 goto err; 830 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 831 } 832 833 /* 834 * Fetch the appropriate pre-computed value from the pre-buf 835 */ 836 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF 837 (&am, top, powerbuf, wvalue, numPowers)) 838 goto err; 839 840 /* Multiply the result into the intermediate result */ 841 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 842 goto err; 843 } 844 } 845 846 /* Convert the final result from montgomery to standard format */ 847 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 848 goto err; 849 ret = 1; 850 err: 851 if ((in_mont == NULL) && (mont != NULL)) 852 BN_MONT_CTX_free(mont); 853 if (powerbuf != NULL) { 854 OPENSSL_cleanse(powerbuf, powerbufLen); 855 if (powerbufFree) 856 OPENSSL_free(powerbufFree); 857 } 858 BN_CTX_end(ctx); 859 return (ret); 860} 861 862int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 863 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 864{ 865 BN_MONT_CTX *mont = NULL; 866 int b, bits, ret = 0; 867 int r_is_one; 868 BN_ULONG w, next_w; 869 BIGNUM *d, *r, *t; 870 BIGNUM *swap_tmp; 871#define BN_MOD_MUL_WORD(r, w, m) \ 872 (BN_mul_word(r, (w)) && \ 873 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 874 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 875 /* 876 * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is 877 * probably more overhead than always using BN_mod (which uses BN_copy if 878 * a similar test returns true). 879 */ 880 /* 881 * We can use BN_mod and do not need BN_nnmod because our accumulator is 882 * never negative (the result of BN_mod does not depend on the sign of 883 * the modulus). 884 */ 885#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 886 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 887 888 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 889 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 890 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 891 return -1; 892 } 893 894 bn_check_top(p); 895 bn_check_top(m); 896 897 if (!BN_is_odd(m)) { 898 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); 899 return (0); 900 } 901 if (m->top == 1) 902 a %= m->d[0]; /* make sure that 'a' is reduced */ 903 904 bits = BN_num_bits(p); 905 if (bits == 0) { 906 /* x**0 mod 1 is still zero. */ 907 if (BN_is_one(m)) { 908 ret = 1; 909 BN_zero(rr); 910 } else 911 ret = BN_one(rr); 912 return ret; 913 } 914 if (a == 0) { 915 BN_zero(rr); 916 ret = 1; 917 return ret; 918 } 919 920 BN_CTX_start(ctx); 921 d = BN_CTX_get(ctx); 922 r = BN_CTX_get(ctx); 923 t = BN_CTX_get(ctx); 924 if (d == NULL || r == NULL || t == NULL) 925 goto err; 926 927 if (in_mont != NULL) 928 mont = in_mont; 929 else { 930 if ((mont = BN_MONT_CTX_new()) == NULL) 931 goto err; 932 if (!BN_MONT_CTX_set(mont, m, ctx)) 933 goto err; 934 } 935 936 r_is_one = 1; /* except for Montgomery factor */ 937 938 /* bits-1 >= 0 */ 939 940 /* The result is accumulated in the product r*w. */ 941 w = a; /* bit 'bits-1' of 'p' is always set */ 942 for (b = bits - 2; b >= 0; b--) { 943 /* First, square r*w. */ 944 next_w = w * w; 945 if ((next_w / w) != w) { /* overflow */ 946 if (r_is_one) { 947 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 948 goto err; 949 r_is_one = 0; 950 } else { 951 if (!BN_MOD_MUL_WORD(r, w, m)) 952 goto err; 953 } 954 next_w = 1; 955 } 956 w = next_w; 957 if (!r_is_one) { 958 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 959 goto err; 960 } 961 962 /* Second, multiply r*w by 'a' if exponent bit is set. */ 963 if (BN_is_bit_set(p, b)) { 964 next_w = w * a; 965 if ((next_w / a) != w) { /* overflow */ 966 if (r_is_one) { 967 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 968 goto err; 969 r_is_one = 0; 970 } else { 971 if (!BN_MOD_MUL_WORD(r, w, m)) 972 goto err; 973 } 974 next_w = a; 975 } 976 w = next_w; 977 } 978 } 979 980 /* Finally, set r:=r*w. */ 981 if (w != 1) { 982 if (r_is_one) { 983 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 984 goto err; 985 r_is_one = 0; 986 } else { 987 if (!BN_MOD_MUL_WORD(r, w, m)) 988 goto err; 989 } 990 } 991 992 if (r_is_one) { /* can happen only if a == 1 */ 993 if (!BN_one(rr)) 994 goto err; 995 } else { 996 if (!BN_from_montgomery(rr, r, mont, ctx)) 997 goto err; 998 } 999 ret = 1; 1000 err: 1001 if ((in_mont == NULL) && (mont != NULL)) 1002 BN_MONT_CTX_free(mont); 1003 BN_CTX_end(ctx); 1004 bn_check_top(rr); 1005 return (ret); 1006} 1007 1008/* The old fallback, simple version :-) */ 1009int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 1010 const BIGNUM *m, BN_CTX *ctx) 1011{ 1012 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1013 int start = 1; 1014 BIGNUM *d; 1015 /* Table of variables obtained from 'ctx' */ 1016 BIGNUM *val[TABLE_SIZE]; 1017 1018 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1019 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1020 BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1021 return -1; 1022 } 1023 1024 bits = BN_num_bits(p); 1025 1026 if (bits == 0) { 1027 ret = BN_one(r); 1028 return ret; 1029 } 1030 1031 BN_CTX_start(ctx); 1032 d = BN_CTX_get(ctx); 1033 val[0] = BN_CTX_get(ctx); 1034 if (!d || !val[0]) 1035 goto err; 1036 1037 if (!BN_nnmod(val[0], a, m, ctx)) 1038 goto err; /* 1 */ 1039 if (BN_is_zero(val[0])) { 1040 BN_zero(r); 1041 ret = 1; 1042 goto err; 1043 } 1044 1045 window = BN_window_bits_for_exponent_size(bits); 1046 if (window > 1) { 1047 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1048 goto err; /* 2 */ 1049 j = 1 << (window - 1); 1050 for (i = 1; i < j; i++) { 1051 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1052 !BN_mod_mul(val[i], val[i - 1], d, m, ctx)) 1053 goto err; 1054 } 1055 } 1056 1057 start = 1; /* This is used to avoid multiplication etc 1058 * when there is only the value '1' in the 1059 * buffer. */ 1060 wvalue = 0; /* The 'value' of the window */ 1061 wstart = bits - 1; /* The top bit of the window */ 1062 wend = 0; /* The bottom bit of the window */ 1063 1064 if (!BN_one(r)) 1065 goto err; 1066 1067 for (;;) { 1068 if (BN_is_bit_set(p, wstart) == 0) { 1069 if (!start) 1070 if (!BN_mod_mul(r, r, r, m, ctx)) 1071 goto err; 1072 if (wstart == 0) 1073 break; 1074 wstart--; 1075 continue; 1076 } 1077 /* 1078 * We now have wstart on a 'set' bit, we now need to work out how bit 1079 * a window to do. To do this we need to scan forward until the last 1080 * set bit before the end of the window 1081 */ 1082 j = wstart; 1083 wvalue = 1; 1084 wend = 0; 1085 for (i = 1; i < window; i++) { 1086 if (wstart - i < 0) 1087 break; 1088 if (BN_is_bit_set(p, wstart - i)) { 1089 wvalue <<= (i - wend); 1090 wvalue |= 1; 1091 wend = i; 1092 } 1093 } 1094 1095 /* wend is the size of the current window */ 1096 j = wend + 1; 1097 /* add the 'bytes above' */ 1098 if (!start) 1099 for (i = 0; i < j; i++) { 1100 if (!BN_mod_mul(r, r, r, m, ctx)) 1101 goto err; 1102 } 1103 1104 /* wvalue will be an odd number < 2^window */ 1105 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1106 goto err; 1107 1108 /* move the 'window' down further */ 1109 wstart -= wend + 1; 1110 wvalue = 0; 1111 start = 0; 1112 if (wstart < 0) 1113 break; 1114 } 1115 ret = 1; 1116 err: 1117 BN_CTX_end(ctx); 1118 bn_check_top(r); 1119 return (ret); 1120} 1121