1/* crypto/bn/bn_exp.c */ 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58/* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112#include "cryptlib.h" 113#include "constant_time_locl.h" 114#include "bn_lcl.h" 115 116#include <stdlib.h> 117#ifdef _WIN32 118# include <malloc.h> 119# ifndef alloca 120# define alloca _alloca 121# endif 122#elif defined(__GNUC__) 123# ifndef alloca 124# define alloca(s) __builtin_alloca((s)) 125# endif 126#endif 127 128/* maximum precomputation table size for *variable* sliding windows */ 129#define TABLE_SIZE 32 130 131/* this one works - simple but works */ 132int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 133{ 134 int i, bits, ret = 0; 135 BIGNUM *v, *rr; 136 137 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 138 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 139 BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 140 return -1; 141 } 142 143 BN_CTX_start(ctx); 144 if ((r == a) || (r == p)) 145 rr = BN_CTX_get(ctx); 146 else 147 rr = r; 148 v = BN_CTX_get(ctx); 149 if (rr == NULL || v == NULL) 150 goto err; 151 152 if (BN_copy(v, a) == NULL) 153 goto err; 154 bits = BN_num_bits(p); 155 156 if (BN_is_odd(p)) { 157 if (BN_copy(rr, a) == NULL) 158 goto err; 159 } else { 160 if (!BN_one(rr)) 161 goto err; 162 } 163 164 for (i = 1; i < bits; i++) { 165 if (!BN_sqr(v, v, ctx)) 166 goto err; 167 if (BN_is_bit_set(p, i)) { 168 if (!BN_mul(rr, rr, v, ctx)) 169 goto err; 170 } 171 } 172 if (r != rr) 173 BN_copy(r, rr); 174 ret = 1; 175 err: 176 BN_CTX_end(ctx); 177 bn_check_top(r); 178 return (ret); 179} 180 181int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 182 BN_CTX *ctx) 183{ 184 int ret; 185 186 bn_check_top(a); 187 bn_check_top(p); 188 bn_check_top(m); 189 190 /*- 191 * For even modulus m = 2^k*m_odd, it might make sense to compute 192 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 193 * exponentiation for the odd part), using appropriate exponent 194 * reductions, and combine the results using the CRT. 195 * 196 * For now, we use Montgomery only if the modulus is odd; otherwise, 197 * exponentiation using the reciprocal-based quick remaindering 198 * algorithm is used. 199 * 200 * (Timing obtained with expspeed.c [computations a^p mod m 201 * where a, p, m are of the same length: 256, 512, 1024, 2048, 202 * 4096, 8192 bits], compared to the running time of the 203 * standard algorithm: 204 * 205 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 206 * 55 .. 77 % [UltraSparc processor, but 207 * debug-solaris-sparcv8-gcc conf.] 208 * 209 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 210 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 211 * 212 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 213 * at 2048 and more bits, but at 512 and 1024 bits, it was 214 * slower even than the standard algorithm! 215 * 216 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 217 * should be obtained when the new Montgomery reduction code 218 * has been integrated into OpenSSL.) 219 */ 220 221#define MONT_MUL_MOD 222#define MONT_EXP_WORD 223#define RECP_MUL_MOD 224 225#ifdef MONT_MUL_MOD 226 /* 227 * I have finally been able to take out this pre-condition of the top bit 228 * being set. It was caused by an error in BN_div with negatives. There 229 * was also another problem when for a^b%m a >= m. eay 07-May-97 230 */ 231 /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 232 233 if (BN_is_odd(m)) { 234# ifdef MONT_EXP_WORD 235 if (a->top == 1 && !a->neg 236 && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) { 237 BN_ULONG A = a->d[0]; 238 ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL); 239 } else 240# endif 241 ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL); 242 } else 243#endif 244#ifdef RECP_MUL_MOD 245 { 246 ret = BN_mod_exp_recp(r, a, p, m, ctx); 247 } 248#else 249 { 250 ret = BN_mod_exp_simple(r, a, p, m, ctx); 251 } 252#endif 253 254 bn_check_top(r); 255 return (ret); 256} 257 258int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 259 const BIGNUM *m, BN_CTX *ctx) 260{ 261 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 262 int start = 1; 263 BIGNUM *aa; 264 /* Table of variables obtained from 'ctx' */ 265 BIGNUM *val[TABLE_SIZE]; 266 BN_RECP_CTX recp; 267 268 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 269 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 270 BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 271 return -1; 272 } 273 274 bits = BN_num_bits(p); 275 if (bits == 0) { 276 /* x**0 mod 1 is still zero. */ 277 if (BN_is_one(m)) { 278 ret = 1; 279 BN_zero(r); 280 } else { 281 ret = BN_one(r); 282 } 283 return ret; 284 } 285 286 BN_CTX_start(ctx); 287 aa = BN_CTX_get(ctx); 288 val[0] = BN_CTX_get(ctx); 289 if (!aa || !val[0]) 290 goto err; 291 292 BN_RECP_CTX_init(&recp); 293 if (m->neg) { 294 /* ignore sign of 'm' */ 295 if (!BN_copy(aa, m)) 296 goto err; 297 aa->neg = 0; 298 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 299 goto err; 300 } else { 301 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 302 goto err; 303 } 304 305 if (!BN_nnmod(val[0], a, m, ctx)) 306 goto err; /* 1 */ 307 if (BN_is_zero(val[0])) { 308 BN_zero(r); 309 ret = 1; 310 goto err; 311 } 312 313 window = BN_window_bits_for_exponent_size(bits); 314 if (window > 1) { 315 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 316 goto err; /* 2 */ 317 j = 1 << (window - 1); 318 for (i = 1; i < j; i++) { 319 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 320 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) 321 goto err; 322 } 323 } 324 325 start = 1; /* This is used to avoid multiplication etc 326 * when there is only the value '1' in the 327 * buffer. */ 328 wvalue = 0; /* The 'value' of the window */ 329 wstart = bits - 1; /* The top bit of the window */ 330 wend = 0; /* The bottom bit of the window */ 331 332 if (!BN_one(r)) 333 goto err; 334 335 for (;;) { 336 if (BN_is_bit_set(p, wstart) == 0) { 337 if (!start) 338 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 339 goto err; 340 if (wstart == 0) 341 break; 342 wstart--; 343 continue; 344 } 345 /* 346 * We now have wstart on a 'set' bit, we now need to work out how bit 347 * a window to do. To do this we need to scan forward until the last 348 * set bit before the end of the window 349 */ 350 j = wstart; 351 wvalue = 1; 352 wend = 0; 353 for (i = 1; i < window; i++) { 354 if (wstart - i < 0) 355 break; 356 if (BN_is_bit_set(p, wstart - i)) { 357 wvalue <<= (i - wend); 358 wvalue |= 1; 359 wend = i; 360 } 361 } 362 363 /* wend is the size of the current window */ 364 j = wend + 1; 365 /* add the 'bytes above' */ 366 if (!start) 367 for (i = 0; i < j; i++) { 368 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 369 goto err; 370 } 371 372 /* wvalue will be an odd number < 2^window */ 373 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) 374 goto err; 375 376 /* move the 'window' down further */ 377 wstart -= wend + 1; 378 wvalue = 0; 379 start = 0; 380 if (wstart < 0) 381 break; 382 } 383 ret = 1; 384 err: 385 BN_CTX_end(ctx); 386 BN_RECP_CTX_free(&recp); 387 bn_check_top(r); 388 return (ret); 389} 390 391int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 392 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 393{ 394 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 395 int start = 1; 396 BIGNUM *d, *r; 397 const BIGNUM *aa; 398 /* Table of variables obtained from 'ctx' */ 399 BIGNUM *val[TABLE_SIZE]; 400 BN_MONT_CTX *mont = NULL; 401 402 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 403 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 404 } 405 406 bn_check_top(a); 407 bn_check_top(p); 408 bn_check_top(m); 409 410 if (!BN_is_odd(m)) { 411 BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); 412 return (0); 413 } 414 bits = BN_num_bits(p); 415 if (bits == 0) { 416 /* x**0 mod 1 is still zero. */ 417 if (BN_is_one(m)) { 418 ret = 1; 419 BN_zero(rr); 420 } else { 421 ret = BN_one(rr); 422 } 423 return ret; 424 } 425 426 BN_CTX_start(ctx); 427 d = BN_CTX_get(ctx); 428 r = BN_CTX_get(ctx); 429 val[0] = BN_CTX_get(ctx); 430 if (!d || !r || !val[0]) 431 goto err; 432 433 /* 434 * If this is not done, things will break in the montgomery part 435 */ 436 437 if (in_mont != NULL) 438 mont = in_mont; 439 else { 440 if ((mont = BN_MONT_CTX_new()) == NULL) 441 goto err; 442 if (!BN_MONT_CTX_set(mont, m, ctx)) 443 goto err; 444 } 445 446 if (a->neg || BN_ucmp(a, m) >= 0) { 447 if (!BN_nnmod(val[0], a, m, ctx)) 448 goto err; 449 aa = val[0]; 450 } else 451 aa = a; 452 if (BN_is_zero(aa)) { 453 BN_zero(rr); 454 ret = 1; 455 goto err; 456 } 457 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 458 goto err; /* 1 */ 459 460 window = BN_window_bits_for_exponent_size(bits); 461 if (window > 1) { 462 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 463 goto err; /* 2 */ 464 j = 1 << (window - 1); 465 for (i = 1; i < j; i++) { 466 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 467 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) 468 goto err; 469 } 470 } 471 472 start = 1; /* This is used to avoid multiplication etc 473 * when there is only the value '1' in the 474 * buffer. */ 475 wvalue = 0; /* The 'value' of the window */ 476 wstart = bits - 1; /* The top bit of the window */ 477 wend = 0; /* The bottom bit of the window */ 478 479 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 480 goto err; 481 for (;;) { 482 if (BN_is_bit_set(p, wstart) == 0) { 483 if (!start) { 484 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 485 goto err; 486 } 487 if (wstart == 0) 488 break; 489 wstart--; 490 continue; 491 } 492 /* 493 * We now have wstart on a 'set' bit, we now need to work out how bit 494 * a window to do. To do this we need to scan forward until the last 495 * set bit before the end of the window 496 */ 497 j = wstart; 498 wvalue = 1; 499 wend = 0; 500 for (i = 1; i < window; i++) { 501 if (wstart - i < 0) 502 break; 503 if (BN_is_bit_set(p, wstart - i)) { 504 wvalue <<= (i - wend); 505 wvalue |= 1; 506 wend = i; 507 } 508 } 509 510 /* wend is the size of the current window */ 511 j = wend + 1; 512 /* add the 'bytes above' */ 513 if (!start) 514 for (i = 0; i < j; i++) { 515 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 516 goto err; 517 } 518 519 /* wvalue will be an odd number < 2^window */ 520 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 521 goto err; 522 523 /* move the 'window' down further */ 524 wstart -= wend + 1; 525 wvalue = 0; 526 start = 0; 527 if (wstart < 0) 528 break; 529 } 530 if (!BN_from_montgomery(rr, r, mont, ctx)) 531 goto err; 532 ret = 1; 533 err: 534 if ((in_mont == NULL) && (mont != NULL)) 535 BN_MONT_CTX_free(mont); 536 BN_CTX_end(ctx); 537 bn_check_top(rr); 538 return (ret); 539} 540 541/* 542 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific 543 * layout so that accessing any of these table values shows the same access 544 * pattern as far as cache lines are concerned. The following functions are 545 * used to transfer a BIGNUM from/to that table. 546 */ 547 548static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, 549 unsigned char *buf, int idx, 550 int window) 551{ 552 int i, j; 553 int width = 1 << window; 554 BN_ULONG *table = (BN_ULONG *)buf; 555 556 if (top > b->top) 557 top = b->top; /* this works because 'buf' is explicitly 558 * zeroed */ 559 for (i = 0, j = idx; i < top; i++, j += width) { 560 table[j] = b->d[i]; 561 } 562 563 return 1; 564} 565 566static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, 567 unsigned char *buf, int idx, 568 int window) 569{ 570 int i, j; 571 int width = 1 << window; 572 volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 573 574 if (bn_wexpand(b, top) == NULL) 575 return 0; 576 577 if (window <= 3) { 578 for (i = 0; i < top; i++, table += width) { 579 BN_ULONG acc = 0; 580 581 for (j = 0; j < width; j++) { 582 acc |= table[j] & 583 ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 584 } 585 586 b->d[i] = acc; 587 } 588 } else { 589 int xstride = 1 << (window - 2); 590 BN_ULONG y0, y1, y2, y3; 591 592 i = idx >> (window - 2); /* equivalent of idx / xstride */ 593 idx &= xstride - 1; /* equivalent of idx % xstride */ 594 595 y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); 596 y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); 597 y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); 598 y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); 599 600 for (i = 0; i < top; i++, table += width) { 601 BN_ULONG acc = 0; 602 603 for (j = 0; j < xstride; j++) { 604 acc |= ( (table[j + 0 * xstride] & y0) | 605 (table[j + 1 * xstride] & y1) | 606 (table[j + 2 * xstride] & y2) | 607 (table[j + 3 * xstride] & y3) ) 608 & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 609 } 610 611 b->d[i] = acc; 612 } 613 } 614 615 b->top = top; 616 bn_correct_top(b); 617 return 1; 618} 619 620/* 621 * Given a pointer value, compute the next address that is a cache line 622 * multiple. 623 */ 624#define MOD_EXP_CTIME_ALIGN(x_) \ 625 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 626 627/* 628 * This variant of BN_mod_exp_mont() uses fixed windows and the special 629 * precomputation memory layout to limit data-dependency to a minimum to 630 * protect secret exponents (cf. the hyper-threading timing attacks pointed 631 * out by Colin Percival, 632 * http://www.daemonology.net/hyperthreading-considered-harmful/) 633 */ 634int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 635 const BIGNUM *m, BN_CTX *ctx, 636 BN_MONT_CTX *in_mont) 637{ 638 int i, bits, ret = 0, window, wvalue; 639 int top; 640 BN_MONT_CTX *mont = NULL; 641 642 int numPowers; 643 unsigned char *powerbufFree = NULL; 644 int powerbufLen = 0; 645 unsigned char *powerbuf = NULL; 646 BIGNUM tmp, am; 647 648 bn_check_top(a); 649 bn_check_top(p); 650 bn_check_top(m); 651 652 if (!BN_is_odd(m)) { 653 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); 654 return (0); 655 } 656 657 top = m->top; 658 659 bits = BN_num_bits(p); 660 if (bits == 0) { 661 /* x**0 mod 1 is still zero. */ 662 if (BN_is_one(m)) { 663 ret = 1; 664 BN_zero(rr); 665 } else { 666 ret = BN_one(rr); 667 } 668 return ret; 669 } 670 671 BN_CTX_start(ctx); 672 673 /* 674 * Allocate a montgomery context if it was not supplied by the caller. If 675 * this is not done, things will break in the montgomery part. 676 */ 677 if (in_mont != NULL) 678 mont = in_mont; 679 else { 680 if ((mont = BN_MONT_CTX_new()) == NULL) 681 goto err; 682 if (!BN_MONT_CTX_set(mont, m, ctx)) 683 goto err; 684 } 685 686 /* Get the window size to use with size of p. */ 687 window = BN_window_bits_for_ctime_exponent_size(bits); 688#if defined(OPENSSL_BN_ASM_MONT5) 689 if (window == 6 && bits <= 1024) 690 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 691#endif 692 693 /* 694 * Allocate a buffer large enough to hold all of the pre-computed powers 695 * of am, am itself and tmp. 696 */ 697 numPowers = 1 << window; 698 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 699 ((2 * top) > 700 numPowers ? (2 * top) : numPowers)); 701#ifdef alloca 702 if (powerbufLen < 3072) 703 powerbufFree = 704 alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 705 else 706#endif 707 if ((powerbufFree = 708 (unsigned char *)OPENSSL_malloc(powerbufLen + 709 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) 710 == NULL) 711 goto err; 712 713 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 714 memset(powerbuf, 0, powerbufLen); 715 716#ifdef alloca 717 if (powerbufLen < 3072) 718 powerbufFree = NULL; 719#endif 720 721 /* lay down tmp and am right after powers table */ 722 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 723 am.d = tmp.d + top; 724 tmp.top = am.top = 0; 725 tmp.dmax = am.dmax = top; 726 tmp.neg = am.neg = 0; 727 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 728 729 /* prepare a^0 in Montgomery domain */ 730#if 1 731 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 732 goto err; 733#else 734 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 735 for (i = 1; i < top; i++) 736 tmp.d[i] = (~m->d[i]) & BN_MASK2; 737 tmp.top = top; 738#endif 739 740 /* prepare a^1 in Montgomery domain */ 741 if (a->neg || BN_ucmp(a, m) >= 0) { 742 if (!BN_mod(&am, a, m, ctx)) 743 goto err; 744 if (!BN_to_montgomery(&am, &am, mont, ctx)) 745 goto err; 746 } else if (!BN_to_montgomery(&am, a, mont, ctx)) 747 goto err; 748 749#if defined(OPENSSL_BN_ASM_MONT5) 750 if (window == 5 && top > 1) { 751 /* 752 * This optimization uses ideas from http://eprint.iacr.org/2011/239, 753 * specifically optimization of cache-timing attack countermeasures 754 * and pre-computation optimization. 755 */ 756 757 /* 758 * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 759 * 512-bit RSA is hardly relevant, we omit it to spare size... 760 */ 761 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 762 const void *table, const BN_ULONG *np, 763 const BN_ULONG *n0, int num, int power); 764 void bn_scatter5(const BN_ULONG *inp, size_t num, 765 void *table, size_t power); 766 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); 767 768 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 769 770 /* 771 * BN_to_montgomery can contaminate words above .top [in 772 * BN_DEBUG[_DEBUG] build]... 773 */ 774 for (i = am.top; i < top; i++) 775 am.d[i] = 0; 776 for (i = tmp.top; i < top; i++) 777 tmp.d[i] = 0; 778 779 bn_scatter5(tmp.d, top, powerbuf, 0); 780 bn_scatter5(am.d, am.top, powerbuf, 1); 781 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 782 bn_scatter5(tmp.d, top, powerbuf, 2); 783 784# if 0 785 for (i = 3; i < 32; i++) { 786 /* Calculate a^i = a^(i-1) * a */ 787 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 788 bn_scatter5(tmp.d, top, powerbuf, i); 789 } 790# else 791 /* same as above, but uses squaring for 1/2 of operations */ 792 for (i = 4; i < 32; i *= 2) { 793 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 794 bn_scatter5(tmp.d, top, powerbuf, i); 795 } 796 for (i = 3; i < 8; i += 2) { 797 int j; 798 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 799 bn_scatter5(tmp.d, top, powerbuf, i); 800 for (j = 2 * i; j < 32; j *= 2) { 801 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 802 bn_scatter5(tmp.d, top, powerbuf, j); 803 } 804 } 805 for (; i < 16; i += 2) { 806 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 807 bn_scatter5(tmp.d, top, powerbuf, i); 808 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 809 bn_scatter5(tmp.d, top, powerbuf, 2 * i); 810 } 811 for (; i < 32; i += 2) { 812 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 813 bn_scatter5(tmp.d, top, powerbuf, i); 814 } 815# endif 816 bits--; 817 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 818 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 819 bn_gather5(tmp.d, top, powerbuf, wvalue); 820 821 /* 822 * Scan the exponent one window at a time starting from the most 823 * significant bits. 824 */ 825 while (bits >= 0) { 826 for (wvalue = 0, i = 0; i < 5; i++, bits--) 827 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 828 829 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 830 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 831 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 832 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 833 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 834 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 835 } 836 837 tmp.top = top; 838 bn_correct_top(&tmp); 839 } else 840#endif 841 { 842 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window)) 843 goto err; 844 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window)) 845 goto err; 846 847 /* 848 * If the window size is greater than 1, then calculate 849 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even 850 * powers could instead be computed as (a^(i/2))^2 to use the slight 851 * performance advantage of sqr over mul). 852 */ 853 if (window > 1) { 854 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 855 goto err; 856 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, 857 window)) 858 goto err; 859 for (i = 3; i < numPowers; i++) { 860 /* Calculate a^i = a^(i-1) * a */ 861 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) 862 goto err; 863 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, 864 window)) 865 goto err; 866 } 867 } 868 869 bits--; 870 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 871 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 872 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, 873 window)) 874 goto err; 875 876 /* 877 * Scan the exponent one window at a time starting from the most 878 * significant bits. 879 */ 880 while (bits >= 0) { 881 wvalue = 0; /* The 'value' of the window */ 882 883 /* Scan the window, squaring the result as we go */ 884 for (i = 0; i < window; i++, bits--) { 885 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) 886 goto err; 887 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 888 } 889 890 /* 891 * Fetch the appropriate pre-computed value from the pre-buf 892 */ 893 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, 894 window)) 895 goto err; 896 897 /* Multiply the result into the intermediate result */ 898 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 899 goto err; 900 } 901 } 902 903 /* Convert the final result from montgomery to standard format */ 904 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 905 goto err; 906 ret = 1; 907 err: 908 if ((in_mont == NULL) && (mont != NULL)) 909 BN_MONT_CTX_free(mont); 910 if (powerbuf != NULL) { 911 OPENSSL_cleanse(powerbuf, powerbufLen); 912 if (powerbufFree) 913 OPENSSL_free(powerbufFree); 914 } 915 BN_CTX_end(ctx); 916 return (ret); 917} 918 919int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 920 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 921{ 922 BN_MONT_CTX *mont = NULL; 923 int b, bits, ret = 0; 924 int r_is_one; 925 BN_ULONG w, next_w; 926 BIGNUM *d, *r, *t; 927 BIGNUM *swap_tmp; 928#define BN_MOD_MUL_WORD(r, w, m) \ 929 (BN_mul_word(r, (w)) && \ 930 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 931 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 932 /* 933 * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is 934 * probably more overhead than always using BN_mod (which uses BN_copy if 935 * a similar test returns true). 936 */ 937 /* 938 * We can use BN_mod and do not need BN_nnmod because our accumulator is 939 * never negative (the result of BN_mod does not depend on the sign of 940 * the modulus). 941 */ 942#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 943 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 944 945 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 946 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 947 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 948 return -1; 949 } 950 951 bn_check_top(p); 952 bn_check_top(m); 953 954 if (!BN_is_odd(m)) { 955 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); 956 return (0); 957 } 958 if (m->top == 1) 959 a %= m->d[0]; /* make sure that 'a' is reduced */ 960 961 bits = BN_num_bits(p); 962 if (bits == 0) { 963 /* x**0 mod 1 is still zero. */ 964 if (BN_is_one(m)) { 965 ret = 1; 966 BN_zero(rr); 967 } else { 968 ret = BN_one(rr); 969 } 970 return ret; 971 } 972 if (a == 0) { 973 BN_zero(rr); 974 ret = 1; 975 return ret; 976 } 977 978 BN_CTX_start(ctx); 979 d = BN_CTX_get(ctx); 980 r = BN_CTX_get(ctx); 981 t = BN_CTX_get(ctx); 982 if (d == NULL || r == NULL || t == NULL) 983 goto err; 984 985 if (in_mont != NULL) 986 mont = in_mont; 987 else { 988 if ((mont = BN_MONT_CTX_new()) == NULL) 989 goto err; 990 if (!BN_MONT_CTX_set(mont, m, ctx)) 991 goto err; 992 } 993 994 r_is_one = 1; /* except for Montgomery factor */ 995 996 /* bits-1 >= 0 */ 997 998 /* The result is accumulated in the product r*w. */ 999 w = a; /* bit 'bits-1' of 'p' is always set */ 1000 for (b = bits - 2; b >= 0; b--) { 1001 /* First, square r*w. */ 1002 next_w = w * w; 1003 if ((next_w / w) != w) { /* overflow */ 1004 if (r_is_one) { 1005 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1006 goto err; 1007 r_is_one = 0; 1008 } else { 1009 if (!BN_MOD_MUL_WORD(r, w, m)) 1010 goto err; 1011 } 1012 next_w = 1; 1013 } 1014 w = next_w; 1015 if (!r_is_one) { 1016 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 1017 goto err; 1018 } 1019 1020 /* Second, multiply r*w by 'a' if exponent bit is set. */ 1021 if (BN_is_bit_set(p, b)) { 1022 next_w = w * a; 1023 if ((next_w / a) != w) { /* overflow */ 1024 if (r_is_one) { 1025 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1026 goto err; 1027 r_is_one = 0; 1028 } else { 1029 if (!BN_MOD_MUL_WORD(r, w, m)) 1030 goto err; 1031 } 1032 next_w = a; 1033 } 1034 w = next_w; 1035 } 1036 } 1037 1038 /* Finally, set r:=r*w. */ 1039 if (w != 1) { 1040 if (r_is_one) { 1041 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1042 goto err; 1043 r_is_one = 0; 1044 } else { 1045 if (!BN_MOD_MUL_WORD(r, w, m)) 1046 goto err; 1047 } 1048 } 1049 1050 if (r_is_one) { /* can happen only if a == 1 */ 1051 if (!BN_one(rr)) 1052 goto err; 1053 } else { 1054 if (!BN_from_montgomery(rr, r, mont, ctx)) 1055 goto err; 1056 } 1057 ret = 1; 1058 err: 1059 if ((in_mont == NULL) && (mont != NULL)) 1060 BN_MONT_CTX_free(mont); 1061 BN_CTX_end(ctx); 1062 bn_check_top(rr); 1063 return (ret); 1064} 1065 1066/* The old fallback, simple version :-) */ 1067int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 1068 const BIGNUM *m, BN_CTX *ctx) 1069{ 1070 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1071 int start = 1; 1072 BIGNUM *d; 1073 /* Table of variables obtained from 'ctx' */ 1074 BIGNUM *val[TABLE_SIZE]; 1075 1076 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1077 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1078 BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1079 return -1; 1080 } 1081 1082 bits = BN_num_bits(p); 1083 if (bits == 0) { 1084 /* x**0 mod 1 is still zero. */ 1085 if (BN_is_one(m)) { 1086 ret = 1; 1087 BN_zero(r); 1088 } else { 1089 ret = BN_one(r); 1090 } 1091 return ret; 1092 } 1093 1094 BN_CTX_start(ctx); 1095 d = BN_CTX_get(ctx); 1096 val[0] = BN_CTX_get(ctx); 1097 if (!d || !val[0]) 1098 goto err; 1099 1100 if (!BN_nnmod(val[0], a, m, ctx)) 1101 goto err; /* 1 */ 1102 if (BN_is_zero(val[0])) { 1103 BN_zero(r); 1104 ret = 1; 1105 goto err; 1106 } 1107 1108 window = BN_window_bits_for_exponent_size(bits); 1109 if (window > 1) { 1110 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1111 goto err; /* 2 */ 1112 j = 1 << (window - 1); 1113 for (i = 1; i < j; i++) { 1114 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1115 !BN_mod_mul(val[i], val[i - 1], d, m, ctx)) 1116 goto err; 1117 } 1118 } 1119 1120 start = 1; /* This is used to avoid multiplication etc 1121 * when there is only the value '1' in the 1122 * buffer. */ 1123 wvalue = 0; /* The 'value' of the window */ 1124 wstart = bits - 1; /* The top bit of the window */ 1125 wend = 0; /* The bottom bit of the window */ 1126 1127 if (!BN_one(r)) 1128 goto err; 1129 1130 for (;;) { 1131 if (BN_is_bit_set(p, wstart) == 0) { 1132 if (!start) 1133 if (!BN_mod_mul(r, r, r, m, ctx)) 1134 goto err; 1135 if (wstart == 0) 1136 break; 1137 wstart--; 1138 continue; 1139 } 1140 /* 1141 * We now have wstart on a 'set' bit, we now need to work out how bit 1142 * a window to do. To do this we need to scan forward until the last 1143 * set bit before the end of the window 1144 */ 1145 j = wstart; 1146 wvalue = 1; 1147 wend = 0; 1148 for (i = 1; i < window; i++) { 1149 if (wstart - i < 0) 1150 break; 1151 if (BN_is_bit_set(p, wstart - i)) { 1152 wvalue <<= (i - wend); 1153 wvalue |= 1; 1154 wend = i; 1155 } 1156 } 1157 1158 /* wend is the size of the current window */ 1159 j = wend + 1; 1160 /* add the 'bytes above' */ 1161 if (!start) 1162 for (i = 0; i < j; i++) { 1163 if (!BN_mod_mul(r, r, r, m, ctx)) 1164 goto err; 1165 } 1166 1167 /* wvalue will be an odd number < 2^window */ 1168 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1169 goto err; 1170 1171 /* move the 'window' down further */ 1172 wstart -= wend + 1; 1173 wvalue = 0; 1174 start = 0; 1175 if (wstart < 0) 1176 break; 1177 } 1178 ret = 1; 1179 err: 1180 BN_CTX_end(ctx); 1181 bn_check_top(r); 1182 return (ret); 1183} 1184