bn_exp.c revision 296317
1139825Simp/* crypto/bn/bn_exp.c */ 299657Sbenno/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 399657Sbenno * All rights reserved. 478342Sbenno * 578342Sbenno * This package is an SSL implementation written 678342Sbenno * by Eric Young (eay@cryptsoft.com). 778342Sbenno * The implementation was written so as to conform with Netscapes SSL. 878342Sbenno * 978342Sbenno * This library is free for commercial and non-commercial use as long as 1099657Sbenno * the following conditions are aheared to. The following conditions 1199657Sbenno * apply to all code found in this distribution, be it the RC4, RSA, 1299657Sbenno * lhash, DES, etc., code; not just the SSL code. The SSL documentation 1399657Sbenno * included with this distribution is covered by the same copyright terms 1478342Sbenno * except that the holder is Tim Hudson (tjh@cryptsoft.com). 1599657Sbenno * 1699657Sbenno * Copyright remains Eric Young's, and as such any Copyright notices in 1799657Sbenno * the code are not to be removed. 1899657Sbenno * If this package is used in a product, Eric Young should be given attribution 1999657Sbenno * as the author of the parts of the library used. 2099657Sbenno * This can be in the form of a textual message at program startup or 2199657Sbenno * in documentation (online or textual) provided with the package. 2299657Sbenno * 2399657Sbenno * Redistribution and use in source and binary forms, with or without 2499657Sbenno * modification, are permitted provided that the following conditions 2599657Sbenno * are met: 2699657Sbenno * 1. Redistributions of source code must retain the copyright 2799657Sbenno * notice, this list of conditions and the following disclaimer. 2878342Sbenno * 2. Redistributions in binary form must reproduce the above copyright 2978342Sbenno * notice, this list of conditions and the following disclaimer in the 30113038Sobrien * documentation and/or other materials provided with the distribution. 31113038Sobrien * 3. All advertising materials mentioning features or use of this software 3278342Sbenno * must display the following acknowledgement: 3399657Sbenno * "This product includes cryptographic software written by 3499657Sbenno * Eric Young (eay@cryptsoft.com)" 3599657Sbenno * The word 'cryptographic' can be left out if the rouines from the library 3678342Sbenno * being used are not cryptographic related :-). 3799657Sbenno * 4. If you include any Windows specific code (or a derivative thereof) from 3899657Sbenno * the apps directory (application code) you must include an acknowledgement: 3999657Sbenno * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 4099657Sbenno * 4199657Sbenno * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 4299657Sbenno * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 4399657Sbenno * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4499657Sbenno * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45108939Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46108939Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 4799657Sbenno * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 4899657Sbenno * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 4999657Sbenno * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50108939Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 5199657Sbenno * SUCH DAMAGE. 52112436Smux * 5399657Sbenno * The licence and distribution terms for any publically available version or 54109919Sbenno * derivative of this code cannot be changed. i.e. this code cannot simply be 5599657Sbenno * copied and put under another distribution licence 5699657Sbenno * [including the GNU Public Licence.] 5799657Sbenno */ 5899657Sbenno/* ==================================================================== 5999657Sbenno * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 6099657Sbenno * 6199657Sbenno * Redistribution and use in source and binary forms, with or without 6299657Sbenno * modification, are permitted provided that the following conditions 6399657Sbenno * are met: 6499657Sbenno * 6599657Sbenno * 1. Redistributions of source code must retain the above copyright 6699657Sbenno * notice, this list of conditions and the following disclaimer. 6799657Sbenno * 6899657Sbenno * 2. Redistributions in binary form must reproduce the above copyright 6999657Sbenno * notice, this list of conditions and the following disclaimer in 70117126Sscottl * the documentation and/or other materials provided with the 71117126Sscottl * distribution. 7299657Sbenno * 7399657Sbenno * 3. All advertising materials mentioning features or use of this 7499657Sbenno * software must display the following acknowledgment: 7599657Sbenno * "This product includes software developed by the OpenSSL Project 7699657Sbenno * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 7799657Sbenno * 7899657Sbenno * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 7999657Sbenno * endorse or promote products derived from this software without 8099657Sbenno * prior written permission. For written permission, please contact 8199657Sbenno * openssl-core@openssl.org. 8299657Sbenno * 83117126Sscottl * 5. Products derived from this software may not be called "OpenSSL" 84117126Sscottl * nor may "OpenSSL" appear in their names without prior written 85117126Sscottl * permission of the OpenSSL Project. 86117126Sscottl * 87117126Sscottl * 6. Redistributions of any form whatsoever must retain the following 88117126Sscottl * acknowledgment: 89117126Sscottl * "This product includes software developed by the OpenSSL Project 90117126Sscottl * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91117126Sscottl * 92117126Sscottl * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93117126Sscottl * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94117126Sscottl * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95117126Sscottl * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96117126Sscottl * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97117126Sscottl * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98117126Sscottl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99117126Sscottl * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100117126Sscottl * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101117126Sscottl * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102117126Sscottl * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103117126Sscottl * OF THE POSSIBILITY OF SUCH DAMAGE. 104117126Sscottl * ==================================================================== 105117126Sscottl * 106117126Sscottl * This product includes cryptographic software written by Eric Young 107117126Sscottl * (eay@cryptsoft.com). This product includes software written by Tim 108117126Sscottl * Hudson (tjh@cryptsoft.com). 109117126Sscottl * 110117126Sscottl */ 111117126Sscottl 112117126Sscottl#include "cryptlib.h" 113117126Sscottl#include "constant_time_locl.h" 114117126Sscottl#include "bn_lcl.h" 115117126Sscottl 116117126Sscottl#include <stdlib.h> 117117126Sscottl#ifdef _WIN32 118117126Sscottl# include <malloc.h> 119117126Sscottl# ifndef alloca 120117126Sscottl# define alloca _alloca 121117126Sscottl# endif 122117126Sscottl#elif defined(__GNUC__) 12399657Sbenno# ifndef alloca 12499657Sbenno# define alloca(s) __builtin_alloca((s)) 12599657Sbenno# endif 12699657Sbenno#endif 12799657Sbenno 12899657Sbenno/* maximum precomputation table size for *variable* sliding windows */ 12999657Sbenno#define TABLE_SIZE 32 130117126Sscottl 131117126Sscottl/* this one works - simple but works */ 13299657Sbennoint BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 13399657Sbenno{ 13499657Sbenno int i, bits, ret = 0; 13599657Sbenno BIGNUM *v, *rr; 13699657Sbenno 13799657Sbenno if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 13899657Sbenno /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 13999657Sbenno BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 14099657Sbenno return -1; 14199657Sbenno } 14299657Sbenno 14399657Sbenno BN_CTX_start(ctx); 14499657Sbenno if ((r == a) || (r == p)) 14599657Sbenno rr = BN_CTX_get(ctx); 14699657Sbenno else 14799657Sbenno rr = r; 14899657Sbenno v = BN_CTX_get(ctx); 14999657Sbenno if (rr == NULL || v == NULL) 15099657Sbenno goto err; 15199657Sbenno 15299657Sbenno if (BN_copy(v, a) == NULL) 15399657Sbenno goto err; 15499657Sbenno bits = BN_num_bits(p); 15599657Sbenno 156117126Sscottl if (BN_is_odd(p)) { 157117126Sscottl if (BN_copy(rr, a) == NULL) 158117126Sscottl goto err; 159117126Sscottl } else { 160117126Sscottl if (!BN_one(rr)) 161117126Sscottl goto err; 162117126Sscottl } 16399657Sbenno 16499657Sbenno for (i = 1; i < bits; i++) { 16599657Sbenno if (!BN_sqr(v, v, ctx)) 16699657Sbenno goto err; 16799657Sbenno if (BN_is_bit_set(p, i)) { 16899657Sbenno if (!BN_mul(rr, rr, v, ctx)) 16999657Sbenno goto err; 170134934Sscottl } 171134934Sscottl } 172134934Sscottl if (r != rr) 173134934Sscottl BN_copy(r, rr); 174134934Sscottl ret = 1; 17599657Sbenno err: 17699657Sbenno BN_CTX_end(ctx); 17799657Sbenno bn_check_top(r); 17899657Sbenno return (ret); 17999657Sbenno} 18099657Sbenno 18199657Sbennoint BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 18299657Sbenno BN_CTX *ctx) 18399657Sbenno{ 184112436Smux int ret; 185112436Smux 18699657Sbenno bn_check_top(a); 18799657Sbenno bn_check_top(p); 18899657Sbenno bn_check_top(m); 18999657Sbenno 19099657Sbenno /*- 19199657Sbenno * For even modulus m = 2^k*m_odd, it might make sense to compute 19299657Sbenno * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 19399657Sbenno * exponentiation for the odd part), using appropriate exponent 19499657Sbenno * reductions, and combine the results using the CRT. 19599657Sbenno * 19699657Sbenno * For now, we use Montgomery only if the modulus is odd; otherwise, 19799657Sbenno * exponentiation using the reciprocal-based quick remaindering 19899657Sbenno * algorithm is used. 19999657Sbenno * 20099657Sbenno * (Timing obtained with expspeed.c [computations a^p mod m 20199657Sbenno * where a, p, m are of the same length: 256, 512, 1024, 2048, 20299657Sbenno * 4096, 8192 bits], compared to the running time of the 20399657Sbenno * standard algorithm: 204112436Smux * 20599657Sbenno * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 20699657Sbenno * 55 .. 77 % [UltraSparc processor, but 20799657Sbenno * debug-solaris-sparcv8-gcc conf.] 20899657Sbenno * 20999657Sbenno * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 21099657Sbenno * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 21199657Sbenno * 21299657Sbenno * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 21399657Sbenno * at 2048 and more bits, but at 512 and 1024 bits, it was 21499657Sbenno * slower even than the standard algorithm! 21599657Sbenno * 21699657Sbenno * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 21799657Sbenno * should be obtained when the new Montgomery reduction code 21899657Sbenno * has been integrated into OpenSSL.) 21999657Sbenno */ 22099657Sbenno 22199657Sbenno#define MONT_MUL_MOD 22299657Sbenno#define MONT_EXP_WORD 22399657Sbenno#define RECP_MUL_MOD 22499657Sbenno 22599657Sbenno#ifdef MONT_MUL_MOD 22699657Sbenno /* 22799657Sbenno * I have finally been able to take out this pre-condition of the top bit 22899657Sbenno * being set. It was caused by an error in BN_div with negatives. There 22999657Sbenno * was also another problem when for a^b%m a >= m. eay 07-May-97 23099657Sbenno */ 23199657Sbenno /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 23299657Sbenno 23399657Sbenno if (BN_is_odd(m)) { 23499657Sbenno# ifdef MONT_EXP_WORD 23599657Sbenno if (a->top == 1 && !a->neg 23699657Sbenno && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) { 23799657Sbenno BN_ULONG A = a->d[0]; 23899657Sbenno ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL); 23999657Sbenno } else 24099657Sbenno# endif 24199657Sbenno ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL); 24299657Sbenno } else 24399657Sbenno#endif 24499657Sbenno#ifdef RECP_MUL_MOD 24599657Sbenno { 24699657Sbenno ret = BN_mod_exp_recp(r, a, p, m, ctx); 24799657Sbenno } 24899657Sbenno#else 24999657Sbenno { 25099657Sbenno ret = BN_mod_exp_simple(r, a, p, m, ctx); 25199657Sbenno } 25299657Sbenno#endif 25399657Sbenno 25499657Sbenno bn_check_top(r); 25599657Sbenno return (ret); 256118081Smux} 257118081Smux 258118081Smuxint BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 259118081Smux const BIGNUM *m, BN_CTX *ctx) 260118081Smux{ 261118081Smux int i, j, bits, ret = 0, wstart, wend, window, wvalue; 262118081Smux int start = 1; 263118081Smux BIGNUM *aa; 264118081Smux /* Table of variables obtained from 'ctx' */ 26599657Sbenno BIGNUM *val[TABLE_SIZE]; 266170421Smarcel BN_RECP_CTX recp; 267170421Smarcel 268170421Smarcel if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 269170421Smarcel /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 270170421Smarcel BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 271170421Smarcel return -1; 272170421Smarcel } 273170421Smarcel 274170421Smarcel bits = BN_num_bits(p); 275170421Smarcel if (bits == 0) { 276170421Smarcel /* x**0 mod 1 is still zero. */ 277170421Smarcel if (BN_is_one(m)) { 27899657Sbenno ret = 1; 27999657Sbenno BN_zero(r); 28099657Sbenno } else { 28199657Sbenno ret = BN_one(r); 28299657Sbenno } 28399657Sbenno return ret; 284118081Smux } 28599657Sbenno 28699657Sbenno BN_CTX_start(ctx); 28799657Sbenno aa = BN_CTX_get(ctx); 28899657Sbenno val[0] = BN_CTX_get(ctx); 28999657Sbenno if (!aa || !val[0]) 29099657Sbenno goto err; 29199657Sbenno 292170421Smarcel BN_RECP_CTX_init(&recp); 293170421Smarcel if (m->neg) { 294170421Smarcel /* ignore sign of 'm' */ 29599657Sbenno if (!BN_copy(aa, m)) 29699657Sbenno goto err; 29799657Sbenno aa->neg = 0; 29899657Sbenno if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 299108939Sgrehan goto err; 30099657Sbenno } else { 30199657Sbenno if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 30278342Sbenno goto err; 30399657Sbenno } 30478342Sbenno 30599657Sbenno if (!BN_nnmod(val[0], a, m, ctx)) 30699657Sbenno goto err; /* 1 */ 307170421Smarcel if (BN_is_zero(val[0])) { 308170421Smarcel BN_zero(r); 30999657Sbenno ret = 1; 310170421Smarcel goto err; 31199657Sbenno } 31299657Sbenno 31378342Sbenno window = BN_window_bits_for_exponent_size(bits); 31499657Sbenno if (window > 1) { 31599657Sbenno if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 31699657Sbenno goto err; /* 2 */ 31799657Sbenno j = 1 << (window - 1); 31899657Sbenno for (i = 1; i < j; i++) { 31999657Sbenno if (((val[i] = BN_CTX_get(ctx)) == NULL) || 32099657Sbenno !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) 32199657Sbenno goto err; 32299657Sbenno } 32399657Sbenno } 324143063Sjoerg 32599657Sbenno start = 1; /* This is used to avoid multiplication etc 32699657Sbenno * when there is only the value '1' in the 32799657Sbenno * buffer. */ 32899657Sbenno wvalue = 0; /* The 'value' of the window */ 32999657Sbenno wstart = bits - 1; /* The top bit of the window */ 33099657Sbenno wend = 0; /* The bottom bit of the window */ 33199657Sbenno 33299657Sbenno if (!BN_one(r)) 33399657Sbenno goto err; 33499657Sbenno 33599657Sbenno for (;;) { 33699657Sbenno if (BN_is_bit_set(p, wstart) == 0) { 33799657Sbenno if (!start) 33899657Sbenno if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 33999657Sbenno goto err; 34099657Sbenno if (wstart == 0) 34199657Sbenno break; 34299657Sbenno wstart--; 34399657Sbenno continue; 34499657Sbenno } 34599657Sbenno /* 34699657Sbenno * We now have wstart on a 'set' bit, we now need to work out how bit 34799657Sbenno * a window to do. To do this we need to scan forward until the last 34899657Sbenno * set bit before the end of the window 34999657Sbenno */ 35099657Sbenno j = wstart; 35199657Sbenno wvalue = 1; 35299657Sbenno wend = 0; 35399657Sbenno for (i = 1; i < window; i++) { 35499657Sbenno if (wstart - i < 0) 35599657Sbenno break; 35699657Sbenno if (BN_is_bit_set(p, wstart - i)) { 35799657Sbenno wvalue <<= (i - wend); 35899657Sbenno wvalue |= 1; 35999657Sbenno wend = i; 36099657Sbenno } 36199657Sbenno } 36299657Sbenno 36399657Sbenno /* wend is the size of the current window */ 36499657Sbenno j = wend + 1; 36599657Sbenno /* add the 'bytes above' */ 36699657Sbenno if (!start) 36799657Sbenno for (i = 0; i < j; i++) { 368108939Sgrehan if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 369108939Sgrehan goto err; 37099657Sbenno } 371108939Sgrehan 372108939Sgrehan /* wvalue will be an odd number < 2^window */ 373108939Sgrehan if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) 374108939Sgrehan goto err; 375108939Sgrehan 37699657Sbenno /* move the 'window' down further */ 377108939Sgrehan wstart -= wend + 1; 37899657Sbenno wvalue = 0; 379108939Sgrehan start = 0; 38078342Sbenno if (wstart < 0) 38199657Sbenno break; 38299657Sbenno } 383108939Sgrehan ret = 1; 384108939Sgrehan err: 385108939Sgrehan BN_CTX_end(ctx); 386108939Sgrehan BN_RECP_CTX_free(&recp); 38799657Sbenno bn_check_top(r); 388108939Sgrehan return (ret); 389108939Sgrehan} 390108939Sgrehan 391108939Sgrehanint BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 392108939Sgrehan const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 393108939Sgrehan{ 394108939Sgrehan int i, j, bits, ret = 0, wstart, wend, window, wvalue; 395108939Sgrehan int start = 1; 396108939Sgrehan BIGNUM *d, *r; 397108939Sgrehan const BIGNUM *aa; 398108939Sgrehan /* Table of variables obtained from 'ctx' */ 399108939Sgrehan BIGNUM *val[TABLE_SIZE]; 400108939Sgrehan BN_MONT_CTX *mont = NULL; 401108939Sgrehan 402108939Sgrehan if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 403108939Sgrehan return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 404108939Sgrehan } 405108939Sgrehan 406108939Sgrehan bn_check_top(a); 407108939Sgrehan bn_check_top(p); 408108939Sgrehan bn_check_top(m); 409108939Sgrehan 410108939Sgrehan if (!BN_is_odd(m)) { 411108939Sgrehan BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); 412108939Sgrehan return (0); 413108939Sgrehan } 414108939Sgrehan bits = BN_num_bits(p); 415108939Sgrehan if (bits == 0) { 416108939Sgrehan /* x**0 mod 1 is still zero. */ 417108939Sgrehan if (BN_is_one(m)) { 418108939Sgrehan ret = 1; 419108939Sgrehan BN_zero(rr); 420108939Sgrehan } else { 421108939Sgrehan ret = BN_one(rr); 422108939Sgrehan } 423108939Sgrehan return ret; 424108939Sgrehan } 425108939Sgrehan 426108939Sgrehan BN_CTX_start(ctx); 427108939Sgrehan d = BN_CTX_get(ctx); 428108939Sgrehan r = BN_CTX_get(ctx); 429108939Sgrehan val[0] = BN_CTX_get(ctx); 430108939Sgrehan if (!d || !r || !val[0]) 431108939Sgrehan goto err; 432108939Sgrehan 433108939Sgrehan /* 434108939Sgrehan * If this is not done, things will break in the montgomery part 435108939Sgrehan */ 436108939Sgrehan 437108939Sgrehan if (in_mont != NULL) 438108939Sgrehan mont = in_mont; 439108939Sgrehan else { 440108939Sgrehan if ((mont = BN_MONT_CTX_new()) == NULL) 441108939Sgrehan goto err; 442108939Sgrehan if (!BN_MONT_CTX_set(mont, m, ctx)) 443108939Sgrehan goto err; 444108939Sgrehan } 445108939Sgrehan 446108939Sgrehan if (a->neg || BN_ucmp(a, m) >= 0) { 447108939Sgrehan if (!BN_nnmod(val[0], a, m, ctx)) 448108939Sgrehan goto err; 449108939Sgrehan aa = val[0]; 450108939Sgrehan } else 451108939Sgrehan aa = a; 452108939Sgrehan if (BN_is_zero(aa)) { 453108939Sgrehan BN_zero(rr); 454108939Sgrehan ret = 1; 455108939Sgrehan goto err; 456108939Sgrehan } 457108939Sgrehan if (!BN_to_montgomery(val[0], aa, mont, ctx)) 458108939Sgrehan goto err; /* 1 */ 459108939Sgrehan 460108939Sgrehan window = BN_window_bits_for_exponent_size(bits); 461108939Sgrehan if (window > 1) { 462108939Sgrehan if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 463108939Sgrehan goto err; /* 2 */ 464108939Sgrehan j = 1 << (window - 1); 465108939Sgrehan for (i = 1; i < j; i++) { 466108939Sgrehan if (((val[i] = BN_CTX_get(ctx)) == NULL) || 467108939Sgrehan !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) 468108939Sgrehan goto err; 469108939Sgrehan } 470108939Sgrehan } 471108939Sgrehan 472108939Sgrehan start = 1; /* This is used to avoid multiplication etc 473108939Sgrehan * when there is only the value '1' in the 474108939Sgrehan * buffer. */ 475108939Sgrehan wvalue = 0; /* The 'value' of the window */ 476108939Sgrehan wstart = bits - 1; /* The top bit of the window */ 477143063Sjoerg wend = 0; /* The bottom bit of the window */ 478108939Sgrehan 479108939Sgrehan if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 480108939Sgrehan goto err; 481108939Sgrehan for (;;) { 482108939Sgrehan if (BN_is_bit_set(p, wstart) == 0) { 483108939Sgrehan if (!start) { 484113255Sdes if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 485108939Sgrehan goto err; 486108939Sgrehan } 487108939Sgrehan if (wstart == 0) 488108939Sgrehan break; 489108939Sgrehan wstart--; 490108939Sgrehan continue; 491108939Sgrehan } 492110335Sharti /* 493110335Sharti * We now have wstart on a 'set' bit, we now need to work out how bit 494110335Sharti * a window to do. To do this we need to scan forward until the last 495110335Sharti * set bit before the end of the window 496110335Sharti */ 497110335Sharti j = wstart; 498108939Sgrehan wvalue = 1; 499108939Sgrehan wend = 0; 500108939Sgrehan for (i = 1; i < window; i++) { 501108939Sgrehan if (wstart - i < 0) 502108939Sgrehan break; 503108939Sgrehan if (BN_is_bit_set(p, wstart - i)) { 504108939Sgrehan wvalue <<= (i - wend); 505108939Sgrehan wvalue |= 1; 506108939Sgrehan wend = i; 507108939Sgrehan } 508108939Sgrehan } 509108939Sgrehan 510108939Sgrehan /* wend is the size of the current window */ 511108939Sgrehan j = wend + 1; 512108939Sgrehan /* add the 'bytes above' */ 513108939Sgrehan if (!start) 514108939Sgrehan for (i = 0; i < j; i++) { 515140314Sscottl if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 516140314Sscottl goto err; 517140314Sscottl } 518140314Sscottl 519140314Sscottl /* wvalue will be an odd number < 2^window */ 520140314Sscottl if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 521140314Sscottl goto err; 522140314Sscottl 523147851Sgrehan /* move the 'window' down further */ 524147851Sgrehan wstart -= wend + 1; 525140314Sscottl wvalue = 0; 526140314Sscottl start = 0; 527140314Sscottl if (wstart < 0) 528140314Sscottl break; 529140314Sscottl } 530140314Sscottl if (!BN_from_montgomery(rr, r, mont, ctx)) 531140314Sscottl goto err; 532140314Sscottl ret = 1; 533140314Sscottl err: 534140314Sscottl if ((in_mont == NULL) && (mont != NULL)) 535140314Sscottl BN_MONT_CTX_free(mont); 536140314Sscottl BN_CTX_end(ctx); 537140314Sscottl bn_check_top(rr); 538140314Sscottl return (ret); 539140314Sscottl} 540140314Sscottl 541140314Sscottl/* 542140314Sscottl * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific 543140314Sscottl * layout so that accessing any of these table values shows the same access 544140314Sscottl * pattern as far as cache lines are concerned. The following functions are 545140314Sscottl * used to transfer a BIGNUM from/to that table. 546108939Sgrehan */ 547108939Sgrehan 548108939Sgrehanstatic int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, 549108939Sgrehan unsigned char *buf, int idx, 550108939Sgrehan int window) 551108939Sgrehan{ 552108939Sgrehan int i, j; 553108939Sgrehan int width = 1 << window; 554108939Sgrehan BN_ULONG *table = (BN_ULONG *)buf; 555143063Sjoerg 556108939Sgrehan if (top > b->top) 557108939Sgrehan top = b->top; /* this works because 'buf' is explicitly 558108939Sgrehan * zeroed */ 559108939Sgrehan for (i = 0, j = idx; i < top; i++, j += width) { 560108939Sgrehan table[j] = b->d[i]; 561108939Sgrehan } 562108939Sgrehan 563113703Sgrehan return 1; 564108939Sgrehan} 565108939Sgrehan 566108939Sgrehanstatic int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, 567108939Sgrehan unsigned char *buf, int idx, 568108939Sgrehan int window) 569108939Sgrehan{ 570108939Sgrehan int i, j; 571108939Sgrehan int width = 1 << window; 572108939Sgrehan volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 573108939Sgrehan 574108939Sgrehan if (bn_wexpand(b, top) == NULL) 575108939Sgrehan return 0; 576108939Sgrehan 577108939Sgrehan if (window <= 3) { 578108939Sgrehan for (i = 0; i < top; i++, table += width) { 579108939Sgrehan BN_ULONG acc = 0; 580108939Sgrehan 581108939Sgrehan for (j = 0; j < width; j++) { 582108939Sgrehan acc |= table[j] & 583108939Sgrehan ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 584108939Sgrehan } 585110335Sharti 586110335Sharti b->d[i] = acc; 587110335Sharti } 588108939Sgrehan } else { 589110335Sharti int xstride = 1 << (window - 2); 590108939Sgrehan BN_ULONG y0, y1, y2, y3; 591110335Sharti 592110335Sharti i = idx >> (window - 2); /* equivalent of idx / xstride */ 593108939Sgrehan idx &= xstride - 1; /* equivalent of idx % xstride */ 594108939Sgrehan 595108939Sgrehan y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); 596108939Sgrehan y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); 597108939Sgrehan y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); 598108939Sgrehan y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); 599108939Sgrehan 600108939Sgrehan for (i = 0; i < top; i++, table += width) { 601108939Sgrehan BN_ULONG acc = 0; 602108939Sgrehan 603108939Sgrehan for (j = 0; j < xstride; j++) { 604108939Sgrehan acc |= ( (table[j + 0 * xstride] & y0) | 605108939Sgrehan (table[j + 1 * xstride] & y1) | 606108939Sgrehan (table[j + 2 * xstride] & y2) | 607108939Sgrehan (table[j + 3 * xstride] & y3) ) 608108939Sgrehan & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 609108939Sgrehan } 610108939Sgrehan 61199657Sbenno b->d[i] = acc; 612143634Sgrehan } 613109935Sbenno } 61499657Sbenno 615109935Sbenno b->top = top; 616109935Sbenno bn_correct_top(b); 617109935Sbenno return 1; 61899657Sbenno} 619143634Sgrehan 620109919Sbenno/* 621109919Sbenno * Given a pointer value, compute the next address that is a cache line 622109935Sbenno * multiple. 623109919Sbenno */ 624#define MOD_EXP_CTIME_ALIGN(x_) \ 625 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 626 627/* 628 * This variant of BN_mod_exp_mont() uses fixed windows and the special 629 * precomputation memory layout to limit data-dependency to a minimum to 630 * protect secret exponents (cf. the hyper-threading timing attacks pointed 631 * out by Colin Percival, 632 * http://www.daemonology.net/hyperthreading-considered-harmful/) 633 */ 634int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 635 const BIGNUM *m, BN_CTX *ctx, 636 BN_MONT_CTX *in_mont) 637{ 638 int i, bits, ret = 0, window, wvalue; 639 int top; 640 BN_MONT_CTX *mont = NULL; 641 642 int numPowers; 643 unsigned char *powerbufFree = NULL; 644 int powerbufLen = 0; 645 unsigned char *powerbuf = NULL; 646 BIGNUM tmp, am; 647 648 bn_check_top(a); 649 bn_check_top(p); 650 bn_check_top(m); 651 652 if (!BN_is_odd(m)) { 653 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); 654 return (0); 655 } 656 657 top = m->top; 658 659 bits = BN_num_bits(p); 660 if (bits == 0) { 661 /* x**0 mod 1 is still zero. */ 662 if (BN_is_one(m)) { 663 ret = 1; 664 BN_zero(rr); 665 } else { 666 ret = BN_one(rr); 667 } 668 return ret; 669 } 670 671 BN_CTX_start(ctx); 672 673 /* 674 * Allocate a montgomery context if it was not supplied by the caller. If 675 * this is not done, things will break in the montgomery part. 676 */ 677 if (in_mont != NULL) 678 mont = in_mont; 679 else { 680 if ((mont = BN_MONT_CTX_new()) == NULL) 681 goto err; 682 if (!BN_MONT_CTX_set(mont, m, ctx)) 683 goto err; 684 } 685 686 /* Get the window size to use with size of p. */ 687 window = BN_window_bits_for_ctime_exponent_size(bits); 688#if defined(OPENSSL_BN_ASM_MONT5) 689 if (window == 6 && bits <= 1024) 690 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 691#endif 692 693 /* 694 * Allocate a buffer large enough to hold all of the pre-computed powers 695 * of am, am itself and tmp. 696 */ 697 numPowers = 1 << window; 698 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 699 ((2 * top) > 700 numPowers ? (2 * top) : numPowers)); 701#ifdef alloca 702 if (powerbufLen < 3072) 703 powerbufFree = 704 alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 705 else 706#endif 707 if ((powerbufFree = 708 (unsigned char *)OPENSSL_malloc(powerbufLen + 709 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) 710 == NULL) 711 goto err; 712 713 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 714 memset(powerbuf, 0, powerbufLen); 715 716#ifdef alloca 717 if (powerbufLen < 3072) 718 powerbufFree = NULL; 719#endif 720 721 /* lay down tmp and am right after powers table */ 722 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 723 am.d = tmp.d + top; 724 tmp.top = am.top = 0; 725 tmp.dmax = am.dmax = top; 726 tmp.neg = am.neg = 0; 727 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 728 729 /* prepare a^0 in Montgomery domain */ 730#if 1 731 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 732 goto err; 733#else 734 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 735 for (i = 1; i < top; i++) 736 tmp.d[i] = (~m->d[i]) & BN_MASK2; 737 tmp.top = top; 738#endif 739 740 /* prepare a^1 in Montgomery domain */ 741 if (a->neg || BN_ucmp(a, m) >= 0) { 742 if (!BN_mod(&am, a, m, ctx)) 743 goto err; 744 if (!BN_to_montgomery(&am, &am, mont, ctx)) 745 goto err; 746 } else if (!BN_to_montgomery(&am, a, mont, ctx)) 747 goto err; 748 749#if defined(OPENSSL_BN_ASM_MONT5) 750 if (window == 5 && top > 1) { 751 /* 752 * This optimization uses ideas from http://eprint.iacr.org/2011/239, 753 * specifically optimization of cache-timing attack countermeasures 754 * and pre-computation optimization. 755 */ 756 757 /* 758 * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 759 * 512-bit RSA is hardly relevant, we omit it to spare size... 760 */ 761 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 762 const void *table, const BN_ULONG *np, 763 const BN_ULONG *n0, int num, int power); 764 void bn_scatter5(const BN_ULONG *inp, size_t num, 765 void *table, size_t power); 766 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); 767 768 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 769 770 /* 771 * BN_to_montgomery can contaminate words above .top [in 772 * BN_DEBUG[_DEBUG] build]... 773 */ 774 for (i = am.top; i < top; i++) 775 am.d[i] = 0; 776 for (i = tmp.top; i < top; i++) 777 tmp.d[i] = 0; 778 779 bn_scatter5(tmp.d, top, powerbuf, 0); 780 bn_scatter5(am.d, am.top, powerbuf, 1); 781 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 782 bn_scatter5(tmp.d, top, powerbuf, 2); 783 784# if 0 785 for (i = 3; i < 32; i++) { 786 /* Calculate a^i = a^(i-1) * a */ 787 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 788 bn_scatter5(tmp.d, top, powerbuf, i); 789 } 790# else 791 /* same as above, but uses squaring for 1/2 of operations */ 792 for (i = 4; i < 32; i *= 2) { 793 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 794 bn_scatter5(tmp.d, top, powerbuf, i); 795 } 796 for (i = 3; i < 8; i += 2) { 797 int j; 798 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 799 bn_scatter5(tmp.d, top, powerbuf, i); 800 for (j = 2 * i; j < 32; j *= 2) { 801 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 802 bn_scatter5(tmp.d, top, powerbuf, j); 803 } 804 } 805 for (; i < 16; i += 2) { 806 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 807 bn_scatter5(tmp.d, top, powerbuf, i); 808 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 809 bn_scatter5(tmp.d, top, powerbuf, 2 * i); 810 } 811 for (; i < 32; i += 2) { 812 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 813 bn_scatter5(tmp.d, top, powerbuf, i); 814 } 815# endif 816 bits--; 817 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 818 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 819 bn_gather5(tmp.d, top, powerbuf, wvalue); 820 821 /* 822 * Scan the exponent one window at a time starting from the most 823 * significant bits. 824 */ 825 while (bits >= 0) { 826 for (wvalue = 0, i = 0; i < 5; i++, bits--) 827 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 828 829 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 830 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 831 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 832 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 833 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 834 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 835 } 836 837 tmp.top = top; 838 bn_correct_top(&tmp); 839 } else 840#endif 841 { 842 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window)) 843 goto err; 844 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window)) 845 goto err; 846 847 /* 848 * If the window size is greater than 1, then calculate 849 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even 850 * powers could instead be computed as (a^(i/2))^2 to use the slight 851 * performance advantage of sqr over mul). 852 */ 853 if (window > 1) { 854 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 855 goto err; 856 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, 857 window)) 858 goto err; 859 for (i = 3; i < numPowers; i++) { 860 /* Calculate a^i = a^(i-1) * a */ 861 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) 862 goto err; 863 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, 864 window)) 865 goto err; 866 } 867 } 868 869 bits--; 870 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 871 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 872 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, 873 window)) 874 goto err; 875 876 /* 877 * Scan the exponent one window at a time starting from the most 878 * significant bits. 879 */ 880 while (bits >= 0) { 881 wvalue = 0; /* The 'value' of the window */ 882 883 /* Scan the window, squaring the result as we go */ 884 for (i = 0; i < window; i++, bits--) { 885 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) 886 goto err; 887 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 888 } 889 890 /* 891 * Fetch the appropriate pre-computed value from the pre-buf 892 */ 893 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, 894 window)) 895 goto err; 896 897 /* Multiply the result into the intermediate result */ 898 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 899 goto err; 900 } 901 } 902 903 /* Convert the final result from montgomery to standard format */ 904 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 905 goto err; 906 ret = 1; 907 err: 908 if ((in_mont == NULL) && (mont != NULL)) 909 BN_MONT_CTX_free(mont); 910 if (powerbuf != NULL) { 911 OPENSSL_cleanse(powerbuf, powerbufLen); 912 if (powerbufFree) 913 OPENSSL_free(powerbufFree); 914 } 915 BN_CTX_end(ctx); 916 return (ret); 917} 918 919int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 920 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 921{ 922 BN_MONT_CTX *mont = NULL; 923 int b, bits, ret = 0; 924 int r_is_one; 925 BN_ULONG w, next_w; 926 BIGNUM *d, *r, *t; 927 BIGNUM *swap_tmp; 928#define BN_MOD_MUL_WORD(r, w, m) \ 929 (BN_mul_word(r, (w)) && \ 930 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 931 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 932 /* 933 * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is 934 * probably more overhead than always using BN_mod (which uses BN_copy if 935 * a similar test returns true). 936 */ 937 /* 938 * We can use BN_mod and do not need BN_nnmod because our accumulator is 939 * never negative (the result of BN_mod does not depend on the sign of 940 * the modulus). 941 */ 942#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 943 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 944 945 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 946 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 947 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 948 return -1; 949 } 950 951 bn_check_top(p); 952 bn_check_top(m); 953 954 if (!BN_is_odd(m)) { 955 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); 956 return (0); 957 } 958 if (m->top == 1) 959 a %= m->d[0]; /* make sure that 'a' is reduced */ 960 961 bits = BN_num_bits(p); 962 if (bits == 0) { 963 /* x**0 mod 1 is still zero. */ 964 if (BN_is_one(m)) { 965 ret = 1; 966 BN_zero(rr); 967 } else { 968 ret = BN_one(rr); 969 } 970 return ret; 971 } 972 if (a == 0) { 973 BN_zero(rr); 974 ret = 1; 975 return ret; 976 } 977 978 BN_CTX_start(ctx); 979 d = BN_CTX_get(ctx); 980 r = BN_CTX_get(ctx); 981 t = BN_CTX_get(ctx); 982 if (d == NULL || r == NULL || t == NULL) 983 goto err; 984 985 if (in_mont != NULL) 986 mont = in_mont; 987 else { 988 if ((mont = BN_MONT_CTX_new()) == NULL) 989 goto err; 990 if (!BN_MONT_CTX_set(mont, m, ctx)) 991 goto err; 992 } 993 994 r_is_one = 1; /* except for Montgomery factor */ 995 996 /* bits-1 >= 0 */ 997 998 /* The result is accumulated in the product r*w. */ 999 w = a; /* bit 'bits-1' of 'p' is always set */ 1000 for (b = bits - 2; b >= 0; b--) { 1001 /* First, square r*w. */ 1002 next_w = w * w; 1003 if ((next_w / w) != w) { /* overflow */ 1004 if (r_is_one) { 1005 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1006 goto err; 1007 r_is_one = 0; 1008 } else { 1009 if (!BN_MOD_MUL_WORD(r, w, m)) 1010 goto err; 1011 } 1012 next_w = 1; 1013 } 1014 w = next_w; 1015 if (!r_is_one) { 1016 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 1017 goto err; 1018 } 1019 1020 /* Second, multiply r*w by 'a' if exponent bit is set. */ 1021 if (BN_is_bit_set(p, b)) { 1022 next_w = w * a; 1023 if ((next_w / a) != w) { /* overflow */ 1024 if (r_is_one) { 1025 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1026 goto err; 1027 r_is_one = 0; 1028 } else { 1029 if (!BN_MOD_MUL_WORD(r, w, m)) 1030 goto err; 1031 } 1032 next_w = a; 1033 } 1034 w = next_w; 1035 } 1036 } 1037 1038 /* Finally, set r:=r*w. */ 1039 if (w != 1) { 1040 if (r_is_one) { 1041 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1042 goto err; 1043 r_is_one = 0; 1044 } else { 1045 if (!BN_MOD_MUL_WORD(r, w, m)) 1046 goto err; 1047 } 1048 } 1049 1050 if (r_is_one) { /* can happen only if a == 1 */ 1051 if (!BN_one(rr)) 1052 goto err; 1053 } else { 1054 if (!BN_from_montgomery(rr, r, mont, ctx)) 1055 goto err; 1056 } 1057 ret = 1; 1058 err: 1059 if ((in_mont == NULL) && (mont != NULL)) 1060 BN_MONT_CTX_free(mont); 1061 BN_CTX_end(ctx); 1062 bn_check_top(rr); 1063 return (ret); 1064} 1065 1066/* The old fallback, simple version :-) */ 1067int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 1068 const BIGNUM *m, BN_CTX *ctx) 1069{ 1070 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1071 int start = 1; 1072 BIGNUM *d; 1073 /* Table of variables obtained from 'ctx' */ 1074 BIGNUM *val[TABLE_SIZE]; 1075 1076 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1077 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1078 BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1079 return -1; 1080 } 1081 1082 bits = BN_num_bits(p); 1083 if (bits == 0) { 1084 /* x**0 mod 1 is still zero. */ 1085 if (BN_is_one(m)) { 1086 ret = 1; 1087 BN_zero(r); 1088 } else { 1089 ret = BN_one(r); 1090 } 1091 return ret; 1092 } 1093 1094 BN_CTX_start(ctx); 1095 d = BN_CTX_get(ctx); 1096 val[0] = BN_CTX_get(ctx); 1097 if (!d || !val[0]) 1098 goto err; 1099 1100 if (!BN_nnmod(val[0], a, m, ctx)) 1101 goto err; /* 1 */ 1102 if (BN_is_zero(val[0])) { 1103 BN_zero(r); 1104 ret = 1; 1105 goto err; 1106 } 1107 1108 window = BN_window_bits_for_exponent_size(bits); 1109 if (window > 1) { 1110 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1111 goto err; /* 2 */ 1112 j = 1 << (window - 1); 1113 for (i = 1; i < j; i++) { 1114 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1115 !BN_mod_mul(val[i], val[i - 1], d, m, ctx)) 1116 goto err; 1117 } 1118 } 1119 1120 start = 1; /* This is used to avoid multiplication etc 1121 * when there is only the value '1' in the 1122 * buffer. */ 1123 wvalue = 0; /* The 'value' of the window */ 1124 wstart = bits - 1; /* The top bit of the window */ 1125 wend = 0; /* The bottom bit of the window */ 1126 1127 if (!BN_one(r)) 1128 goto err; 1129 1130 for (;;) { 1131 if (BN_is_bit_set(p, wstart) == 0) { 1132 if (!start) 1133 if (!BN_mod_mul(r, r, r, m, ctx)) 1134 goto err; 1135 if (wstart == 0) 1136 break; 1137 wstart--; 1138 continue; 1139 } 1140 /* 1141 * We now have wstart on a 'set' bit, we now need to work out how bit 1142 * a window to do. To do this we need to scan forward until the last 1143 * set bit before the end of the window 1144 */ 1145 j = wstart; 1146 wvalue = 1; 1147 wend = 0; 1148 for (i = 1; i < window; i++) { 1149 if (wstart - i < 0) 1150 break; 1151 if (BN_is_bit_set(p, wstart - i)) { 1152 wvalue <<= (i - wend); 1153 wvalue |= 1; 1154 wend = i; 1155 } 1156 } 1157 1158 /* wend is the size of the current window */ 1159 j = wend + 1; 1160 /* add the 'bytes above' */ 1161 if (!start) 1162 for (i = 0; i < j; i++) { 1163 if (!BN_mod_mul(r, r, r, m, ctx)) 1164 goto err; 1165 } 1166 1167 /* wvalue will be an odd number < 2^window */ 1168 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1169 goto err; 1170 1171 /* move the 'window' down further */ 1172 wstart -= wend + 1; 1173 wvalue = 0; 1174 start = 0; 1175 if (wstart < 0) 1176 break; 1177 } 1178 ret = 1; 1179 err: 1180 BN_CTX_end(ctx); 1181 bn_check_top(r); 1182 return (ret); 1183} 1184