155714Skris/* crypto/bn/bn_exp.c */ 255714Skris/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 355714Skris * All rights reserved. 455714Skris * 555714Skris * This package is an SSL implementation written 655714Skris * by Eric Young (eay@cryptsoft.com). 755714Skris * The implementation was written so as to conform with Netscapes SSL. 8296341Sdelphij * 955714Skris * This library is free for commercial and non-commercial use as long as 1055714Skris * the following conditions are aheared to. The following conditions 1155714Skris * apply to all code found in this distribution, be it the RC4, RSA, 1255714Skris * lhash, DES, etc., code; not just the SSL code. The SSL documentation 1355714Skris * included with this distribution is covered by the same copyright terms 1455714Skris * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15296341Sdelphij * 1655714Skris * Copyright remains Eric Young's, and as such any Copyright notices in 1755714Skris * the code are not to be removed. 1855714Skris * If this package is used in a product, Eric Young should be given attribution 1955714Skris * as the author of the parts of the library used. 2055714Skris * This can be in the form of a textual message at program startup or 2155714Skris * in documentation (online or textual) provided with the package. 22296341Sdelphij * 2355714Skris * Redistribution and use in source and binary forms, with or without 2455714Skris * modification, are permitted provided that the following conditions 2555714Skris * are met: 2655714Skris * 1. Redistributions of source code must retain the copyright 2755714Skris * notice, this list of conditions and the following disclaimer. 2855714Skris * 2. Redistributions in binary form must reproduce the above copyright 2955714Skris * notice, this list of conditions and the following disclaimer in the 3055714Skris * documentation and/or other materials provided with the distribution. 3155714Skris * 3. All advertising materials mentioning features or use of this software 3255714Skris * must display the following acknowledgement: 3355714Skris * "This product includes cryptographic software written by 3455714Skris * Eric Young (eay@cryptsoft.com)" 3555714Skris * The word 'cryptographic' can be left out if the rouines from the library 3655714Skris * being used are not cryptographic related :-). 37296341Sdelphij * 4. If you include any Windows specific code (or a derivative thereof) from 3855714Skris * the apps directory (application code) you must include an acknowledgement: 3955714Skris * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40296341Sdelphij * 4155714Skris * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 4255714Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 4355714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4455714Skris * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 4555714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4655714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 4755714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 4855714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 4955714Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 5055714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 5155714Skris * SUCH DAMAGE. 52296341Sdelphij * 5355714Skris * The licence and distribution terms for any publically available version or 5455714Skris * derivative of this code cannot be changed. i.e. this code cannot simply be 5555714Skris * copied and put under another distribution licence 5655714Skris * [including the GNU Public Licence.] 5755714Skris */ 5868651Skris/* ==================================================================== 59160814Ssimon * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 6068651Skris * 6168651Skris * Redistribution and use in source and binary forms, with or without 6268651Skris * modification, are permitted provided that the following conditions 6368651Skris * are met: 6468651Skris * 6568651Skris * 1. Redistributions of source code must retain the above copyright 66296341Sdelphij * notice, this list of conditions and the following disclaimer. 6768651Skris * 6868651Skris * 2. Redistributions in binary form must reproduce the above copyright 6968651Skris * notice, this list of conditions and the following disclaimer in 7068651Skris * the documentation and/or other materials provided with the 7168651Skris * distribution. 7268651Skris * 7368651Skris * 3. All advertising materials mentioning features or use of this 7468651Skris * software must display the following acknowledgment: 7568651Skris * "This product includes software developed by the OpenSSL Project 7668651Skris * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 7768651Skris * 7868651Skris * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 7968651Skris * endorse or promote products derived from this software without 8068651Skris * prior written permission. For written permission, please contact 8168651Skris * openssl-core@openssl.org. 8268651Skris * 8368651Skris * 5. Products derived from this software may not be called "OpenSSL" 8468651Skris * nor may "OpenSSL" appear in their names without prior written 8568651Skris * permission of the OpenSSL Project. 8668651Skris * 8768651Skris * 6. Redistributions of any form whatsoever must retain the following 8868651Skris * acknowledgment: 8968651Skris * "This product includes software developed by the OpenSSL Project 9068651Skris * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 9168651Skris * 9268651Skris * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 9368651Skris * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 9468651Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 9568651Skris * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 9668651Skris * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 9768651Skris * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 9868651Skris * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 9968651Skris * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 10068651Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 10168651Skris * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 10268651Skris * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 10368651Skris * OF THE POSSIBILITY OF SUCH DAMAGE. 10468651Skris * ==================================================================== 10568651Skris * 10668651Skris * This product includes cryptographic software written by Eric Young 10768651Skris * (eay@cryptsoft.com). This product includes software written by Tim 10868651Skris * Hudson (tjh@cryptsoft.com). 10968651Skris * 11068651Skris */ 11155714Skris 11255714Skris#include "cryptlib.h" 113296341Sdelphij#include "constant_time_locl.h" 11455714Skris#include "bn_lcl.h" 11555714Skris 116238405Sjkim#include <stdlib.h> 117238405Sjkim#ifdef _WIN32 118238405Sjkim# include <malloc.h> 119238405Sjkim# ifndef alloca 120238405Sjkim# define alloca _alloca 121238405Sjkim# endif 122238405Sjkim#elif defined(__GNUC__) 123238405Sjkim# ifndef alloca 124238405Sjkim# define alloca(s) __builtin_alloca((s)) 125238405Sjkim# endif 126238405Sjkim#endif 127238405Sjkim 128160814Ssimon/* maximum precomputation table size for *variable* sliding windows */ 129296341Sdelphij#define TABLE_SIZE 32 13068651Skris 13155714Skris/* this one works - simple but works */ 132109998Smarkmint BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 133296341Sdelphij{ 134296341Sdelphij int i, bits, ret = 0; 135296341Sdelphij BIGNUM *v, *rr; 13655714Skris 137296341Sdelphij if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 138296341Sdelphij /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 139296341Sdelphij BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 140296341Sdelphij return -1; 141296341Sdelphij } 142160814Ssimon 143296341Sdelphij BN_CTX_start(ctx); 144296341Sdelphij if ((r == a) || (r == p)) 145296341Sdelphij rr = BN_CTX_get(ctx); 146296341Sdelphij else 147296341Sdelphij rr = r; 148296341Sdelphij v = BN_CTX_get(ctx); 149296341Sdelphij if (rr == NULL || v == NULL) 150296341Sdelphij goto err; 15155714Skris 152296341Sdelphij if (BN_copy(v, a) == NULL) 153296341Sdelphij goto err; 154296341Sdelphij bits = BN_num_bits(p); 15555714Skris 156296341Sdelphij if (BN_is_odd(p)) { 157296341Sdelphij if (BN_copy(rr, a) == NULL) 158296341Sdelphij goto err; 159296341Sdelphij } else { 160296341Sdelphij if (!BN_one(rr)) 161296341Sdelphij goto err; 162296341Sdelphij } 16355714Skris 164296341Sdelphij for (i = 1; i < bits; i++) { 165296341Sdelphij if (!BN_sqr(v, v, ctx)) 166296341Sdelphij goto err; 167296341Sdelphij if (BN_is_bit_set(p, i)) { 168296341Sdelphij if (!BN_mul(rr, rr, v, ctx)) 169296341Sdelphij goto err; 170296341Sdelphij } 171296341Sdelphij } 172296341Sdelphij if (r != rr) 173296341Sdelphij BN_copy(r, rr); 174296341Sdelphij ret = 1; 175296341Sdelphij err: 176296341Sdelphij BN_CTX_end(ctx); 177296341Sdelphij bn_check_top(r); 178296341Sdelphij return (ret); 179296341Sdelphij} 18055714Skris 181109998Smarkmint BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 182296341Sdelphij BN_CTX *ctx) 183296341Sdelphij{ 184296341Sdelphij int ret; 18555714Skris 186296341Sdelphij bn_check_top(a); 187296341Sdelphij bn_check_top(p); 188296341Sdelphij bn_check_top(m); 18955714Skris 190296341Sdelphij /*- 191296341Sdelphij * For even modulus m = 2^k*m_odd, it might make sense to compute 192296341Sdelphij * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 193296341Sdelphij * exponentiation for the odd part), using appropriate exponent 194296341Sdelphij * reductions, and combine the results using the CRT. 195296341Sdelphij * 196296341Sdelphij * For now, we use Montgomery only if the modulus is odd; otherwise, 197296341Sdelphij * exponentiation using the reciprocal-based quick remaindering 198296341Sdelphij * algorithm is used. 199296341Sdelphij * 200296341Sdelphij * (Timing obtained with expspeed.c [computations a^p mod m 201296341Sdelphij * where a, p, m are of the same length: 256, 512, 1024, 2048, 202296341Sdelphij * 4096, 8192 bits], compared to the running time of the 203296341Sdelphij * standard algorithm: 204296341Sdelphij * 205296341Sdelphij * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 206296341Sdelphij * 55 .. 77 % [UltraSparc processor, but 207296341Sdelphij * debug-solaris-sparcv8-gcc conf.] 208296341Sdelphij * 209296341Sdelphij * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 210296341Sdelphij * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 211296341Sdelphij * 212296341Sdelphij * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 213296341Sdelphij * at 2048 and more bits, but at 512 and 1024 bits, it was 214296341Sdelphij * slower even than the standard algorithm! 215296341Sdelphij * 216296341Sdelphij * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 217296341Sdelphij * should be obtained when the new Montgomery reduction code 218296341Sdelphij * has been integrated into OpenSSL.) 219296341Sdelphij */ 22059191Skris 221109998Smarkm#define MONT_MUL_MOD 222109998Smarkm#define MONT_EXP_WORD 223109998Smarkm#define RECP_MUL_MOD 224109998Smarkm 22555714Skris#ifdef MONT_MUL_MOD 226296341Sdelphij /* 227296341Sdelphij * I have finally been able to take out this pre-condition of the top bit 228296341Sdelphij * being set. It was caused by an error in BN_div with negatives. There 229296341Sdelphij * was also another problem when for a^b%m a >= m. eay 07-May-97 230296341Sdelphij */ 231296341Sdelphij /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 23255714Skris 233296341Sdelphij if (BN_is_odd(m)) { 234296341Sdelphij# ifdef MONT_EXP_WORD 235296341Sdelphij if (a->top == 1 && !a->neg 236296341Sdelphij && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) { 237296341Sdelphij BN_ULONG A = a->d[0]; 238296341Sdelphij ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL); 239296341Sdelphij } else 240296341Sdelphij# endif 241296341Sdelphij ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL); 242296341Sdelphij } else 24355714Skris#endif 24455714Skris#ifdef RECP_MUL_MOD 245296341Sdelphij { 246296341Sdelphij ret = BN_mod_exp_recp(r, a, p, m, ctx); 247296341Sdelphij } 24855714Skris#else 249296341Sdelphij { 250296341Sdelphij ret = BN_mod_exp_simple(r, a, p, m, ctx); 251296341Sdelphij } 25255714Skris#endif 25355714Skris 254296341Sdelphij bn_check_top(r); 255296341Sdelphij return (ret); 256296341Sdelphij} 25755714Skris 25855714Skrisint BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 259296341Sdelphij const BIGNUM *m, BN_CTX *ctx) 260296341Sdelphij{ 261296341Sdelphij int i, j, bits, ret = 0, wstart, wend, window, wvalue; 262296341Sdelphij int start = 1; 263296341Sdelphij BIGNUM *aa; 264296341Sdelphij /* Table of variables obtained from 'ctx' */ 265296341Sdelphij BIGNUM *val[TABLE_SIZE]; 266296341Sdelphij BN_RECP_CTX recp; 26755714Skris 268296341Sdelphij if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 269296341Sdelphij /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 270296341Sdelphij BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 271296341Sdelphij return -1; 272296341Sdelphij } 273160814Ssimon 274296341Sdelphij bits = BN_num_bits(p); 27555714Skris 276296341Sdelphij if (bits == 0) { 277296341Sdelphij ret = BN_one(r); 278296341Sdelphij return ret; 279296341Sdelphij } 28059191Skris 281296341Sdelphij BN_CTX_start(ctx); 282296341Sdelphij aa = BN_CTX_get(ctx); 283296341Sdelphij val[0] = BN_CTX_get(ctx); 284296341Sdelphij if (!aa || !val[0]) 285296341Sdelphij goto err; 28659191Skris 287296341Sdelphij BN_RECP_CTX_init(&recp); 288296341Sdelphij if (m->neg) { 289296341Sdelphij /* ignore sign of 'm' */ 290296341Sdelphij if (!BN_copy(aa, m)) 291296341Sdelphij goto err; 292296341Sdelphij aa->neg = 0; 293296341Sdelphij if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 294296341Sdelphij goto err; 295296341Sdelphij } else { 296296341Sdelphij if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 297296341Sdelphij goto err; 298296341Sdelphij } 29955714Skris 300296341Sdelphij if (!BN_nnmod(val[0], a, m, ctx)) 301296341Sdelphij goto err; /* 1 */ 302296341Sdelphij if (BN_is_zero(val[0])) { 303296341Sdelphij BN_zero(r); 304296341Sdelphij ret = 1; 305296341Sdelphij goto err; 306296341Sdelphij } 30755714Skris 308296341Sdelphij window = BN_window_bits_for_exponent_size(bits); 309296341Sdelphij if (window > 1) { 310296341Sdelphij if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 311296341Sdelphij goto err; /* 2 */ 312296341Sdelphij j = 1 << (window - 1); 313296341Sdelphij for (i = 1; i < j; i++) { 314296341Sdelphij if (((val[i] = BN_CTX_get(ctx)) == NULL) || 315296341Sdelphij !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) 316296341Sdelphij goto err; 317296341Sdelphij } 318296341Sdelphij } 31955714Skris 320296341Sdelphij start = 1; /* This is used to avoid multiplication etc 321296341Sdelphij * when there is only the value '1' in the 322296341Sdelphij * buffer. */ 323296341Sdelphij wvalue = 0; /* The 'value' of the window */ 324296341Sdelphij wstart = bits - 1; /* The top bit of the window */ 325296341Sdelphij wend = 0; /* The bottom bit of the window */ 32655714Skris 327296341Sdelphij if (!BN_one(r)) 328296341Sdelphij goto err; 32955714Skris 330296341Sdelphij for (;;) { 331296341Sdelphij if (BN_is_bit_set(p, wstart) == 0) { 332296341Sdelphij if (!start) 333296341Sdelphij if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 334296341Sdelphij goto err; 335296341Sdelphij if (wstart == 0) 336296341Sdelphij break; 337296341Sdelphij wstart--; 338296341Sdelphij continue; 339296341Sdelphij } 340296341Sdelphij /* 341296341Sdelphij * We now have wstart on a 'set' bit, we now need to work out how bit 342296341Sdelphij * a window to do. To do this we need to scan forward until the last 343296341Sdelphij * set bit before the end of the window 344296341Sdelphij */ 345296341Sdelphij j = wstart; 346296341Sdelphij wvalue = 1; 347296341Sdelphij wend = 0; 348296341Sdelphij for (i = 1; i < window; i++) { 349296341Sdelphij if (wstart - i < 0) 350296341Sdelphij break; 351296341Sdelphij if (BN_is_bit_set(p, wstart - i)) { 352296341Sdelphij wvalue <<= (i - wend); 353296341Sdelphij wvalue |= 1; 354296341Sdelphij wend = i; 355296341Sdelphij } 356296341Sdelphij } 35755714Skris 358296341Sdelphij /* wend is the size of the current window */ 359296341Sdelphij j = wend + 1; 360296341Sdelphij /* add the 'bytes above' */ 361296341Sdelphij if (!start) 362296341Sdelphij for (i = 0; i < j; i++) { 363296341Sdelphij if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) 364296341Sdelphij goto err; 365296341Sdelphij } 36655714Skris 367296341Sdelphij /* wvalue will be an odd number < 2^window */ 368296341Sdelphij if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) 369296341Sdelphij goto err; 37068651Skris 371296341Sdelphij /* move the 'window' down further */ 372296341Sdelphij wstart -= wend + 1; 373296341Sdelphij wvalue = 0; 374296341Sdelphij start = 0; 375296341Sdelphij if (wstart < 0) 376296341Sdelphij break; 377296341Sdelphij } 378296341Sdelphij ret = 1; 379296341Sdelphij err: 380296341Sdelphij BN_CTX_end(ctx); 381296341Sdelphij BN_RECP_CTX_free(&recp); 382296341Sdelphij bn_check_top(r); 383296341Sdelphij return (ret); 384296341Sdelphij} 385296341Sdelphij 386109998Smarkmint BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 387296341Sdelphij const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 388296341Sdelphij{ 389296341Sdelphij int i, j, bits, ret = 0, wstart, wend, window, wvalue; 390296341Sdelphij int start = 1; 391296341Sdelphij BIGNUM *d, *r; 392296341Sdelphij const BIGNUM *aa; 393296341Sdelphij /* Table of variables obtained from 'ctx' */ 394296341Sdelphij BIGNUM *val[TABLE_SIZE]; 395296341Sdelphij BN_MONT_CTX *mont = NULL; 39655714Skris 397296341Sdelphij if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 398296341Sdelphij return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 399296341Sdelphij } 400160814Ssimon 401296341Sdelphij bn_check_top(a); 402296341Sdelphij bn_check_top(p); 403296341Sdelphij bn_check_top(m); 40455714Skris 405296341Sdelphij if (!BN_is_odd(m)) { 406296341Sdelphij BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); 407296341Sdelphij return (0); 408296341Sdelphij } 409296341Sdelphij bits = BN_num_bits(p); 410296341Sdelphij if (bits == 0) { 411296341Sdelphij ret = BN_one(rr); 412296341Sdelphij return ret; 413296341Sdelphij } 414109998Smarkm 415296341Sdelphij BN_CTX_start(ctx); 416296341Sdelphij d = BN_CTX_get(ctx); 417296341Sdelphij r = BN_CTX_get(ctx); 418296341Sdelphij val[0] = BN_CTX_get(ctx); 419296341Sdelphij if (!d || !r || !val[0]) 420296341Sdelphij goto err; 42155714Skris 422296341Sdelphij /* 423296341Sdelphij * If this is not done, things will break in the montgomery part 424296341Sdelphij */ 42555714Skris 426296341Sdelphij if (in_mont != NULL) 427296341Sdelphij mont = in_mont; 428296341Sdelphij else { 429296341Sdelphij if ((mont = BN_MONT_CTX_new()) == NULL) 430296341Sdelphij goto err; 431296341Sdelphij if (!BN_MONT_CTX_set(mont, m, ctx)) 432296341Sdelphij goto err; 433296341Sdelphij } 43455714Skris 435296341Sdelphij if (a->neg || BN_ucmp(a, m) >= 0) { 436296341Sdelphij if (!BN_nnmod(val[0], a, m, ctx)) 437296341Sdelphij goto err; 438296341Sdelphij aa = val[0]; 439296341Sdelphij } else 440296341Sdelphij aa = a; 441296341Sdelphij if (BN_is_zero(aa)) { 442296341Sdelphij BN_zero(rr); 443296341Sdelphij ret = 1; 444296341Sdelphij goto err; 445296341Sdelphij } 446296341Sdelphij if (!BN_to_montgomery(val[0], aa, mont, ctx)) 447296341Sdelphij goto err; /* 1 */ 44855714Skris 449296341Sdelphij window = BN_window_bits_for_exponent_size(bits); 450296341Sdelphij if (window > 1) { 451296341Sdelphij if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 452296341Sdelphij goto err; /* 2 */ 453296341Sdelphij j = 1 << (window - 1); 454296341Sdelphij for (i = 1; i < j; i++) { 455296341Sdelphij if (((val[i] = BN_CTX_get(ctx)) == NULL) || 456296341Sdelphij !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) 457296341Sdelphij goto err; 458296341Sdelphij } 459296341Sdelphij } 46055714Skris 461296341Sdelphij start = 1; /* This is used to avoid multiplication etc 462296341Sdelphij * when there is only the value '1' in the 463296341Sdelphij * buffer. */ 464296341Sdelphij wvalue = 0; /* The 'value' of the window */ 465296341Sdelphij wstart = bits - 1; /* The top bit of the window */ 466296341Sdelphij wend = 0; /* The bottom bit of the window */ 46755714Skris 468296341Sdelphij if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 469296341Sdelphij goto err; 470296341Sdelphij for (;;) { 471296341Sdelphij if (BN_is_bit_set(p, wstart) == 0) { 472296341Sdelphij if (!start) { 473296341Sdelphij if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 474296341Sdelphij goto err; 475296341Sdelphij } 476296341Sdelphij if (wstart == 0) 477296341Sdelphij break; 478296341Sdelphij wstart--; 479296341Sdelphij continue; 480296341Sdelphij } 481296341Sdelphij /* 482296341Sdelphij * We now have wstart on a 'set' bit, we now need to work out how bit 483296341Sdelphij * a window to do. To do this we need to scan forward until the last 484296341Sdelphij * set bit before the end of the window 485296341Sdelphij */ 486296341Sdelphij j = wstart; 487296341Sdelphij wvalue = 1; 488296341Sdelphij wend = 0; 489296341Sdelphij for (i = 1; i < window; i++) { 490296341Sdelphij if (wstart - i < 0) 491296341Sdelphij break; 492296341Sdelphij if (BN_is_bit_set(p, wstart - i)) { 493296341Sdelphij wvalue <<= (i - wend); 494296341Sdelphij wvalue |= 1; 495296341Sdelphij wend = i; 496296341Sdelphij } 497296341Sdelphij } 49855714Skris 499296341Sdelphij /* wend is the size of the current window */ 500296341Sdelphij j = wend + 1; 501296341Sdelphij /* add the 'bytes above' */ 502296341Sdelphij if (!start) 503296341Sdelphij for (i = 0; i < j; i++) { 504296341Sdelphij if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 505296341Sdelphij goto err; 506296341Sdelphij } 50755714Skris 508296341Sdelphij /* wvalue will be an odd number < 2^window */ 509296341Sdelphij if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 510296341Sdelphij goto err; 51155714Skris 512296341Sdelphij /* move the 'window' down further */ 513296341Sdelphij wstart -= wend + 1; 514296341Sdelphij wvalue = 0; 515296341Sdelphij start = 0; 516296341Sdelphij if (wstart < 0) 517296341Sdelphij break; 518296341Sdelphij } 519296341Sdelphij if (!BN_from_montgomery(rr, r, mont, ctx)) 520296341Sdelphij goto err; 521296341Sdelphij ret = 1; 522296341Sdelphij err: 523296341Sdelphij if ((in_mont == NULL) && (mont != NULL)) 524296341Sdelphij BN_MONT_CTX_free(mont); 525296341Sdelphij BN_CTX_end(ctx); 526296341Sdelphij bn_check_top(rr); 527296341Sdelphij return (ret); 528296341Sdelphij} 529160814Ssimon 530296341Sdelphij/* 531296341Sdelphij * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific 532296341Sdelphij * layout so that accessing any of these table values shows the same access 533296341Sdelphij * pattern as far as cache lines are concerned. The following functions are 534296341Sdelphij * used to transfer a BIGNUM from/to that table. 535296341Sdelphij */ 536160814Ssimon 537296341Sdelphijstatic int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, 538296341Sdelphij unsigned char *buf, int idx, 539296341Sdelphij int window) 540296341Sdelphij{ 541296341Sdelphij int i, j; 542296341Sdelphij int width = 1 << window; 543296341Sdelphij BN_ULONG *table = (BN_ULONG *)buf; 544160814Ssimon 545296341Sdelphij if (top > b->top) 546296341Sdelphij top = b->top; /* this works because 'buf' is explicitly 547296341Sdelphij * zeroed */ 548296341Sdelphij for (i = 0, j = idx; i < top; i++, j += width) { 549296341Sdelphij table[j] = b->d[i]; 550296341Sdelphij } 551160814Ssimon 552296341Sdelphij return 1; 553296341Sdelphij} 554160814Ssimon 555296341Sdelphijstatic int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, 556296341Sdelphij unsigned char *buf, int idx, 557296341Sdelphij int window) 558296341Sdelphij{ 559296341Sdelphij int i, j; 560296341Sdelphij int width = 1 << window; 561296341Sdelphij volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 562160814Ssimon 563296341Sdelphij if (bn_wexpand(b, top) == NULL) 564296341Sdelphij return 0; 565160814Ssimon 566296341Sdelphij if (window <= 3) { 567296341Sdelphij for (i = 0; i < top; i++, table += width) { 568296341Sdelphij BN_ULONG acc = 0; 569160814Ssimon 570296341Sdelphij for (j = 0; j < width; j++) { 571296341Sdelphij acc |= table[j] & 572296341Sdelphij ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 573296341Sdelphij } 574160814Ssimon 575296341Sdelphij b->d[i] = acc; 576296341Sdelphij } 577296341Sdelphij } else { 578296341Sdelphij int xstride = 1 << (window - 2); 579296341Sdelphij BN_ULONG y0, y1, y2, y3; 580296341Sdelphij 581296341Sdelphij i = idx >> (window - 2); /* equivalent of idx / xstride */ 582296341Sdelphij idx &= xstride - 1; /* equivalent of idx % xstride */ 583296341Sdelphij 584296341Sdelphij y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); 585296341Sdelphij y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); 586296341Sdelphij y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); 587296341Sdelphij y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); 588296341Sdelphij 589296341Sdelphij for (i = 0; i < top; i++, table += width) { 590296341Sdelphij BN_ULONG acc = 0; 591296341Sdelphij 592296341Sdelphij for (j = 0; j < xstride; j++) { 593296341Sdelphij acc |= ( (table[j + 0 * xstride] & y0) | 594296341Sdelphij (table[j + 1 * xstride] & y1) | 595296341Sdelphij (table[j + 2 * xstride] & y2) | 596296341Sdelphij (table[j + 3 * xstride] & y3) ) 597296341Sdelphij & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 598296341Sdelphij } 599296341Sdelphij 600296341Sdelphij b->d[i] = acc; 601296341Sdelphij } 602296341Sdelphij } 603296341Sdelphij 604296341Sdelphij b->top = top; 605296341Sdelphij bn_correct_top(b); 606296341Sdelphij return 1; 607296341Sdelphij} 608296341Sdelphij 609296341Sdelphij/* 610296341Sdelphij * Given a pointer value, compute the next address that is a cache line 611296341Sdelphij * multiple. 612296341Sdelphij */ 613160814Ssimon#define MOD_EXP_CTIME_ALIGN(x_) \ 614296341Sdelphij ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 615160814Ssimon 616296341Sdelphij/* 617296341Sdelphij * This variant of BN_mod_exp_mont() uses fixed windows and the special 618296341Sdelphij * precomputation memory layout to limit data-dependency to a minimum to 619296341Sdelphij * protect secret exponents (cf. the hyper-threading timing attacks pointed 620296341Sdelphij * out by Colin Percival, 621296341Sdelphij * http://www.daemong-consideredperthreading-considered-harmful/) 622160814Ssimon */ 623160814Ssimonint BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 624296341Sdelphij const BIGNUM *m, BN_CTX *ctx, 625296341Sdelphij BN_MONT_CTX *in_mont) 626296341Sdelphij{ 627296341Sdelphij int i, bits, ret = 0, window, wvalue; 628296341Sdelphij int top; 629296341Sdelphij BN_MONT_CTX *mont = NULL; 630160814Ssimon 631296341Sdelphij int numPowers; 632296341Sdelphij unsigned char *powerbufFree = NULL; 633296341Sdelphij int powerbufLen = 0; 634296341Sdelphij unsigned char *powerbuf = NULL; 635296341Sdelphij BIGNUM tmp, am; 636160814Ssimon 637296341Sdelphij bn_check_top(a); 638296341Sdelphij bn_check_top(p); 639296341Sdelphij bn_check_top(m); 640160814Ssimon 641296341Sdelphij top = m->top; 642160814Ssimon 643296341Sdelphij if (!(m->d[0] & 1)) { 644296341Sdelphij BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); 645296341Sdelphij return (0); 646296341Sdelphij } 647296341Sdelphij bits = BN_num_bits(p); 648296341Sdelphij if (bits == 0) { 649296341Sdelphij ret = BN_one(rr); 650296341Sdelphij return ret; 651296341Sdelphij } 652160814Ssimon 653296341Sdelphij BN_CTX_start(ctx); 654160814Ssimon 655296341Sdelphij /* 656296341Sdelphij * Allocate a montgomery context if it was not supplied by the caller. If 657296341Sdelphij * this is not done, things will break in the montgomery part. 658296341Sdelphij */ 659296341Sdelphij if (in_mont != NULL) 660296341Sdelphij mont = in_mont; 661296341Sdelphij else { 662296341Sdelphij if ((mont = BN_MONT_CTX_new()) == NULL) 663296341Sdelphij goto err; 664296341Sdelphij if (!BN_MONT_CTX_set(mont, m, ctx)) 665296341Sdelphij goto err; 666296341Sdelphij } 667160814Ssimon 668296341Sdelphij /* Get the window size to use with size of p. */ 669296341Sdelphij window = BN_window_bits_for_ctime_exponent_size(bits); 670238405Sjkim#if defined(OPENSSL_BN_ASM_MONT5) 671296341Sdelphij if (window == 6 && bits <= 1024) 672296341Sdelphij window = 5; /* ~5% improvement of 2048-bit RSA sign */ 673238405Sjkim#endif 674160814Ssimon 675296341Sdelphij /* 676296341Sdelphij * Allocate a buffer large enough to hold all of the pre-computed powers 677296341Sdelphij * of am, am itself and tmp. 678296341Sdelphij */ 679296341Sdelphij numPowers = 1 << window; 680296341Sdelphij powerbufLen = sizeof(m->d[0]) * (top * numPowers + 681296341Sdelphij ((2 * top) > 682296341Sdelphij numPowers ? (2 * top) : numPowers)); 683238405Sjkim#ifdef alloca 684296341Sdelphij if (powerbufLen < 3072) 685296341Sdelphij powerbufFree = 686296341Sdelphij alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 687296341Sdelphij else 688238405Sjkim#endif 689296341Sdelphij if ((powerbufFree = 690296341Sdelphij (unsigned char *)OPENSSL_malloc(powerbufLen + 691296341Sdelphij MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) 692296341Sdelphij == NULL) 693296341Sdelphij goto err; 694160814Ssimon 695296341Sdelphij powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 696296341Sdelphij memset(powerbuf, 0, powerbufLen); 697296341Sdelphij 698238405Sjkim#ifdef alloca 699296341Sdelphij if (powerbufLen < 3072) 700296341Sdelphij powerbufFree = NULL; 701238405Sjkim#endif 702160814Ssimon 703296341Sdelphij /* lay down tmp and am right after powers table */ 704296341Sdelphij tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 705296341Sdelphij am.d = tmp.d + top; 706296341Sdelphij tmp.top = am.top = 0; 707296341Sdelphij tmp.dmax = am.dmax = top; 708296341Sdelphij tmp.neg = am.neg = 0; 709296341Sdelphij tmp.flags = am.flags = BN_FLG_STATIC_DATA; 710160814Ssimon 711296341Sdelphij /* prepare a^0 in Montgomery domain */ 712238405Sjkim#if 1 713296341Sdelphij if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 714296341Sdelphij goto err; 715238405Sjkim#else 716296341Sdelphij tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 717296341Sdelphij for (i = 1; i < top; i++) 718296341Sdelphij tmp.d[i] = (~m->d[i]) & BN_MASK2; 719296341Sdelphij tmp.top = top; 720238405Sjkim#endif 721238405Sjkim 722296341Sdelphij /* prepare a^1 in Montgomery domain */ 723296341Sdelphij if (a->neg || BN_ucmp(a, m) >= 0) { 724296341Sdelphij if (!BN_mod(&am, a, m, ctx)) 725296341Sdelphij goto err; 726296341Sdelphij if (!BN_to_montgomery(&am, &am, mont, ctx)) 727296341Sdelphij goto err; 728296341Sdelphij } else if (!BN_to_montgomery(&am, a, mont, ctx)) 729296341Sdelphij goto err; 730160814Ssimon 731238405Sjkim#if defined(OPENSSL_BN_ASM_MONT5) 732296341Sdelphij if (window == 5 && top > 1) { 733296341Sdelphij /* 734296341Sdelphij * This optimization uses ideas from http://eprint.iacr.org/2011/239, 735296341Sdelphij * specifically optimization of cache-timing attack countermeasures 736296341Sdelphij * and pre-computation optimization. 737296341Sdelphij */ 738238405Sjkim 739296341Sdelphij /* 740296341Sdelphij * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 741296341Sdelphij * 512-bit RSA is hardly relevant, we omit it to spare size... 742296341Sdelphij */ 743296341Sdelphij void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 744296341Sdelphij const void *table, const BN_ULONG *np, 745296341Sdelphij const BN_ULONG *n0, int num, int power); 746296341Sdelphij void bn_scatter5(const BN_ULONG *inp, size_t num, 747296341Sdelphij void *table, size_t power); 748296341Sdelphij void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); 749238405Sjkim 750296341Sdelphij BN_ULONG *np = mont->N.d, *n0 = mont->n0; 751238405Sjkim 752296341Sdelphij /* 753296341Sdelphij * BN_to_montgomery can contaminate words above .top [in 754296341Sdelphij * BN_DEBUG[_DEBUG] build]... 755296341Sdelphij */ 756296341Sdelphij for (i = am.top; i < top; i++) 757296341Sdelphij am.d[i] = 0; 758296341Sdelphij for (i = tmp.top; i < top; i++) 759296341Sdelphij tmp.d[i] = 0; 760238405Sjkim 761296341Sdelphij bn_scatter5(tmp.d, top, powerbuf, 0); 762296341Sdelphij bn_scatter5(am.d, am.top, powerbuf, 1); 763296341Sdelphij bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 764296341Sdelphij bn_scatter5(tmp.d, top, powerbuf, 2); 765238405Sjkim 766296341Sdelphij# if 0 767296341Sdelphij for (i = 3; i < 32; i++) { 768296341Sdelphij /* Calculate a^i = a^(i-1) * a */ 769296341Sdelphij bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 770296341Sdelphij bn_scatter5(tmp.d, top, powerbuf, i); 771296341Sdelphij } 772296341Sdelphij# else 773296341Sdelphij /* same as above, but uses squaring for 1/2 of operations */ 774296341Sdelphij for (i = 4; i < 32; i *= 2) { 775296341Sdelphij bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 776296341Sdelphij bn_scatter5(tmp.d, top, powerbuf, i); 777296341Sdelphij } 778296341Sdelphij for (i = 3; i < 8; i += 2) { 779296341Sdelphij int j; 780296341Sdelphij bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 781296341Sdelphij bn_scatter5(tmp.d, top, powerbuf, i); 782296341Sdelphij for (j = 2 * i; j < 32; j *= 2) { 783296341Sdelphij bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 784296341Sdelphij bn_scatter5(tmp.d, top, powerbuf, j); 785296341Sdelphij } 786296341Sdelphij } 787296341Sdelphij for (; i < 16; i += 2) { 788296341Sdelphij bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 789296341Sdelphij bn_scatter5(tmp.d, top, powerbuf, i); 790296341Sdelphij bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 791296341Sdelphij bn_scatter5(tmp.d, top, powerbuf, 2 * i); 792296341Sdelphij } 793296341Sdelphij for (; i < 32; i += 2) { 794296341Sdelphij bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 795296341Sdelphij bn_scatter5(tmp.d, top, powerbuf, i); 796296341Sdelphij } 797296341Sdelphij# endif 798296341Sdelphij bits--; 799296341Sdelphij for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 800296341Sdelphij wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 801296341Sdelphij bn_gather5(tmp.d, top, powerbuf, wvalue); 802238405Sjkim 803296341Sdelphij /* 804296341Sdelphij * Scan the exponent one window at a time starting from the most 805296341Sdelphij * significant bits. 806296341Sdelphij */ 807296341Sdelphij while (bits >= 0) { 808296341Sdelphij for (wvalue = 0, i = 0; i < 5; i++, bits--) 809296341Sdelphij wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 810238405Sjkim 811296341Sdelphij bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 812296341Sdelphij bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 813296341Sdelphij bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 814296341Sdelphij bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 815296341Sdelphij bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 816296341Sdelphij bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 817296341Sdelphij } 818238405Sjkim 819296341Sdelphij tmp.top = top; 820296341Sdelphij bn_correct_top(&tmp); 821296341Sdelphij } else 822238405Sjkim#endif 823296341Sdelphij { 824296341Sdelphij if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window)) 825296341Sdelphij goto err; 826296341Sdelphij if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window)) 827296341Sdelphij goto err; 828238405Sjkim 829296341Sdelphij /* 830296341Sdelphij * If the window size is greater than 1, then calculate 831296341Sdelphij * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even 832296341Sdelphij * powers could instead be computed as (a^(i/2))^2 to use the slight 833296341Sdelphij * performance advantage of sqr over mul). 834296341Sdelphij */ 835296341Sdelphij if (window > 1) { 836296341Sdelphij if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 837296341Sdelphij goto err; 838296341Sdelphij if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, 839296341Sdelphij window)) 840296341Sdelphij goto err; 841296341Sdelphij for (i = 3; i < numPowers; i++) { 842296341Sdelphij /* Calculate a^i = a^(i-1) * a */ 843296341Sdelphij if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) 844296341Sdelphij goto err; 845296341Sdelphij if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, 846296341Sdelphij window)) 847296341Sdelphij goto err; 848296341Sdelphij } 849296341Sdelphij } 850160814Ssimon 851296341Sdelphij bits--; 852296341Sdelphij for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 853296341Sdelphij wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 854296341Sdelphij if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, 855296341Sdelphij window)) 856296341Sdelphij goto err; 857160814Ssimon 858296341Sdelphij /* 859296341Sdelphij * Scan the exponent one window at a time starting from the most 860296341Sdelphij * significant bits. 861296341Sdelphij */ 862296341Sdelphij while (bits >= 0) { 863296341Sdelphij wvalue = 0; /* The 'value' of the window */ 864160814Ssimon 865296341Sdelphij /* Scan the window, squaring the result as we go */ 866296341Sdelphij for (i = 0; i < window; i++, bits--) { 867296341Sdelphij if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) 868296341Sdelphij goto err; 869296341Sdelphij wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 870296341Sdelphij } 871160814Ssimon 872296341Sdelphij /* 873296341Sdelphij * Fetch the appropriate pre-computed value from the pre-buf 874296341Sdelphij */ 875296341Sdelphij if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, 876296341Sdelphij window)) 877296341Sdelphij goto err; 878296341Sdelphij 879296341Sdelphij /* Multiply the result into the intermediate result */ 880296341Sdelphij if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 881296341Sdelphij goto err; 882296341Sdelphij } 883296341Sdelphij } 884296341Sdelphij 885296341Sdelphij /* Convert the final result from montgomery to standard format */ 886296341Sdelphij if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 887296341Sdelphij goto err; 888296341Sdelphij ret = 1; 889296341Sdelphij err: 890296341Sdelphij if ((in_mont == NULL) && (mont != NULL)) 891296341Sdelphij BN_MONT_CTX_free(mont); 892296341Sdelphij if (powerbuf != NULL) { 893296341Sdelphij OPENSSL_cleanse(powerbuf, powerbufLen); 894296341Sdelphij if (powerbufFree) 895296341Sdelphij OPENSSL_free(powerbufFree); 896296341Sdelphij } 897296341Sdelphij BN_CTX_end(ctx); 898296341Sdelphij return (ret); 899296341Sdelphij} 900296341Sdelphij 90168651Skrisint BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 90268651Skris const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 903296341Sdelphij{ 904296341Sdelphij BN_MONT_CTX *mont = NULL; 905296341Sdelphij int b, bits, ret = 0; 906296341Sdelphij int r_is_one; 907296341Sdelphij BN_ULONG w, next_w; 908296341Sdelphij BIGNUM *d, *r, *t; 909296341Sdelphij BIGNUM *swap_tmp; 91068651Skris#define BN_MOD_MUL_WORD(r, w, m) \ 911296341Sdelphij (BN_mul_word(r, (w)) && \ 912296341Sdelphij (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 913296341Sdelphij (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 914296341Sdelphij /* 915296341Sdelphij * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is 916296341Sdelphij * probably more overhead than always using BN_mod (which uses BN_copy if 917296341Sdelphij * a similar test returns true). 918296341Sdelphij */ 919296341Sdelphij /* 920296341Sdelphij * We can use BN_mod and do not need BN_nnmod because our accumulator is 921296341Sdelphij * never negative (the result of BN_mod does not depend on the sign of 922296341Sdelphij * the modulus). 923296341Sdelphij */ 92468651Skris#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 925296341Sdelphij (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 92668651Skris 927296341Sdelphij if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 928296341Sdelphij /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 929296341Sdelphij BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 930296341Sdelphij return -1; 931296341Sdelphij } 932160814Ssimon 933296341Sdelphij bn_check_top(p); 934296341Sdelphij bn_check_top(m); 93568651Skris 936296341Sdelphij if (!BN_is_odd(m)) { 937296341Sdelphij BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); 938296341Sdelphij return (0); 939296341Sdelphij } 940296341Sdelphij if (m->top == 1) 941296341Sdelphij a %= m->d[0]; /* make sure that 'a' is reduced */ 942109998Smarkm 943296341Sdelphij bits = BN_num_bits(p); 944296341Sdelphij if (bits == 0) { 945296341Sdelphij /* x**0 mod 1 is still zero. */ 946296341Sdelphij if (BN_is_one(m)) { 947296341Sdelphij ret = 1; 948296341Sdelphij BN_zero(rr); 949296341Sdelphij } else 950296341Sdelphij ret = BN_one(rr); 951296341Sdelphij return ret; 952296341Sdelphij } 953296341Sdelphij if (a == 0) { 954296341Sdelphij BN_zero(rr); 955296341Sdelphij ret = 1; 956296341Sdelphij return ret; 957296341Sdelphij } 958109998Smarkm 959296341Sdelphij BN_CTX_start(ctx); 960296341Sdelphij d = BN_CTX_get(ctx); 961296341Sdelphij r = BN_CTX_get(ctx); 962296341Sdelphij t = BN_CTX_get(ctx); 963296341Sdelphij if (d == NULL || r == NULL || t == NULL) 964296341Sdelphij goto err; 96568651Skris 966296341Sdelphij if (in_mont != NULL) 967296341Sdelphij mont = in_mont; 968296341Sdelphij else { 969296341Sdelphij if ((mont = BN_MONT_CTX_new()) == NULL) 970296341Sdelphij goto err; 971296341Sdelphij if (!BN_MONT_CTX_set(mont, m, ctx)) 972296341Sdelphij goto err; 973296341Sdelphij } 97468651Skris 975296341Sdelphij r_is_one = 1; /* except for Montgomery factor */ 97668651Skris 977296341Sdelphij /* bits-1 >= 0 */ 97868651Skris 979296341Sdelphij /* The result is accumulated in the product r*w. */ 980296341Sdelphij w = a; /* bit 'bits-1' of 'p' is always set */ 981296341Sdelphij for (b = bits - 2; b >= 0; b--) { 982296341Sdelphij /* First, square r*w. */ 983296341Sdelphij next_w = w * w; 984296341Sdelphij if ((next_w / w) != w) { /* overflow */ 985296341Sdelphij if (r_is_one) { 986296341Sdelphij if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 987296341Sdelphij goto err; 988296341Sdelphij r_is_one = 0; 989296341Sdelphij } else { 990296341Sdelphij if (!BN_MOD_MUL_WORD(r, w, m)) 991296341Sdelphij goto err; 992296341Sdelphij } 993296341Sdelphij next_w = 1; 994296341Sdelphij } 995296341Sdelphij w = next_w; 996296341Sdelphij if (!r_is_one) { 997296341Sdelphij if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 998296341Sdelphij goto err; 999296341Sdelphij } 100068651Skris 1001296341Sdelphij /* Second, multiply r*w by 'a' if exponent bit is set. */ 1002296341Sdelphij if (BN_is_bit_set(p, b)) { 1003296341Sdelphij next_w = w * a; 1004296341Sdelphij if ((next_w / a) != w) { /* overflow */ 1005296341Sdelphij if (r_is_one) { 1006296341Sdelphij if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1007296341Sdelphij goto err; 1008296341Sdelphij r_is_one = 0; 1009296341Sdelphij } else { 1010296341Sdelphij if (!BN_MOD_MUL_WORD(r, w, m)) 1011296341Sdelphij goto err; 1012296341Sdelphij } 1013296341Sdelphij next_w = a; 1014296341Sdelphij } 1015296341Sdelphij w = next_w; 1016296341Sdelphij } 1017296341Sdelphij } 101868651Skris 1019296341Sdelphij /* Finally, set r:=r*w. */ 1020296341Sdelphij if (w != 1) { 1021296341Sdelphij if (r_is_one) { 1022296341Sdelphij if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1023296341Sdelphij goto err; 1024296341Sdelphij r_is_one = 0; 1025296341Sdelphij } else { 1026296341Sdelphij if (!BN_MOD_MUL_WORD(r, w, m)) 1027296341Sdelphij goto err; 1028296341Sdelphij } 1029296341Sdelphij } 103068651Skris 1031296341Sdelphij if (r_is_one) { /* can happen only if a == 1 */ 1032296341Sdelphij if (!BN_one(rr)) 1033296341Sdelphij goto err; 1034296341Sdelphij } else { 1035296341Sdelphij if (!BN_from_montgomery(rr, r, mont, ctx)) 1036296341Sdelphij goto err; 1037296341Sdelphij } 1038296341Sdelphij ret = 1; 1039296341Sdelphij err: 1040296341Sdelphij if ((in_mont == NULL) && (mont != NULL)) 1041296341Sdelphij BN_MONT_CTX_free(mont); 1042296341Sdelphij BN_CTX_end(ctx); 1043296341Sdelphij bn_check_top(rr); 1044296341Sdelphij return (ret); 1045296341Sdelphij} 104668651Skris 104755714Skris/* The old fallback, simple version :-) */ 1048160814Ssimonint BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 1049296341Sdelphij const BIGNUM *m, BN_CTX *ctx) 1050296341Sdelphij{ 1051296341Sdelphij int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1052296341Sdelphij int start = 1; 1053296341Sdelphij BIGNUM *d; 1054296341Sdelphij /* Table of variables obtained from 'ctx' */ 1055296341Sdelphij BIGNUM *val[TABLE_SIZE]; 105655714Skris 1057296341Sdelphij if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1058296341Sdelphij /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1059296341Sdelphij BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1060296341Sdelphij return -1; 1061296341Sdelphij } 1062160814Ssimon 1063296341Sdelphij bits = BN_num_bits(p); 106455714Skris 1065296341Sdelphij if (bits == 0) { 1066296341Sdelphij ret = BN_one(r); 1067296341Sdelphij return ret; 1068296341Sdelphij } 106955714Skris 1070296341Sdelphij BN_CTX_start(ctx); 1071296341Sdelphij d = BN_CTX_get(ctx); 1072296341Sdelphij val[0] = BN_CTX_get(ctx); 1073296341Sdelphij if (!d || !val[0]) 1074296341Sdelphij goto err; 107559191Skris 1076296341Sdelphij if (!BN_nnmod(val[0], a, m, ctx)) 1077296341Sdelphij goto err; /* 1 */ 1078296341Sdelphij if (BN_is_zero(val[0])) { 1079296341Sdelphij BN_zero(r); 1080296341Sdelphij ret = 1; 1081296341Sdelphij goto err; 1082296341Sdelphij } 108355714Skris 1084296341Sdelphij window = BN_window_bits_for_exponent_size(bits); 1085296341Sdelphij if (window > 1) { 1086296341Sdelphij if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1087296341Sdelphij goto err; /* 2 */ 1088296341Sdelphij j = 1 << (window - 1); 1089296341Sdelphij for (i = 1; i < j; i++) { 1090296341Sdelphij if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1091296341Sdelphij !BN_mod_mul(val[i], val[i - 1], d, m, ctx)) 1092296341Sdelphij goto err; 1093296341Sdelphij } 1094296341Sdelphij } 109555714Skris 1096296341Sdelphij start = 1; /* This is used to avoid multiplication etc 1097296341Sdelphij * when there is only the value '1' in the 1098296341Sdelphij * buffer. */ 1099296341Sdelphij wvalue = 0; /* The 'value' of the window */ 1100296341Sdelphij wstart = bits - 1; /* The top bit of the window */ 1101296341Sdelphij wend = 0; /* The bottom bit of the window */ 110255714Skris 1103296341Sdelphij if (!BN_one(r)) 1104296341Sdelphij goto err; 110555714Skris 1106296341Sdelphij for (;;) { 1107296341Sdelphij if (BN_is_bit_set(p, wstart) == 0) { 1108296341Sdelphij if (!start) 1109296341Sdelphij if (!BN_mod_mul(r, r, r, m, ctx)) 1110296341Sdelphij goto err; 1111296341Sdelphij if (wstart == 0) 1112296341Sdelphij break; 1113296341Sdelphij wstart--; 1114296341Sdelphij continue; 1115296341Sdelphij } 1116296341Sdelphij /* 1117296341Sdelphij * We now have wstart on a 'set' bit, we now need to work out how bit 1118296341Sdelphij * a window to do. To do this we need to scan forward until the last 1119296341Sdelphij * set bit before the end of the window 1120296341Sdelphij */ 1121296341Sdelphij j = wstart; 1122296341Sdelphij wvalue = 1; 1123296341Sdelphij wend = 0; 1124296341Sdelphij for (i = 1; i < window; i++) { 1125296341Sdelphij if (wstart - i < 0) 1126296341Sdelphij break; 1127296341Sdelphij if (BN_is_bit_set(p, wstart - i)) { 1128296341Sdelphij wvalue <<= (i - wend); 1129296341Sdelphij wvalue |= 1; 1130296341Sdelphij wend = i; 1131296341Sdelphij } 1132296341Sdelphij } 113355714Skris 1134296341Sdelphij /* wend is the size of the current window */ 1135296341Sdelphij j = wend + 1; 1136296341Sdelphij /* add the 'bytes above' */ 1137296341Sdelphij if (!start) 1138296341Sdelphij for (i = 0; i < j; i++) { 1139296341Sdelphij if (!BN_mod_mul(r, r, r, m, ctx)) 1140296341Sdelphij goto err; 1141296341Sdelphij } 114255714Skris 1143296341Sdelphij /* wvalue will be an odd number < 2^window */ 1144296341Sdelphij if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1145296341Sdelphij goto err; 1146296341Sdelphij 1147296341Sdelphij /* move the 'window' down further */ 1148296341Sdelphij wstart -= wend + 1; 1149296341Sdelphij wvalue = 0; 1150296341Sdelphij start = 0; 1151296341Sdelphij if (wstart < 0) 1152296341Sdelphij break; 1153296341Sdelphij } 1154296341Sdelphij ret = 1; 1155296341Sdelphij err: 1156296341Sdelphij BN_CTX_end(ctx); 1157296341Sdelphij bn_check_top(r); 1158296341Sdelphij return (ret); 1159296341Sdelphij} 1160