bn_exp.c revision 279264
1186904Ssam/* crypto/bn/bn_exp.c */ 2186904Ssam/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3186904Ssam * All rights reserved. 4186904Ssam * 5186904Ssam * This package is an SSL implementation written 6186904Ssam * by Eric Young (eay@cryptsoft.com). 7186904Ssam * The implementation was written so as to conform with Netscapes SSL. 8186904Ssam * 9186904Ssam * This library is free for commercial and non-commercial use as long as 10186904Ssam * the following conditions are aheared to. The following conditions 11186904Ssam * apply to all code found in this distribution, be it the RC4, RSA, 12186904Ssam * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13186904Ssam * included with this distribution is covered by the same copyright terms 14186904Ssam * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15186904Ssam * 16186904Ssam * Copyright remains Eric Young's, and as such any Copyright notices in 17186904Ssam * the code are not to be removed. 18186904Ssam * If this package is used in a product, Eric Young should be given attribution 19186904Ssam * as the author of the parts of the library used. 20186904Ssam * This can be in the form of a textual message at program startup or 21186904Ssam * in documentation (online or textual) provided with the package. 22186904Ssam * 23186904Ssam * Redistribution and use in source and binary forms, with or without 24186904Ssam * modification, are permitted provided that the following conditions 25186904Ssam * are met: 26186904Ssam * 1. Redistributions of source code must retain the copyright 27186904Ssam * notice, this list of conditions and the following disclaimer. 28186904Ssam * 2. Redistributions in binary form must reproduce the above copyright 29186904Ssam * notice, this list of conditions and the following disclaimer in the 30186904Ssam * documentation and/or other materials provided with the distribution. 31186904Ssam * 3. All advertising materials mentioning features or use of this software 32186904Ssam * must display the following acknowledgement: 33186904Ssam * "This product includes cryptographic software written by 34186904Ssam * Eric Young (eay@cryptsoft.com)" 35186904Ssam * The word 'cryptographic' can be left out if the rouines from the library 36190455Ssam * being used are not cryptographic related :-). 37186904Ssam * 4. If you include any Windows specific code (or a derivative thereof) from 38186904Ssam * the apps directory (application code) you must include an acknowledgement: 39186904Ssam * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40186904Ssam * 41186904Ssam * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42186904Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43186904Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44186904Ssam * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45186904Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46186904Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47186904Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48186904Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49186904Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50186904Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51186904Ssam * SUCH DAMAGE. 52186904Ssam * 53186904Ssam * The licence and distribution terms for any publically available version or 54186904Ssam * derivative of this code cannot be changed. i.e. this code cannot simply be 55186904Ssam * copied and put under another distribution licence 56186904Ssam * [including the GNU Public Licence.] 57186904Ssam */ 58186904Ssam/* ==================================================================== 59186904Ssam * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60186904Ssam * 61186904Ssam * Redistribution and use in source and binary forms, with or without 62186904Ssam * modification, are permitted provided that the following conditions 63186904Ssam * are met: 64186904Ssam * 65186904Ssam * 1. Redistributions of source code must retain the above copyright 66186904Ssam * notice, this list of conditions and the following disclaimer. 67186904Ssam * 68186904Ssam * 2. Redistributions in binary form must reproduce the above copyright 69186904Ssam * notice, this list of conditions and the following disclaimer in 70186904Ssam * the documentation and/or other materials provided with the 71186904Ssam * distribution. 72186904Ssam * 73186904Ssam * 3. All advertising materials mentioning features or use of this 74186904Ssam * software must display the following acknowledgment: 75186904Ssam * "This product includes software developed by the OpenSSL Project 76186904Ssam * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77186904Ssam * 78186904Ssam * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79186904Ssam * endorse or promote products derived from this software without 80186904Ssam * prior written permission. For written permission, please contact 81187899Ssam * openssl-core@openssl.org. 82187899Ssam * 83187899Ssam * 5. Products derived from this software may not be called "OpenSSL" 84188782Ssam * nor may "OpenSSL" appear in their names without prior written 85188782Ssam * permission of the OpenSSL Project. 86188782Ssam * 87188782Ssam * 6. Redistributions of any form whatsoever must retain the following 88188782Ssam * acknowledgment: 89188782Ssam * "This product includes software developed by the OpenSSL Project 90187899Ssam * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91187899Ssam * 92187899Ssam * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93187899Ssam * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94187899Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95187899Ssam * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96186904Ssam * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97190455Ssam * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98190455Ssam * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99190455Ssam * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100190455Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101190455Ssam * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102190455Ssam * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103190455Ssam * OF THE POSSIBILITY OF SUCH DAMAGE. 104190455Ssam * ==================================================================== 105190455Ssam * 106190455Ssam * This product includes cryptographic software written by Eric Young 107186904Ssam * (eay@cryptsoft.com). This product includes software written by Tim 108186904Ssam * Hudson (tjh@cryptsoft.com). 109186904Ssam * 110186904Ssam */ 111186904Ssam 112186904Ssam 113186904Ssam#include "cryptlib.h" 114186904Ssam#include "bn_lcl.h" 115186904Ssam 116186904Ssam#include <stdlib.h> 117186904Ssam#ifdef _WIN32 118186904Ssam# include <malloc.h> 119187899Ssam# ifndef alloca 120187899Ssam# define alloca _alloca 121187899Ssam# endif 122187899Ssam#elif defined(__GNUC__) 123187899Ssam# ifndef alloca 124187899Ssam# define alloca(s) __builtin_alloca((s)) 125187899Ssam# endif 126186904Ssam#endif 127186904Ssam 128186904Ssam/* maximum precomputation table size for *variable* sliding windows */ 129186904Ssam#define TABLE_SIZE 32 130186904Ssam 131186904Ssam/* this one works - simple but works */ 132186904Ssamint BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 133186904Ssam { 134186904Ssam int i,bits,ret=0; 135186904Ssam BIGNUM *v,*rr; 136186904Ssam 137186904Ssam if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 138186904Ssam { 139186904Ssam /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 140186904Ssam BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 141186904Ssam return -1; 142186904Ssam } 143186904Ssam 144186904Ssam BN_CTX_start(ctx); 145186904Ssam if ((r == a) || (r == p)) 146186904Ssam rr = BN_CTX_get(ctx); 147186904Ssam else 148186904Ssam rr = r; 149186904Ssam v = BN_CTX_get(ctx); 150186904Ssam if (rr == NULL || v == NULL) goto err; 151186904Ssam 152186904Ssam if (BN_copy(v,a) == NULL) goto err; 153186904Ssam bits=BN_num_bits(p); 154189980Ssam 155186904Ssam if (BN_is_odd(p)) 156186904Ssam { if (BN_copy(rr,a) == NULL) goto err; } 157186904Ssam else { if (!BN_one(rr)) goto err; } 158186904Ssam 159186904Ssam for (i=1; i<bits; i++) 160186904Ssam { 161187899Ssam if (!BN_sqr(v,v,ctx)) goto err; 162187899Ssam if (BN_is_bit_set(p,i)) 163187899Ssam { 164187899Ssam if (!BN_mul(rr,rr,v,ctx)) goto err; 165187899Ssam } 166187899Ssam } 167188782Ssam ret=1; 168188782Ssamerr: 169186904Ssam if (r != rr) BN_copy(r,rr); 170186904Ssam BN_CTX_end(ctx); 171186904Ssam bn_check_top(r); 172186904Ssam return(ret); 173186904Ssam } 174186904Ssam 175186904Ssam 176186904Ssamint BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 177186904Ssam BN_CTX *ctx) 178186904Ssam { 179186904Ssam int ret; 180186904Ssam 181186904Ssam bn_check_top(a); 182186904Ssam bn_check_top(p); 183186904Ssam bn_check_top(m); 184186904Ssam 185186904Ssam /* For even modulus m = 2^k*m_odd, it might make sense to compute 186186904Ssam * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 187186904Ssam * exponentiation for the odd part), using appropriate exponent 188186904Ssam * reductions, and combine the results using the CRT. 189186904Ssam * 190186904Ssam * For now, we use Montgomery only if the modulus is odd; otherwise, 191186904Ssam * exponentiation using the reciprocal-based quick remaindering 192186904Ssam * algorithm is used. 193186904Ssam * 194188467Ssam * (Timing obtained with expspeed.c [computations a^p mod m 195188467Ssam * where a, p, m are of the same length: 256, 512, 1024, 2048, 196188467Ssam * 4096, 8192 bits], compared to the running time of the 197188467Ssam * standard algorithm: 198188467Ssam * 199188467Ssam * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 200188467Ssam * 55 .. 77 % [UltraSparc processor, but 201188467Ssam * debug-solaris-sparcv8-gcc conf.] 202188467Ssam * 203186904Ssam * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 204186904Ssam * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 205186904Ssam * 206186904Ssam * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 207186904Ssam * at 2048 and more bits, but at 512 and 1024 bits, it was 208186904Ssam * slower even than the standard algorithm! 209186904Ssam * 210188467Ssam * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 211186904Ssam * should be obtained when the new Montgomery reduction code 212186904Ssam * has been integrated into OpenSSL.) 213186904Ssam */ 214188467Ssam 215186904Ssam#define MONT_MUL_MOD 216186904Ssam#define MONT_EXP_WORD 217186904Ssam#define RECP_MUL_MOD 218186904Ssam 219186904Ssam#ifdef MONT_MUL_MOD 220186904Ssam /* I have finally been able to take out this pre-condition of 221186904Ssam * the top bit being set. It was caused by an error in BN_div 222186904Ssam * with negatives. There was also another problem when for a^b%m 223186904Ssam * a >= m. eay 07-May-97 */ 224186904Ssam/* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 225186904Ssam 226186904Ssam if (BN_is_odd(m)) 227186904Ssam { 228186904Ssam# ifdef MONT_EXP_WORD 229186904Ssam if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) 230186904Ssam { 231186904Ssam BN_ULONG A = a->d[0]; 232188467Ssam ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL); 233188467Ssam } 234188467Ssam else 235188467Ssam# endif 236186904Ssam ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL); 237186904Ssam } 238186904Ssam else 239186904Ssam#endif 240186904Ssam#ifdef RECP_MUL_MOD 241186904Ssam { ret=BN_mod_exp_recp(r,a,p,m,ctx); } 242186904Ssam#else 243186904Ssam { ret=BN_mod_exp_simple(r,a,p,m,ctx); } 244186904Ssam#endif 245186904Ssam 246186904Ssam bn_check_top(r); 247186904Ssam return(ret); 248186904Ssam } 249186904Ssam 250186904Ssam 251186904Ssamint BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 252186904Ssam const BIGNUM *m, BN_CTX *ctx) 253188925Ssam { 254188925Ssam int i,j,bits,ret=0,wstart,wend,window,wvalue; 255186904Ssam int start=1; 256186904Ssam BIGNUM *aa; 257188925Ssam /* Table of variables obtained from 'ctx' */ 258188925Ssam BIGNUM *val[TABLE_SIZE]; 259188925Ssam BN_RECP_CTX recp; 260188925Ssam 261186904Ssam if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 262186904Ssam { 263186904Ssam /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 264186904Ssam BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 265186904Ssam return -1; 266186904Ssam } 267186904Ssam 268186904Ssam bits=BN_num_bits(p); 269186904Ssam 270186904Ssam if (bits == 0) 271186904Ssam { 272186904Ssam ret = BN_one(r); 273186904Ssam return ret; 274186904Ssam } 275186904Ssam 276186904Ssam BN_CTX_start(ctx); 277186904Ssam aa = BN_CTX_get(ctx); 278186904Ssam val[0] = BN_CTX_get(ctx); 279186904Ssam if(!aa || !val[0]) goto err; 280186904Ssam 281186904Ssam BN_RECP_CTX_init(&recp); 282186904Ssam if (m->neg) 283186904Ssam { 284186904Ssam /* ignore sign of 'm' */ 285186904Ssam if (!BN_copy(aa, m)) goto err; 286186904Ssam aa->neg = 0; 287186904Ssam if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err; 288186904Ssam } 289186904Ssam else 290186904Ssam { 291186904Ssam if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err; 292186904Ssam } 293186904Ssam 294186904Ssam if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */ 295186904Ssam if (BN_is_zero(val[0])) 296186904Ssam { 297186904Ssam BN_zero(r); 298186904Ssam ret = 1; 299186904Ssam goto err; 300186904Ssam } 301186904Ssam 302186904Ssam window = BN_window_bits_for_exponent_size(bits); 303186904Ssam if (window > 1) 304186904Ssam { 305186904Ssam if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx)) 306186904Ssam goto err; /* 2 */ 307186904Ssam j=1<<(window-1); 308186904Ssam for (i=1; i<j; i++) 309186904Ssam { 310186904Ssam if(((val[i] = BN_CTX_get(ctx)) == NULL) || 311186904Ssam !BN_mod_mul_reciprocal(val[i],val[i-1], 312186904Ssam aa,&recp,ctx)) 313186904Ssam goto err; 314186904Ssam } 315186904Ssam } 316186904Ssam 317186904Ssam start=1; /* This is used to avoid multiplication etc 318186904Ssam * when there is only the value '1' in the 319186904Ssam * buffer. */ 320186904Ssam wvalue=0; /* The 'value' of the window */ 321186904Ssam wstart=bits-1; /* The top bit of the window */ 322186904Ssam wend=0; /* The bottom bit of the window */ 323186904Ssam 324186904Ssam if (!BN_one(r)) goto err; 325186904Ssam 326186904Ssam for (;;) 327186904Ssam { 328186904Ssam if (BN_is_bit_set(p,wstart) == 0) 329186904Ssam { 330186904Ssam if (!start) 331186904Ssam if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx)) 332186904Ssam goto err; 333186904Ssam if (wstart == 0) break; 334186904Ssam wstart--; 335186904Ssam continue; 336186904Ssam } 337186904Ssam /* We now have wstart on a 'set' bit, we now need to work out 338186904Ssam * how bit a window to do. To do this we need to scan 339186904Ssam * forward until the last set bit before the end of the 340186904Ssam * window */ 341186904Ssam j=wstart; 342186904Ssam wvalue=1; 343186904Ssam wend=0; 344186904Ssam for (i=1; i<window; i++) 345186904Ssam { 346186904Ssam if (wstart-i < 0) break; 347191016Ssam if (BN_is_bit_set(p,wstart-i)) 348186904Ssam { 349186904Ssam wvalue<<=(i-wend); 350186904Ssam wvalue|=1; 351186904Ssam wend=i; 352186904Ssam } 353186904Ssam } 354186904Ssam 355186904Ssam /* wend is the size of the current window */ 356186904Ssam j=wend+1; 357186904Ssam /* add the 'bytes above' */ 358186904Ssam if (!start) 359186904Ssam for (i=0; i<j; i++) 360186904Ssam { 361186904Ssam if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx)) 362186904Ssam goto err; 363186904Ssam } 364186904Ssam 365186904Ssam /* wvalue will be an odd number < 2^window */ 366186904Ssam if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx)) 367186904Ssam goto err; 368186904Ssam 369186904Ssam /* move the 'window' down further */ 370186904Ssam wstart-=wend+1; 371186904Ssam wvalue=0; 372186904Ssam start=0; 373186904Ssam if (wstart < 0) break; 374186904Ssam } 375186904Ssam ret=1; 376186904Ssamerr: 377186904Ssam BN_CTX_end(ctx); 378186904Ssam BN_RECP_CTX_free(&recp); 379186904Ssam bn_check_top(r); 380186904Ssam return(ret); 381186904Ssam } 382186904Ssam 383186904Ssam 384189980Ssamint BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 385186904Ssam const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 386186904Ssam { 387186904Ssam int i,j,bits,ret=0,wstart,wend,window,wvalue; 388186904Ssam int start=1; 389186904Ssam BIGNUM *d,*r; 390186904Ssam const BIGNUM *aa; 391189980Ssam /* Table of variables obtained from 'ctx' */ 392186904Ssam BIGNUM *val[TABLE_SIZE]; 393186904Ssam BN_MONT_CTX *mont=NULL; 394186904Ssam 395186904Ssam if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 396189980Ssam { 397189980Ssam return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 398189980Ssam } 399189981Ssam 400189981Ssam bn_check_top(a); 401189981Ssam bn_check_top(p); 402189980Ssam bn_check_top(m); 403189980Ssam 404189980Ssam if (!BN_is_odd(m)) 405189980Ssam { 406189980Ssam BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); 407189980Ssam return(0); 408189980Ssam } 409189981Ssam bits=BN_num_bits(p); 410189981Ssam if (bits == 0) 411189981Ssam { 412189980Ssam ret = BN_one(rr); 413189980Ssam return ret; 414189980Ssam } 415189980Ssam 416189980Ssam BN_CTX_start(ctx); 417189980Ssam d = BN_CTX_get(ctx); 418189981Ssam r = BN_CTX_get(ctx); 419189981Ssam val[0] = BN_CTX_get(ctx); 420189981Ssam if (!d || !r || !val[0]) goto err; 421189980Ssam 422189980Ssam /* If this is not done, things will break in the montgomery 423189980Ssam * part */ 424189980Ssam 425189980Ssam if (in_mont != NULL) 426186904Ssam mont=in_mont; 427186904Ssam else 428186904Ssam { 429186904Ssam if ((mont=BN_MONT_CTX_new()) == NULL) goto err; 430189980Ssam if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; 431186904Ssam } 432186904Ssam 433186904Ssam if (a->neg || BN_ucmp(a,m) >= 0) 434186904Ssam { 435189980Ssam if (!BN_nnmod(val[0],a,m,ctx)) 436189980Ssam goto err; 437186904Ssam aa= val[0]; 438186904Ssam } 439186904Ssam else 440189980Ssam aa=a; 441189980Ssam if (BN_is_zero(aa)) 442189980Ssam { 443189980Ssam BN_zero(rr); 444189980Ssam ret = 1; 445189980Ssam goto err; 446189980Ssam } 447189980Ssam if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */ 448189980Ssam 449189980Ssam window = BN_window_bits_for_exponent_size(bits); 450186904Ssam if (window > 1) 451189980Ssam { 452186904Ssam if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */ 453186904Ssam j=1<<(window-1); 454186904Ssam for (i=1; i<j; i++) 455186904Ssam { 456189980Ssam if(((val[i] = BN_CTX_get(ctx)) == NULL) || 457189980Ssam !BN_mod_mul_montgomery(val[i],val[i-1], 458189980Ssam d,mont,ctx)) 459189980Ssam goto err; 460189980Ssam } 461189980Ssam } 462189980Ssam 463189980Ssam start=1; /* This is used to avoid multiplication etc 464186904Ssam * when there is only the value '1' in the 465186904Ssam * buffer. */ 466186904Ssam wvalue=0; /* The 'value' of the window */ 467186904Ssam wstart=bits-1; /* The top bit of the window */ 468186904Ssam wend=0; /* The bottom bit of the window */ 469189980Ssam 470191017Ssam if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; 471186904Ssam for (;;) 472186904Ssam { 473186904Ssam if (BN_is_bit_set(p,wstart) == 0) 474186904Ssam { 475186904Ssam if (!start) 476186904Ssam { 477186904Ssam if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) 478186904Ssam goto err; 479186904Ssam } 480189980Ssam if (wstart == 0) break; 481186904Ssam wstart--; 482186904Ssam continue; 483186904Ssam } 484186904Ssam /* We now have wstart on a 'set' bit, we now need to work out 485186904Ssam * how bit a window to do. To do this we need to scan 486186904Ssam * forward until the last set bit before the end of the 487186904Ssam * window */ 488186904Ssam j=wstart; 489186904Ssam wvalue=1; 490186904Ssam wend=0; 491186904Ssam for (i=1; i<window; i++) 492186904Ssam { 493186904Ssam if (wstart-i < 0) break; 494186904Ssam if (BN_is_bit_set(p,wstart-i)) 495186904Ssam { 496186904Ssam wvalue<<=(i-wend); 497186904Ssam wvalue|=1; 498186904Ssam wend=i; 499186904Ssam } 500186904Ssam } 501186904Ssam 502186904Ssam /* wend is the size of the current window */ 503186904Ssam j=wend+1; 504186904Ssam /* add the 'bytes above' */ 505186904Ssam if (!start) 506186904Ssam for (i=0; i<j; i++) 507186904Ssam { 508186904Ssam if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) 509186904Ssam goto err; 510186904Ssam } 511186904Ssam 512186904Ssam /* wvalue will be an odd number < 2^window */ 513186904Ssam if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx)) 514186904Ssam goto err; 515186904Ssam 516189980Ssam /* move the 'window' down further */ 517186904Ssam wstart-=wend+1; 518186904Ssam wvalue=0; 519189980Ssam start=0; 520189980Ssam if (wstart < 0) break; 521186904Ssam } 522186904Ssam if (!BN_from_montgomery(rr,r,mont,ctx)) goto err; 523189980Ssam ret=1; 524189980Ssamerr: 525189980Ssam if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 526189980Ssam BN_CTX_end(ctx); 527189980Ssam bn_check_top(rr); 528189980Ssam return(ret); 529189980Ssam } 530189980Ssam 531189980Ssam 532189980Ssam/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout 533189980Ssam * so that accessing any of these table values shows the same access pattern as far 534189980Ssam * as cache lines are concerned. The following functions are used to transfer a BIGNUM 535186904Ssam * from/to that table. */ 536186904Ssam 537186904Ssamstatic int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width) 538186904Ssam { 539186904Ssam size_t i, j; 540186904Ssam 541186904Ssam if (top > b->top) 542186904Ssam top = b->top; /* this works because 'buf' is explicitly zeroed */ 543186904Ssam for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) 544186904Ssam { 545186904Ssam buf[j] = ((unsigned char*)b->d)[i]; 546186904Ssam } 547186904Ssam 548186904Ssam return 1; 549186904Ssam } 550186904Ssam 551186904Ssamstatic int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width) 552186904Ssam { 553186904Ssam size_t i, j; 554186904Ssam 555186904Ssam if (bn_wexpand(b, top) == NULL) 556186904Ssam return 0; 557186904Ssam 558186904Ssam for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width) 559186904Ssam { 560186904Ssam ((unsigned char*)b->d)[i] = buf[j]; 561186904Ssam } 562186904Ssam 563186904Ssam b->top = top; 564186904Ssam bn_correct_top(b); 565186904Ssam return 1; 566186904Ssam } 567186904Ssam 568186904Ssam/* Given a pointer value, compute the next address that is a cache line multiple. */ 569186904Ssam#define MOD_EXP_CTIME_ALIGN(x_) \ 570186904Ssam ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 571186904Ssam 572186904Ssam/* This variant of BN_mod_exp_mont() uses fixed windows and the special 573186904Ssam * precomputation memory layout to limit data-dependency to a minimum 574186904Ssam * to protect secret exponents (cf. the hyper-threading timing attacks 575186904Ssam * pointed out by Colin Percival, 576186904Ssam * http://www.daemonology.net/hyperthreading-considered-harmful/) 577186904Ssam */ 578186904Ssamint BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 579186904Ssam const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 580186904Ssam { 581186904Ssam int i,bits,ret=0,window,wvalue; 582186904Ssam int top; 583186904Ssam BN_MONT_CTX *mont=NULL; 584186904Ssam 585186904Ssam int numPowers; 586186904Ssam unsigned char *powerbufFree=NULL; 587186904Ssam int powerbufLen = 0; 588186904Ssam unsigned char *powerbuf=NULL; 589186904Ssam BIGNUM tmp, am; 590186904Ssam 591186904Ssam bn_check_top(a); 592186904Ssam bn_check_top(p); 593186904Ssam bn_check_top(m); 594186904Ssam 595186904Ssam top = m->top; 596186904Ssam 597186904Ssam if (!(m->d[0] & 1)) 598186904Ssam { 599186904Ssam BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS); 600186904Ssam return(0); 601186904Ssam } 602186904Ssam bits=BN_num_bits(p); 603186904Ssam if (bits == 0) 604186904Ssam { 605186904Ssam ret = BN_one(rr); 606186904Ssam return ret; 607186904Ssam } 608186904Ssam 609186904Ssam BN_CTX_start(ctx); 610186904Ssam 611186904Ssam /* Allocate a montgomery context if it was not supplied by the caller. 612186904Ssam * If this is not done, things will break in the montgomery part. 613186904Ssam */ 614186904Ssam if (in_mont != NULL) 615186904Ssam mont=in_mont; 616186904Ssam else 617186904Ssam { 618186904Ssam if ((mont=BN_MONT_CTX_new()) == NULL) goto err; 619186904Ssam if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; 620186904Ssam } 621186904Ssam 622186904Ssam /* Get the window size to use with size of p. */ 623186904Ssam window = BN_window_bits_for_ctime_exponent_size(bits); 624186904Ssam#if defined(OPENSSL_BN_ASM_MONT5) 625186904Ssam if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */ 626186904Ssam#endif 627186904Ssam 628186904Ssam /* Allocate a buffer large enough to hold all of the pre-computed 629186904Ssam * powers of am, am itself and tmp. 630186904Ssam */ 631186904Ssam numPowers = 1 << window; 632186904Ssam powerbufLen = sizeof(m->d[0])*(top*numPowers + 633186904Ssam ((2*top)>numPowers?(2*top):numPowers)); 634186904Ssam#ifdef alloca 635186904Ssam if (powerbufLen < 3072) 636186904Ssam powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 637186904Ssam else 638186904Ssam#endif 639186904Ssam if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) 640186904Ssam goto err; 641186904Ssam 642186904Ssam powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 643186904Ssam memset(powerbuf, 0, powerbufLen); 644186904Ssam 645186904Ssam#ifdef alloca 646186904Ssam if (powerbufLen < 3072) 647186904Ssam powerbufFree = NULL; 648186904Ssam#endif 649186904Ssam 650186904Ssam /* lay down tmp and am right after powers table */ 651186904Ssam tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers); 652186904Ssam am.d = tmp.d + top; 653186904Ssam tmp.top = am.top = 0; 654186904Ssam tmp.dmax = am.dmax = top; 655186904Ssam tmp.neg = am.neg = 0; 656186904Ssam tmp.flags = am.flags = BN_FLG_STATIC_DATA; 657186904Ssam 658186904Ssam /* prepare a^0 in Montgomery domain */ 659186904Ssam#if 1 660189980Ssam if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx)) goto err; 661186904Ssam#else 662186904Ssam tmp.d[0] = (0-m->d[0])&BN_MASK2; /* 2^(top*BN_BITS2) - m */ 663186904Ssam for (i=1;i<top;i++) 664186904Ssam tmp.d[i] = (~m->d[i])&BN_MASK2; 665186904Ssam tmp.top = top; 666186904Ssam#endif 667186904Ssam 668189980Ssam /* prepare a^1 in Montgomery domain */ 669189980Ssam if (a->neg || BN_ucmp(a,m) >= 0) 670186904Ssam { 671189980Ssam if (!BN_mod(&am,a,m,ctx)) goto err; 672186904Ssam if (!BN_to_montgomery(&am,&am,mont,ctx)) goto err; 673189980Ssam } 674189980Ssam else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err; 675186904Ssam 676186904Ssam#if defined(OPENSSL_BN_ASM_MONT5) 677186904Ssam /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 678186904Ssam * specifically optimization of cache-timing attack countermeasures 679186904Ssam * and pre-computation optimization. */ 680186904Ssam 681186904Ssam /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 682186904Ssam * 512-bit RSA is hardly relevant, we omit it to spare size... */ 683186904Ssam if (window==5 && top>1) 684186904Ssam { 685186904Ssam void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap, 686186904Ssam const void *table,const BN_ULONG *np, 687186904Ssam const BN_ULONG *n0,int num,int power); 688186904Ssam void bn_scatter5(const BN_ULONG *inp,size_t num, 689186904Ssam void *table,size_t power); 690186904Ssam void bn_gather5(BN_ULONG *out,size_t num, 691186904Ssam void *table,size_t power); 692186904Ssam 693186904Ssam BN_ULONG *np=mont->N.d, *n0=mont->n0; 694186904Ssam 695186904Ssam /* BN_to_montgomery can contaminate words above .top 696186904Ssam * [in BN_DEBUG[_DEBUG] build]... */ 697186904Ssam for (i=am.top; i<top; i++) am.d[i]=0; 698186904Ssam for (i=tmp.top; i<top; i++) tmp.d[i]=0; 699186904Ssam 700186904Ssam bn_scatter5(tmp.d,top,powerbuf,0); 701186904Ssam bn_scatter5(am.d,am.top,powerbuf,1); 702186904Ssam bn_mul_mont(tmp.d,am.d,am.d,np,n0,top); 703186904Ssam bn_scatter5(tmp.d,top,powerbuf,2); 704186904Ssam 705186904Ssam#if 0 706186904Ssam for (i=3; i<32; i++) 707186904Ssam { 708186904Ssam /* Calculate a^i = a^(i-1) * a */ 709186904Ssam bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); 710186904Ssam bn_scatter5(tmp.d,top,powerbuf,i); 711186904Ssam } 712186904Ssam#else 713186904Ssam /* same as above, but uses squaring for 1/2 of operations */ 714186904Ssam for (i=4; i<32; i*=2) 715186904Ssam { 716186904Ssam bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 717186904Ssam bn_scatter5(tmp.d,top,powerbuf,i); 718190384Ssam } 719190384Ssam for (i=3; i<8; i+=2) 720186904Ssam { 721186904Ssam int j; 722186904Ssam bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); 723186904Ssam bn_scatter5(tmp.d,top,powerbuf,i); 724186904Ssam for (j=2*i; j<32; j*=2) 725186904Ssam { 726186904Ssam bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 727186904Ssam bn_scatter5(tmp.d,top,powerbuf,j); 728186904Ssam } 729186904Ssam } 730186904Ssam for (; i<16; i+=2) 731186904Ssam { 732186904Ssam bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); 733186904Ssam bn_scatter5(tmp.d,top,powerbuf,i); 734186904Ssam bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 735186904Ssam bn_scatter5(tmp.d,top,powerbuf,2*i); 736186904Ssam } 737186904Ssam for (; i<32; i+=2) 738186904Ssam { 739186904Ssam bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1); 740190384Ssam bn_scatter5(tmp.d,top,powerbuf,i); 741186904Ssam } 742186904Ssam#endif 743186904Ssam bits--; 744190384Ssam for (wvalue=0, i=bits%5; i>=0; i--,bits--) 745186904Ssam wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); 746190384Ssam bn_gather5(tmp.d,top,powerbuf,wvalue); 747190384Ssam 748186904Ssam /* Scan the exponent one window at a time starting from the most 749186904Ssam * significant bits. 750186904Ssam */ 751186904Ssam while (bits >= 0) 752186904Ssam { 753186904Ssam for (wvalue=0, i=0; i<5; i++,bits--) 754186904Ssam wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); 755186904Ssam 756186904Ssam bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 757186904Ssam bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 758186904Ssam bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 759186904Ssam bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 760186904Ssam bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top); 761186904Ssam bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue); 762186904Ssam } 763186904Ssam 764189980Ssam tmp.top=top; 765186904Ssam bn_correct_top(&tmp); 766186904Ssam } 767186904Ssam else 768186904Ssam#endif 769186904Ssam { 770186904Ssam if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err; 771186904Ssam if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers)) goto err; 772186904Ssam 773186904Ssam /* If the window size is greater than 1, then calculate 774186904Ssam * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 775186904Ssam * (even powers could instead be computed as (a^(i/2))^2 776186904Ssam * to use the slight performance advantage of sqr over mul). 777186904Ssam */ 778189980Ssam if (window > 1) 779186904Ssam { 780186904Ssam if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx)) goto err; 781186904Ssam if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err; 782186904Ssam for (i=3; i<numPowers; i++) 783186904Ssam { 784186904Ssam /* Calculate a^i = a^(i-1) * a */ 785186904Ssam if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx)) 786189980Ssam goto err; 787186904Ssam if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err; 788186904Ssam } 789186904Ssam } 790186904Ssam 791186904Ssam bits--; 792186904Ssam for (wvalue=0, i=bits%window; i>=0; i--,bits--) 793186904Ssam wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); 794190384Ssam if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err; 795186904Ssam 796186904Ssam /* Scan the exponent one window at a time starting from the most 797186904Ssam * significant bits. 798190384Ssam */ 799 while (bits >= 0) 800 { 801 wvalue=0; /* The 'value' of the window */ 802 803 /* Scan the window, squaring the result as we go */ 804 for (i=0; i<window; i++,bits--) 805 { 806 if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx)) goto err; 807 wvalue = (wvalue<<1)+BN_is_bit_set(p,bits); 808 } 809 810 /* Fetch the appropriate pre-computed value from the pre-buf */ 811 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err; 812 813 /* Multiply the result into the intermediate result */ 814 if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err; 815 } 816 } 817 818 /* Convert the final result from montgomery to standard format */ 819 if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err; 820 ret=1; 821err: 822 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 823 if (powerbuf!=NULL) 824 { 825 OPENSSL_cleanse(powerbuf,powerbufLen); 826 if (powerbufFree) OPENSSL_free(powerbufFree); 827 } 828 BN_CTX_end(ctx); 829 return(ret); 830 } 831 832int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 833 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 834 { 835 BN_MONT_CTX *mont = NULL; 836 int b, bits, ret=0; 837 int r_is_one; 838 BN_ULONG w, next_w; 839 BIGNUM *d, *r, *t; 840 BIGNUM *swap_tmp; 841#define BN_MOD_MUL_WORD(r, w, m) \ 842 (BN_mul_word(r, (w)) && \ 843 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 844 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 845 /* BN_MOD_MUL_WORD is only used with 'w' large, 846 * so the BN_ucmp test is probably more overhead 847 * than always using BN_mod (which uses BN_copy if 848 * a similar test returns true). */ 849 /* We can use BN_mod and do not need BN_nnmod because our 850 * accumulator is never negative (the result of BN_mod does 851 * not depend on the sign of the modulus). 852 */ 853#define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 854 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 855 856 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 857 { 858 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 859 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 860 return -1; 861 } 862 863 bn_check_top(p); 864 bn_check_top(m); 865 866 if (!BN_is_odd(m)) 867 { 868 BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS); 869 return(0); 870 } 871 if (m->top == 1) 872 a %= m->d[0]; /* make sure that 'a' is reduced */ 873 874 bits = BN_num_bits(p); 875 if (bits == 0) 876 { 877 /* x**0 mod 1 is still zero. */ 878 if (BN_is_one(m)) 879 { 880 ret = 1; 881 BN_zero(rr); 882 } 883 else 884 ret = BN_one(rr); 885 return ret; 886 } 887 if (a == 0) 888 { 889 BN_zero(rr); 890 ret = 1; 891 return ret; 892 } 893 894 BN_CTX_start(ctx); 895 d = BN_CTX_get(ctx); 896 r = BN_CTX_get(ctx); 897 t = BN_CTX_get(ctx); 898 if (d == NULL || r == NULL || t == NULL) goto err; 899 900 if (in_mont != NULL) 901 mont=in_mont; 902 else 903 { 904 if ((mont = BN_MONT_CTX_new()) == NULL) goto err; 905 if (!BN_MONT_CTX_set(mont, m, ctx)) goto err; 906 } 907 908 r_is_one = 1; /* except for Montgomery factor */ 909 910 /* bits-1 >= 0 */ 911 912 /* The result is accumulated in the product r*w. */ 913 w = a; /* bit 'bits-1' of 'p' is always set */ 914 for (b = bits-2; b >= 0; b--) 915 { 916 /* First, square r*w. */ 917 next_w = w*w; 918 if ((next_w/w) != w) /* overflow */ 919 { 920 if (r_is_one) 921 { 922 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; 923 r_is_one = 0; 924 } 925 else 926 { 927 if (!BN_MOD_MUL_WORD(r, w, m)) goto err; 928 } 929 next_w = 1; 930 } 931 w = next_w; 932 if (!r_is_one) 933 { 934 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err; 935 } 936 937 /* Second, multiply r*w by 'a' if exponent bit is set. */ 938 if (BN_is_bit_set(p, b)) 939 { 940 next_w = w*a; 941 if ((next_w/a) != w) /* overflow */ 942 { 943 if (r_is_one) 944 { 945 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; 946 r_is_one = 0; 947 } 948 else 949 { 950 if (!BN_MOD_MUL_WORD(r, w, m)) goto err; 951 } 952 next_w = a; 953 } 954 w = next_w; 955 } 956 } 957 958 /* Finally, set r:=r*w. */ 959 if (w != 1) 960 { 961 if (r_is_one) 962 { 963 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err; 964 r_is_one = 0; 965 } 966 else 967 { 968 if (!BN_MOD_MUL_WORD(r, w, m)) goto err; 969 } 970 } 971 972 if (r_is_one) /* can happen only if a == 1*/ 973 { 974 if (!BN_one(rr)) goto err; 975 } 976 else 977 { 978 if (!BN_from_montgomery(rr, r, mont, ctx)) goto err; 979 } 980 ret = 1; 981err: 982 if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); 983 BN_CTX_end(ctx); 984 bn_check_top(rr); 985 return(ret); 986 } 987 988 989/* The old fallback, simple version :-) */ 990int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 991 const BIGNUM *m, BN_CTX *ctx) 992 { 993 int i,j,bits,ret=0,wstart,wend,window,wvalue; 994 int start=1; 995 BIGNUM *d; 996 /* Table of variables obtained from 'ctx' */ 997 BIGNUM *val[TABLE_SIZE]; 998 999 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) 1000 { 1001 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1002 BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1003 return -1; 1004 } 1005 1006 bits=BN_num_bits(p); 1007 1008 if (bits == 0) 1009 { 1010 ret = BN_one(r); 1011 return ret; 1012 } 1013 1014 BN_CTX_start(ctx); 1015 d = BN_CTX_get(ctx); 1016 val[0] = BN_CTX_get(ctx); 1017 if(!d || !val[0]) goto err; 1018 1019 if (!BN_nnmod(val[0],a,m,ctx)) goto err; /* 1 */ 1020 if (BN_is_zero(val[0])) 1021 { 1022 BN_zero(r); 1023 ret = 1; 1024 goto err; 1025 } 1026 1027 window = BN_window_bits_for_exponent_size(bits); 1028 if (window > 1) 1029 { 1030 if (!BN_mod_mul(d,val[0],val[0],m,ctx)) 1031 goto err; /* 2 */ 1032 j=1<<(window-1); 1033 for (i=1; i<j; i++) 1034 { 1035 if(((val[i] = BN_CTX_get(ctx)) == NULL) || 1036 !BN_mod_mul(val[i],val[i-1],d,m,ctx)) 1037 goto err; 1038 } 1039 } 1040 1041 start=1; /* This is used to avoid multiplication etc 1042 * when there is only the value '1' in the 1043 * buffer. */ 1044 wvalue=0; /* The 'value' of the window */ 1045 wstart=bits-1; /* The top bit of the window */ 1046 wend=0; /* The bottom bit of the window */ 1047 1048 if (!BN_one(r)) goto err; 1049 1050 for (;;) 1051 { 1052 if (BN_is_bit_set(p,wstart) == 0) 1053 { 1054 if (!start) 1055 if (!BN_mod_mul(r,r,r,m,ctx)) 1056 goto err; 1057 if (wstart == 0) break; 1058 wstart--; 1059 continue; 1060 } 1061 /* We now have wstart on a 'set' bit, we now need to work out 1062 * how bit a window to do. To do this we need to scan 1063 * forward until the last set bit before the end of the 1064 * window */ 1065 j=wstart; 1066 wvalue=1; 1067 wend=0; 1068 for (i=1; i<window; i++) 1069 { 1070 if (wstart-i < 0) break; 1071 if (BN_is_bit_set(p,wstart-i)) 1072 { 1073 wvalue<<=(i-wend); 1074 wvalue|=1; 1075 wend=i; 1076 } 1077 } 1078 1079 /* wend is the size of the current window */ 1080 j=wend+1; 1081 /* add the 'bytes above' */ 1082 if (!start) 1083 for (i=0; i<j; i++) 1084 { 1085 if (!BN_mod_mul(r,r,r,m,ctx)) 1086 goto err; 1087 } 1088 1089 /* wvalue will be an odd number < 2^window */ 1090 if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx)) 1091 goto err; 1092 1093 /* move the 'window' down further */ 1094 wstart-=wend+1; 1095 wvalue=0; 1096 start=0; 1097 if (wstart < 0) break; 1098 } 1099 ret=1; 1100err: 1101 BN_CTX_end(ctx); 1102 bn_check_top(r); 1103 return(ret); 1104 } 1105