bn_exp.c revision 279264
1186904Ssam/* crypto/bn/bn_exp.c */
2186904Ssam/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3186904Ssam * All rights reserved.
4186904Ssam *
5186904Ssam * This package is an SSL implementation written
6186904Ssam * by Eric Young (eay@cryptsoft.com).
7186904Ssam * The implementation was written so as to conform with Netscapes SSL.
8186904Ssam *
9186904Ssam * This library is free for commercial and non-commercial use as long as
10186904Ssam * the following conditions are aheared to.  The following conditions
11186904Ssam * apply to all code found in this distribution, be it the RC4, RSA,
12186904Ssam * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13186904Ssam * included with this distribution is covered by the same copyright terms
14186904Ssam * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15186904Ssam *
16186904Ssam * Copyright remains Eric Young's, and as such any Copyright notices in
17186904Ssam * the code are not to be removed.
18186904Ssam * If this package is used in a product, Eric Young should be given attribution
19186904Ssam * as the author of the parts of the library used.
20186904Ssam * This can be in the form of a textual message at program startup or
21186904Ssam * in documentation (online or textual) provided with the package.
22186904Ssam *
23186904Ssam * Redistribution and use in source and binary forms, with or without
24186904Ssam * modification, are permitted provided that the following conditions
25186904Ssam * are met:
26186904Ssam * 1. Redistributions of source code must retain the copyright
27186904Ssam *    notice, this list of conditions and the following disclaimer.
28186904Ssam * 2. Redistributions in binary form must reproduce the above copyright
29186904Ssam *    notice, this list of conditions and the following disclaimer in the
30186904Ssam *    documentation and/or other materials provided with the distribution.
31186904Ssam * 3. All advertising materials mentioning features or use of this software
32186904Ssam *    must display the following acknowledgement:
33186904Ssam *    "This product includes cryptographic software written by
34186904Ssam *     Eric Young (eay@cryptsoft.com)"
35186904Ssam *    The word 'cryptographic' can be left out if the rouines from the library
36190455Ssam *    being used are not cryptographic related :-).
37186904Ssam * 4. If you include any Windows specific code (or a derivative thereof) from
38186904Ssam *    the apps directory (application code) you must include an acknowledgement:
39186904Ssam *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40186904Ssam *
41186904Ssam * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42186904Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43186904Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44186904Ssam * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45186904Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46186904Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47186904Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48186904Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49186904Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50186904Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51186904Ssam * SUCH DAMAGE.
52186904Ssam *
53186904Ssam * The licence and distribution terms for any publically available version or
54186904Ssam * derivative of this code cannot be changed.  i.e. this code cannot simply be
55186904Ssam * copied and put under another distribution licence
56186904Ssam * [including the GNU Public Licence.]
57186904Ssam */
58186904Ssam/* ====================================================================
59186904Ssam * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60186904Ssam *
61186904Ssam * Redistribution and use in source and binary forms, with or without
62186904Ssam * modification, are permitted provided that the following conditions
63186904Ssam * are met:
64186904Ssam *
65186904Ssam * 1. Redistributions of source code must retain the above copyright
66186904Ssam *    notice, this list of conditions and the following disclaimer.
67186904Ssam *
68186904Ssam * 2. Redistributions in binary form must reproduce the above copyright
69186904Ssam *    notice, this list of conditions and the following disclaimer in
70186904Ssam *    the documentation and/or other materials provided with the
71186904Ssam *    distribution.
72186904Ssam *
73186904Ssam * 3. All advertising materials mentioning features or use of this
74186904Ssam *    software must display the following acknowledgment:
75186904Ssam *    "This product includes software developed by the OpenSSL Project
76186904Ssam *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77186904Ssam *
78186904Ssam * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79186904Ssam *    endorse or promote products derived from this software without
80186904Ssam *    prior written permission. For written permission, please contact
81187899Ssam *    openssl-core@openssl.org.
82187899Ssam *
83187899Ssam * 5. Products derived from this software may not be called "OpenSSL"
84188782Ssam *    nor may "OpenSSL" appear in their names without prior written
85188782Ssam *    permission of the OpenSSL Project.
86188782Ssam *
87188782Ssam * 6. Redistributions of any form whatsoever must retain the following
88188782Ssam *    acknowledgment:
89188782Ssam *    "This product includes software developed by the OpenSSL Project
90187899Ssam *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91187899Ssam *
92187899Ssam * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93187899Ssam * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94187899Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95187899Ssam * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96186904Ssam * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97190455Ssam * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98190455Ssam * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99190455Ssam * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100190455Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101190455Ssam * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102190455Ssam * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103190455Ssam * OF THE POSSIBILITY OF SUCH DAMAGE.
104190455Ssam * ====================================================================
105190455Ssam *
106190455Ssam * This product includes cryptographic software written by Eric Young
107186904Ssam * (eay@cryptsoft.com).  This product includes software written by Tim
108186904Ssam * Hudson (tjh@cryptsoft.com).
109186904Ssam *
110186904Ssam */
111186904Ssam
112186904Ssam
113186904Ssam#include "cryptlib.h"
114186904Ssam#include "bn_lcl.h"
115186904Ssam
116186904Ssam#include <stdlib.h>
117186904Ssam#ifdef _WIN32
118186904Ssam# include <malloc.h>
119187899Ssam# ifndef alloca
120187899Ssam#  define alloca _alloca
121187899Ssam# endif
122187899Ssam#elif defined(__GNUC__)
123187899Ssam# ifndef alloca
124187899Ssam#  define alloca(s) __builtin_alloca((s))
125187899Ssam# endif
126186904Ssam#endif
127186904Ssam
128186904Ssam/* maximum precomputation table size for *variable* sliding windows */
129186904Ssam#define TABLE_SIZE	32
130186904Ssam
131186904Ssam/* this one works - simple but works */
132186904Ssamint BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
133186904Ssam	{
134186904Ssam	int i,bits,ret=0;
135186904Ssam	BIGNUM *v,*rr;
136186904Ssam
137186904Ssam	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
138186904Ssam		{
139186904Ssam		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
140186904Ssam		BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
141186904Ssam		return -1;
142186904Ssam		}
143186904Ssam
144186904Ssam	BN_CTX_start(ctx);
145186904Ssam	if ((r == a) || (r == p))
146186904Ssam		rr = BN_CTX_get(ctx);
147186904Ssam	else
148186904Ssam		rr = r;
149186904Ssam	v = BN_CTX_get(ctx);
150186904Ssam	if (rr == NULL || v == NULL) goto err;
151186904Ssam
152186904Ssam	if (BN_copy(v,a) == NULL) goto err;
153186904Ssam	bits=BN_num_bits(p);
154189980Ssam
155186904Ssam	if (BN_is_odd(p))
156186904Ssam		{ if (BN_copy(rr,a) == NULL) goto err; }
157186904Ssam	else	{ if (!BN_one(rr)) goto err; }
158186904Ssam
159186904Ssam	for (i=1; i<bits; i++)
160186904Ssam		{
161187899Ssam		if (!BN_sqr(v,v,ctx)) goto err;
162187899Ssam		if (BN_is_bit_set(p,i))
163187899Ssam			{
164187899Ssam			if (!BN_mul(rr,rr,v,ctx)) goto err;
165187899Ssam			}
166187899Ssam		}
167188782Ssam	ret=1;
168188782Ssamerr:
169186904Ssam	if (r != rr) BN_copy(r,rr);
170186904Ssam	BN_CTX_end(ctx);
171186904Ssam	bn_check_top(r);
172186904Ssam	return(ret);
173186904Ssam	}
174186904Ssam
175186904Ssam
176186904Ssamint BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
177186904Ssam	       BN_CTX *ctx)
178186904Ssam	{
179186904Ssam	int ret;
180186904Ssam
181186904Ssam	bn_check_top(a);
182186904Ssam	bn_check_top(p);
183186904Ssam	bn_check_top(m);
184186904Ssam
185186904Ssam	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
186186904Ssam	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
187186904Ssam	 * exponentiation for the odd part), using appropriate exponent
188186904Ssam	 * reductions, and combine the results using the CRT.
189186904Ssam	 *
190186904Ssam	 * For now, we use Montgomery only if the modulus is odd; otherwise,
191186904Ssam	 * exponentiation using the reciprocal-based quick remaindering
192186904Ssam	 * algorithm is used.
193186904Ssam	 *
194188467Ssam	 * (Timing obtained with expspeed.c [computations  a^p mod m
195188467Ssam	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
196188467Ssam	 * 4096, 8192 bits], compared to the running time of the
197188467Ssam	 * standard algorithm:
198188467Ssam	 *
199188467Ssam	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
200188467Ssam         *                     55 .. 77 %  [UltraSparc processor, but
201188467Ssam	 *                                  debug-solaris-sparcv8-gcc conf.]
202188467Ssam	 *
203186904Ssam	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
204186904Ssam	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
205186904Ssam	 *
206186904Ssam	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
207186904Ssam	 * at 2048 and more bits, but at 512 and 1024 bits, it was
208186904Ssam	 * slower even than the standard algorithm!
209186904Ssam	 *
210188467Ssam	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
211186904Ssam	 * should be obtained when the new Montgomery reduction code
212186904Ssam	 * has been integrated into OpenSSL.)
213186904Ssam	 */
214188467Ssam
215186904Ssam#define MONT_MUL_MOD
216186904Ssam#define MONT_EXP_WORD
217186904Ssam#define RECP_MUL_MOD
218186904Ssam
219186904Ssam#ifdef MONT_MUL_MOD
220186904Ssam	/* I have finally been able to take out this pre-condition of
221186904Ssam	 * the top bit being set.  It was caused by an error in BN_div
222186904Ssam	 * with negatives.  There was also another problem when for a^b%m
223186904Ssam	 * a >= m.  eay 07-May-97 */
224186904Ssam/*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
225186904Ssam
226186904Ssam	if (BN_is_odd(m))
227186904Ssam		{
228186904Ssam#  ifdef MONT_EXP_WORD
229186904Ssam		if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
230186904Ssam			{
231186904Ssam			BN_ULONG A = a->d[0];
232188467Ssam			ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
233188467Ssam			}
234188467Ssam		else
235188467Ssam#  endif
236186904Ssam			ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
237186904Ssam		}
238186904Ssam	else
239186904Ssam#endif
240186904Ssam#ifdef RECP_MUL_MOD
241186904Ssam		{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }
242186904Ssam#else
243186904Ssam		{ ret=BN_mod_exp_simple(r,a,p,m,ctx); }
244186904Ssam#endif
245186904Ssam
246186904Ssam	bn_check_top(r);
247186904Ssam	return(ret);
248186904Ssam	}
249186904Ssam
250186904Ssam
251186904Ssamint BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
252186904Ssam		    const BIGNUM *m, BN_CTX *ctx)
253188925Ssam	{
254188925Ssam	int i,j,bits,ret=0,wstart,wend,window,wvalue;
255186904Ssam	int start=1;
256186904Ssam	BIGNUM *aa;
257188925Ssam	/* Table of variables obtained from 'ctx' */
258188925Ssam	BIGNUM *val[TABLE_SIZE];
259188925Ssam	BN_RECP_CTX recp;
260188925Ssam
261186904Ssam	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
262186904Ssam		{
263186904Ssam		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
264186904Ssam		BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
265186904Ssam		return -1;
266186904Ssam		}
267186904Ssam
268186904Ssam	bits=BN_num_bits(p);
269186904Ssam
270186904Ssam	if (bits == 0)
271186904Ssam		{
272186904Ssam		ret = BN_one(r);
273186904Ssam		return ret;
274186904Ssam		}
275186904Ssam
276186904Ssam	BN_CTX_start(ctx);
277186904Ssam	aa = BN_CTX_get(ctx);
278186904Ssam	val[0] = BN_CTX_get(ctx);
279186904Ssam	if(!aa || !val[0]) goto err;
280186904Ssam
281186904Ssam	BN_RECP_CTX_init(&recp);
282186904Ssam	if (m->neg)
283186904Ssam		{
284186904Ssam		/* ignore sign of 'm' */
285186904Ssam		if (!BN_copy(aa, m)) goto err;
286186904Ssam		aa->neg = 0;
287186904Ssam		if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
288186904Ssam		}
289186904Ssam	else
290186904Ssam		{
291186904Ssam		if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
292186904Ssam		}
293186904Ssam
294186904Ssam	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
295186904Ssam	if (BN_is_zero(val[0]))
296186904Ssam		{
297186904Ssam		BN_zero(r);
298186904Ssam		ret = 1;
299186904Ssam		goto err;
300186904Ssam		}
301186904Ssam
302186904Ssam	window = BN_window_bits_for_exponent_size(bits);
303186904Ssam	if (window > 1)
304186904Ssam		{
305186904Ssam		if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
306186904Ssam			goto err;				/* 2 */
307186904Ssam		j=1<<(window-1);
308186904Ssam		for (i=1; i<j; i++)
309186904Ssam			{
310186904Ssam			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
311186904Ssam					!BN_mod_mul_reciprocal(val[i],val[i-1],
312186904Ssam						aa,&recp,ctx))
313186904Ssam				goto err;
314186904Ssam			}
315186904Ssam		}
316186904Ssam
317186904Ssam	start=1;	/* This is used to avoid multiplication etc
318186904Ssam			 * when there is only the value '1' in the
319186904Ssam			 * buffer. */
320186904Ssam	wvalue=0;	/* The 'value' of the window */
321186904Ssam	wstart=bits-1;	/* The top bit of the window */
322186904Ssam	wend=0;		/* The bottom bit of the window */
323186904Ssam
324186904Ssam	if (!BN_one(r)) goto err;
325186904Ssam
326186904Ssam	for (;;)
327186904Ssam		{
328186904Ssam		if (BN_is_bit_set(p,wstart) == 0)
329186904Ssam			{
330186904Ssam			if (!start)
331186904Ssam				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
332186904Ssam				goto err;
333186904Ssam			if (wstart == 0) break;
334186904Ssam			wstart--;
335186904Ssam			continue;
336186904Ssam			}
337186904Ssam		/* We now have wstart on a 'set' bit, we now need to work out
338186904Ssam		 * how bit a window to do.  To do this we need to scan
339186904Ssam		 * forward until the last set bit before the end of the
340186904Ssam		 * window */
341186904Ssam		j=wstart;
342186904Ssam		wvalue=1;
343186904Ssam		wend=0;
344186904Ssam		for (i=1; i<window; i++)
345186904Ssam			{
346186904Ssam			if (wstart-i < 0) break;
347191016Ssam			if (BN_is_bit_set(p,wstart-i))
348186904Ssam				{
349186904Ssam				wvalue<<=(i-wend);
350186904Ssam				wvalue|=1;
351186904Ssam				wend=i;
352186904Ssam				}
353186904Ssam			}
354186904Ssam
355186904Ssam		/* wend is the size of the current window */
356186904Ssam		j=wend+1;
357186904Ssam		/* add the 'bytes above' */
358186904Ssam		if (!start)
359186904Ssam			for (i=0; i<j; i++)
360186904Ssam				{
361186904Ssam				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
362186904Ssam					goto err;
363186904Ssam				}
364186904Ssam
365186904Ssam		/* wvalue will be an odd number < 2^window */
366186904Ssam		if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
367186904Ssam			goto err;
368186904Ssam
369186904Ssam		/* move the 'window' down further */
370186904Ssam		wstart-=wend+1;
371186904Ssam		wvalue=0;
372186904Ssam		start=0;
373186904Ssam		if (wstart < 0) break;
374186904Ssam		}
375186904Ssam	ret=1;
376186904Ssamerr:
377186904Ssam	BN_CTX_end(ctx);
378186904Ssam	BN_RECP_CTX_free(&recp);
379186904Ssam	bn_check_top(r);
380186904Ssam	return(ret);
381186904Ssam	}
382186904Ssam
383186904Ssam
384189980Ssamint BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
385186904Ssam		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
386186904Ssam	{
387186904Ssam	int i,j,bits,ret=0,wstart,wend,window,wvalue;
388186904Ssam	int start=1;
389186904Ssam	BIGNUM *d,*r;
390186904Ssam	const BIGNUM *aa;
391189980Ssam	/* Table of variables obtained from 'ctx' */
392186904Ssam	BIGNUM *val[TABLE_SIZE];
393186904Ssam	BN_MONT_CTX *mont=NULL;
394186904Ssam
395186904Ssam	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
396189980Ssam		{
397189980Ssam		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
398189980Ssam		}
399189981Ssam
400189981Ssam	bn_check_top(a);
401189981Ssam	bn_check_top(p);
402189980Ssam	bn_check_top(m);
403189980Ssam
404189980Ssam	if (!BN_is_odd(m))
405189980Ssam		{
406189980Ssam		BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
407189980Ssam		return(0);
408189980Ssam		}
409189981Ssam	bits=BN_num_bits(p);
410189981Ssam	if (bits == 0)
411189981Ssam		{
412189980Ssam		ret = BN_one(rr);
413189980Ssam		return ret;
414189980Ssam		}
415189980Ssam
416189980Ssam	BN_CTX_start(ctx);
417189980Ssam	d = BN_CTX_get(ctx);
418189981Ssam	r = BN_CTX_get(ctx);
419189981Ssam	val[0] = BN_CTX_get(ctx);
420189981Ssam	if (!d || !r || !val[0]) goto err;
421189980Ssam
422189980Ssam	/* If this is not done, things will break in the montgomery
423189980Ssam	 * part */
424189980Ssam
425189980Ssam	if (in_mont != NULL)
426186904Ssam		mont=in_mont;
427186904Ssam	else
428186904Ssam		{
429186904Ssam		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
430189980Ssam		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
431186904Ssam		}
432186904Ssam
433186904Ssam	if (a->neg || BN_ucmp(a,m) >= 0)
434186904Ssam		{
435189980Ssam		if (!BN_nnmod(val[0],a,m,ctx))
436189980Ssam			goto err;
437186904Ssam		aa= val[0];
438186904Ssam		}
439186904Ssam	else
440189980Ssam		aa=a;
441189980Ssam	if (BN_is_zero(aa))
442189980Ssam		{
443189980Ssam		BN_zero(rr);
444189980Ssam		ret = 1;
445189980Ssam		goto err;
446189980Ssam		}
447189980Ssam	if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
448189980Ssam
449189980Ssam	window = BN_window_bits_for_exponent_size(bits);
450186904Ssam	if (window > 1)
451189980Ssam		{
452186904Ssam		if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
453186904Ssam		j=1<<(window-1);
454186904Ssam		for (i=1; i<j; i++)
455186904Ssam			{
456189980Ssam			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
457189980Ssam					!BN_mod_mul_montgomery(val[i],val[i-1],
458189980Ssam						d,mont,ctx))
459189980Ssam				goto err;
460189980Ssam			}
461189980Ssam		}
462189980Ssam
463189980Ssam	start=1;	/* This is used to avoid multiplication etc
464186904Ssam			 * when there is only the value '1' in the
465186904Ssam			 * buffer. */
466186904Ssam	wvalue=0;	/* The 'value' of the window */
467186904Ssam	wstart=bits-1;	/* The top bit of the window */
468186904Ssam	wend=0;		/* The bottom bit of the window */
469189980Ssam
470191017Ssam	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
471186904Ssam	for (;;)
472186904Ssam		{
473186904Ssam		if (BN_is_bit_set(p,wstart) == 0)
474186904Ssam			{
475186904Ssam			if (!start)
476186904Ssam				{
477186904Ssam				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
478186904Ssam				goto err;
479186904Ssam				}
480189980Ssam			if (wstart == 0) break;
481186904Ssam			wstart--;
482186904Ssam			continue;
483186904Ssam			}
484186904Ssam		/* We now have wstart on a 'set' bit, we now need to work out
485186904Ssam		 * how bit a window to do.  To do this we need to scan
486186904Ssam		 * forward until the last set bit before the end of the
487186904Ssam		 * window */
488186904Ssam		j=wstart;
489186904Ssam		wvalue=1;
490186904Ssam		wend=0;
491186904Ssam		for (i=1; i<window; i++)
492186904Ssam			{
493186904Ssam			if (wstart-i < 0) break;
494186904Ssam			if (BN_is_bit_set(p,wstart-i))
495186904Ssam				{
496186904Ssam				wvalue<<=(i-wend);
497186904Ssam				wvalue|=1;
498186904Ssam				wend=i;
499186904Ssam				}
500186904Ssam			}
501186904Ssam
502186904Ssam		/* wend is the size of the current window */
503186904Ssam		j=wend+1;
504186904Ssam		/* add the 'bytes above' */
505186904Ssam		if (!start)
506186904Ssam			for (i=0; i<j; i++)
507186904Ssam				{
508186904Ssam				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
509186904Ssam					goto err;
510186904Ssam				}
511186904Ssam
512186904Ssam		/* wvalue will be an odd number < 2^window */
513186904Ssam		if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
514186904Ssam			goto err;
515186904Ssam
516189980Ssam		/* move the 'window' down further */
517186904Ssam		wstart-=wend+1;
518186904Ssam		wvalue=0;
519189980Ssam		start=0;
520189980Ssam		if (wstart < 0) break;
521186904Ssam		}
522186904Ssam	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
523189980Ssam	ret=1;
524189980Ssamerr:
525189980Ssam	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
526189980Ssam	BN_CTX_end(ctx);
527189980Ssam	bn_check_top(rr);
528189980Ssam	return(ret);
529189980Ssam	}
530189980Ssam
531189980Ssam
532189980Ssam/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
533189980Ssam * so that accessing any of these table values shows the same access pattern as far
534189980Ssam * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
535186904Ssam * from/to that table. */
536186904Ssam
537186904Ssamstatic int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width)
538186904Ssam	{
539186904Ssam	size_t i, j;
540186904Ssam
541186904Ssam	if (top > b->top)
542186904Ssam		top = b->top; /* this works because 'buf' is explicitly zeroed */
543186904Ssam	for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
544186904Ssam		{
545186904Ssam		buf[j] = ((unsigned char*)b->d)[i];
546186904Ssam		}
547186904Ssam
548186904Ssam	return 1;
549186904Ssam	}
550186904Ssam
551186904Ssamstatic int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
552186904Ssam	{
553186904Ssam	size_t i, j;
554186904Ssam
555186904Ssam	if (bn_wexpand(b, top) == NULL)
556186904Ssam		return 0;
557186904Ssam
558186904Ssam	for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
559186904Ssam		{
560186904Ssam		((unsigned char*)b->d)[i] = buf[j];
561186904Ssam		}
562186904Ssam
563186904Ssam	b->top = top;
564186904Ssam	bn_correct_top(b);
565186904Ssam	return 1;
566186904Ssam	}
567186904Ssam
568186904Ssam/* Given a pointer value, compute the next address that is a cache line multiple. */
569186904Ssam#define MOD_EXP_CTIME_ALIGN(x_) \
570186904Ssam	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
571186904Ssam
572186904Ssam/* This variant of BN_mod_exp_mont() uses fixed windows and the special
573186904Ssam * precomputation memory layout to limit data-dependency to a minimum
574186904Ssam * to protect secret exponents (cf. the hyper-threading timing attacks
575186904Ssam * pointed out by Colin Percival,
576186904Ssam * http://www.daemonology.net/hyperthreading-considered-harmful/)
577186904Ssam */
578186904Ssamint BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
579186904Ssam		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
580186904Ssam	{
581186904Ssam	int i,bits,ret=0,window,wvalue;
582186904Ssam	int top;
583186904Ssam	BN_MONT_CTX *mont=NULL;
584186904Ssam
585186904Ssam	int numPowers;
586186904Ssam	unsigned char *powerbufFree=NULL;
587186904Ssam	int powerbufLen = 0;
588186904Ssam	unsigned char *powerbuf=NULL;
589186904Ssam	BIGNUM tmp, am;
590186904Ssam
591186904Ssam	bn_check_top(a);
592186904Ssam	bn_check_top(p);
593186904Ssam	bn_check_top(m);
594186904Ssam
595186904Ssam	top = m->top;
596186904Ssam
597186904Ssam	if (!(m->d[0] & 1))
598186904Ssam		{
599186904Ssam		BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
600186904Ssam		return(0);
601186904Ssam		}
602186904Ssam	bits=BN_num_bits(p);
603186904Ssam	if (bits == 0)
604186904Ssam		{
605186904Ssam		ret = BN_one(rr);
606186904Ssam		return ret;
607186904Ssam		}
608186904Ssam
609186904Ssam	BN_CTX_start(ctx);
610186904Ssam
611186904Ssam	/* Allocate a montgomery context if it was not supplied by the caller.
612186904Ssam	 * If this is not done, things will break in the montgomery part.
613186904Ssam 	 */
614186904Ssam	if (in_mont != NULL)
615186904Ssam		mont=in_mont;
616186904Ssam	else
617186904Ssam		{
618186904Ssam		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
619186904Ssam		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
620186904Ssam		}
621186904Ssam
622186904Ssam	/* Get the window size to use with size of p. */
623186904Ssam	window = BN_window_bits_for_ctime_exponent_size(bits);
624186904Ssam#if defined(OPENSSL_BN_ASM_MONT5)
625186904Ssam	if (window==6 && bits<=1024) window=5;	/* ~5% improvement of 2048-bit RSA sign */
626186904Ssam#endif
627186904Ssam
628186904Ssam	/* Allocate a buffer large enough to hold all of the pre-computed
629186904Ssam	 * powers of am, am itself and tmp.
630186904Ssam	 */
631186904Ssam	numPowers = 1 << window;
632186904Ssam	powerbufLen = sizeof(m->d[0])*(top*numPowers +
633186904Ssam				((2*top)>numPowers?(2*top):numPowers));
634186904Ssam#ifdef alloca
635186904Ssam	if (powerbufLen < 3072)
636186904Ssam		powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
637186904Ssam	else
638186904Ssam#endif
639186904Ssam	if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
640186904Ssam		goto err;
641186904Ssam
642186904Ssam	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
643186904Ssam	memset(powerbuf, 0, powerbufLen);
644186904Ssam
645186904Ssam#ifdef alloca
646186904Ssam	if (powerbufLen < 3072)
647186904Ssam		powerbufFree = NULL;
648186904Ssam#endif
649186904Ssam
650186904Ssam	/* lay down tmp and am right after powers table */
651186904Ssam	tmp.d     = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers);
652186904Ssam	am.d      = tmp.d + top;
653186904Ssam	tmp.top   = am.top  = 0;
654186904Ssam	tmp.dmax  = am.dmax = top;
655186904Ssam	tmp.neg   = am.neg  = 0;
656186904Ssam	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
657186904Ssam
658186904Ssam	/* prepare a^0 in Montgomery domain */
659186904Ssam#if 1
660189980Ssam 	if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx))	goto err;
661186904Ssam#else
662186904Ssam	tmp.d[0] = (0-m->d[0])&BN_MASK2;	/* 2^(top*BN_BITS2) - m */
663186904Ssam	for (i=1;i<top;i++)
664186904Ssam		tmp.d[i] = (~m->d[i])&BN_MASK2;
665186904Ssam	tmp.top = top;
666186904Ssam#endif
667186904Ssam
668189980Ssam	/* prepare a^1 in Montgomery domain */
669189980Ssam	if (a->neg || BN_ucmp(a,m) >= 0)
670186904Ssam		{
671189980Ssam		if (!BN_mod(&am,a,m,ctx))			goto err;
672186904Ssam		if (!BN_to_montgomery(&am,&am,mont,ctx))	goto err;
673189980Ssam		}
674189980Ssam	else	if (!BN_to_montgomery(&am,a,mont,ctx))		goto err;
675186904Ssam
676186904Ssam#if defined(OPENSSL_BN_ASM_MONT5)
677186904Ssam    /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
678186904Ssam     * specifically optimization of cache-timing attack countermeasures
679186904Ssam     * and pre-computation optimization. */
680186904Ssam
681186904Ssam    /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
682186904Ssam     * 512-bit RSA is hardly relevant, we omit it to spare size... */
683186904Ssam    if (window==5 && top>1)
684186904Ssam	{
685186904Ssam	void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap,
686186904Ssam			const void *table,const BN_ULONG *np,
687186904Ssam			const BN_ULONG *n0,int num,int power);
688186904Ssam	void bn_scatter5(const BN_ULONG *inp,size_t num,
689186904Ssam			void *table,size_t power);
690186904Ssam	void bn_gather5(BN_ULONG *out,size_t num,
691186904Ssam			void *table,size_t power);
692186904Ssam
693186904Ssam	BN_ULONG *np=mont->N.d, *n0=mont->n0;
694186904Ssam
695186904Ssam	/* BN_to_montgomery can contaminate words above .top
696186904Ssam	 * [in BN_DEBUG[_DEBUG] build]... */
697186904Ssam	for (i=am.top; i<top; i++)	am.d[i]=0;
698186904Ssam	for (i=tmp.top; i<top; i++)	tmp.d[i]=0;
699186904Ssam
700186904Ssam	bn_scatter5(tmp.d,top,powerbuf,0);
701186904Ssam	bn_scatter5(am.d,am.top,powerbuf,1);
702186904Ssam	bn_mul_mont(tmp.d,am.d,am.d,np,n0,top);
703186904Ssam	bn_scatter5(tmp.d,top,powerbuf,2);
704186904Ssam
705186904Ssam#if 0
706186904Ssam	for (i=3; i<32; i++)
707186904Ssam		{
708186904Ssam		/* Calculate a^i = a^(i-1) * a */
709186904Ssam		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
710186904Ssam		bn_scatter5(tmp.d,top,powerbuf,i);
711186904Ssam		}
712186904Ssam#else
713186904Ssam	/* same as above, but uses squaring for 1/2 of operations */
714186904Ssam	for (i=4; i<32; i*=2)
715186904Ssam		{
716186904Ssam		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
717186904Ssam		bn_scatter5(tmp.d,top,powerbuf,i);
718190384Ssam		}
719190384Ssam	for (i=3; i<8; i+=2)
720186904Ssam		{
721186904Ssam		int j;
722186904Ssam		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
723186904Ssam		bn_scatter5(tmp.d,top,powerbuf,i);
724186904Ssam		for (j=2*i; j<32; j*=2)
725186904Ssam			{
726186904Ssam			bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
727186904Ssam			bn_scatter5(tmp.d,top,powerbuf,j);
728186904Ssam			}
729186904Ssam		}
730186904Ssam	for (; i<16; i+=2)
731186904Ssam		{
732186904Ssam		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
733186904Ssam		bn_scatter5(tmp.d,top,powerbuf,i);
734186904Ssam		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
735186904Ssam		bn_scatter5(tmp.d,top,powerbuf,2*i);
736186904Ssam		}
737186904Ssam	for (; i<32; i+=2)
738186904Ssam		{
739186904Ssam		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
740190384Ssam		bn_scatter5(tmp.d,top,powerbuf,i);
741186904Ssam		}
742186904Ssam#endif
743186904Ssam	bits--;
744190384Ssam	for (wvalue=0, i=bits%5; i>=0; i--,bits--)
745186904Ssam		wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
746190384Ssam	bn_gather5(tmp.d,top,powerbuf,wvalue);
747190384Ssam
748186904Ssam	/* Scan the exponent one window at a time starting from the most
749186904Ssam	 * significant bits.
750186904Ssam	 */
751186904Ssam	while (bits >= 0)
752186904Ssam		{
753186904Ssam		for (wvalue=0, i=0; i<5; i++,bits--)
754186904Ssam			wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
755186904Ssam
756186904Ssam		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
757186904Ssam		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
758186904Ssam		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
759186904Ssam		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
760186904Ssam		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
761186904Ssam		bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
762186904Ssam		}
763186904Ssam
764189980Ssam	tmp.top=top;
765186904Ssam	bn_correct_top(&tmp);
766186904Ssam	}
767186904Ssam    else
768186904Ssam#endif
769186904Ssam	{
770186904Ssam	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err;
771186904Ssam	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am,  top, powerbuf, 1, numPowers)) goto err;
772186904Ssam
773186904Ssam	/* If the window size is greater than 1, then calculate
774186904Ssam	 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
775186904Ssam	 * (even powers could instead be computed as (a^(i/2))^2
776186904Ssam	 * to use the slight performance advantage of sqr over mul).
777186904Ssam	 */
778189980Ssam	if (window > 1)
779186904Ssam		{
780186904Ssam		if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx))	goto err;
781186904Ssam		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err;
782186904Ssam		for (i=3; i<numPowers; i++)
783186904Ssam			{
784186904Ssam			/* Calculate a^i = a^(i-1) * a */
785186904Ssam			if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx))
786189980Ssam				goto err;
787186904Ssam			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err;
788186904Ssam			}
789186904Ssam		}
790186904Ssam
791186904Ssam	bits--;
792186904Ssam	for (wvalue=0, i=bits%window; i>=0; i--,bits--)
793186904Ssam		wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
794190384Ssam	if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err;
795186904Ssam
796186904Ssam	/* Scan the exponent one window at a time starting from the most
797186904Ssam	 * significant bits.
798190384Ssam	 */
799 	while (bits >= 0)
800  		{
801 		wvalue=0; /* The 'value' of the window */
802
803 		/* Scan the window, squaring the result as we go */
804 		for (i=0; i<window; i++,bits--)
805 			{
806			if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx))	goto err;
807			wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
808  			}
809
810		/* Fetch the appropriate pre-computed value from the pre-buf */
811		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err;
812
813 		/* Multiply the result into the intermediate result */
814 		if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err;
815  		}
816	}
817
818 	/* Convert the final result from montgomery to standard format */
819	if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err;
820	ret=1;
821err:
822	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
823	if (powerbuf!=NULL)
824		{
825		OPENSSL_cleanse(powerbuf,powerbufLen);
826		if (powerbufFree) OPENSSL_free(powerbufFree);
827		}
828	BN_CTX_end(ctx);
829	return(ret);
830	}
831
832int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
833                         const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
834	{
835	BN_MONT_CTX *mont = NULL;
836	int b, bits, ret=0;
837	int r_is_one;
838	BN_ULONG w, next_w;
839	BIGNUM *d, *r, *t;
840	BIGNUM *swap_tmp;
841#define BN_MOD_MUL_WORD(r, w, m) \
842		(BN_mul_word(r, (w)) && \
843		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
844			(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
845		/* BN_MOD_MUL_WORD is only used with 'w' large,
846		 * so the BN_ucmp test is probably more overhead
847		 * than always using BN_mod (which uses BN_copy if
848		 * a similar test returns true). */
849		/* We can use BN_mod and do not need BN_nnmod because our
850		 * accumulator is never negative (the result of BN_mod does
851		 * not depend on the sign of the modulus).
852		 */
853#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
854		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
855
856	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
857		{
858		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
859		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
860		return -1;
861		}
862
863	bn_check_top(p);
864	bn_check_top(m);
865
866	if (!BN_is_odd(m))
867		{
868		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
869		return(0);
870		}
871	if (m->top == 1)
872		a %= m->d[0]; /* make sure that 'a' is reduced */
873
874	bits = BN_num_bits(p);
875	if (bits == 0)
876		{
877		/* x**0 mod 1 is still zero. */
878		if (BN_is_one(m))
879			{
880			ret = 1;
881			BN_zero(rr);
882			}
883		else
884			ret = BN_one(rr);
885		return ret;
886		}
887	if (a == 0)
888		{
889		BN_zero(rr);
890		ret = 1;
891		return ret;
892		}
893
894	BN_CTX_start(ctx);
895	d = BN_CTX_get(ctx);
896	r = BN_CTX_get(ctx);
897	t = BN_CTX_get(ctx);
898	if (d == NULL || r == NULL || t == NULL) goto err;
899
900	if (in_mont != NULL)
901		mont=in_mont;
902	else
903		{
904		if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
905		if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
906		}
907
908	r_is_one = 1; /* except for Montgomery factor */
909
910	/* bits-1 >= 0 */
911
912	/* The result is accumulated in the product r*w. */
913	w = a; /* bit 'bits-1' of 'p' is always set */
914	for (b = bits-2; b >= 0; b--)
915		{
916		/* First, square r*w. */
917		next_w = w*w;
918		if ((next_w/w) != w) /* overflow */
919			{
920			if (r_is_one)
921				{
922				if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
923				r_is_one = 0;
924				}
925			else
926				{
927				if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
928				}
929			next_w = 1;
930			}
931		w = next_w;
932		if (!r_is_one)
933			{
934			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
935			}
936
937		/* Second, multiply r*w by 'a' if exponent bit is set. */
938		if (BN_is_bit_set(p, b))
939			{
940			next_w = w*a;
941			if ((next_w/a) != w) /* overflow */
942				{
943				if (r_is_one)
944					{
945					if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
946					r_is_one = 0;
947					}
948				else
949					{
950					if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
951					}
952				next_w = a;
953				}
954			w = next_w;
955			}
956		}
957
958	/* Finally, set r:=r*w. */
959	if (w != 1)
960		{
961		if (r_is_one)
962			{
963			if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
964			r_is_one = 0;
965			}
966		else
967			{
968			if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
969			}
970		}
971
972	if (r_is_one) /* can happen only if a == 1*/
973		{
974		if (!BN_one(rr)) goto err;
975		}
976	else
977		{
978		if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
979		}
980	ret = 1;
981err:
982	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
983	BN_CTX_end(ctx);
984	bn_check_top(rr);
985	return(ret);
986	}
987
988
989/* The old fallback, simple version :-) */
990int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
991		const BIGNUM *m, BN_CTX *ctx)
992	{
993	int i,j,bits,ret=0,wstart,wend,window,wvalue;
994	int start=1;
995	BIGNUM *d;
996	/* Table of variables obtained from 'ctx' */
997	BIGNUM *val[TABLE_SIZE];
998
999	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
1000		{
1001		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1002		BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1003		return -1;
1004		}
1005
1006	bits=BN_num_bits(p);
1007
1008	if (bits == 0)
1009		{
1010		ret = BN_one(r);
1011		return ret;
1012		}
1013
1014	BN_CTX_start(ctx);
1015	d = BN_CTX_get(ctx);
1016	val[0] = BN_CTX_get(ctx);
1017	if(!d || !val[0]) goto err;
1018
1019	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
1020	if (BN_is_zero(val[0]))
1021		{
1022		BN_zero(r);
1023		ret = 1;
1024		goto err;
1025		}
1026
1027	window = BN_window_bits_for_exponent_size(bits);
1028	if (window > 1)
1029		{
1030		if (!BN_mod_mul(d,val[0],val[0],m,ctx))
1031			goto err;				/* 2 */
1032		j=1<<(window-1);
1033		for (i=1; i<j; i++)
1034			{
1035			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
1036					!BN_mod_mul(val[i],val[i-1],d,m,ctx))
1037				goto err;
1038			}
1039		}
1040
1041	start=1;	/* This is used to avoid multiplication etc
1042			 * when there is only the value '1' in the
1043			 * buffer. */
1044	wvalue=0;	/* The 'value' of the window */
1045	wstart=bits-1;	/* The top bit of the window */
1046	wend=0;		/* The bottom bit of the window */
1047
1048	if (!BN_one(r)) goto err;
1049
1050	for (;;)
1051		{
1052		if (BN_is_bit_set(p,wstart) == 0)
1053			{
1054			if (!start)
1055				if (!BN_mod_mul(r,r,r,m,ctx))
1056				goto err;
1057			if (wstart == 0) break;
1058			wstart--;
1059			continue;
1060			}
1061		/* We now have wstart on a 'set' bit, we now need to work out
1062		 * how bit a window to do.  To do this we need to scan
1063		 * forward until the last set bit before the end of the
1064		 * window */
1065		j=wstart;
1066		wvalue=1;
1067		wend=0;
1068		for (i=1; i<window; i++)
1069			{
1070			if (wstart-i < 0) break;
1071			if (BN_is_bit_set(p,wstart-i))
1072				{
1073				wvalue<<=(i-wend);
1074				wvalue|=1;
1075				wend=i;
1076				}
1077			}
1078
1079		/* wend is the size of the current window */
1080		j=wend+1;
1081		/* add the 'bytes above' */
1082		if (!start)
1083			for (i=0; i<j; i++)
1084				{
1085				if (!BN_mod_mul(r,r,r,m,ctx))
1086					goto err;
1087				}
1088
1089		/* wvalue will be an odd number < 2^window */
1090		if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
1091			goto err;
1092
1093		/* move the 'window' down further */
1094		wstart-=wend+1;
1095		wvalue=0;
1096		start=0;
1097		if (wstart < 0) break;
1098		}
1099	ret=1;
1100err:
1101	BN_CTX_end(ctx);
1102	bn_check_top(r);
1103	return(ret);
1104	}
1105