155714Skris/* crypto/bn/bn_exp.c */
255714Skris/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
355714Skris * All rights reserved.
455714Skris *
555714Skris * This package is an SSL implementation written
655714Skris * by Eric Young (eay@cryptsoft.com).
755714Skris * The implementation was written so as to conform with Netscapes SSL.
855714Skris *
955714Skris * This library is free for commercial and non-commercial use as long as
1055714Skris * the following conditions are aheared to.  The following conditions
1155714Skris * apply to all code found in this distribution, be it the RC4, RSA,
1255714Skris * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
1355714Skris * included with this distribution is covered by the same copyright terms
1455714Skris * except that the holder is Tim Hudson (tjh@cryptsoft.com).
1555714Skris *
1655714Skris * Copyright remains Eric Young's, and as such any Copyright notices in
1755714Skris * the code are not to be removed.
1855714Skris * If this package is used in a product, Eric Young should be given attribution
1955714Skris * as the author of the parts of the library used.
2055714Skris * This can be in the form of a textual message at program startup or
2155714Skris * in documentation (online or textual) provided with the package.
2255714Skris *
2355714Skris * Redistribution and use in source and binary forms, with or without
2455714Skris * modification, are permitted provided that the following conditions
2555714Skris * are met:
2655714Skris * 1. Redistributions of source code must retain the copyright
2755714Skris *    notice, this list of conditions and the following disclaimer.
2855714Skris * 2. Redistributions in binary form must reproduce the above copyright
2955714Skris *    notice, this list of conditions and the following disclaimer in the
3055714Skris *    documentation and/or other materials provided with the distribution.
3155714Skris * 3. All advertising materials mentioning features or use of this software
3255714Skris *    must display the following acknowledgement:
3355714Skris *    "This product includes cryptographic software written by
3455714Skris *     Eric Young (eay@cryptsoft.com)"
3555714Skris *    The word 'cryptographic' can be left out if the rouines from the library
3655714Skris *    being used are not cryptographic related :-).
3755714Skris * 4. If you include any Windows specific code (or a derivative thereof) from
3855714Skris *    the apps directory (application code) you must include an acknowledgement:
3955714Skris *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
4055714Skris *
4155714Skris * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
4255714Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4355714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4455714Skris * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
4555714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4655714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4755714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4855714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4955714Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
5055714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5155714Skris * SUCH DAMAGE.
5255714Skris *
5355714Skris * The licence and distribution terms for any publically available version or
5455714Skris * derivative of this code cannot be changed.  i.e. this code cannot simply be
5555714Skris * copied and put under another distribution licence
5655714Skris * [including the GNU Public Licence.]
5755714Skris */
5868651Skris/* ====================================================================
59160814Ssimon * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
6068651Skris *
6168651Skris * Redistribution and use in source and binary forms, with or without
6268651Skris * modification, are permitted provided that the following conditions
6368651Skris * are met:
6468651Skris *
6568651Skris * 1. Redistributions of source code must retain the above copyright
6668651Skris *    notice, this list of conditions and the following disclaimer.
6768651Skris *
6868651Skris * 2. Redistributions in binary form must reproduce the above copyright
6968651Skris *    notice, this list of conditions and the following disclaimer in
7068651Skris *    the documentation and/or other materials provided with the
7168651Skris *    distribution.
7268651Skris *
7368651Skris * 3. All advertising materials mentioning features or use of this
7468651Skris *    software must display the following acknowledgment:
7568651Skris *    "This product includes software developed by the OpenSSL Project
7668651Skris *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
7768651Skris *
7868651Skris * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
7968651Skris *    endorse or promote products derived from this software without
8068651Skris *    prior written permission. For written permission, please contact
8168651Skris *    openssl-core@openssl.org.
8268651Skris *
8368651Skris * 5. Products derived from this software may not be called "OpenSSL"
8468651Skris *    nor may "OpenSSL" appear in their names without prior written
8568651Skris *    permission of the OpenSSL Project.
8668651Skris *
8768651Skris * 6. Redistributions of any form whatsoever must retain the following
8868651Skris *    acknowledgment:
8968651Skris *    "This product includes software developed by the OpenSSL Project
9068651Skris *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
9168651Skris *
9268651Skris * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
9368651Skris * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9468651Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
9568651Skris * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
9668651Skris * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
9768651Skris * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
9868651Skris * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
9968651Skris * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
10068651Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
10168651Skris * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
10268651Skris * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
10368651Skris * OF THE POSSIBILITY OF SUCH DAMAGE.
10468651Skris * ====================================================================
10568651Skris *
10668651Skris * This product includes cryptographic software written by Eric Young
10768651Skris * (eay@cryptsoft.com).  This product includes software written by Tim
10868651Skris * Hudson (tjh@cryptsoft.com).
10968651Skris *
11068651Skris */
11155714Skris
11268651Skris
11355714Skris#include "cryptlib.h"
11455714Skris#include "bn_lcl.h"
11555714Skris
116238405Sjkim#include <stdlib.h>
117238405Sjkim#ifdef _WIN32
118238405Sjkim# include <malloc.h>
119238405Sjkim# ifndef alloca
120238405Sjkim#  define alloca _alloca
121238405Sjkim# endif
122238405Sjkim#elif defined(__GNUC__)
123238405Sjkim# ifndef alloca
124238405Sjkim#  define alloca(s) __builtin_alloca((s))
125238405Sjkim# endif
126238405Sjkim#endif
127238405Sjkim
128160814Ssimon/* maximum precomputation table size for *variable* sliding windows */
12968651Skris#define TABLE_SIZE	32
13068651Skris
13155714Skris/* this one works - simple but works */
132109998Smarkmint BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
13355714Skris	{
13459191Skris	int i,bits,ret=0;
13555714Skris	BIGNUM *v,*rr;
13655714Skris
137194206Ssimon	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
138160814Ssimon		{
139194206Ssimon		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
140160814Ssimon		BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
141160814Ssimon		return -1;
142160814Ssimon		}
143160814Ssimon
14459191Skris	BN_CTX_start(ctx);
14555714Skris	if ((r == a) || (r == p))
14659191Skris		rr = BN_CTX_get(ctx);
14755714Skris	else
14859191Skris		rr = r;
149205128Ssimon	v = BN_CTX_get(ctx);
150205128Ssimon	if (rr == NULL || v == NULL) goto err;
15155714Skris
15255714Skris	if (BN_copy(v,a) == NULL) goto err;
15355714Skris	bits=BN_num_bits(p);
15455714Skris
15555714Skris	if (BN_is_odd(p))
15655714Skris		{ if (BN_copy(rr,a) == NULL) goto err; }
15755714Skris	else	{ if (!BN_one(rr)) goto err; }
15855714Skris
15955714Skris	for (i=1; i<bits; i++)
16055714Skris		{
16155714Skris		if (!BN_sqr(v,v,ctx)) goto err;
16255714Skris		if (BN_is_bit_set(p,i))
16355714Skris			{
16455714Skris			if (!BN_mul(rr,rr,v,ctx)) goto err;
16555714Skris			}
16655714Skris		}
16755714Skris	ret=1;
16855714Skriserr:
16955714Skris	if (r != rr) BN_copy(r,rr);
17059191Skris	BN_CTX_end(ctx);
171160814Ssimon	bn_check_top(r);
17255714Skris	return(ret);
17355714Skris	}
17455714Skris
17568651Skris
176109998Smarkmint BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
17755714Skris	       BN_CTX *ctx)
17855714Skris	{
17955714Skris	int ret;
18055714Skris
18155714Skris	bn_check_top(a);
18255714Skris	bn_check_top(p);
18355714Skris	bn_check_top(m);
18455714Skris
185109998Smarkm	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
186109998Smarkm	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
187109998Smarkm	 * exponentiation for the odd part), using appropriate exponent
188109998Smarkm	 * reductions, and combine the results using the CRT.
189109998Smarkm	 *
190109998Smarkm	 * For now, we use Montgomery only if the modulus is odd; otherwise,
191109998Smarkm	 * exponentiation using the reciprocal-based quick remaindering
192109998Smarkm	 * algorithm is used.
193109998Smarkm	 *
194109998Smarkm	 * (Timing obtained with expspeed.c [computations  a^p mod m
195109998Smarkm	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
196109998Smarkm	 * 4096, 8192 bits], compared to the running time of the
197109998Smarkm	 * standard algorithm:
198109998Smarkm	 *
199109998Smarkm	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
200109998Smarkm         *                     55 .. 77 %  [UltraSparc processor, but
201109998Smarkm	 *                                  debug-solaris-sparcv8-gcc conf.]
202109998Smarkm	 *
203109998Smarkm	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
204109998Smarkm	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
205109998Smarkm	 *
206109998Smarkm	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
207109998Smarkm	 * at 2048 and more bits, but at 512 and 1024 bits, it was
208109998Smarkm	 * slower even than the standard algorithm!
209109998Smarkm	 *
210109998Smarkm	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
211109998Smarkm	 * should be obtained when the new Montgomery reduction code
212109998Smarkm	 * has been integrated into OpenSSL.)
213109998Smarkm	 */
21459191Skris
215109998Smarkm#define MONT_MUL_MOD
216109998Smarkm#define MONT_EXP_WORD
217109998Smarkm#define RECP_MUL_MOD
218109998Smarkm
21955714Skris#ifdef MONT_MUL_MOD
22055714Skris	/* I have finally been able to take out this pre-condition of
22155714Skris	 * the top bit being set.  It was caused by an error in BN_div
22255714Skris	 * with negatives.  There was also another problem when for a^b%m
22355714Skris	 * a >= m.  eay 07-May-97 */
22455714Skris/*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
22555714Skris
22655714Skris	if (BN_is_odd(m))
22768651Skris		{
228109998Smarkm#  ifdef MONT_EXP_WORD
229194206Ssimon		if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
23068651Skris			{
23168651Skris			BN_ULONG A = a->d[0];
23268651Skris			ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
23368651Skris			}
23468651Skris		else
235109998Smarkm#  endif
23668651Skris			ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
23768651Skris		}
23855714Skris	else
23955714Skris#endif
24055714Skris#ifdef RECP_MUL_MOD
24155714Skris		{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }
24255714Skris#else
24355714Skris		{ ret=BN_mod_exp_simple(r,a,p,m,ctx); }
24455714Skris#endif
24555714Skris
246160814Ssimon	bn_check_top(r);
24755714Skris	return(ret);
24855714Skris	}
24955714Skris
25068651Skris
25155714Skrisint BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
25255714Skris		    const BIGNUM *m, BN_CTX *ctx)
25355714Skris	{
25455714Skris	int i,j,bits,ret=0,wstart,wend,window,wvalue;
255160814Ssimon	int start=1;
25655714Skris	BIGNUM *aa;
257160814Ssimon	/* Table of variables obtained from 'ctx' */
258160814Ssimon	BIGNUM *val[TABLE_SIZE];
25955714Skris	BN_RECP_CTX recp;
26055714Skris
261194206Ssimon	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
262160814Ssimon		{
263194206Ssimon		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
264160814Ssimon		BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
265160814Ssimon		return -1;
266160814Ssimon		}
267160814Ssimon
26855714Skris	bits=BN_num_bits(p);
26955714Skris
27055714Skris	if (bits == 0)
27155714Skris		{
272109998Smarkm		ret = BN_one(r);
273109998Smarkm		return ret;
27455714Skris		}
27559191Skris
27659191Skris	BN_CTX_start(ctx);
277160814Ssimon	aa = BN_CTX_get(ctx);
278160814Ssimon	val[0] = BN_CTX_get(ctx);
279160814Ssimon	if(!aa || !val[0]) goto err;
28059191Skris
28155714Skris	BN_RECP_CTX_init(&recp);
282109998Smarkm	if (m->neg)
283109998Smarkm		{
284109998Smarkm		/* ignore sign of 'm' */
285109998Smarkm		if (!BN_copy(aa, m)) goto err;
286109998Smarkm		aa->neg = 0;
287109998Smarkm		if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
288109998Smarkm		}
289109998Smarkm	else
290109998Smarkm		{
291109998Smarkm		if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
292109998Smarkm		}
29355714Skris
294160814Ssimon	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
295160814Ssimon	if (BN_is_zero(val[0]))
296109998Smarkm		{
297160814Ssimon		BN_zero(r);
298160814Ssimon		ret = 1;
299109998Smarkm		goto err;
300109998Smarkm		}
30155714Skris
30268651Skris	window = BN_window_bits_for_exponent_size(bits);
30368651Skris	if (window > 1)
30455714Skris		{
305160814Ssimon		if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
30668651Skris			goto err;				/* 2 */
30768651Skris		j=1<<(window-1);
30868651Skris		for (i=1; i<j; i++)
30968651Skris			{
310160814Ssimon			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
311160814Ssimon					!BN_mod_mul_reciprocal(val[i],val[i-1],
312160814Ssimon						aa,&recp,ctx))
31368651Skris				goto err;
31468651Skris			}
31555714Skris		}
31668651Skris
31755714Skris	start=1;	/* This is used to avoid multiplication etc
31855714Skris			 * when there is only the value '1' in the
31955714Skris			 * buffer. */
32055714Skris	wvalue=0;	/* The 'value' of the window */
32155714Skris	wstart=bits-1;	/* The top bit of the window */
32255714Skris	wend=0;		/* The bottom bit of the window */
32355714Skris
32455714Skris	if (!BN_one(r)) goto err;
32555714Skris
32655714Skris	for (;;)
32755714Skris		{
32855714Skris		if (BN_is_bit_set(p,wstart) == 0)
32955714Skris			{
33055714Skris			if (!start)
33155714Skris				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
33255714Skris				goto err;
33355714Skris			if (wstart == 0) break;
33455714Skris			wstart--;
33555714Skris			continue;
33655714Skris			}
33755714Skris		/* We now have wstart on a 'set' bit, we now need to work out
33855714Skris		 * how bit a window to do.  To do this we need to scan
33955714Skris		 * forward until the last set bit before the end of the
34055714Skris		 * window */
34155714Skris		j=wstart;
34255714Skris		wvalue=1;
34355714Skris		wend=0;
34455714Skris		for (i=1; i<window; i++)
34555714Skris			{
34655714Skris			if (wstart-i < 0) break;
34755714Skris			if (BN_is_bit_set(p,wstart-i))
34855714Skris				{
34955714Skris				wvalue<<=(i-wend);
35055714Skris				wvalue|=1;
35155714Skris				wend=i;
35255714Skris				}
35355714Skris			}
35455714Skris
35555714Skris		/* wend is the size of the current window */
35655714Skris		j=wend+1;
35755714Skris		/* add the 'bytes above' */
35855714Skris		if (!start)
35955714Skris			for (i=0; i<j; i++)
36055714Skris				{
36155714Skris				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
36255714Skris					goto err;
36355714Skris				}
36455714Skris
36555714Skris		/* wvalue will be an odd number < 2^window */
366160814Ssimon		if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
36755714Skris			goto err;
36855714Skris
36955714Skris		/* move the 'window' down further */
37055714Skris		wstart-=wend+1;
37155714Skris		wvalue=0;
37255714Skris		start=0;
37355714Skris		if (wstart < 0) break;
37455714Skris		}
37555714Skris	ret=1;
37655714Skriserr:
37759191Skris	BN_CTX_end(ctx);
37855714Skris	BN_RECP_CTX_free(&recp);
379160814Ssimon	bn_check_top(r);
38055714Skris	return(ret);
38155714Skris	}
38255714Skris
38368651Skris
384109998Smarkmint BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
38555714Skris		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
38655714Skris	{
38755714Skris	int i,j,bits,ret=0,wstart,wend,window,wvalue;
388160814Ssimon	int start=1;
38955714Skris	BIGNUM *d,*r;
390109998Smarkm	const BIGNUM *aa;
391160814Ssimon	/* Table of variables obtained from 'ctx' */
392160814Ssimon	BIGNUM *val[TABLE_SIZE];
39355714Skris	BN_MONT_CTX *mont=NULL;
39455714Skris
395194206Ssimon	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
396160814Ssimon		{
397160814Ssimon		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
398160814Ssimon		}
399160814Ssimon
40055714Skris	bn_check_top(a);
40155714Skris	bn_check_top(p);
40255714Skris	bn_check_top(m);
40355714Skris
404160814Ssimon	if (!BN_is_odd(m))
40555714Skris		{
40655714Skris		BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
40755714Skris		return(0);
40855714Skris		}
40955714Skris	bits=BN_num_bits(p);
41055714Skris	if (bits == 0)
41155714Skris		{
412109998Smarkm		ret = BN_one(rr);
413109998Smarkm		return ret;
41455714Skris		}
415109998Smarkm
41659191Skris	BN_CTX_start(ctx);
41759191Skris	d = BN_CTX_get(ctx);
41859191Skris	r = BN_CTX_get(ctx);
419160814Ssimon	val[0] = BN_CTX_get(ctx);
420160814Ssimon	if (!d || !r || !val[0]) goto err;
42155714Skris
42255714Skris	/* If this is not done, things will break in the montgomery
42355714Skris	 * part */
42455714Skris
42555714Skris	if (in_mont != NULL)
42655714Skris		mont=in_mont;
42755714Skris	else
42855714Skris		{
42955714Skris		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
43055714Skris		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
43155714Skris		}
43255714Skris
433109998Smarkm	if (a->neg || BN_ucmp(a,m) >= 0)
43455714Skris		{
435160814Ssimon		if (!BN_nnmod(val[0],a,m,ctx))
43668651Skris			goto err;
437160814Ssimon		aa= val[0];
43855714Skris		}
43955714Skris	else
44055714Skris		aa=a;
441109998Smarkm	if (BN_is_zero(aa))
442109998Smarkm		{
443160814Ssimon		BN_zero(rr);
444160814Ssimon		ret = 1;
445109998Smarkm		goto err;
446109998Smarkm		}
447160814Ssimon	if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
44855714Skris
44968651Skris	window = BN_window_bits_for_exponent_size(bits);
45068651Skris	if (window > 1)
45155714Skris		{
452160814Ssimon		if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
45368651Skris		j=1<<(window-1);
45468651Skris		for (i=1; i<j; i++)
45568651Skris			{
456160814Ssimon			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
457160814Ssimon					!BN_mod_mul_montgomery(val[i],val[i-1],
458160814Ssimon						d,mont,ctx))
45968651Skris				goto err;
46068651Skris			}
46155714Skris		}
46255714Skris
46355714Skris	start=1;	/* This is used to avoid multiplication etc
46455714Skris			 * when there is only the value '1' in the
46555714Skris			 * buffer. */
46655714Skris	wvalue=0;	/* The 'value' of the window */
46755714Skris	wstart=bits-1;	/* The top bit of the window */
46855714Skris	wend=0;		/* The bottom bit of the window */
46955714Skris
47068651Skris	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
47155714Skris	for (;;)
47255714Skris		{
47355714Skris		if (BN_is_bit_set(p,wstart) == 0)
47455714Skris			{
47555714Skris			if (!start)
47655714Skris				{
47755714Skris				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
47855714Skris				goto err;
47955714Skris				}
48055714Skris			if (wstart == 0) break;
48155714Skris			wstart--;
48255714Skris			continue;
48355714Skris			}
48455714Skris		/* We now have wstart on a 'set' bit, we now need to work out
48555714Skris		 * how bit a window to do.  To do this we need to scan
48655714Skris		 * forward until the last set bit before the end of the
48755714Skris		 * window */
48855714Skris		j=wstart;
48955714Skris		wvalue=1;
49055714Skris		wend=0;
49155714Skris		for (i=1; i<window; i++)
49255714Skris			{
49355714Skris			if (wstart-i < 0) break;
49455714Skris			if (BN_is_bit_set(p,wstart-i))
49555714Skris				{
49655714Skris				wvalue<<=(i-wend);
49755714Skris				wvalue|=1;
49855714Skris				wend=i;
49955714Skris				}
50055714Skris			}
50155714Skris
50255714Skris		/* wend is the size of the current window */
50355714Skris		j=wend+1;
50455714Skris		/* add the 'bytes above' */
50555714Skris		if (!start)
50655714Skris			for (i=0; i<j; i++)
50755714Skris				{
50855714Skris				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
50955714Skris					goto err;
51055714Skris				}
51155714Skris
51255714Skris		/* wvalue will be an odd number < 2^window */
513160814Ssimon		if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
51455714Skris			goto err;
51555714Skris
51655714Skris		/* move the 'window' down further */
51755714Skris		wstart-=wend+1;
51855714Skris		wvalue=0;
51955714Skris		start=0;
52055714Skris		if (wstart < 0) break;
52155714Skris		}
52268651Skris	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
52355714Skris	ret=1;
52455714Skriserr:
52555714Skris	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
52659191Skris	BN_CTX_end(ctx);
527160814Ssimon	bn_check_top(rr);
52855714Skris	return(ret);
52955714Skris	}
53055714Skris
531160814Ssimon
532160814Ssimon/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
533160814Ssimon * so that accessing any of these table values shows the same access pattern as far
534160814Ssimon * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
535160814Ssimon * from/to that table. */
536160814Ssimon
537238405Sjkimstatic int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width)
538160814Ssimon	{
539160814Ssimon	size_t i, j;
540160814Ssimon
541238405Sjkim	if (top > b->top)
542238405Sjkim		top = b->top; /* this works because 'buf' is explicitly zeroed */
543160814Ssimon	for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
544160814Ssimon		{
545160814Ssimon		buf[j] = ((unsigned char*)b->d)[i];
546160814Ssimon		}
547160814Ssimon
548160814Ssimon	return 1;
549160814Ssimon	}
550160814Ssimon
551160814Ssimonstatic int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
552160814Ssimon	{
553160814Ssimon	size_t i, j;
554160814Ssimon
555160814Ssimon	if (bn_wexpand(b, top) == NULL)
556160814Ssimon		return 0;
557160814Ssimon
558160814Ssimon	for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
559160814Ssimon		{
560160814Ssimon		((unsigned char*)b->d)[i] = buf[j];
561160814Ssimon		}
562160814Ssimon
563160814Ssimon	b->top = top;
564160814Ssimon	bn_correct_top(b);
565160814Ssimon	return 1;
566160814Ssimon	}
567160814Ssimon
568160814Ssimon/* Given a pointer value, compute the next address that is a cache line multiple. */
569160814Ssimon#define MOD_EXP_CTIME_ALIGN(x_) \
570238405Sjkim	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
571160814Ssimon
572160814Ssimon/* This variant of BN_mod_exp_mont() uses fixed windows and the special
573160814Ssimon * precomputation memory layout to limit data-dependency to a minimum
574160814Ssimon * to protect secret exponents (cf. the hyper-threading timing attacks
575160814Ssimon * pointed out by Colin Percival,
576160814Ssimon * http://www.daemonology.net/hyperthreading-considered-harmful/)
577160814Ssimon */
578160814Ssimonint BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
579160814Ssimon		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
580160814Ssimon	{
581238405Sjkim	int i,bits,ret=0,window,wvalue;
582160814Ssimon	int top;
583160814Ssimon	BN_MONT_CTX *mont=NULL;
584160814Ssimon
585160814Ssimon	int numPowers;
586160814Ssimon	unsigned char *powerbufFree=NULL;
587160814Ssimon	int powerbufLen = 0;
588160814Ssimon	unsigned char *powerbuf=NULL;
589238405Sjkim	BIGNUM tmp, am;
590160814Ssimon
591160814Ssimon	bn_check_top(a);
592160814Ssimon	bn_check_top(p);
593160814Ssimon	bn_check_top(m);
594160814Ssimon
595160814Ssimon	top = m->top;
596160814Ssimon
597160814Ssimon	if (!(m->d[0] & 1))
598160814Ssimon		{
599160814Ssimon		BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
600160814Ssimon		return(0);
601160814Ssimon		}
602160814Ssimon	bits=BN_num_bits(p);
603160814Ssimon	if (bits == 0)
604160814Ssimon		{
605160814Ssimon		ret = BN_one(rr);
606160814Ssimon		return ret;
607160814Ssimon		}
608160814Ssimon
609160814Ssimon	BN_CTX_start(ctx);
610160814Ssimon
611160814Ssimon	/* Allocate a montgomery context if it was not supplied by the caller.
612160814Ssimon	 * If this is not done, things will break in the montgomery part.
613160814Ssimon 	 */
614160814Ssimon	if (in_mont != NULL)
615160814Ssimon		mont=in_mont;
616160814Ssimon	else
617160814Ssimon		{
618160814Ssimon		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
619160814Ssimon		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
620160814Ssimon		}
621160814Ssimon
622160814Ssimon	/* Get the window size to use with size of p. */
623160814Ssimon	window = BN_window_bits_for_ctime_exponent_size(bits);
624238405Sjkim#if defined(OPENSSL_BN_ASM_MONT5)
625238405Sjkim	if (window==6 && bits<=1024) window=5;	/* ~5% improvement of 2048-bit RSA sign */
626238405Sjkim#endif
627160814Ssimon
628160814Ssimon	/* Allocate a buffer large enough to hold all of the pre-computed
629238405Sjkim	 * powers of am, am itself and tmp.
630160814Ssimon	 */
631160814Ssimon	numPowers = 1 << window;
632238405Sjkim	powerbufLen = sizeof(m->d[0])*(top*numPowers +
633238405Sjkim				((2*top)>numPowers?(2*top):numPowers));
634238405Sjkim#ifdef alloca
635238405Sjkim	if (powerbufLen < 3072)
636238405Sjkim		powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
637238405Sjkim	else
638238405Sjkim#endif
639160814Ssimon	if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
640160814Ssimon		goto err;
641160814Ssimon
642160814Ssimon	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
643160814Ssimon	memset(powerbuf, 0, powerbufLen);
644160814Ssimon
645238405Sjkim#ifdef alloca
646238405Sjkim	if (powerbufLen < 3072)
647238405Sjkim		powerbufFree = NULL;
648238405Sjkim#endif
649160814Ssimon
650238405Sjkim	/* lay down tmp and am right after powers table */
651238405Sjkim	tmp.d     = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers);
652238405Sjkim	am.d      = tmp.d + top;
653238405Sjkim	tmp.top   = am.top  = 0;
654238405Sjkim	tmp.dmax  = am.dmax = top;
655238405Sjkim	tmp.neg   = am.neg  = 0;
656238405Sjkim	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
657160814Ssimon
658238405Sjkim	/* prepare a^0 in Montgomery domain */
659238405Sjkim#if 1
660238405Sjkim 	if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx))	goto err;
661238405Sjkim#else
662238405Sjkim	tmp.d[0] = (0-m->d[0])&BN_MASK2;	/* 2^(top*BN_BITS2) - m */
663238405Sjkim	for (i=1;i<top;i++)
664238405Sjkim		tmp.d[i] = (~m->d[i])&BN_MASK2;
665238405Sjkim	tmp.top = top;
666238405Sjkim#endif
667238405Sjkim
668238405Sjkim	/* prepare a^1 in Montgomery domain */
669160814Ssimon	if (a->neg || BN_ucmp(a,m) >= 0)
670160814Ssimon		{
671238405Sjkim		if (!BN_mod(&am,a,m,ctx))			goto err;
672238405Sjkim		if (!BN_to_montgomery(&am,&am,mont,ctx))	goto err;
673160814Ssimon		}
674238405Sjkim	else	if (!BN_to_montgomery(&am,a,mont,ctx))		goto err;
675160814Ssimon
676238405Sjkim#if defined(OPENSSL_BN_ASM_MONT5)
677238405Sjkim    /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
678238405Sjkim     * specifically optimization of cache-timing attack countermeasures
679238405Sjkim     * and pre-computation optimization. */
680238405Sjkim
681238405Sjkim    /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
682238405Sjkim     * 512-bit RSA is hardly relevant, we omit it to spare size... */
683279264Sdelphij    if (window==5 && top>1)
684238405Sjkim	{
685238405Sjkim	void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap,
686238405Sjkim			const void *table,const BN_ULONG *np,
687238405Sjkim			const BN_ULONG *n0,int num,int power);
688238405Sjkim	void bn_scatter5(const BN_ULONG *inp,size_t num,
689238405Sjkim			void *table,size_t power);
690238405Sjkim	void bn_gather5(BN_ULONG *out,size_t num,
691238405Sjkim			void *table,size_t power);
692238405Sjkim
693238405Sjkim	BN_ULONG *np=mont->N.d, *n0=mont->n0;
694238405Sjkim
695238405Sjkim	/* BN_to_montgomery can contaminate words above .top
696238405Sjkim	 * [in BN_DEBUG[_DEBUG] build]... */
697238405Sjkim	for (i=am.top; i<top; i++)	am.d[i]=0;
698238405Sjkim	for (i=tmp.top; i<top; i++)	tmp.d[i]=0;
699238405Sjkim
700238405Sjkim	bn_scatter5(tmp.d,top,powerbuf,0);
701238405Sjkim	bn_scatter5(am.d,am.top,powerbuf,1);
702238405Sjkim	bn_mul_mont(tmp.d,am.d,am.d,np,n0,top);
703238405Sjkim	bn_scatter5(tmp.d,top,powerbuf,2);
704238405Sjkim
705238405Sjkim#if 0
706238405Sjkim	for (i=3; i<32; i++)
707238405Sjkim		{
708238405Sjkim		/* Calculate a^i = a^(i-1) * a */
709238405Sjkim		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
710238405Sjkim		bn_scatter5(tmp.d,top,powerbuf,i);
711238405Sjkim		}
712238405Sjkim#else
713238405Sjkim	/* same as above, but uses squaring for 1/2 of operations */
714238405Sjkim	for (i=4; i<32; i*=2)
715238405Sjkim		{
716238405Sjkim		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
717238405Sjkim		bn_scatter5(tmp.d,top,powerbuf,i);
718238405Sjkim		}
719238405Sjkim	for (i=3; i<8; i+=2)
720238405Sjkim		{
721238405Sjkim		int j;
722238405Sjkim		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
723238405Sjkim		bn_scatter5(tmp.d,top,powerbuf,i);
724238405Sjkim		for (j=2*i; j<32; j*=2)
725238405Sjkim			{
726238405Sjkim			bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
727238405Sjkim			bn_scatter5(tmp.d,top,powerbuf,j);
728238405Sjkim			}
729238405Sjkim		}
730238405Sjkim	for (; i<16; i+=2)
731238405Sjkim		{
732238405Sjkim		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
733238405Sjkim		bn_scatter5(tmp.d,top,powerbuf,i);
734238405Sjkim		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
735238405Sjkim		bn_scatter5(tmp.d,top,powerbuf,2*i);
736238405Sjkim		}
737238405Sjkim	for (; i<32; i+=2)
738238405Sjkim		{
739238405Sjkim		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
740238405Sjkim		bn_scatter5(tmp.d,top,powerbuf,i);
741238405Sjkim		}
742238405Sjkim#endif
743238405Sjkim	bits--;
744238405Sjkim	for (wvalue=0, i=bits%5; i>=0; i--,bits--)
745238405Sjkim		wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
746238405Sjkim	bn_gather5(tmp.d,top,powerbuf,wvalue);
747238405Sjkim
748238405Sjkim	/* Scan the exponent one window at a time starting from the most
749238405Sjkim	 * significant bits.
750238405Sjkim	 */
751238405Sjkim	while (bits >= 0)
752238405Sjkim		{
753238405Sjkim		for (wvalue=0, i=0; i<5; i++,bits--)
754238405Sjkim			wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
755238405Sjkim
756238405Sjkim		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
757238405Sjkim		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
758238405Sjkim		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
759238405Sjkim		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
760238405Sjkim		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
761238405Sjkim		bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
762238405Sjkim		}
763238405Sjkim
764238405Sjkim	tmp.top=top;
765238405Sjkim	bn_correct_top(&tmp);
766238405Sjkim	}
767238405Sjkim    else
768238405Sjkim#endif
769238405Sjkim	{
770238405Sjkim	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err;
771238405Sjkim	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am,  top, powerbuf, 1, numPowers)) goto err;
772238405Sjkim
773160814Ssimon	/* If the window size is greater than 1, then calculate
774160814Ssimon	 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
775160814Ssimon	 * (even powers could instead be computed as (a^(i/2))^2
776160814Ssimon	 * to use the slight performance advantage of sqr over mul).
777160814Ssimon	 */
778160814Ssimon	if (window > 1)
779160814Ssimon		{
780238405Sjkim		if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx))	goto err;
781238405Sjkim		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err;
782238405Sjkim		for (i=3; i<numPowers; i++)
783160814Ssimon			{
784160814Ssimon			/* Calculate a^i = a^(i-1) * a */
785238405Sjkim			if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx))
786160814Ssimon				goto err;
787238405Sjkim			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err;
788160814Ssimon			}
789160814Ssimon		}
790160814Ssimon
791238405Sjkim	bits--;
792238405Sjkim	for (wvalue=0, i=bits%window; i>=0; i--,bits--)
793238405Sjkim		wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
794238405Sjkim	if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err;
795238405Sjkim
796238405Sjkim	/* Scan the exponent one window at a time starting from the most
797238405Sjkim	 * significant bits.
798238405Sjkim	 */
799238405Sjkim 	while (bits >= 0)
800160814Ssimon  		{
801160814Ssimon 		wvalue=0; /* The 'value' of the window */
802160814Ssimon
803160814Ssimon 		/* Scan the window, squaring the result as we go */
804238405Sjkim 		for (i=0; i<window; i++,bits--)
805160814Ssimon 			{
806238405Sjkim			if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx))	goto err;
807238405Sjkim			wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
808160814Ssimon  			}
809160814Ssimon
810160814Ssimon		/* Fetch the appropriate pre-computed value from the pre-buf */
811238405Sjkim		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err;
812160814Ssimon
813160814Ssimon 		/* Multiply the result into the intermediate result */
814238405Sjkim 		if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err;
815160814Ssimon  		}
816238405Sjkim	}
817160814Ssimon
818160814Ssimon 	/* Convert the final result from montgomery to standard format */
819238405Sjkim	if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err;
820160814Ssimon	ret=1;
821160814Ssimonerr:
822160814Ssimon	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
823160814Ssimon	if (powerbuf!=NULL)
824160814Ssimon		{
825160814Ssimon		OPENSSL_cleanse(powerbuf,powerbufLen);
826238405Sjkim		if (powerbufFree) OPENSSL_free(powerbufFree);
827160814Ssimon		}
828160814Ssimon	BN_CTX_end(ctx);
829160814Ssimon	return(ret);
830160814Ssimon	}
831160814Ssimon
83268651Skrisint BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
83368651Skris                         const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
83468651Skris	{
83568651Skris	BN_MONT_CTX *mont = NULL;
83668651Skris	int b, bits, ret=0;
83768651Skris	int r_is_one;
83868651Skris	BN_ULONG w, next_w;
83968651Skris	BIGNUM *d, *r, *t;
84068651Skris	BIGNUM *swap_tmp;
84168651Skris#define BN_MOD_MUL_WORD(r, w, m) \
84268651Skris		(BN_mul_word(r, (w)) && \
84368651Skris		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
84468651Skris			(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
84568651Skris		/* BN_MOD_MUL_WORD is only used with 'w' large,
846109998Smarkm		 * so the BN_ucmp test is probably more overhead
847109998Smarkm		 * than always using BN_mod (which uses BN_copy if
848109998Smarkm		 * a similar test returns true). */
849109998Smarkm		/* We can use BN_mod and do not need BN_nnmod because our
850109998Smarkm		 * accumulator is never negative (the result of BN_mod does
851109998Smarkm		 * not depend on the sign of the modulus).
852109998Smarkm		 */
85368651Skris#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
85468651Skris		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
85568651Skris
856194206Ssimon	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
857160814Ssimon		{
858194206Ssimon		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
859160814Ssimon		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
860160814Ssimon		return -1;
861160814Ssimon		}
862160814Ssimon
86368651Skris	bn_check_top(p);
86468651Skris	bn_check_top(m);
86568651Skris
866160814Ssimon	if (!BN_is_odd(m))
86768651Skris		{
86868651Skris		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
86968651Skris		return(0);
87068651Skris		}
871109998Smarkm	if (m->top == 1)
872109998Smarkm		a %= m->d[0]; /* make sure that 'a' is reduced */
873109998Smarkm
87468651Skris	bits = BN_num_bits(p);
87568651Skris	if (bits == 0)
87668651Skris		{
877279264Sdelphij		/* x**0 mod 1 is still zero. */
878279264Sdelphij		if (BN_is_one(m))
879279264Sdelphij			{
880279264Sdelphij			ret = 1;
881279264Sdelphij			BN_zero(rr);
882279264Sdelphij			}
883279264Sdelphij		else
884279264Sdelphij			ret = BN_one(rr);
885109998Smarkm		return ret;
88668651Skris		}
887109998Smarkm	if (a == 0)
888109998Smarkm		{
889160814Ssimon		BN_zero(rr);
890160814Ssimon		ret = 1;
891109998Smarkm		return ret;
892109998Smarkm		}
893109998Smarkm
89468651Skris	BN_CTX_start(ctx);
89568651Skris	d = BN_CTX_get(ctx);
89668651Skris	r = BN_CTX_get(ctx);
89768651Skris	t = BN_CTX_get(ctx);
89868651Skris	if (d == NULL || r == NULL || t == NULL) goto err;
89968651Skris
90068651Skris	if (in_mont != NULL)
90168651Skris		mont=in_mont;
90268651Skris	else
90368651Skris		{
90468651Skris		if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
90568651Skris		if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
90668651Skris		}
90768651Skris
90868651Skris	r_is_one = 1; /* except for Montgomery factor */
90968651Skris
91068651Skris	/* bits-1 >= 0 */
91168651Skris
91268651Skris	/* The result is accumulated in the product r*w. */
91368651Skris	w = a; /* bit 'bits-1' of 'p' is always set */
91468651Skris	for (b = bits-2; b >= 0; b--)
91568651Skris		{
91668651Skris		/* First, square r*w. */
91768651Skris		next_w = w*w;
91868651Skris		if ((next_w/w) != w) /* overflow */
91968651Skris			{
92068651Skris			if (r_is_one)
92168651Skris				{
92268651Skris				if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
92368651Skris				r_is_one = 0;
92468651Skris				}
92568651Skris			else
92668651Skris				{
92768651Skris				if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
92868651Skris				}
92968651Skris			next_w = 1;
93068651Skris			}
93168651Skris		w = next_w;
93268651Skris		if (!r_is_one)
93368651Skris			{
93468651Skris			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
93568651Skris			}
93668651Skris
93768651Skris		/* Second, multiply r*w by 'a' if exponent bit is set. */
93868651Skris		if (BN_is_bit_set(p, b))
93968651Skris			{
94068651Skris			next_w = w*a;
94168651Skris			if ((next_w/a) != w) /* overflow */
94268651Skris				{
94368651Skris				if (r_is_one)
94468651Skris					{
94568651Skris					if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
94668651Skris					r_is_one = 0;
94768651Skris					}
94868651Skris				else
94968651Skris					{
95068651Skris					if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
95168651Skris					}
95268651Skris				next_w = a;
95368651Skris				}
95468651Skris			w = next_w;
95568651Skris			}
95668651Skris		}
95768651Skris
95868651Skris	/* Finally, set r:=r*w. */
95968651Skris	if (w != 1)
96068651Skris		{
96168651Skris		if (r_is_one)
96268651Skris			{
96368651Skris			if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
96468651Skris			r_is_one = 0;
96568651Skris			}
96668651Skris		else
96768651Skris			{
96868651Skris			if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
96968651Skris			}
97068651Skris		}
97168651Skris
97268651Skris	if (r_is_one) /* can happen only if a == 1*/
97368651Skris		{
97468651Skris		if (!BN_one(rr)) goto err;
97568651Skris		}
97668651Skris	else
97768651Skris		{
97868651Skris		if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
97968651Skris		}
98068651Skris	ret = 1;
98168651Skriserr:
98268651Skris	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
98368651Skris	BN_CTX_end(ctx);
984160814Ssimon	bn_check_top(rr);
98568651Skris	return(ret);
98668651Skris	}
98768651Skris
98868651Skris
98955714Skris/* The old fallback, simple version :-) */
990160814Ssimonint BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
991160814Ssimon		const BIGNUM *m, BN_CTX *ctx)
99255714Skris	{
993160814Ssimon	int i,j,bits,ret=0,wstart,wend,window,wvalue;
99455714Skris	int start=1;
99555714Skris	BIGNUM *d;
996160814Ssimon	/* Table of variables obtained from 'ctx' */
997160814Ssimon	BIGNUM *val[TABLE_SIZE];
99855714Skris
999194206Ssimon	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
1000160814Ssimon		{
1001194206Ssimon		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1002160814Ssimon		BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1003160814Ssimon		return -1;
1004160814Ssimon		}
1005160814Ssimon
100655714Skris	bits=BN_num_bits(p);
100755714Skris
100855714Skris	if (bits == 0)
100955714Skris		{
1010109998Smarkm		ret = BN_one(r);
1011109998Smarkm		return ret;
101255714Skris		}
101355714Skris
101459191Skris	BN_CTX_start(ctx);
1015160814Ssimon	d = BN_CTX_get(ctx);
1016160814Ssimon	val[0] = BN_CTX_get(ctx);
1017160814Ssimon	if(!d || !val[0]) goto err;
101859191Skris
1019160814Ssimon	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
1020160814Ssimon	if (BN_is_zero(val[0]))
1021109998Smarkm		{
1022160814Ssimon		BN_zero(r);
1023160814Ssimon		ret = 1;
1024109998Smarkm		goto err;
1025109998Smarkm		}
102655714Skris
102768651Skris	window = BN_window_bits_for_exponent_size(bits);
102868651Skris	if (window > 1)
102955714Skris		{
1030160814Ssimon		if (!BN_mod_mul(d,val[0],val[0],m,ctx))
103168651Skris			goto err;				/* 2 */
103268651Skris		j=1<<(window-1);
103368651Skris		for (i=1; i<j; i++)
103468651Skris			{
1035160814Ssimon			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
1036160814Ssimon					!BN_mod_mul(val[i],val[i-1],d,m,ctx))
103768651Skris				goto err;
103868651Skris			}
103955714Skris		}
104055714Skris
104155714Skris	start=1;	/* This is used to avoid multiplication etc
104255714Skris			 * when there is only the value '1' in the
104355714Skris			 * buffer. */
104455714Skris	wvalue=0;	/* The 'value' of the window */
104555714Skris	wstart=bits-1;	/* The top bit of the window */
104655714Skris	wend=0;		/* The bottom bit of the window */
104755714Skris
104855714Skris	if (!BN_one(r)) goto err;
104955714Skris
105055714Skris	for (;;)
105155714Skris		{
105255714Skris		if (BN_is_bit_set(p,wstart) == 0)
105355714Skris			{
105455714Skris			if (!start)
105555714Skris				if (!BN_mod_mul(r,r,r,m,ctx))
105655714Skris				goto err;
105755714Skris			if (wstart == 0) break;
105855714Skris			wstart--;
105955714Skris			continue;
106055714Skris			}
106155714Skris		/* We now have wstart on a 'set' bit, we now need to work out
106255714Skris		 * how bit a window to do.  To do this we need to scan
106355714Skris		 * forward until the last set bit before the end of the
106455714Skris		 * window */
106555714Skris		j=wstart;
106655714Skris		wvalue=1;
106755714Skris		wend=0;
106855714Skris		for (i=1; i<window; i++)
106955714Skris			{
107055714Skris			if (wstart-i < 0) break;
107155714Skris			if (BN_is_bit_set(p,wstart-i))
107255714Skris				{
107355714Skris				wvalue<<=(i-wend);
107455714Skris				wvalue|=1;
107555714Skris				wend=i;
107655714Skris				}
107755714Skris			}
107855714Skris
107955714Skris		/* wend is the size of the current window */
108055714Skris		j=wend+1;
108155714Skris		/* add the 'bytes above' */
108255714Skris		if (!start)
108355714Skris			for (i=0; i<j; i++)
108455714Skris				{
108555714Skris				if (!BN_mod_mul(r,r,r,m,ctx))
108655714Skris					goto err;
108755714Skris				}
108855714Skris
108955714Skris		/* wvalue will be an odd number < 2^window */
1090160814Ssimon		if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
109155714Skris			goto err;
109255714Skris
109355714Skris		/* move the 'window' down further */
109455714Skris		wstart-=wend+1;
109555714Skris		wvalue=0;
109655714Skris		start=0;
109755714Skris		if (wstart < 0) break;
109855714Skris		}
109955714Skris	ret=1;
110055714Skriserr:
110159191Skris	BN_CTX_end(ctx);
1102160814Ssimon	bn_check_top(r);
110355714Skris	return(ret);
110455714Skris	}
1105