155714Skris/* crypto/bn/bn_exp.c */
255714Skris/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
355714Skris * All rights reserved.
455714Skris *
555714Skris * This package is an SSL implementation written
655714Skris * by Eric Young (eay@cryptsoft.com).
755714Skris * The implementation was written so as to conform with Netscapes SSL.
8296341Sdelphij *
955714Skris * This library is free for commercial and non-commercial use as long as
1055714Skris * the following conditions are aheared to.  The following conditions
1155714Skris * apply to all code found in this distribution, be it the RC4, RSA,
1255714Skris * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
1355714Skris * included with this distribution is covered by the same copyright terms
1455714Skris * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15296341Sdelphij *
1655714Skris * Copyright remains Eric Young's, and as such any Copyright notices in
1755714Skris * the code are not to be removed.
1855714Skris * If this package is used in a product, Eric Young should be given attribution
1955714Skris * as the author of the parts of the library used.
2055714Skris * This can be in the form of a textual message at program startup or
2155714Skris * in documentation (online or textual) provided with the package.
22296341Sdelphij *
2355714Skris * Redistribution and use in source and binary forms, with or without
2455714Skris * modification, are permitted provided that the following conditions
2555714Skris * are met:
2655714Skris * 1. Redistributions of source code must retain the copyright
2755714Skris *    notice, this list of conditions and the following disclaimer.
2855714Skris * 2. Redistributions in binary form must reproduce the above copyright
2955714Skris *    notice, this list of conditions and the following disclaimer in the
3055714Skris *    documentation and/or other materials provided with the distribution.
3155714Skris * 3. All advertising materials mentioning features or use of this software
3255714Skris *    must display the following acknowledgement:
3355714Skris *    "This product includes cryptographic software written by
3455714Skris *     Eric Young (eay@cryptsoft.com)"
3555714Skris *    The word 'cryptographic' can be left out if the rouines from the library
3655714Skris *    being used are not cryptographic related :-).
37296341Sdelphij * 4. If you include any Windows specific code (or a derivative thereof) from
3855714Skris *    the apps directory (application code) you must include an acknowledgement:
3955714Skris *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40296341Sdelphij *
4155714Skris * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
4255714Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4355714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4455714Skris * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
4555714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4655714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4755714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4855714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4955714Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
5055714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5155714Skris * SUCH DAMAGE.
52296341Sdelphij *
5355714Skris * The licence and distribution terms for any publically available version or
5455714Skris * derivative of this code cannot be changed.  i.e. this code cannot simply be
5555714Skris * copied and put under another distribution licence
5655714Skris * [including the GNU Public Licence.]
5755714Skris */
5868651Skris/* ====================================================================
59160814Ssimon * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
6068651Skris *
6168651Skris * Redistribution and use in source and binary forms, with or without
6268651Skris * modification, are permitted provided that the following conditions
6368651Skris * are met:
6468651Skris *
6568651Skris * 1. Redistributions of source code must retain the above copyright
66296341Sdelphij *    notice, this list of conditions and the following disclaimer.
6768651Skris *
6868651Skris * 2. Redistributions in binary form must reproduce the above copyright
6968651Skris *    notice, this list of conditions and the following disclaimer in
7068651Skris *    the documentation and/or other materials provided with the
7168651Skris *    distribution.
7268651Skris *
7368651Skris * 3. All advertising materials mentioning features or use of this
7468651Skris *    software must display the following acknowledgment:
7568651Skris *    "This product includes software developed by the OpenSSL Project
7668651Skris *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
7768651Skris *
7868651Skris * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
7968651Skris *    endorse or promote products derived from this software without
8068651Skris *    prior written permission. For written permission, please contact
8168651Skris *    openssl-core@openssl.org.
8268651Skris *
8368651Skris * 5. Products derived from this software may not be called "OpenSSL"
8468651Skris *    nor may "OpenSSL" appear in their names without prior written
8568651Skris *    permission of the OpenSSL Project.
8668651Skris *
8768651Skris * 6. Redistributions of any form whatsoever must retain the following
8868651Skris *    acknowledgment:
8968651Skris *    "This product includes software developed by the OpenSSL Project
9068651Skris *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
9168651Skris *
9268651Skris * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
9368651Skris * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
9468651Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
9568651Skris * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
9668651Skris * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
9768651Skris * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
9868651Skris * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
9968651Skris * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
10068651Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
10168651Skris * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
10268651Skris * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
10368651Skris * OF THE POSSIBILITY OF SUCH DAMAGE.
10468651Skris * ====================================================================
10568651Skris *
10668651Skris * This product includes cryptographic software written by Eric Young
10768651Skris * (eay@cryptsoft.com).  This product includes software written by Tim
10868651Skris * Hudson (tjh@cryptsoft.com).
10968651Skris *
11068651Skris */
11155714Skris
11255714Skris#include "cryptlib.h"
113296341Sdelphij#include "constant_time_locl.h"
11455714Skris#include "bn_lcl.h"
11555714Skris
116238405Sjkim#include <stdlib.h>
117238405Sjkim#ifdef _WIN32
118238405Sjkim# include <malloc.h>
119238405Sjkim# ifndef alloca
120238405Sjkim#  define alloca _alloca
121238405Sjkim# endif
122238405Sjkim#elif defined(__GNUC__)
123238405Sjkim# ifndef alloca
124238405Sjkim#  define alloca(s) __builtin_alloca((s))
125238405Sjkim# endif
126238405Sjkim#endif
127238405Sjkim
128160814Ssimon/* maximum precomputation table size for *variable* sliding windows */
129296341Sdelphij#define TABLE_SIZE      32
13068651Skris
13155714Skris/* this one works - simple but works */
132109998Smarkmint BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
133296341Sdelphij{
134296341Sdelphij    int i, bits, ret = 0;
135296341Sdelphij    BIGNUM *v, *rr;
13655714Skris
137296341Sdelphij    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
138296341Sdelphij        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
139296341Sdelphij        BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
140296341Sdelphij        return -1;
141296341Sdelphij    }
142160814Ssimon
143296341Sdelphij    BN_CTX_start(ctx);
144296341Sdelphij    if ((r == a) || (r == p))
145296341Sdelphij        rr = BN_CTX_get(ctx);
146296341Sdelphij    else
147296341Sdelphij        rr = r;
148296341Sdelphij    v = BN_CTX_get(ctx);
149296341Sdelphij    if (rr == NULL || v == NULL)
150296341Sdelphij        goto err;
15155714Skris
152296341Sdelphij    if (BN_copy(v, a) == NULL)
153296341Sdelphij        goto err;
154296341Sdelphij    bits = BN_num_bits(p);
15555714Skris
156296341Sdelphij    if (BN_is_odd(p)) {
157296341Sdelphij        if (BN_copy(rr, a) == NULL)
158296341Sdelphij            goto err;
159296341Sdelphij    } else {
160296341Sdelphij        if (!BN_one(rr))
161296341Sdelphij            goto err;
162296341Sdelphij    }
16355714Skris
164296341Sdelphij    for (i = 1; i < bits; i++) {
165296341Sdelphij        if (!BN_sqr(v, v, ctx))
166296341Sdelphij            goto err;
167296341Sdelphij        if (BN_is_bit_set(p, i)) {
168296341Sdelphij            if (!BN_mul(rr, rr, v, ctx))
169296341Sdelphij                goto err;
170296341Sdelphij        }
171296341Sdelphij    }
172296341Sdelphij    if (r != rr)
173296341Sdelphij        BN_copy(r, rr);
174296341Sdelphij    ret = 1;
175296341Sdelphij err:
176296341Sdelphij    BN_CTX_end(ctx);
177296341Sdelphij    bn_check_top(r);
178296341Sdelphij    return (ret);
179296341Sdelphij}
18055714Skris
181109998Smarkmint BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
182296341Sdelphij               BN_CTX *ctx)
183296341Sdelphij{
184296341Sdelphij    int ret;
18555714Skris
186296341Sdelphij    bn_check_top(a);
187296341Sdelphij    bn_check_top(p);
188296341Sdelphij    bn_check_top(m);
18955714Skris
190296341Sdelphij    /*-
191296341Sdelphij     * For even modulus  m = 2^k*m_odd,  it might make sense to compute
192296341Sdelphij     * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
193296341Sdelphij     * exponentiation for the odd part), using appropriate exponent
194296341Sdelphij     * reductions, and combine the results using the CRT.
195296341Sdelphij     *
196296341Sdelphij     * For now, we use Montgomery only if the modulus is odd; otherwise,
197296341Sdelphij     * exponentiation using the reciprocal-based quick remaindering
198296341Sdelphij     * algorithm is used.
199296341Sdelphij     *
200296341Sdelphij     * (Timing obtained with expspeed.c [computations  a^p mod m
201296341Sdelphij     * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
202296341Sdelphij     * 4096, 8192 bits], compared to the running time of the
203296341Sdelphij     * standard algorithm:
204296341Sdelphij     *
205296341Sdelphij     *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
206296341Sdelphij     *                     55 .. 77 %  [UltraSparc processor, but
207296341Sdelphij     *                                  debug-solaris-sparcv8-gcc conf.]
208296341Sdelphij     *
209296341Sdelphij     *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
210296341Sdelphij     *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
211296341Sdelphij     *
212296341Sdelphij     * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
213296341Sdelphij     * at 2048 and more bits, but at 512 and 1024 bits, it was
214296341Sdelphij     * slower even than the standard algorithm!
215296341Sdelphij     *
216296341Sdelphij     * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
217296341Sdelphij     * should be obtained when the new Montgomery reduction code
218296341Sdelphij     * has been integrated into OpenSSL.)
219296341Sdelphij     */
22059191Skris
221109998Smarkm#define MONT_MUL_MOD
222109998Smarkm#define MONT_EXP_WORD
223109998Smarkm#define RECP_MUL_MOD
224109998Smarkm
22555714Skris#ifdef MONT_MUL_MOD
226296341Sdelphij    /*
227296341Sdelphij     * I have finally been able to take out this pre-condition of the top bit
228296341Sdelphij     * being set.  It was caused by an error in BN_div with negatives.  There
229296341Sdelphij     * was also another problem when for a^b%m a >= m.  eay 07-May-97
230296341Sdelphij     */
231296341Sdelphij    /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
23255714Skris
233296341Sdelphij    if (BN_is_odd(m)) {
234296341Sdelphij# ifdef MONT_EXP_WORD
235296341Sdelphij        if (a->top == 1 && !a->neg
236296341Sdelphij            && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
237296341Sdelphij            BN_ULONG A = a->d[0];
238296341Sdelphij            ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
239296341Sdelphij        } else
240296341Sdelphij# endif
241296341Sdelphij            ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
242296341Sdelphij    } else
24355714Skris#endif
24455714Skris#ifdef RECP_MUL_MOD
245296341Sdelphij    {
246296341Sdelphij        ret = BN_mod_exp_recp(r, a, p, m, ctx);
247296341Sdelphij    }
24855714Skris#else
249296341Sdelphij    {
250296341Sdelphij        ret = BN_mod_exp_simple(r, a, p, m, ctx);
251296341Sdelphij    }
25255714Skris#endif
25355714Skris
254296341Sdelphij    bn_check_top(r);
255296341Sdelphij    return (ret);
256296341Sdelphij}
25755714Skris
25855714Skrisint BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
259296341Sdelphij                    const BIGNUM *m, BN_CTX *ctx)
260296341Sdelphij{
261296341Sdelphij    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
262296341Sdelphij    int start = 1;
263296341Sdelphij    BIGNUM *aa;
264296341Sdelphij    /* Table of variables obtained from 'ctx' */
265296341Sdelphij    BIGNUM *val[TABLE_SIZE];
266296341Sdelphij    BN_RECP_CTX recp;
26755714Skris
268296341Sdelphij    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
269296341Sdelphij        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
270296341Sdelphij        BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
271296341Sdelphij        return -1;
272296341Sdelphij    }
273160814Ssimon
274296341Sdelphij    bits = BN_num_bits(p);
27555714Skris
276296341Sdelphij    if (bits == 0) {
277296341Sdelphij        ret = BN_one(r);
278296341Sdelphij        return ret;
279296341Sdelphij    }
28059191Skris
281296341Sdelphij    BN_CTX_start(ctx);
282296341Sdelphij    aa = BN_CTX_get(ctx);
283296341Sdelphij    val[0] = BN_CTX_get(ctx);
284296341Sdelphij    if (!aa || !val[0])
285296341Sdelphij        goto err;
28659191Skris
287296341Sdelphij    BN_RECP_CTX_init(&recp);
288296341Sdelphij    if (m->neg) {
289296341Sdelphij        /* ignore sign of 'm' */
290296341Sdelphij        if (!BN_copy(aa, m))
291296341Sdelphij            goto err;
292296341Sdelphij        aa->neg = 0;
293296341Sdelphij        if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
294296341Sdelphij            goto err;
295296341Sdelphij    } else {
296296341Sdelphij        if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
297296341Sdelphij            goto err;
298296341Sdelphij    }
29955714Skris
300296341Sdelphij    if (!BN_nnmod(val[0], a, m, ctx))
301296341Sdelphij        goto err;               /* 1 */
302296341Sdelphij    if (BN_is_zero(val[0])) {
303296341Sdelphij        BN_zero(r);
304296341Sdelphij        ret = 1;
305296341Sdelphij        goto err;
306296341Sdelphij    }
30755714Skris
308296341Sdelphij    window = BN_window_bits_for_exponent_size(bits);
309296341Sdelphij    if (window > 1) {
310296341Sdelphij        if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
311296341Sdelphij            goto err;           /* 2 */
312296341Sdelphij        j = 1 << (window - 1);
313296341Sdelphij        for (i = 1; i < j; i++) {
314296341Sdelphij            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
315296341Sdelphij                !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
316296341Sdelphij                goto err;
317296341Sdelphij        }
318296341Sdelphij    }
31955714Skris
320296341Sdelphij    start = 1;                  /* This is used to avoid multiplication etc
321296341Sdelphij                                 * when there is only the value '1' in the
322296341Sdelphij                                 * buffer. */
323296341Sdelphij    wvalue = 0;                 /* The 'value' of the window */
324296341Sdelphij    wstart = bits - 1;          /* The top bit of the window */
325296341Sdelphij    wend = 0;                   /* The bottom bit of the window */
32655714Skris
327296341Sdelphij    if (!BN_one(r))
328296341Sdelphij        goto err;
32955714Skris
330296341Sdelphij    for (;;) {
331296341Sdelphij        if (BN_is_bit_set(p, wstart) == 0) {
332296341Sdelphij            if (!start)
333296341Sdelphij                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
334296341Sdelphij                    goto err;
335296341Sdelphij            if (wstart == 0)
336296341Sdelphij                break;
337296341Sdelphij            wstart--;
338296341Sdelphij            continue;
339296341Sdelphij        }
340296341Sdelphij        /*
341296341Sdelphij         * We now have wstart on a 'set' bit, we now need to work out how bit
342296341Sdelphij         * a window to do.  To do this we need to scan forward until the last
343296341Sdelphij         * set bit before the end of the window
344296341Sdelphij         */
345296341Sdelphij        j = wstart;
346296341Sdelphij        wvalue = 1;
347296341Sdelphij        wend = 0;
348296341Sdelphij        for (i = 1; i < window; i++) {
349296341Sdelphij            if (wstart - i < 0)
350296341Sdelphij                break;
351296341Sdelphij            if (BN_is_bit_set(p, wstart - i)) {
352296341Sdelphij                wvalue <<= (i - wend);
353296341Sdelphij                wvalue |= 1;
354296341Sdelphij                wend = i;
355296341Sdelphij            }
356296341Sdelphij        }
35755714Skris
358296341Sdelphij        /* wend is the size of the current window */
359296341Sdelphij        j = wend + 1;
360296341Sdelphij        /* add the 'bytes above' */
361296341Sdelphij        if (!start)
362296341Sdelphij            for (i = 0; i < j; i++) {
363296341Sdelphij                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
364296341Sdelphij                    goto err;
365296341Sdelphij            }
36655714Skris
367296341Sdelphij        /* wvalue will be an odd number < 2^window */
368296341Sdelphij        if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
369296341Sdelphij            goto err;
37068651Skris
371296341Sdelphij        /* move the 'window' down further */
372296341Sdelphij        wstart -= wend + 1;
373296341Sdelphij        wvalue = 0;
374296341Sdelphij        start = 0;
375296341Sdelphij        if (wstart < 0)
376296341Sdelphij            break;
377296341Sdelphij    }
378296341Sdelphij    ret = 1;
379296341Sdelphij err:
380296341Sdelphij    BN_CTX_end(ctx);
381296341Sdelphij    BN_RECP_CTX_free(&recp);
382296341Sdelphij    bn_check_top(r);
383296341Sdelphij    return (ret);
384296341Sdelphij}
385296341Sdelphij
386109998Smarkmint BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
387296341Sdelphij                    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
388296341Sdelphij{
389296341Sdelphij    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
390296341Sdelphij    int start = 1;
391296341Sdelphij    BIGNUM *d, *r;
392296341Sdelphij    const BIGNUM *aa;
393296341Sdelphij    /* Table of variables obtained from 'ctx' */
394296341Sdelphij    BIGNUM *val[TABLE_SIZE];
395296341Sdelphij    BN_MONT_CTX *mont = NULL;
39655714Skris
397296341Sdelphij    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
398296341Sdelphij        return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
399296341Sdelphij    }
400160814Ssimon
401296341Sdelphij    bn_check_top(a);
402296341Sdelphij    bn_check_top(p);
403296341Sdelphij    bn_check_top(m);
40455714Skris
405296341Sdelphij    if (!BN_is_odd(m)) {
406296341Sdelphij        BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
407296341Sdelphij        return (0);
408296341Sdelphij    }
409296341Sdelphij    bits = BN_num_bits(p);
410296341Sdelphij    if (bits == 0) {
411296341Sdelphij        ret = BN_one(rr);
412296341Sdelphij        return ret;
413296341Sdelphij    }
414109998Smarkm
415296341Sdelphij    BN_CTX_start(ctx);
416296341Sdelphij    d = BN_CTX_get(ctx);
417296341Sdelphij    r = BN_CTX_get(ctx);
418296341Sdelphij    val[0] = BN_CTX_get(ctx);
419296341Sdelphij    if (!d || !r || !val[0])
420296341Sdelphij        goto err;
42155714Skris
422296341Sdelphij    /*
423296341Sdelphij     * If this is not done, things will break in the montgomery part
424296341Sdelphij     */
42555714Skris
426296341Sdelphij    if (in_mont != NULL)
427296341Sdelphij        mont = in_mont;
428296341Sdelphij    else {
429296341Sdelphij        if ((mont = BN_MONT_CTX_new()) == NULL)
430296341Sdelphij            goto err;
431296341Sdelphij        if (!BN_MONT_CTX_set(mont, m, ctx))
432296341Sdelphij            goto err;
433296341Sdelphij    }
43455714Skris
435296341Sdelphij    if (a->neg || BN_ucmp(a, m) >= 0) {
436296341Sdelphij        if (!BN_nnmod(val[0], a, m, ctx))
437296341Sdelphij            goto err;
438296341Sdelphij        aa = val[0];
439296341Sdelphij    } else
440296341Sdelphij        aa = a;
441296341Sdelphij    if (BN_is_zero(aa)) {
442296341Sdelphij        BN_zero(rr);
443296341Sdelphij        ret = 1;
444296341Sdelphij        goto err;
445296341Sdelphij    }
446296341Sdelphij    if (!BN_to_montgomery(val[0], aa, mont, ctx))
447296341Sdelphij        goto err;               /* 1 */
44855714Skris
449296341Sdelphij    window = BN_window_bits_for_exponent_size(bits);
450296341Sdelphij    if (window > 1) {
451296341Sdelphij        if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
452296341Sdelphij            goto err;           /* 2 */
453296341Sdelphij        j = 1 << (window - 1);
454296341Sdelphij        for (i = 1; i < j; i++) {
455296341Sdelphij            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
456296341Sdelphij                !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
457296341Sdelphij                goto err;
458296341Sdelphij        }
459296341Sdelphij    }
46055714Skris
461296341Sdelphij    start = 1;                  /* This is used to avoid multiplication etc
462296341Sdelphij                                 * when there is only the value '1' in the
463296341Sdelphij                                 * buffer. */
464296341Sdelphij    wvalue = 0;                 /* The 'value' of the window */
465296341Sdelphij    wstart = bits - 1;          /* The top bit of the window */
466296341Sdelphij    wend = 0;                   /* The bottom bit of the window */
46755714Skris
468296341Sdelphij    if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
469296341Sdelphij        goto err;
470296341Sdelphij    for (;;) {
471296341Sdelphij        if (BN_is_bit_set(p, wstart) == 0) {
472296341Sdelphij            if (!start) {
473296341Sdelphij                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
474296341Sdelphij                    goto err;
475296341Sdelphij            }
476296341Sdelphij            if (wstart == 0)
477296341Sdelphij                break;
478296341Sdelphij            wstart--;
479296341Sdelphij            continue;
480296341Sdelphij        }
481296341Sdelphij        /*
482296341Sdelphij         * We now have wstart on a 'set' bit, we now need to work out how bit
483296341Sdelphij         * a window to do.  To do this we need to scan forward until the last
484296341Sdelphij         * set bit before the end of the window
485296341Sdelphij         */
486296341Sdelphij        j = wstart;
487296341Sdelphij        wvalue = 1;
488296341Sdelphij        wend = 0;
489296341Sdelphij        for (i = 1; i < window; i++) {
490296341Sdelphij            if (wstart - i < 0)
491296341Sdelphij                break;
492296341Sdelphij            if (BN_is_bit_set(p, wstart - i)) {
493296341Sdelphij                wvalue <<= (i - wend);
494296341Sdelphij                wvalue |= 1;
495296341Sdelphij                wend = i;
496296341Sdelphij            }
497296341Sdelphij        }
49855714Skris
499296341Sdelphij        /* wend is the size of the current window */
500296341Sdelphij        j = wend + 1;
501296341Sdelphij        /* add the 'bytes above' */
502296341Sdelphij        if (!start)
503296341Sdelphij            for (i = 0; i < j; i++) {
504296341Sdelphij                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
505296341Sdelphij                    goto err;
506296341Sdelphij            }
50755714Skris
508296341Sdelphij        /* wvalue will be an odd number < 2^window */
509296341Sdelphij        if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
510296341Sdelphij            goto err;
51155714Skris
512296341Sdelphij        /* move the 'window' down further */
513296341Sdelphij        wstart -= wend + 1;
514296341Sdelphij        wvalue = 0;
515296341Sdelphij        start = 0;
516296341Sdelphij        if (wstart < 0)
517296341Sdelphij            break;
518296341Sdelphij    }
519296341Sdelphij    if (!BN_from_montgomery(rr, r, mont, ctx))
520296341Sdelphij        goto err;
521296341Sdelphij    ret = 1;
522296341Sdelphij err:
523296341Sdelphij    if ((in_mont == NULL) && (mont != NULL))
524296341Sdelphij        BN_MONT_CTX_free(mont);
525296341Sdelphij    BN_CTX_end(ctx);
526296341Sdelphij    bn_check_top(rr);
527296341Sdelphij    return (ret);
528296341Sdelphij}
529160814Ssimon
530296341Sdelphij/*
531296341Sdelphij * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
532296341Sdelphij * layout so that accessing any of these table values shows the same access
533296341Sdelphij * pattern as far as cache lines are concerned.  The following functions are
534296341Sdelphij * used to transfer a BIGNUM from/to that table.
535296341Sdelphij */
536160814Ssimon
537296341Sdelphijstatic int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
538296341Sdelphij                                        unsigned char *buf, int idx,
539296341Sdelphij                                        int window)
540296341Sdelphij{
541296341Sdelphij    int i, j;
542296341Sdelphij    int width = 1 << window;
543296341Sdelphij    BN_ULONG *table = (BN_ULONG *)buf;
544160814Ssimon
545296341Sdelphij    if (top > b->top)
546296341Sdelphij        top = b->top;           /* this works because 'buf' is explicitly
547296341Sdelphij                                 * zeroed */
548296341Sdelphij    for (i = 0, j = idx; i < top; i++, j += width) {
549296341Sdelphij        table[j] = b->d[i];
550296341Sdelphij    }
551160814Ssimon
552296341Sdelphij    return 1;
553296341Sdelphij}
554160814Ssimon
555296341Sdelphijstatic int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
556296341Sdelphij                                          unsigned char *buf, int idx,
557296341Sdelphij                                          int window)
558296341Sdelphij{
559296341Sdelphij    int i, j;
560296341Sdelphij    int width = 1 << window;
561296341Sdelphij    volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
562160814Ssimon
563296341Sdelphij    if (bn_wexpand(b, top) == NULL)
564296341Sdelphij        return 0;
565160814Ssimon
566296341Sdelphij    if (window <= 3) {
567296341Sdelphij        for (i = 0; i < top; i++, table += width) {
568296341Sdelphij            BN_ULONG acc = 0;
569160814Ssimon
570296341Sdelphij            for (j = 0; j < width; j++) {
571296341Sdelphij                acc |= table[j] &
572296341Sdelphij                       ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
573296341Sdelphij            }
574160814Ssimon
575296341Sdelphij            b->d[i] = acc;
576296341Sdelphij        }
577296341Sdelphij    } else {
578296341Sdelphij        int xstride = 1 << (window - 2);
579296341Sdelphij        BN_ULONG y0, y1, y2, y3;
580296341Sdelphij
581296341Sdelphij        i = idx >> (window - 2);        /* equivalent of idx / xstride */
582296341Sdelphij        idx &= xstride - 1;             /* equivalent of idx % xstride */
583296341Sdelphij
584296341Sdelphij        y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
585296341Sdelphij        y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
586296341Sdelphij        y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
587296341Sdelphij        y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
588296341Sdelphij
589296341Sdelphij        for (i = 0; i < top; i++, table += width) {
590296341Sdelphij            BN_ULONG acc = 0;
591296341Sdelphij
592296341Sdelphij            for (j = 0; j < xstride; j++) {
593296341Sdelphij                acc |= ( (table[j + 0 * xstride] & y0) |
594296341Sdelphij                         (table[j + 1 * xstride] & y1) |
595296341Sdelphij                         (table[j + 2 * xstride] & y2) |
596296341Sdelphij                         (table[j + 3 * xstride] & y3) )
597296341Sdelphij                       & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
598296341Sdelphij            }
599296341Sdelphij
600296341Sdelphij            b->d[i] = acc;
601296341Sdelphij        }
602296341Sdelphij    }
603296341Sdelphij
604296341Sdelphij    b->top = top;
605296341Sdelphij    bn_correct_top(b);
606296341Sdelphij    return 1;
607296341Sdelphij}
608296341Sdelphij
609296341Sdelphij/*
610296341Sdelphij * Given a pointer value, compute the next address that is a cache line
611296341Sdelphij * multiple.
612296341Sdelphij */
613160814Ssimon#define MOD_EXP_CTIME_ALIGN(x_) \
614296341Sdelphij        ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
615160814Ssimon
616296341Sdelphij/*
617296341Sdelphij * This variant of BN_mod_exp_mont() uses fixed windows and the special
618296341Sdelphij * precomputation memory layout to limit data-dependency to a minimum to
619296341Sdelphij * protect secret exponents (cf. the hyper-threading timing attacks pointed
620296341Sdelphij * out by Colin Percival,
621296341Sdelphij * http://www.daemong-consideredperthreading-considered-harmful/)
622160814Ssimon */
623160814Ssimonint BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
624296341Sdelphij                              const BIGNUM *m, BN_CTX *ctx,
625296341Sdelphij                              BN_MONT_CTX *in_mont)
626296341Sdelphij{
627296341Sdelphij    int i, bits, ret = 0, window, wvalue;
628296341Sdelphij    int top;
629296341Sdelphij    BN_MONT_CTX *mont = NULL;
630160814Ssimon
631296341Sdelphij    int numPowers;
632296341Sdelphij    unsigned char *powerbufFree = NULL;
633296341Sdelphij    int powerbufLen = 0;
634296341Sdelphij    unsigned char *powerbuf = NULL;
635296341Sdelphij    BIGNUM tmp, am;
636160814Ssimon
637296341Sdelphij    bn_check_top(a);
638296341Sdelphij    bn_check_top(p);
639296341Sdelphij    bn_check_top(m);
640160814Ssimon
641296341Sdelphij    top = m->top;
642160814Ssimon
643296341Sdelphij    if (!(m->d[0] & 1)) {
644296341Sdelphij        BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
645296341Sdelphij        return (0);
646296341Sdelphij    }
647296341Sdelphij    bits = BN_num_bits(p);
648296341Sdelphij    if (bits == 0) {
649296341Sdelphij        ret = BN_one(rr);
650296341Sdelphij        return ret;
651296341Sdelphij    }
652160814Ssimon
653296341Sdelphij    BN_CTX_start(ctx);
654160814Ssimon
655296341Sdelphij    /*
656296341Sdelphij     * Allocate a montgomery context if it was not supplied by the caller. If
657296341Sdelphij     * this is not done, things will break in the montgomery part.
658296341Sdelphij     */
659296341Sdelphij    if (in_mont != NULL)
660296341Sdelphij        mont = in_mont;
661296341Sdelphij    else {
662296341Sdelphij        if ((mont = BN_MONT_CTX_new()) == NULL)
663296341Sdelphij            goto err;
664296341Sdelphij        if (!BN_MONT_CTX_set(mont, m, ctx))
665296341Sdelphij            goto err;
666296341Sdelphij    }
667160814Ssimon
668296341Sdelphij    /* Get the window size to use with size of p. */
669296341Sdelphij    window = BN_window_bits_for_ctime_exponent_size(bits);
670238405Sjkim#if defined(OPENSSL_BN_ASM_MONT5)
671296341Sdelphij    if (window == 6 && bits <= 1024)
672296341Sdelphij        window = 5;             /* ~5% improvement of 2048-bit RSA sign */
673238405Sjkim#endif
674160814Ssimon
675296341Sdelphij    /*
676296341Sdelphij     * Allocate a buffer large enough to hold all of the pre-computed powers
677296341Sdelphij     * of am, am itself and tmp.
678296341Sdelphij     */
679296341Sdelphij    numPowers = 1 << window;
680296341Sdelphij    powerbufLen = sizeof(m->d[0]) * (top * numPowers +
681296341Sdelphij                                     ((2 * top) >
682296341Sdelphij                                      numPowers ? (2 * top) : numPowers));
683238405Sjkim#ifdef alloca
684296341Sdelphij    if (powerbufLen < 3072)
685296341Sdelphij        powerbufFree =
686296341Sdelphij            alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
687296341Sdelphij    else
688238405Sjkim#endif
689296341Sdelphij        if ((powerbufFree =
690296341Sdelphij             (unsigned char *)OPENSSL_malloc(powerbufLen +
691296341Sdelphij                                             MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
692296341Sdelphij            == NULL)
693296341Sdelphij        goto err;
694160814Ssimon
695296341Sdelphij    powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
696296341Sdelphij    memset(powerbuf, 0, powerbufLen);
697296341Sdelphij
698238405Sjkim#ifdef alloca
699296341Sdelphij    if (powerbufLen < 3072)
700296341Sdelphij        powerbufFree = NULL;
701238405Sjkim#endif
702160814Ssimon
703296341Sdelphij    /* lay down tmp and am right after powers table */
704296341Sdelphij    tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
705296341Sdelphij    am.d = tmp.d + top;
706296341Sdelphij    tmp.top = am.top = 0;
707296341Sdelphij    tmp.dmax = am.dmax = top;
708296341Sdelphij    tmp.neg = am.neg = 0;
709296341Sdelphij    tmp.flags = am.flags = BN_FLG_STATIC_DATA;
710160814Ssimon
711296341Sdelphij    /* prepare a^0 in Montgomery domain */
712238405Sjkim#if 1
713296341Sdelphij    if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
714296341Sdelphij        goto err;
715238405Sjkim#else
716296341Sdelphij    tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
717296341Sdelphij    for (i = 1; i < top; i++)
718296341Sdelphij        tmp.d[i] = (~m->d[i]) & BN_MASK2;
719296341Sdelphij    tmp.top = top;
720238405Sjkim#endif
721238405Sjkim
722296341Sdelphij    /* prepare a^1 in Montgomery domain */
723296341Sdelphij    if (a->neg || BN_ucmp(a, m) >= 0) {
724296341Sdelphij        if (!BN_mod(&am, a, m, ctx))
725296341Sdelphij            goto err;
726296341Sdelphij        if (!BN_to_montgomery(&am, &am, mont, ctx))
727296341Sdelphij            goto err;
728296341Sdelphij    } else if (!BN_to_montgomery(&am, a, mont, ctx))
729296341Sdelphij        goto err;
730160814Ssimon
731238405Sjkim#if defined(OPENSSL_BN_ASM_MONT5)
732296341Sdelphij    if (window == 5 && top > 1) {
733296341Sdelphij        /*
734296341Sdelphij         * This optimization uses ideas from http://eprint.iacr.org/2011/239,
735296341Sdelphij         * specifically optimization of cache-timing attack countermeasures
736296341Sdelphij         * and pre-computation optimization.
737296341Sdelphij         */
738238405Sjkim
739296341Sdelphij        /*
740296341Sdelphij         * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
741296341Sdelphij         * 512-bit RSA is hardly relevant, we omit it to spare size...
742296341Sdelphij         */
743296341Sdelphij        void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
744296341Sdelphij                                 const void *table, const BN_ULONG *np,
745296341Sdelphij                                 const BN_ULONG *n0, int num, int power);
746296341Sdelphij        void bn_scatter5(const BN_ULONG *inp, size_t num,
747296341Sdelphij                         void *table, size_t power);
748296341Sdelphij        void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
749238405Sjkim
750296341Sdelphij        BN_ULONG *np = mont->N.d, *n0 = mont->n0;
751238405Sjkim
752296341Sdelphij        /*
753296341Sdelphij         * BN_to_montgomery can contaminate words above .top [in
754296341Sdelphij         * BN_DEBUG[_DEBUG] build]...
755296341Sdelphij         */
756296341Sdelphij        for (i = am.top; i < top; i++)
757296341Sdelphij            am.d[i] = 0;
758296341Sdelphij        for (i = tmp.top; i < top; i++)
759296341Sdelphij            tmp.d[i] = 0;
760238405Sjkim
761296341Sdelphij        bn_scatter5(tmp.d, top, powerbuf, 0);
762296341Sdelphij        bn_scatter5(am.d, am.top, powerbuf, 1);
763296341Sdelphij        bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
764296341Sdelphij        bn_scatter5(tmp.d, top, powerbuf, 2);
765238405Sjkim
766296341Sdelphij# if 0
767296341Sdelphij        for (i = 3; i < 32; i++) {
768296341Sdelphij            /* Calculate a^i = a^(i-1) * a */
769296341Sdelphij            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
770296341Sdelphij            bn_scatter5(tmp.d, top, powerbuf, i);
771296341Sdelphij        }
772296341Sdelphij# else
773296341Sdelphij        /* same as above, but uses squaring for 1/2 of operations */
774296341Sdelphij        for (i = 4; i < 32; i *= 2) {
775296341Sdelphij            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
776296341Sdelphij            bn_scatter5(tmp.d, top, powerbuf, i);
777296341Sdelphij        }
778296341Sdelphij        for (i = 3; i < 8; i += 2) {
779296341Sdelphij            int j;
780296341Sdelphij            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
781296341Sdelphij            bn_scatter5(tmp.d, top, powerbuf, i);
782296341Sdelphij            for (j = 2 * i; j < 32; j *= 2) {
783296341Sdelphij                bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
784296341Sdelphij                bn_scatter5(tmp.d, top, powerbuf, j);
785296341Sdelphij            }
786296341Sdelphij        }
787296341Sdelphij        for (; i < 16; i += 2) {
788296341Sdelphij            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
789296341Sdelphij            bn_scatter5(tmp.d, top, powerbuf, i);
790296341Sdelphij            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
791296341Sdelphij            bn_scatter5(tmp.d, top, powerbuf, 2 * i);
792296341Sdelphij        }
793296341Sdelphij        for (; i < 32; i += 2) {
794296341Sdelphij            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
795296341Sdelphij            bn_scatter5(tmp.d, top, powerbuf, i);
796296341Sdelphij        }
797296341Sdelphij# endif
798296341Sdelphij        bits--;
799296341Sdelphij        for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
800296341Sdelphij            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
801296341Sdelphij        bn_gather5(tmp.d, top, powerbuf, wvalue);
802238405Sjkim
803296341Sdelphij        /*
804296341Sdelphij         * Scan the exponent one window at a time starting from the most
805296341Sdelphij         * significant bits.
806296341Sdelphij         */
807296341Sdelphij        while (bits >= 0) {
808296341Sdelphij            for (wvalue = 0, i = 0; i < 5; i++, bits--)
809296341Sdelphij                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
810238405Sjkim
811296341Sdelphij            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
812296341Sdelphij            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
813296341Sdelphij            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
814296341Sdelphij            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
815296341Sdelphij            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
816296341Sdelphij            bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
817296341Sdelphij        }
818238405Sjkim
819296341Sdelphij        tmp.top = top;
820296341Sdelphij        bn_correct_top(&tmp);
821296341Sdelphij    } else
822238405Sjkim#endif
823296341Sdelphij    {
824296341Sdelphij        if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window))
825296341Sdelphij            goto err;
826296341Sdelphij        if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window))
827296341Sdelphij            goto err;
828238405Sjkim
829296341Sdelphij        /*
830296341Sdelphij         * If the window size is greater than 1, then calculate
831296341Sdelphij         * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
832296341Sdelphij         * powers could instead be computed as (a^(i/2))^2 to use the slight
833296341Sdelphij         * performance advantage of sqr over mul).
834296341Sdelphij         */
835296341Sdelphij        if (window > 1) {
836296341Sdelphij            if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
837296341Sdelphij                goto err;
838296341Sdelphij            if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2,
839296341Sdelphij                                              window))
840296341Sdelphij                goto err;
841296341Sdelphij            for (i = 3; i < numPowers; i++) {
842296341Sdelphij                /* Calculate a^i = a^(i-1) * a */
843296341Sdelphij                if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
844296341Sdelphij                    goto err;
845296341Sdelphij                if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i,
846296341Sdelphij                                                  window))
847296341Sdelphij                    goto err;
848296341Sdelphij            }
849296341Sdelphij        }
850160814Ssimon
851296341Sdelphij        bits--;
852296341Sdelphij        for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
853296341Sdelphij            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
854296341Sdelphij        if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue,
855296341Sdelphij                                            window))
856296341Sdelphij            goto err;
857160814Ssimon
858296341Sdelphij        /*
859296341Sdelphij         * Scan the exponent one window at a time starting from the most
860296341Sdelphij         * significant bits.
861296341Sdelphij         */
862296341Sdelphij        while (bits >= 0) {
863296341Sdelphij            wvalue = 0;         /* The 'value' of the window */
864160814Ssimon
865296341Sdelphij            /* Scan the window, squaring the result as we go */
866296341Sdelphij            for (i = 0; i < window; i++, bits--) {
867296341Sdelphij                if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
868296341Sdelphij                    goto err;
869296341Sdelphij                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
870296341Sdelphij            }
871160814Ssimon
872296341Sdelphij            /*
873296341Sdelphij             * Fetch the appropriate pre-computed value from the pre-buf
874296341Sdelphij             */
875296341Sdelphij            if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue,
876296341Sdelphij                                                window))
877296341Sdelphij                goto err;
878296341Sdelphij
879296341Sdelphij            /* Multiply the result into the intermediate result */
880296341Sdelphij            if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
881296341Sdelphij                goto err;
882296341Sdelphij        }
883296341Sdelphij    }
884296341Sdelphij
885296341Sdelphij    /* Convert the final result from montgomery to standard format */
886296341Sdelphij    if (!BN_from_montgomery(rr, &tmp, mont, ctx))
887296341Sdelphij        goto err;
888296341Sdelphij    ret = 1;
889296341Sdelphij err:
890296341Sdelphij    if ((in_mont == NULL) && (mont != NULL))
891296341Sdelphij        BN_MONT_CTX_free(mont);
892296341Sdelphij    if (powerbuf != NULL) {
893296341Sdelphij        OPENSSL_cleanse(powerbuf, powerbufLen);
894296341Sdelphij        if (powerbufFree)
895296341Sdelphij            OPENSSL_free(powerbufFree);
896296341Sdelphij    }
897296341Sdelphij    BN_CTX_end(ctx);
898296341Sdelphij    return (ret);
899296341Sdelphij}
900296341Sdelphij
90168651Skrisint BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
90268651Skris                         const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
903296341Sdelphij{
904296341Sdelphij    BN_MONT_CTX *mont = NULL;
905296341Sdelphij    int b, bits, ret = 0;
906296341Sdelphij    int r_is_one;
907296341Sdelphij    BN_ULONG w, next_w;
908296341Sdelphij    BIGNUM *d, *r, *t;
909296341Sdelphij    BIGNUM *swap_tmp;
91068651Skris#define BN_MOD_MUL_WORD(r, w, m) \
911296341Sdelphij                (BN_mul_word(r, (w)) && \
912296341Sdelphij                (/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
913296341Sdelphij                        (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
914296341Sdelphij    /*
915296341Sdelphij     * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
916296341Sdelphij     * probably more overhead than always using BN_mod (which uses BN_copy if
917296341Sdelphij     * a similar test returns true).
918296341Sdelphij     */
919296341Sdelphij    /*
920296341Sdelphij     * We can use BN_mod and do not need BN_nnmod because our accumulator is
921296341Sdelphij     * never negative (the result of BN_mod does not depend on the sign of
922296341Sdelphij     * the modulus).
923296341Sdelphij     */
92468651Skris#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
925296341Sdelphij                (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
92668651Skris
927296341Sdelphij    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
928296341Sdelphij        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
929296341Sdelphij        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
930296341Sdelphij        return -1;
931296341Sdelphij    }
932160814Ssimon
933296341Sdelphij    bn_check_top(p);
934296341Sdelphij    bn_check_top(m);
93568651Skris
936296341Sdelphij    if (!BN_is_odd(m)) {
937296341Sdelphij        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
938296341Sdelphij        return (0);
939296341Sdelphij    }
940296341Sdelphij    if (m->top == 1)
941296341Sdelphij        a %= m->d[0];           /* make sure that 'a' is reduced */
942109998Smarkm
943296341Sdelphij    bits = BN_num_bits(p);
944296341Sdelphij    if (bits == 0) {
945296341Sdelphij        /* x**0 mod 1 is still zero. */
946296341Sdelphij        if (BN_is_one(m)) {
947296341Sdelphij            ret = 1;
948296341Sdelphij            BN_zero(rr);
949296341Sdelphij        } else
950296341Sdelphij            ret = BN_one(rr);
951296341Sdelphij        return ret;
952296341Sdelphij    }
953296341Sdelphij    if (a == 0) {
954296341Sdelphij        BN_zero(rr);
955296341Sdelphij        ret = 1;
956296341Sdelphij        return ret;
957296341Sdelphij    }
958109998Smarkm
959296341Sdelphij    BN_CTX_start(ctx);
960296341Sdelphij    d = BN_CTX_get(ctx);
961296341Sdelphij    r = BN_CTX_get(ctx);
962296341Sdelphij    t = BN_CTX_get(ctx);
963296341Sdelphij    if (d == NULL || r == NULL || t == NULL)
964296341Sdelphij        goto err;
96568651Skris
966296341Sdelphij    if (in_mont != NULL)
967296341Sdelphij        mont = in_mont;
968296341Sdelphij    else {
969296341Sdelphij        if ((mont = BN_MONT_CTX_new()) == NULL)
970296341Sdelphij            goto err;
971296341Sdelphij        if (!BN_MONT_CTX_set(mont, m, ctx))
972296341Sdelphij            goto err;
973296341Sdelphij    }
97468651Skris
975296341Sdelphij    r_is_one = 1;               /* except for Montgomery factor */
97668651Skris
977296341Sdelphij    /* bits-1 >= 0 */
97868651Skris
979296341Sdelphij    /* The result is accumulated in the product r*w. */
980296341Sdelphij    w = a;                      /* bit 'bits-1' of 'p' is always set */
981296341Sdelphij    for (b = bits - 2; b >= 0; b--) {
982296341Sdelphij        /* First, square r*w. */
983296341Sdelphij        next_w = w * w;
984296341Sdelphij        if ((next_w / w) != w) { /* overflow */
985296341Sdelphij            if (r_is_one) {
986296341Sdelphij                if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
987296341Sdelphij                    goto err;
988296341Sdelphij                r_is_one = 0;
989296341Sdelphij            } else {
990296341Sdelphij                if (!BN_MOD_MUL_WORD(r, w, m))
991296341Sdelphij                    goto err;
992296341Sdelphij            }
993296341Sdelphij            next_w = 1;
994296341Sdelphij        }
995296341Sdelphij        w = next_w;
996296341Sdelphij        if (!r_is_one) {
997296341Sdelphij            if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
998296341Sdelphij                goto err;
999296341Sdelphij        }
100068651Skris
1001296341Sdelphij        /* Second, multiply r*w by 'a' if exponent bit is set. */
1002296341Sdelphij        if (BN_is_bit_set(p, b)) {
1003296341Sdelphij            next_w = w * a;
1004296341Sdelphij            if ((next_w / a) != w) { /* overflow */
1005296341Sdelphij                if (r_is_one) {
1006296341Sdelphij                    if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1007296341Sdelphij                        goto err;
1008296341Sdelphij                    r_is_one = 0;
1009296341Sdelphij                } else {
1010296341Sdelphij                    if (!BN_MOD_MUL_WORD(r, w, m))
1011296341Sdelphij                        goto err;
1012296341Sdelphij                }
1013296341Sdelphij                next_w = a;
1014296341Sdelphij            }
1015296341Sdelphij            w = next_w;
1016296341Sdelphij        }
1017296341Sdelphij    }
101868651Skris
1019296341Sdelphij    /* Finally, set r:=r*w. */
1020296341Sdelphij    if (w != 1) {
1021296341Sdelphij        if (r_is_one) {
1022296341Sdelphij            if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1023296341Sdelphij                goto err;
1024296341Sdelphij            r_is_one = 0;
1025296341Sdelphij        } else {
1026296341Sdelphij            if (!BN_MOD_MUL_WORD(r, w, m))
1027296341Sdelphij                goto err;
1028296341Sdelphij        }
1029296341Sdelphij    }
103068651Skris
1031296341Sdelphij    if (r_is_one) {             /* can happen only if a == 1 */
1032296341Sdelphij        if (!BN_one(rr))
1033296341Sdelphij            goto err;
1034296341Sdelphij    } else {
1035296341Sdelphij        if (!BN_from_montgomery(rr, r, mont, ctx))
1036296341Sdelphij            goto err;
1037296341Sdelphij    }
1038296341Sdelphij    ret = 1;
1039296341Sdelphij err:
1040296341Sdelphij    if ((in_mont == NULL) && (mont != NULL))
1041296341Sdelphij        BN_MONT_CTX_free(mont);
1042296341Sdelphij    BN_CTX_end(ctx);
1043296341Sdelphij    bn_check_top(rr);
1044296341Sdelphij    return (ret);
1045296341Sdelphij}
104668651Skris
104755714Skris/* The old fallback, simple version :-) */
1048160814Ssimonint BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
1049296341Sdelphij                      const BIGNUM *m, BN_CTX *ctx)
1050296341Sdelphij{
1051296341Sdelphij    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
1052296341Sdelphij    int start = 1;
1053296341Sdelphij    BIGNUM *d;
1054296341Sdelphij    /* Table of variables obtained from 'ctx' */
1055296341Sdelphij    BIGNUM *val[TABLE_SIZE];
105655714Skris
1057296341Sdelphij    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1058296341Sdelphij        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1059296341Sdelphij        BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1060296341Sdelphij        return -1;
1061296341Sdelphij    }
1062160814Ssimon
1063296341Sdelphij    bits = BN_num_bits(p);
106455714Skris
1065296341Sdelphij    if (bits == 0) {
1066296341Sdelphij        ret = BN_one(r);
1067296341Sdelphij        return ret;
1068296341Sdelphij    }
106955714Skris
1070296341Sdelphij    BN_CTX_start(ctx);
1071296341Sdelphij    d = BN_CTX_get(ctx);
1072296341Sdelphij    val[0] = BN_CTX_get(ctx);
1073296341Sdelphij    if (!d || !val[0])
1074296341Sdelphij        goto err;
107559191Skris
1076296341Sdelphij    if (!BN_nnmod(val[0], a, m, ctx))
1077296341Sdelphij        goto err;               /* 1 */
1078296341Sdelphij    if (BN_is_zero(val[0])) {
1079296341Sdelphij        BN_zero(r);
1080296341Sdelphij        ret = 1;
1081296341Sdelphij        goto err;
1082296341Sdelphij    }
108355714Skris
1084296341Sdelphij    window = BN_window_bits_for_exponent_size(bits);
1085296341Sdelphij    if (window > 1) {
1086296341Sdelphij        if (!BN_mod_mul(d, val[0], val[0], m, ctx))
1087296341Sdelphij            goto err;           /* 2 */
1088296341Sdelphij        j = 1 << (window - 1);
1089296341Sdelphij        for (i = 1; i < j; i++) {
1090296341Sdelphij            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1091296341Sdelphij                !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
1092296341Sdelphij                goto err;
1093296341Sdelphij        }
1094296341Sdelphij    }
109555714Skris
1096296341Sdelphij    start = 1;                  /* This is used to avoid multiplication etc
1097296341Sdelphij                                 * when there is only the value '1' in the
1098296341Sdelphij                                 * buffer. */
1099296341Sdelphij    wvalue = 0;                 /* The 'value' of the window */
1100296341Sdelphij    wstart = bits - 1;          /* The top bit of the window */
1101296341Sdelphij    wend = 0;                   /* The bottom bit of the window */
110255714Skris
1103296341Sdelphij    if (!BN_one(r))
1104296341Sdelphij        goto err;
110555714Skris
1106296341Sdelphij    for (;;) {
1107296341Sdelphij        if (BN_is_bit_set(p, wstart) == 0) {
1108296341Sdelphij            if (!start)
1109296341Sdelphij                if (!BN_mod_mul(r, r, r, m, ctx))
1110296341Sdelphij                    goto err;
1111296341Sdelphij            if (wstart == 0)
1112296341Sdelphij                break;
1113296341Sdelphij            wstart--;
1114296341Sdelphij            continue;
1115296341Sdelphij        }
1116296341Sdelphij        /*
1117296341Sdelphij         * We now have wstart on a 'set' bit, we now need to work out how bit
1118296341Sdelphij         * a window to do.  To do this we need to scan forward until the last
1119296341Sdelphij         * set bit before the end of the window
1120296341Sdelphij         */
1121296341Sdelphij        j = wstart;
1122296341Sdelphij        wvalue = 1;
1123296341Sdelphij        wend = 0;
1124296341Sdelphij        for (i = 1; i < window; i++) {
1125296341Sdelphij            if (wstart - i < 0)
1126296341Sdelphij                break;
1127296341Sdelphij            if (BN_is_bit_set(p, wstart - i)) {
1128296341Sdelphij                wvalue <<= (i - wend);
1129296341Sdelphij                wvalue |= 1;
1130296341Sdelphij                wend = i;
1131296341Sdelphij            }
1132296341Sdelphij        }
113355714Skris
1134296341Sdelphij        /* wend is the size of the current window */
1135296341Sdelphij        j = wend + 1;
1136296341Sdelphij        /* add the 'bytes above' */
1137296341Sdelphij        if (!start)
1138296341Sdelphij            for (i = 0; i < j; i++) {
1139296341Sdelphij                if (!BN_mod_mul(r, r, r, m, ctx))
1140296341Sdelphij                    goto err;
1141296341Sdelphij            }
114255714Skris
1143296341Sdelphij        /* wvalue will be an odd number < 2^window */
1144296341Sdelphij        if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1145296341Sdelphij            goto err;
1146296341Sdelphij
1147296341Sdelphij        /* move the 'window' down further */
1148296341Sdelphij        wstart -= wend + 1;
1149296341Sdelphij        wvalue = 0;
1150296341Sdelphij        start = 0;
1151296341Sdelphij        if (wstart < 0)
1152296341Sdelphij            break;
1153296341Sdelphij    }
1154296341Sdelphij    ret = 1;
1155296341Sdelphij err:
1156296341Sdelphij    BN_CTX_end(ctx);
1157296341Sdelphij    bn_check_top(r);
1158296341Sdelphij    return (ret);
1159296341Sdelphij}
1160