bn_exp.c revision 296317
1139825Simp/* crypto/bn/bn_exp.c */
299657Sbenno/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
399657Sbenno * All rights reserved.
478342Sbenno *
578342Sbenno * This package is an SSL implementation written
678342Sbenno * by Eric Young (eay@cryptsoft.com).
778342Sbenno * The implementation was written so as to conform with Netscapes SSL.
878342Sbenno *
978342Sbenno * This library is free for commercial and non-commercial use as long as
1099657Sbenno * the following conditions are aheared to.  The following conditions
1199657Sbenno * apply to all code found in this distribution, be it the RC4, RSA,
1299657Sbenno * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
1399657Sbenno * included with this distribution is covered by the same copyright terms
1478342Sbenno * except that the holder is Tim Hudson (tjh@cryptsoft.com).
1599657Sbenno *
1699657Sbenno * Copyright remains Eric Young's, and as such any Copyright notices in
1799657Sbenno * the code are not to be removed.
1899657Sbenno * If this package is used in a product, Eric Young should be given attribution
1999657Sbenno * as the author of the parts of the library used.
2099657Sbenno * This can be in the form of a textual message at program startup or
2199657Sbenno * in documentation (online or textual) provided with the package.
2299657Sbenno *
2399657Sbenno * Redistribution and use in source and binary forms, with or without
2499657Sbenno * modification, are permitted provided that the following conditions
2599657Sbenno * are met:
2699657Sbenno * 1. Redistributions of source code must retain the copyright
2799657Sbenno *    notice, this list of conditions and the following disclaimer.
2878342Sbenno * 2. Redistributions in binary form must reproduce the above copyright
2978342Sbenno *    notice, this list of conditions and the following disclaimer in the
30113038Sobrien *    documentation and/or other materials provided with the distribution.
31113038Sobrien * 3. All advertising materials mentioning features or use of this software
3278342Sbenno *    must display the following acknowledgement:
3399657Sbenno *    "This product includes cryptographic software written by
3499657Sbenno *     Eric Young (eay@cryptsoft.com)"
3599657Sbenno *    The word 'cryptographic' can be left out if the rouines from the library
3678342Sbenno *    being used are not cryptographic related :-).
3799657Sbenno * 4. If you include any Windows specific code (or a derivative thereof) from
3899657Sbenno *    the apps directory (application code) you must include an acknowledgement:
3999657Sbenno *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
4099657Sbenno *
4199657Sbenno * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
4299657Sbenno * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4399657Sbenno * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4499657Sbenno * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45108939Sgrehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46108939Sgrehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4799657Sbenno * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4899657Sbenno * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4999657Sbenno * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50108939Sgrehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5199657Sbenno * SUCH DAMAGE.
52112436Smux *
5399657Sbenno * The licence and distribution terms for any publically available version or
54109919Sbenno * derivative of this code cannot be changed.  i.e. this code cannot simply be
5599657Sbenno * copied and put under another distribution licence
5699657Sbenno * [including the GNU Public Licence.]
5799657Sbenno */
5899657Sbenno/* ====================================================================
5999657Sbenno * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
6099657Sbenno *
6199657Sbenno * Redistribution and use in source and binary forms, with or without
6299657Sbenno * modification, are permitted provided that the following conditions
6399657Sbenno * are met:
6499657Sbenno *
6599657Sbenno * 1. Redistributions of source code must retain the above copyright
6699657Sbenno *    notice, this list of conditions and the following disclaimer.
6799657Sbenno *
6899657Sbenno * 2. Redistributions in binary form must reproduce the above copyright
6999657Sbenno *    notice, this list of conditions and the following disclaimer in
70117126Sscottl *    the documentation and/or other materials provided with the
71117126Sscottl *    distribution.
7299657Sbenno *
7399657Sbenno * 3. All advertising materials mentioning features or use of this
7499657Sbenno *    software must display the following acknowledgment:
7599657Sbenno *    "This product includes software developed by the OpenSSL Project
7699657Sbenno *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
7799657Sbenno *
7899657Sbenno * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
7999657Sbenno *    endorse or promote products derived from this software without
8099657Sbenno *    prior written permission. For written permission, please contact
8199657Sbenno *    openssl-core@openssl.org.
8299657Sbenno *
83117126Sscottl * 5. Products derived from this software may not be called "OpenSSL"
84117126Sscottl *    nor may "OpenSSL" appear in their names without prior written
85117126Sscottl *    permission of the OpenSSL Project.
86117126Sscottl *
87117126Sscottl * 6. Redistributions of any form whatsoever must retain the following
88117126Sscottl *    acknowledgment:
89117126Sscottl *    "This product includes software developed by the OpenSSL Project
90117126Sscottl *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91117126Sscottl *
92117126Sscottl * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93117126Sscottl * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94117126Sscottl * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95117126Sscottl * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96117126Sscottl * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97117126Sscottl * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98117126Sscottl * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99117126Sscottl * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100117126Sscottl * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101117126Sscottl * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102117126Sscottl * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103117126Sscottl * OF THE POSSIBILITY OF SUCH DAMAGE.
104117126Sscottl * ====================================================================
105117126Sscottl *
106117126Sscottl * This product includes cryptographic software written by Eric Young
107117126Sscottl * (eay@cryptsoft.com).  This product includes software written by Tim
108117126Sscottl * Hudson (tjh@cryptsoft.com).
109117126Sscottl *
110117126Sscottl */
111117126Sscottl
112117126Sscottl#include "cryptlib.h"
113117126Sscottl#include "constant_time_locl.h"
114117126Sscottl#include "bn_lcl.h"
115117126Sscottl
116117126Sscottl#include <stdlib.h>
117117126Sscottl#ifdef _WIN32
118117126Sscottl# include <malloc.h>
119117126Sscottl# ifndef alloca
120117126Sscottl#  define alloca _alloca
121117126Sscottl# endif
122117126Sscottl#elif defined(__GNUC__)
12399657Sbenno# ifndef alloca
12499657Sbenno#  define alloca(s) __builtin_alloca((s))
12599657Sbenno# endif
12699657Sbenno#endif
12799657Sbenno
12899657Sbenno/* maximum precomputation table size for *variable* sliding windows */
12999657Sbenno#define TABLE_SIZE      32
130117126Sscottl
131117126Sscottl/* this one works - simple but works */
13299657Sbennoint BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
13399657Sbenno{
13499657Sbenno    int i, bits, ret = 0;
13599657Sbenno    BIGNUM *v, *rr;
13699657Sbenno
13799657Sbenno    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
13899657Sbenno        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
13999657Sbenno        BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
14099657Sbenno        return -1;
14199657Sbenno    }
14299657Sbenno
14399657Sbenno    BN_CTX_start(ctx);
14499657Sbenno    if ((r == a) || (r == p))
14599657Sbenno        rr = BN_CTX_get(ctx);
14699657Sbenno    else
14799657Sbenno        rr = r;
14899657Sbenno    v = BN_CTX_get(ctx);
14999657Sbenno    if (rr == NULL || v == NULL)
15099657Sbenno        goto err;
15199657Sbenno
15299657Sbenno    if (BN_copy(v, a) == NULL)
15399657Sbenno        goto err;
15499657Sbenno    bits = BN_num_bits(p);
15599657Sbenno
156117126Sscottl    if (BN_is_odd(p)) {
157117126Sscottl        if (BN_copy(rr, a) == NULL)
158117126Sscottl            goto err;
159117126Sscottl    } else {
160117126Sscottl        if (!BN_one(rr))
161117126Sscottl            goto err;
162117126Sscottl    }
16399657Sbenno
16499657Sbenno    for (i = 1; i < bits; i++) {
16599657Sbenno        if (!BN_sqr(v, v, ctx))
16699657Sbenno            goto err;
16799657Sbenno        if (BN_is_bit_set(p, i)) {
16899657Sbenno            if (!BN_mul(rr, rr, v, ctx))
16999657Sbenno                goto err;
170134934Sscottl        }
171134934Sscottl    }
172134934Sscottl    if (r != rr)
173134934Sscottl        BN_copy(r, rr);
174134934Sscottl    ret = 1;
17599657Sbenno err:
17699657Sbenno    BN_CTX_end(ctx);
17799657Sbenno    bn_check_top(r);
17899657Sbenno    return (ret);
17999657Sbenno}
18099657Sbenno
18199657Sbennoint BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
18299657Sbenno               BN_CTX *ctx)
18399657Sbenno{
184112436Smux    int ret;
185112436Smux
18699657Sbenno    bn_check_top(a);
18799657Sbenno    bn_check_top(p);
18899657Sbenno    bn_check_top(m);
18999657Sbenno
19099657Sbenno    /*-
19199657Sbenno     * For even modulus  m = 2^k*m_odd,  it might make sense to compute
19299657Sbenno     * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
19399657Sbenno     * exponentiation for the odd part), using appropriate exponent
19499657Sbenno     * reductions, and combine the results using the CRT.
19599657Sbenno     *
19699657Sbenno     * For now, we use Montgomery only if the modulus is odd; otherwise,
19799657Sbenno     * exponentiation using the reciprocal-based quick remaindering
19899657Sbenno     * algorithm is used.
19999657Sbenno     *
20099657Sbenno     * (Timing obtained with expspeed.c [computations  a^p mod m
20199657Sbenno     * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
20299657Sbenno     * 4096, 8192 bits], compared to the running time of the
20399657Sbenno     * standard algorithm:
204112436Smux     *
20599657Sbenno     *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
20699657Sbenno     *                     55 .. 77 %  [UltraSparc processor, but
20799657Sbenno     *                                  debug-solaris-sparcv8-gcc conf.]
20899657Sbenno     *
20999657Sbenno     *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
21099657Sbenno     *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
21199657Sbenno     *
21299657Sbenno     * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
21399657Sbenno     * at 2048 and more bits, but at 512 and 1024 bits, it was
21499657Sbenno     * slower even than the standard algorithm!
21599657Sbenno     *
21699657Sbenno     * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
21799657Sbenno     * should be obtained when the new Montgomery reduction code
21899657Sbenno     * has been integrated into OpenSSL.)
21999657Sbenno     */
22099657Sbenno
22199657Sbenno#define MONT_MUL_MOD
22299657Sbenno#define MONT_EXP_WORD
22399657Sbenno#define RECP_MUL_MOD
22499657Sbenno
22599657Sbenno#ifdef MONT_MUL_MOD
22699657Sbenno    /*
22799657Sbenno     * I have finally been able to take out this pre-condition of the top bit
22899657Sbenno     * being set.  It was caused by an error in BN_div with negatives.  There
22999657Sbenno     * was also another problem when for a^b%m a >= m.  eay 07-May-97
23099657Sbenno     */
23199657Sbenno    /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
23299657Sbenno
23399657Sbenno    if (BN_is_odd(m)) {
23499657Sbenno# ifdef MONT_EXP_WORD
23599657Sbenno        if (a->top == 1 && !a->neg
23699657Sbenno            && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
23799657Sbenno            BN_ULONG A = a->d[0];
23899657Sbenno            ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
23999657Sbenno        } else
24099657Sbenno# endif
24199657Sbenno            ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
24299657Sbenno    } else
24399657Sbenno#endif
24499657Sbenno#ifdef RECP_MUL_MOD
24599657Sbenno    {
24699657Sbenno        ret = BN_mod_exp_recp(r, a, p, m, ctx);
24799657Sbenno    }
24899657Sbenno#else
24999657Sbenno    {
25099657Sbenno        ret = BN_mod_exp_simple(r, a, p, m, ctx);
25199657Sbenno    }
25299657Sbenno#endif
25399657Sbenno
25499657Sbenno    bn_check_top(r);
25599657Sbenno    return (ret);
256118081Smux}
257118081Smux
258118081Smuxint BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
259118081Smux                    const BIGNUM *m, BN_CTX *ctx)
260118081Smux{
261118081Smux    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
262118081Smux    int start = 1;
263118081Smux    BIGNUM *aa;
264118081Smux    /* Table of variables obtained from 'ctx' */
26599657Sbenno    BIGNUM *val[TABLE_SIZE];
266170421Smarcel    BN_RECP_CTX recp;
267170421Smarcel
268170421Smarcel    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
269170421Smarcel        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
270170421Smarcel        BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
271170421Smarcel        return -1;
272170421Smarcel    }
273170421Smarcel
274170421Smarcel    bits = BN_num_bits(p);
275170421Smarcel    if (bits == 0) {
276170421Smarcel        /* x**0 mod 1 is still zero. */
277170421Smarcel        if (BN_is_one(m)) {
27899657Sbenno            ret = 1;
27999657Sbenno            BN_zero(r);
28099657Sbenno        } else {
28199657Sbenno            ret = BN_one(r);
28299657Sbenno        }
28399657Sbenno        return ret;
284118081Smux    }
28599657Sbenno
28699657Sbenno    BN_CTX_start(ctx);
28799657Sbenno    aa = BN_CTX_get(ctx);
28899657Sbenno    val[0] = BN_CTX_get(ctx);
28999657Sbenno    if (!aa || !val[0])
29099657Sbenno        goto err;
29199657Sbenno
292170421Smarcel    BN_RECP_CTX_init(&recp);
293170421Smarcel    if (m->neg) {
294170421Smarcel        /* ignore sign of 'm' */
29599657Sbenno        if (!BN_copy(aa, m))
29699657Sbenno            goto err;
29799657Sbenno        aa->neg = 0;
29899657Sbenno        if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
299108939Sgrehan            goto err;
30099657Sbenno    } else {
30199657Sbenno        if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
30278342Sbenno            goto err;
30399657Sbenno    }
30478342Sbenno
30599657Sbenno    if (!BN_nnmod(val[0], a, m, ctx))
30699657Sbenno        goto err;               /* 1 */
307170421Smarcel    if (BN_is_zero(val[0])) {
308170421Smarcel        BN_zero(r);
30999657Sbenno        ret = 1;
310170421Smarcel        goto err;
31199657Sbenno    }
31299657Sbenno
31378342Sbenno    window = BN_window_bits_for_exponent_size(bits);
31499657Sbenno    if (window > 1) {
31599657Sbenno        if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
31699657Sbenno            goto err;           /* 2 */
31799657Sbenno        j = 1 << (window - 1);
31899657Sbenno        for (i = 1; i < j; i++) {
31999657Sbenno            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
32099657Sbenno                !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
32199657Sbenno                goto err;
32299657Sbenno        }
32399657Sbenno    }
324143063Sjoerg
32599657Sbenno    start = 1;                  /* This is used to avoid multiplication etc
32699657Sbenno                                 * when there is only the value '1' in the
32799657Sbenno                                 * buffer. */
32899657Sbenno    wvalue = 0;                 /* The 'value' of the window */
32999657Sbenno    wstart = bits - 1;          /* The top bit of the window */
33099657Sbenno    wend = 0;                   /* The bottom bit of the window */
33199657Sbenno
33299657Sbenno    if (!BN_one(r))
33399657Sbenno        goto err;
33499657Sbenno
33599657Sbenno    for (;;) {
33699657Sbenno        if (BN_is_bit_set(p, wstart) == 0) {
33799657Sbenno            if (!start)
33899657Sbenno                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
33999657Sbenno                    goto err;
34099657Sbenno            if (wstart == 0)
34199657Sbenno                break;
34299657Sbenno            wstart--;
34399657Sbenno            continue;
34499657Sbenno        }
34599657Sbenno        /*
34699657Sbenno         * We now have wstart on a 'set' bit, we now need to work out how bit
34799657Sbenno         * a window to do.  To do this we need to scan forward until the last
34899657Sbenno         * set bit before the end of the window
34999657Sbenno         */
35099657Sbenno        j = wstart;
35199657Sbenno        wvalue = 1;
35299657Sbenno        wend = 0;
35399657Sbenno        for (i = 1; i < window; i++) {
35499657Sbenno            if (wstart - i < 0)
35599657Sbenno                break;
35699657Sbenno            if (BN_is_bit_set(p, wstart - i)) {
35799657Sbenno                wvalue <<= (i - wend);
35899657Sbenno                wvalue |= 1;
35999657Sbenno                wend = i;
36099657Sbenno            }
36199657Sbenno        }
36299657Sbenno
36399657Sbenno        /* wend is the size of the current window */
36499657Sbenno        j = wend + 1;
36599657Sbenno        /* add the 'bytes above' */
36699657Sbenno        if (!start)
36799657Sbenno            for (i = 0; i < j; i++) {
368108939Sgrehan                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
369108939Sgrehan                    goto err;
37099657Sbenno            }
371108939Sgrehan
372108939Sgrehan        /* wvalue will be an odd number < 2^window */
373108939Sgrehan        if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
374108939Sgrehan            goto err;
375108939Sgrehan
37699657Sbenno        /* move the 'window' down further */
377108939Sgrehan        wstart -= wend + 1;
37899657Sbenno        wvalue = 0;
379108939Sgrehan        start = 0;
38078342Sbenno        if (wstart < 0)
38199657Sbenno            break;
38299657Sbenno    }
383108939Sgrehan    ret = 1;
384108939Sgrehan err:
385108939Sgrehan    BN_CTX_end(ctx);
386108939Sgrehan    BN_RECP_CTX_free(&recp);
38799657Sbenno    bn_check_top(r);
388108939Sgrehan    return (ret);
389108939Sgrehan}
390108939Sgrehan
391108939Sgrehanint BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
392108939Sgrehan                    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
393108939Sgrehan{
394108939Sgrehan    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
395108939Sgrehan    int start = 1;
396108939Sgrehan    BIGNUM *d, *r;
397108939Sgrehan    const BIGNUM *aa;
398108939Sgrehan    /* Table of variables obtained from 'ctx' */
399108939Sgrehan    BIGNUM *val[TABLE_SIZE];
400108939Sgrehan    BN_MONT_CTX *mont = NULL;
401108939Sgrehan
402108939Sgrehan    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
403108939Sgrehan        return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
404108939Sgrehan    }
405108939Sgrehan
406108939Sgrehan    bn_check_top(a);
407108939Sgrehan    bn_check_top(p);
408108939Sgrehan    bn_check_top(m);
409108939Sgrehan
410108939Sgrehan    if (!BN_is_odd(m)) {
411108939Sgrehan        BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
412108939Sgrehan        return (0);
413108939Sgrehan    }
414108939Sgrehan    bits = BN_num_bits(p);
415108939Sgrehan    if (bits == 0) {
416108939Sgrehan        /* x**0 mod 1 is still zero. */
417108939Sgrehan        if (BN_is_one(m)) {
418108939Sgrehan            ret = 1;
419108939Sgrehan            BN_zero(rr);
420108939Sgrehan        } else {
421108939Sgrehan            ret = BN_one(rr);
422108939Sgrehan        }
423108939Sgrehan        return ret;
424108939Sgrehan    }
425108939Sgrehan
426108939Sgrehan    BN_CTX_start(ctx);
427108939Sgrehan    d = BN_CTX_get(ctx);
428108939Sgrehan    r = BN_CTX_get(ctx);
429108939Sgrehan    val[0] = BN_CTX_get(ctx);
430108939Sgrehan    if (!d || !r || !val[0])
431108939Sgrehan        goto err;
432108939Sgrehan
433108939Sgrehan    /*
434108939Sgrehan     * If this is not done, things will break in the montgomery part
435108939Sgrehan     */
436108939Sgrehan
437108939Sgrehan    if (in_mont != NULL)
438108939Sgrehan        mont = in_mont;
439108939Sgrehan    else {
440108939Sgrehan        if ((mont = BN_MONT_CTX_new()) == NULL)
441108939Sgrehan            goto err;
442108939Sgrehan        if (!BN_MONT_CTX_set(mont, m, ctx))
443108939Sgrehan            goto err;
444108939Sgrehan    }
445108939Sgrehan
446108939Sgrehan    if (a->neg || BN_ucmp(a, m) >= 0) {
447108939Sgrehan        if (!BN_nnmod(val[0], a, m, ctx))
448108939Sgrehan            goto err;
449108939Sgrehan        aa = val[0];
450108939Sgrehan    } else
451108939Sgrehan        aa = a;
452108939Sgrehan    if (BN_is_zero(aa)) {
453108939Sgrehan        BN_zero(rr);
454108939Sgrehan        ret = 1;
455108939Sgrehan        goto err;
456108939Sgrehan    }
457108939Sgrehan    if (!BN_to_montgomery(val[0], aa, mont, ctx))
458108939Sgrehan        goto err;               /* 1 */
459108939Sgrehan
460108939Sgrehan    window = BN_window_bits_for_exponent_size(bits);
461108939Sgrehan    if (window > 1) {
462108939Sgrehan        if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
463108939Sgrehan            goto err;           /* 2 */
464108939Sgrehan        j = 1 << (window - 1);
465108939Sgrehan        for (i = 1; i < j; i++) {
466108939Sgrehan            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
467108939Sgrehan                !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
468108939Sgrehan                goto err;
469108939Sgrehan        }
470108939Sgrehan    }
471108939Sgrehan
472108939Sgrehan    start = 1;                  /* This is used to avoid multiplication etc
473108939Sgrehan                                 * when there is only the value '1' in the
474108939Sgrehan                                 * buffer. */
475108939Sgrehan    wvalue = 0;                 /* The 'value' of the window */
476108939Sgrehan    wstart = bits - 1;          /* The top bit of the window */
477143063Sjoerg    wend = 0;                   /* The bottom bit of the window */
478108939Sgrehan
479108939Sgrehan    if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
480108939Sgrehan        goto err;
481108939Sgrehan    for (;;) {
482108939Sgrehan        if (BN_is_bit_set(p, wstart) == 0) {
483108939Sgrehan            if (!start) {
484113255Sdes                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
485108939Sgrehan                    goto err;
486108939Sgrehan            }
487108939Sgrehan            if (wstart == 0)
488108939Sgrehan                break;
489108939Sgrehan            wstart--;
490108939Sgrehan            continue;
491108939Sgrehan        }
492110335Sharti        /*
493110335Sharti         * We now have wstart on a 'set' bit, we now need to work out how bit
494110335Sharti         * a window to do.  To do this we need to scan forward until the last
495110335Sharti         * set bit before the end of the window
496110335Sharti         */
497110335Sharti        j = wstart;
498108939Sgrehan        wvalue = 1;
499108939Sgrehan        wend = 0;
500108939Sgrehan        for (i = 1; i < window; i++) {
501108939Sgrehan            if (wstart - i < 0)
502108939Sgrehan                break;
503108939Sgrehan            if (BN_is_bit_set(p, wstart - i)) {
504108939Sgrehan                wvalue <<= (i - wend);
505108939Sgrehan                wvalue |= 1;
506108939Sgrehan                wend = i;
507108939Sgrehan            }
508108939Sgrehan        }
509108939Sgrehan
510108939Sgrehan        /* wend is the size of the current window */
511108939Sgrehan        j = wend + 1;
512108939Sgrehan        /* add the 'bytes above' */
513108939Sgrehan        if (!start)
514108939Sgrehan            for (i = 0; i < j; i++) {
515140314Sscottl                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
516140314Sscottl                    goto err;
517140314Sscottl            }
518140314Sscottl
519140314Sscottl        /* wvalue will be an odd number < 2^window */
520140314Sscottl        if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
521140314Sscottl            goto err;
522140314Sscottl
523147851Sgrehan        /* move the 'window' down further */
524147851Sgrehan        wstart -= wend + 1;
525140314Sscottl        wvalue = 0;
526140314Sscottl        start = 0;
527140314Sscottl        if (wstart < 0)
528140314Sscottl            break;
529140314Sscottl    }
530140314Sscottl    if (!BN_from_montgomery(rr, r, mont, ctx))
531140314Sscottl        goto err;
532140314Sscottl    ret = 1;
533140314Sscottl err:
534140314Sscottl    if ((in_mont == NULL) && (mont != NULL))
535140314Sscottl        BN_MONT_CTX_free(mont);
536140314Sscottl    BN_CTX_end(ctx);
537140314Sscottl    bn_check_top(rr);
538140314Sscottl    return (ret);
539140314Sscottl}
540140314Sscottl
541140314Sscottl/*
542140314Sscottl * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
543140314Sscottl * layout so that accessing any of these table values shows the same access
544140314Sscottl * pattern as far as cache lines are concerned.  The following functions are
545140314Sscottl * used to transfer a BIGNUM from/to that table.
546108939Sgrehan */
547108939Sgrehan
548108939Sgrehanstatic int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
549108939Sgrehan                                        unsigned char *buf, int idx,
550108939Sgrehan                                        int window)
551108939Sgrehan{
552108939Sgrehan    int i, j;
553108939Sgrehan    int width = 1 << window;
554108939Sgrehan    BN_ULONG *table = (BN_ULONG *)buf;
555143063Sjoerg
556108939Sgrehan    if (top > b->top)
557108939Sgrehan        top = b->top;           /* this works because 'buf' is explicitly
558108939Sgrehan                                 * zeroed */
559108939Sgrehan    for (i = 0, j = idx; i < top; i++, j += width) {
560108939Sgrehan        table[j] = b->d[i];
561108939Sgrehan    }
562108939Sgrehan
563113703Sgrehan    return 1;
564108939Sgrehan}
565108939Sgrehan
566108939Sgrehanstatic int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
567108939Sgrehan                                          unsigned char *buf, int idx,
568108939Sgrehan                                          int window)
569108939Sgrehan{
570108939Sgrehan    int i, j;
571108939Sgrehan    int width = 1 << window;
572108939Sgrehan    volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
573108939Sgrehan
574108939Sgrehan    if (bn_wexpand(b, top) == NULL)
575108939Sgrehan        return 0;
576108939Sgrehan
577108939Sgrehan    if (window <= 3) {
578108939Sgrehan        for (i = 0; i < top; i++, table += width) {
579108939Sgrehan            BN_ULONG acc = 0;
580108939Sgrehan
581108939Sgrehan            for (j = 0; j < width; j++) {
582108939Sgrehan                acc |= table[j] &
583108939Sgrehan                       ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
584108939Sgrehan            }
585110335Sharti
586110335Sharti            b->d[i] = acc;
587110335Sharti        }
588108939Sgrehan    } else {
589110335Sharti        int xstride = 1 << (window - 2);
590108939Sgrehan        BN_ULONG y0, y1, y2, y3;
591110335Sharti
592110335Sharti        i = idx >> (window - 2);        /* equivalent of idx / xstride */
593108939Sgrehan        idx &= xstride - 1;             /* equivalent of idx % xstride */
594108939Sgrehan
595108939Sgrehan        y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
596108939Sgrehan        y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
597108939Sgrehan        y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
598108939Sgrehan        y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
599108939Sgrehan
600108939Sgrehan        for (i = 0; i < top; i++, table += width) {
601108939Sgrehan            BN_ULONG acc = 0;
602108939Sgrehan
603108939Sgrehan            for (j = 0; j < xstride; j++) {
604108939Sgrehan                acc |= ( (table[j + 0 * xstride] & y0) |
605108939Sgrehan                         (table[j + 1 * xstride] & y1) |
606108939Sgrehan                         (table[j + 2 * xstride] & y2) |
607108939Sgrehan                         (table[j + 3 * xstride] & y3) )
608108939Sgrehan                       & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
609108939Sgrehan            }
610108939Sgrehan
61199657Sbenno            b->d[i] = acc;
612143634Sgrehan        }
613109935Sbenno    }
61499657Sbenno
615109935Sbenno    b->top = top;
616109935Sbenno    bn_correct_top(b);
617109935Sbenno    return 1;
61899657Sbenno}
619143634Sgrehan
620109919Sbenno/*
621109919Sbenno * Given a pointer value, compute the next address that is a cache line
622109935Sbenno * multiple.
623109919Sbenno */
624#define MOD_EXP_CTIME_ALIGN(x_) \
625        ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
626
627/*
628 * This variant of BN_mod_exp_mont() uses fixed windows and the special
629 * precomputation memory layout to limit data-dependency to a minimum to
630 * protect secret exponents (cf. the hyper-threading timing attacks pointed
631 * out by Colin Percival,
632 * http://www.daemonology.net/hyperthreading-considered-harmful/)
633 */
634int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
635                              const BIGNUM *m, BN_CTX *ctx,
636                              BN_MONT_CTX *in_mont)
637{
638    int i, bits, ret = 0, window, wvalue;
639    int top;
640    BN_MONT_CTX *mont = NULL;
641
642    int numPowers;
643    unsigned char *powerbufFree = NULL;
644    int powerbufLen = 0;
645    unsigned char *powerbuf = NULL;
646    BIGNUM tmp, am;
647
648    bn_check_top(a);
649    bn_check_top(p);
650    bn_check_top(m);
651
652    if (!BN_is_odd(m)) {
653        BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
654        return (0);
655    }
656
657    top = m->top;
658
659    bits = BN_num_bits(p);
660    if (bits == 0) {
661        /* x**0 mod 1 is still zero. */
662        if (BN_is_one(m)) {
663            ret = 1;
664            BN_zero(rr);
665        } else {
666            ret = BN_one(rr);
667        }
668        return ret;
669    }
670
671    BN_CTX_start(ctx);
672
673    /*
674     * Allocate a montgomery context if it was not supplied by the caller. If
675     * this is not done, things will break in the montgomery part.
676     */
677    if (in_mont != NULL)
678        mont = in_mont;
679    else {
680        if ((mont = BN_MONT_CTX_new()) == NULL)
681            goto err;
682        if (!BN_MONT_CTX_set(mont, m, ctx))
683            goto err;
684    }
685
686    /* Get the window size to use with size of p. */
687    window = BN_window_bits_for_ctime_exponent_size(bits);
688#if defined(OPENSSL_BN_ASM_MONT5)
689    if (window == 6 && bits <= 1024)
690        window = 5;             /* ~5% improvement of 2048-bit RSA sign */
691#endif
692
693    /*
694     * Allocate a buffer large enough to hold all of the pre-computed powers
695     * of am, am itself and tmp.
696     */
697    numPowers = 1 << window;
698    powerbufLen = sizeof(m->d[0]) * (top * numPowers +
699                                     ((2 * top) >
700                                      numPowers ? (2 * top) : numPowers));
701#ifdef alloca
702    if (powerbufLen < 3072)
703        powerbufFree =
704            alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
705    else
706#endif
707        if ((powerbufFree =
708             (unsigned char *)OPENSSL_malloc(powerbufLen +
709                                             MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
710            == NULL)
711        goto err;
712
713    powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
714    memset(powerbuf, 0, powerbufLen);
715
716#ifdef alloca
717    if (powerbufLen < 3072)
718        powerbufFree = NULL;
719#endif
720
721    /* lay down tmp and am right after powers table */
722    tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
723    am.d = tmp.d + top;
724    tmp.top = am.top = 0;
725    tmp.dmax = am.dmax = top;
726    tmp.neg = am.neg = 0;
727    tmp.flags = am.flags = BN_FLG_STATIC_DATA;
728
729    /* prepare a^0 in Montgomery domain */
730#if 1
731    if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
732        goto err;
733#else
734    tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
735    for (i = 1; i < top; i++)
736        tmp.d[i] = (~m->d[i]) & BN_MASK2;
737    tmp.top = top;
738#endif
739
740    /* prepare a^1 in Montgomery domain */
741    if (a->neg || BN_ucmp(a, m) >= 0) {
742        if (!BN_mod(&am, a, m, ctx))
743            goto err;
744        if (!BN_to_montgomery(&am, &am, mont, ctx))
745            goto err;
746    } else if (!BN_to_montgomery(&am, a, mont, ctx))
747        goto err;
748
749#if defined(OPENSSL_BN_ASM_MONT5)
750    if (window == 5 && top > 1) {
751        /*
752         * This optimization uses ideas from http://eprint.iacr.org/2011/239,
753         * specifically optimization of cache-timing attack countermeasures
754         * and pre-computation optimization.
755         */
756
757        /*
758         * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
759         * 512-bit RSA is hardly relevant, we omit it to spare size...
760         */
761        void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
762                                 const void *table, const BN_ULONG *np,
763                                 const BN_ULONG *n0, int num, int power);
764        void bn_scatter5(const BN_ULONG *inp, size_t num,
765                         void *table, size_t power);
766        void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
767
768        BN_ULONG *np = mont->N.d, *n0 = mont->n0;
769
770        /*
771         * BN_to_montgomery can contaminate words above .top [in
772         * BN_DEBUG[_DEBUG] build]...
773         */
774        for (i = am.top; i < top; i++)
775            am.d[i] = 0;
776        for (i = tmp.top; i < top; i++)
777            tmp.d[i] = 0;
778
779        bn_scatter5(tmp.d, top, powerbuf, 0);
780        bn_scatter5(am.d, am.top, powerbuf, 1);
781        bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
782        bn_scatter5(tmp.d, top, powerbuf, 2);
783
784# if 0
785        for (i = 3; i < 32; i++) {
786            /* Calculate a^i = a^(i-1) * a */
787            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
788            bn_scatter5(tmp.d, top, powerbuf, i);
789        }
790# else
791        /* same as above, but uses squaring for 1/2 of operations */
792        for (i = 4; i < 32; i *= 2) {
793            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
794            bn_scatter5(tmp.d, top, powerbuf, i);
795        }
796        for (i = 3; i < 8; i += 2) {
797            int j;
798            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
799            bn_scatter5(tmp.d, top, powerbuf, i);
800            for (j = 2 * i; j < 32; j *= 2) {
801                bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
802                bn_scatter5(tmp.d, top, powerbuf, j);
803            }
804        }
805        for (; i < 16; i += 2) {
806            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
807            bn_scatter5(tmp.d, top, powerbuf, i);
808            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
809            bn_scatter5(tmp.d, top, powerbuf, 2 * i);
810        }
811        for (; i < 32; i += 2) {
812            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
813            bn_scatter5(tmp.d, top, powerbuf, i);
814        }
815# endif
816        bits--;
817        for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
818            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
819        bn_gather5(tmp.d, top, powerbuf, wvalue);
820
821        /*
822         * Scan the exponent one window at a time starting from the most
823         * significant bits.
824         */
825        while (bits >= 0) {
826            for (wvalue = 0, i = 0; i < 5; i++, bits--)
827                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
828
829            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
830            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
831            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
832            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
833            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
834            bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
835        }
836
837        tmp.top = top;
838        bn_correct_top(&tmp);
839    } else
840#endif
841    {
842        if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window))
843            goto err;
844        if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window))
845            goto err;
846
847        /*
848         * If the window size is greater than 1, then calculate
849         * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
850         * powers could instead be computed as (a^(i/2))^2 to use the slight
851         * performance advantage of sqr over mul).
852         */
853        if (window > 1) {
854            if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
855                goto err;
856            if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2,
857                                              window))
858                goto err;
859            for (i = 3; i < numPowers; i++) {
860                /* Calculate a^i = a^(i-1) * a */
861                if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
862                    goto err;
863                if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i,
864                                                  window))
865                    goto err;
866            }
867        }
868
869        bits--;
870        for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
871            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
872        if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue,
873                                            window))
874            goto err;
875
876        /*
877         * Scan the exponent one window at a time starting from the most
878         * significant bits.
879         */
880        while (bits >= 0) {
881            wvalue = 0;         /* The 'value' of the window */
882
883            /* Scan the window, squaring the result as we go */
884            for (i = 0; i < window; i++, bits--) {
885                if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
886                    goto err;
887                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
888            }
889
890            /*
891             * Fetch the appropriate pre-computed value from the pre-buf
892             */
893            if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue,
894                                                window))
895                goto err;
896
897            /* Multiply the result into the intermediate result */
898            if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
899                goto err;
900        }
901    }
902
903    /* Convert the final result from montgomery to standard format */
904    if (!BN_from_montgomery(rr, &tmp, mont, ctx))
905        goto err;
906    ret = 1;
907 err:
908    if ((in_mont == NULL) && (mont != NULL))
909        BN_MONT_CTX_free(mont);
910    if (powerbuf != NULL) {
911        OPENSSL_cleanse(powerbuf, powerbufLen);
912        if (powerbufFree)
913            OPENSSL_free(powerbufFree);
914    }
915    BN_CTX_end(ctx);
916    return (ret);
917}
918
919int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
920                         const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
921{
922    BN_MONT_CTX *mont = NULL;
923    int b, bits, ret = 0;
924    int r_is_one;
925    BN_ULONG w, next_w;
926    BIGNUM *d, *r, *t;
927    BIGNUM *swap_tmp;
928#define BN_MOD_MUL_WORD(r, w, m) \
929                (BN_mul_word(r, (w)) && \
930                (/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
931                        (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
932    /*
933     * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
934     * probably more overhead than always using BN_mod (which uses BN_copy if
935     * a similar test returns true).
936     */
937    /*
938     * We can use BN_mod and do not need BN_nnmod because our accumulator is
939     * never negative (the result of BN_mod does not depend on the sign of
940     * the modulus).
941     */
942#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
943                (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
944
945    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
946        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
947        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
948        return -1;
949    }
950
951    bn_check_top(p);
952    bn_check_top(m);
953
954    if (!BN_is_odd(m)) {
955        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
956        return (0);
957    }
958    if (m->top == 1)
959        a %= m->d[0];           /* make sure that 'a' is reduced */
960
961    bits = BN_num_bits(p);
962    if (bits == 0) {
963        /* x**0 mod 1 is still zero. */
964        if (BN_is_one(m)) {
965            ret = 1;
966            BN_zero(rr);
967        } else {
968            ret = BN_one(rr);
969        }
970        return ret;
971    }
972    if (a == 0) {
973        BN_zero(rr);
974        ret = 1;
975        return ret;
976    }
977
978    BN_CTX_start(ctx);
979    d = BN_CTX_get(ctx);
980    r = BN_CTX_get(ctx);
981    t = BN_CTX_get(ctx);
982    if (d == NULL || r == NULL || t == NULL)
983        goto err;
984
985    if (in_mont != NULL)
986        mont = in_mont;
987    else {
988        if ((mont = BN_MONT_CTX_new()) == NULL)
989            goto err;
990        if (!BN_MONT_CTX_set(mont, m, ctx))
991            goto err;
992    }
993
994    r_is_one = 1;               /* except for Montgomery factor */
995
996    /* bits-1 >= 0 */
997
998    /* The result is accumulated in the product r*w. */
999    w = a;                      /* bit 'bits-1' of 'p' is always set */
1000    for (b = bits - 2; b >= 0; b--) {
1001        /* First, square r*w. */
1002        next_w = w * w;
1003        if ((next_w / w) != w) { /* overflow */
1004            if (r_is_one) {
1005                if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1006                    goto err;
1007                r_is_one = 0;
1008            } else {
1009                if (!BN_MOD_MUL_WORD(r, w, m))
1010                    goto err;
1011            }
1012            next_w = 1;
1013        }
1014        w = next_w;
1015        if (!r_is_one) {
1016            if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
1017                goto err;
1018        }
1019
1020        /* Second, multiply r*w by 'a' if exponent bit is set. */
1021        if (BN_is_bit_set(p, b)) {
1022            next_w = w * a;
1023            if ((next_w / a) != w) { /* overflow */
1024                if (r_is_one) {
1025                    if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1026                        goto err;
1027                    r_is_one = 0;
1028                } else {
1029                    if (!BN_MOD_MUL_WORD(r, w, m))
1030                        goto err;
1031                }
1032                next_w = a;
1033            }
1034            w = next_w;
1035        }
1036    }
1037
1038    /* Finally, set r:=r*w. */
1039    if (w != 1) {
1040        if (r_is_one) {
1041            if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1042                goto err;
1043            r_is_one = 0;
1044        } else {
1045            if (!BN_MOD_MUL_WORD(r, w, m))
1046                goto err;
1047        }
1048    }
1049
1050    if (r_is_one) {             /* can happen only if a == 1 */
1051        if (!BN_one(rr))
1052            goto err;
1053    } else {
1054        if (!BN_from_montgomery(rr, r, mont, ctx))
1055            goto err;
1056    }
1057    ret = 1;
1058 err:
1059    if ((in_mont == NULL) && (mont != NULL))
1060        BN_MONT_CTX_free(mont);
1061    BN_CTX_end(ctx);
1062    bn_check_top(rr);
1063    return (ret);
1064}
1065
1066/* The old fallback, simple version :-) */
1067int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
1068                      const BIGNUM *m, BN_CTX *ctx)
1069{
1070    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
1071    int start = 1;
1072    BIGNUM *d;
1073    /* Table of variables obtained from 'ctx' */
1074    BIGNUM *val[TABLE_SIZE];
1075
1076    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1077        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1078        BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1079        return -1;
1080    }
1081
1082    bits = BN_num_bits(p);
1083   if (bits == 0) {
1084        /* x**0 mod 1 is still zero. */
1085        if (BN_is_one(m)) {
1086            ret = 1;
1087            BN_zero(r);
1088        } else {
1089            ret = BN_one(r);
1090        }
1091        return ret;
1092    }
1093
1094    BN_CTX_start(ctx);
1095    d = BN_CTX_get(ctx);
1096    val[0] = BN_CTX_get(ctx);
1097    if (!d || !val[0])
1098        goto err;
1099
1100    if (!BN_nnmod(val[0], a, m, ctx))
1101        goto err;               /* 1 */
1102    if (BN_is_zero(val[0])) {
1103        BN_zero(r);
1104        ret = 1;
1105        goto err;
1106    }
1107
1108    window = BN_window_bits_for_exponent_size(bits);
1109    if (window > 1) {
1110        if (!BN_mod_mul(d, val[0], val[0], m, ctx))
1111            goto err;           /* 2 */
1112        j = 1 << (window - 1);
1113        for (i = 1; i < j; i++) {
1114            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1115                !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
1116                goto err;
1117        }
1118    }
1119
1120    start = 1;                  /* This is used to avoid multiplication etc
1121                                 * when there is only the value '1' in the
1122                                 * buffer. */
1123    wvalue = 0;                 /* The 'value' of the window */
1124    wstart = bits - 1;          /* The top bit of the window */
1125    wend = 0;                   /* The bottom bit of the window */
1126
1127    if (!BN_one(r))
1128        goto err;
1129
1130    for (;;) {
1131        if (BN_is_bit_set(p, wstart) == 0) {
1132            if (!start)
1133                if (!BN_mod_mul(r, r, r, m, ctx))
1134                    goto err;
1135            if (wstart == 0)
1136                break;
1137            wstart--;
1138            continue;
1139        }
1140        /*
1141         * We now have wstart on a 'set' bit, we now need to work out how bit
1142         * a window to do.  To do this we need to scan forward until the last
1143         * set bit before the end of the window
1144         */
1145        j = wstart;
1146        wvalue = 1;
1147        wend = 0;
1148        for (i = 1; i < window; i++) {
1149            if (wstart - i < 0)
1150                break;
1151            if (BN_is_bit_set(p, wstart - i)) {
1152                wvalue <<= (i - wend);
1153                wvalue |= 1;
1154                wend = i;
1155            }
1156        }
1157
1158        /* wend is the size of the current window */
1159        j = wend + 1;
1160        /* add the 'bytes above' */
1161        if (!start)
1162            for (i = 0; i < j; i++) {
1163                if (!BN_mod_mul(r, r, r, m, ctx))
1164                    goto err;
1165            }
1166
1167        /* wvalue will be an odd number < 2^window */
1168        if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1169            goto err;
1170
1171        /* move the 'window' down further */
1172        wstart -= wend + 1;
1173        wvalue = 0;
1174        start = 0;
1175        if (wstart < 0)
1176            break;
1177    }
1178    ret = 1;
1179 err:
1180    BN_CTX_end(ctx);
1181    bn_check_top(r);
1182    return (ret);
1183}
1184