bn_exp.c revision 291721
1/* crypto/bn/bn_exp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 *    notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 *    notice, this list of conditions and the following disclaimer in
70 *    the documentation and/or other materials provided with the
71 *    distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 *    software must display the following acknowledgment:
75 *    "This product includes software developed by the OpenSSL Project
76 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 *    endorse or promote products derived from this software without
80 *    prior written permission. For written permission, please contact
81 *    openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 *    nor may "OpenSSL" appear in their names without prior written
85 *    permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 *    acknowledgment:
89 *    "This product includes software developed by the OpenSSL Project
90 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com).  This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include "cryptlib.h"
113#include "bn_lcl.h"
114
115#include <stdlib.h>
116#ifdef _WIN32
117# include <malloc.h>
118# ifndef alloca
119#  define alloca _alloca
120# endif
121#elif defined(__GNUC__)
122# ifndef alloca
123#  define alloca(s) __builtin_alloca((s))
124# endif
125#endif
126
127/* maximum precomputation table size for *variable* sliding windows */
128#define TABLE_SIZE      32
129
130/* this one works - simple but works */
131int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
132{
133    int i, bits, ret = 0;
134    BIGNUM *v, *rr;
135
136    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
137        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
138        BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
139        return -1;
140    }
141
142    BN_CTX_start(ctx);
143    if ((r == a) || (r == p))
144        rr = BN_CTX_get(ctx);
145    else
146        rr = r;
147    v = BN_CTX_get(ctx);
148    if (rr == NULL || v == NULL)
149        goto err;
150
151    if (BN_copy(v, a) == NULL)
152        goto err;
153    bits = BN_num_bits(p);
154
155    if (BN_is_odd(p)) {
156        if (BN_copy(rr, a) == NULL)
157            goto err;
158    } else {
159        if (!BN_one(rr))
160            goto err;
161    }
162
163    for (i = 1; i < bits; i++) {
164        if (!BN_sqr(v, v, ctx))
165            goto err;
166        if (BN_is_bit_set(p, i)) {
167            if (!BN_mul(rr, rr, v, ctx))
168                goto err;
169        }
170    }
171    if (r != rr)
172        BN_copy(r, rr);
173    ret = 1;
174 err:
175    BN_CTX_end(ctx);
176    bn_check_top(r);
177    return (ret);
178}
179
180int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
181               BN_CTX *ctx)
182{
183    int ret;
184
185    bn_check_top(a);
186    bn_check_top(p);
187    bn_check_top(m);
188
189    /*-
190     * For even modulus  m = 2^k*m_odd,  it might make sense to compute
191     * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
192     * exponentiation for the odd part), using appropriate exponent
193     * reductions, and combine the results using the CRT.
194     *
195     * For now, we use Montgomery only if the modulus is odd; otherwise,
196     * exponentiation using the reciprocal-based quick remaindering
197     * algorithm is used.
198     *
199     * (Timing obtained with expspeed.c [computations  a^p mod m
200     * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
201     * 4096, 8192 bits], compared to the running time of the
202     * standard algorithm:
203     *
204     *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
205     *                     55 .. 77 %  [UltraSparc processor, but
206     *                                  debug-solaris-sparcv8-gcc conf.]
207     *
208     *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
209     *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
210     *
211     * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
212     * at 2048 and more bits, but at 512 and 1024 bits, it was
213     * slower even than the standard algorithm!
214     *
215     * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
216     * should be obtained when the new Montgomery reduction code
217     * has been integrated into OpenSSL.)
218     */
219
220#define MONT_MUL_MOD
221#define MONT_EXP_WORD
222#define RECP_MUL_MOD
223
224#ifdef MONT_MUL_MOD
225    /*
226     * I have finally been able to take out this pre-condition of the top bit
227     * being set.  It was caused by an error in BN_div with negatives.  There
228     * was also another problem when for a^b%m a >= m.  eay 07-May-97
229     */
230    /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
231
232    if (BN_is_odd(m)) {
233# ifdef MONT_EXP_WORD
234        if (a->top == 1 && !a->neg
235            && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
236            BN_ULONG A = a->d[0];
237            ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
238        } else
239# endif
240            ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
241    } else
242#endif
243#ifdef RECP_MUL_MOD
244    {
245        ret = BN_mod_exp_recp(r, a, p, m, ctx);
246    }
247#else
248    {
249        ret = BN_mod_exp_simple(r, a, p, m, ctx);
250    }
251#endif
252
253    bn_check_top(r);
254    return (ret);
255}
256
257int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
258                    const BIGNUM *m, BN_CTX *ctx)
259{
260    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
261    int start = 1;
262    BIGNUM *aa;
263    /* Table of variables obtained from 'ctx' */
264    BIGNUM *val[TABLE_SIZE];
265    BN_RECP_CTX recp;
266
267    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
268        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
269        BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
270        return -1;
271    }
272
273    bits = BN_num_bits(p);
274
275    if (bits == 0) {
276        ret = BN_one(r);
277        return ret;
278    }
279
280    BN_CTX_start(ctx);
281    aa = BN_CTX_get(ctx);
282    val[0] = BN_CTX_get(ctx);
283    if (!aa || !val[0])
284        goto err;
285
286    BN_RECP_CTX_init(&recp);
287    if (m->neg) {
288        /* ignore sign of 'm' */
289        if (!BN_copy(aa, m))
290            goto err;
291        aa->neg = 0;
292        if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
293            goto err;
294    } else {
295        if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
296            goto err;
297    }
298
299    if (!BN_nnmod(val[0], a, m, ctx))
300        goto err;               /* 1 */
301    if (BN_is_zero(val[0])) {
302        BN_zero(r);
303        ret = 1;
304        goto err;
305    }
306
307    window = BN_window_bits_for_exponent_size(bits);
308    if (window > 1) {
309        if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
310            goto err;           /* 2 */
311        j = 1 << (window - 1);
312        for (i = 1; i < j; i++) {
313            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
314                !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
315                goto err;
316        }
317    }
318
319    start = 1;                  /* This is used to avoid multiplication etc
320                                 * when there is only the value '1' in the
321                                 * buffer. */
322    wvalue = 0;                 /* The 'value' of the window */
323    wstart = bits - 1;          /* The top bit of the window */
324    wend = 0;                   /* The bottom bit of the window */
325
326    if (!BN_one(r))
327        goto err;
328
329    for (;;) {
330        if (BN_is_bit_set(p, wstart) == 0) {
331            if (!start)
332                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
333                    goto err;
334            if (wstart == 0)
335                break;
336            wstart--;
337            continue;
338        }
339        /*
340         * We now have wstart on a 'set' bit, we now need to work out how bit
341         * a window to do.  To do this we need to scan forward until the last
342         * set bit before the end of the window
343         */
344        j = wstart;
345        wvalue = 1;
346        wend = 0;
347        for (i = 1; i < window; i++) {
348            if (wstart - i < 0)
349                break;
350            if (BN_is_bit_set(p, wstart - i)) {
351                wvalue <<= (i - wend);
352                wvalue |= 1;
353                wend = i;
354            }
355        }
356
357        /* wend is the size of the current window */
358        j = wend + 1;
359        /* add the 'bytes above' */
360        if (!start)
361            for (i = 0; i < j; i++) {
362                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
363                    goto err;
364            }
365
366        /* wvalue will be an odd number < 2^window */
367        if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
368            goto err;
369
370        /* move the 'window' down further */
371        wstart -= wend + 1;
372        wvalue = 0;
373        start = 0;
374        if (wstart < 0)
375            break;
376    }
377    ret = 1;
378 err:
379    BN_CTX_end(ctx);
380    BN_RECP_CTX_free(&recp);
381    bn_check_top(r);
382    return (ret);
383}
384
385int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
386                    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
387{
388    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
389    int start = 1;
390    BIGNUM *d, *r;
391    const BIGNUM *aa;
392    /* Table of variables obtained from 'ctx' */
393    BIGNUM *val[TABLE_SIZE];
394    BN_MONT_CTX *mont = NULL;
395
396    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
397        return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
398    }
399
400    bn_check_top(a);
401    bn_check_top(p);
402    bn_check_top(m);
403
404    if (!BN_is_odd(m)) {
405        BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
406        return (0);
407    }
408    bits = BN_num_bits(p);
409    if (bits == 0) {
410        ret = BN_one(rr);
411        return ret;
412    }
413
414    BN_CTX_start(ctx);
415    d = BN_CTX_get(ctx);
416    r = BN_CTX_get(ctx);
417    val[0] = BN_CTX_get(ctx);
418    if (!d || !r || !val[0])
419        goto err;
420
421    /*
422     * If this is not done, things will break in the montgomery part
423     */
424
425    if (in_mont != NULL)
426        mont = in_mont;
427    else {
428        if ((mont = BN_MONT_CTX_new()) == NULL)
429            goto err;
430        if (!BN_MONT_CTX_set(mont, m, ctx))
431            goto err;
432    }
433
434    if (a->neg || BN_ucmp(a, m) >= 0) {
435        if (!BN_nnmod(val[0], a, m, ctx))
436            goto err;
437        aa = val[0];
438    } else
439        aa = a;
440    if (BN_is_zero(aa)) {
441        BN_zero(rr);
442        ret = 1;
443        goto err;
444    }
445    if (!BN_to_montgomery(val[0], aa, mont, ctx))
446        goto err;               /* 1 */
447
448    window = BN_window_bits_for_exponent_size(bits);
449    if (window > 1) {
450        if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
451            goto err;           /* 2 */
452        j = 1 << (window - 1);
453        for (i = 1; i < j; i++) {
454            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
455                !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
456                goto err;
457        }
458    }
459
460    start = 1;                  /* This is used to avoid multiplication etc
461                                 * when there is only the value '1' in the
462                                 * buffer. */
463    wvalue = 0;                 /* The 'value' of the window */
464    wstart = bits - 1;          /* The top bit of the window */
465    wend = 0;                   /* The bottom bit of the window */
466
467    if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
468        goto err;
469    for (;;) {
470        if (BN_is_bit_set(p, wstart) == 0) {
471            if (!start) {
472                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
473                    goto err;
474            }
475            if (wstart == 0)
476                break;
477            wstart--;
478            continue;
479        }
480        /*
481         * We now have wstart on a 'set' bit, we now need to work out how bit
482         * a window to do.  To do this we need to scan forward until the last
483         * set bit before the end of the window
484         */
485        j = wstart;
486        wvalue = 1;
487        wend = 0;
488        for (i = 1; i < window; i++) {
489            if (wstart - i < 0)
490                break;
491            if (BN_is_bit_set(p, wstart - i)) {
492                wvalue <<= (i - wend);
493                wvalue |= 1;
494                wend = i;
495            }
496        }
497
498        /* wend is the size of the current window */
499        j = wend + 1;
500        /* add the 'bytes above' */
501        if (!start)
502            for (i = 0; i < j; i++) {
503                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
504                    goto err;
505            }
506
507        /* wvalue will be an odd number < 2^window */
508        if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
509            goto err;
510
511        /* move the 'window' down further */
512        wstart -= wend + 1;
513        wvalue = 0;
514        start = 0;
515        if (wstart < 0)
516            break;
517    }
518    if (!BN_from_montgomery(rr, r, mont, ctx))
519        goto err;
520    ret = 1;
521 err:
522    if ((in_mont == NULL) && (mont != NULL))
523        BN_MONT_CTX_free(mont);
524    BN_CTX_end(ctx);
525    bn_check_top(rr);
526    return (ret);
527}
528
529/*
530 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
531 * layout so that accessing any of these table values shows the same access
532 * pattern as far as cache lines are concerned.  The following functions are
533 * used to transfer a BIGNUM from/to that table.
534 */
535
536static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
537                                        unsigned char *buf, int idx,
538                                        int width)
539{
540    size_t i, j;
541
542    if (top > b->top)
543        top = b->top;           /* this works because 'buf' is explicitly
544                                 * zeroed */
545    for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
546        buf[j] = ((unsigned char *)b->d)[i];
547    }
548
549    return 1;
550}
551
552static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
553                                          unsigned char *buf, int idx,
554                                          int width)
555{
556    size_t i, j;
557
558    if (bn_wexpand(b, top) == NULL)
559        return 0;
560
561    for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
562        ((unsigned char *)b->d)[i] = buf[j];
563    }
564
565    b->top = top;
566    bn_correct_top(b);
567    return 1;
568}
569
570/*
571 * Given a pointer value, compute the next address that is a cache line
572 * multiple.
573 */
574#define MOD_EXP_CTIME_ALIGN(x_) \
575        ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
576
577/*
578 * This variant of BN_mod_exp_mont() uses fixed windows and the special
579 * precomputation memory layout to limit data-dependency to a minimum to
580 * protect secret exponents (cf. the hyper-threading timing attacks pointed
581 * out by Colin Percival,
582 * http://www.daemong-consideredperthreading-considered-harmful/)
583 */
584int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
585                              const BIGNUM *m, BN_CTX *ctx,
586                              BN_MONT_CTX *in_mont)
587{
588    int i, bits, ret = 0, window, wvalue;
589    int top;
590    BN_MONT_CTX *mont = NULL;
591
592    int numPowers;
593    unsigned char *powerbufFree = NULL;
594    int powerbufLen = 0;
595    unsigned char *powerbuf = NULL;
596    BIGNUM tmp, am;
597
598    bn_check_top(a);
599    bn_check_top(p);
600    bn_check_top(m);
601
602    if (!BN_is_odd(m)) {
603        BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
604        return (0);
605    }
606
607    top = m->top;
608
609    bits = BN_num_bits(p);
610    if (bits == 0) {
611        ret = BN_one(rr);
612        return ret;
613    }
614
615    BN_CTX_start(ctx);
616
617    /*
618     * Allocate a montgomery context if it was not supplied by the caller. If
619     * this is not done, things will break in the montgomery part.
620     */
621    if (in_mont != NULL)
622        mont = in_mont;
623    else {
624        if ((mont = BN_MONT_CTX_new()) == NULL)
625            goto err;
626        if (!BN_MONT_CTX_set(mont, m, ctx))
627            goto err;
628    }
629
630    /* Get the window size to use with size of p. */
631    window = BN_window_bits_for_ctime_exponent_size(bits);
632#if defined(OPENSSL_BN_ASM_MONT5)
633    if (window == 6 && bits <= 1024)
634        window = 5;             /* ~5% improvement of 2048-bit RSA sign */
635#endif
636
637    /*
638     * Allocate a buffer large enough to hold all of the pre-computed powers
639     * of am, am itself and tmp.
640     */
641    numPowers = 1 << window;
642    powerbufLen = sizeof(m->d[0]) * (top * numPowers +
643                                     ((2 * top) >
644                                      numPowers ? (2 * top) : numPowers));
645#ifdef alloca
646    if (powerbufLen < 3072)
647        powerbufFree =
648            alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
649    else
650#endif
651        if ((powerbufFree =
652             (unsigned char *)OPENSSL_malloc(powerbufLen +
653                                             MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
654            == NULL)
655        goto err;
656
657    powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
658    memset(powerbuf, 0, powerbufLen);
659
660#ifdef alloca
661    if (powerbufLen < 3072)
662        powerbufFree = NULL;
663#endif
664
665    /* lay down tmp and am right after powers table */
666    tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
667    am.d = tmp.d + top;
668    tmp.top = am.top = 0;
669    tmp.dmax = am.dmax = top;
670    tmp.neg = am.neg = 0;
671    tmp.flags = am.flags = BN_FLG_STATIC_DATA;
672
673    /* prepare a^0 in Montgomery domain */
674#if 1
675    if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
676        goto err;
677#else
678    tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
679    for (i = 1; i < top; i++)
680        tmp.d[i] = (~m->d[i]) & BN_MASK2;
681    tmp.top = top;
682#endif
683
684    /* prepare a^1 in Montgomery domain */
685    if (a->neg || BN_ucmp(a, m) >= 0) {
686        if (!BN_mod(&am, a, m, ctx))
687            goto err;
688        if (!BN_to_montgomery(&am, &am, mont, ctx))
689            goto err;
690    } else if (!BN_to_montgomery(&am, a, mont, ctx))
691        goto err;
692
693#if defined(OPENSSL_BN_ASM_MONT5)
694    if (window == 5 && top > 1) {
695        /*
696         * This optimization uses ideas from http://eprint.iacr.org/2011/239,
697         * specifically optimization of cache-timing attack countermeasures
698         * and pre-computation optimization.
699         */
700
701        /*
702         * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
703         * 512-bit RSA is hardly relevant, we omit it to spare size...
704         */
705        void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
706                                 const void *table, const BN_ULONG *np,
707                                 const BN_ULONG *n0, int num, int power);
708        void bn_scatter5(const BN_ULONG *inp, size_t num,
709                         void *table, size_t power);
710        void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
711
712        BN_ULONG *np = mont->N.d, *n0 = mont->n0;
713
714        /*
715         * BN_to_montgomery can contaminate words above .top [in
716         * BN_DEBUG[_DEBUG] build]...
717         */
718        for (i = am.top; i < top; i++)
719            am.d[i] = 0;
720        for (i = tmp.top; i < top; i++)
721            tmp.d[i] = 0;
722
723        bn_scatter5(tmp.d, top, powerbuf, 0);
724        bn_scatter5(am.d, am.top, powerbuf, 1);
725        bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
726        bn_scatter5(tmp.d, top, powerbuf, 2);
727
728# if 0
729        for (i = 3; i < 32; i++) {
730            /* Calculate a^i = a^(i-1) * a */
731            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
732            bn_scatter5(tmp.d, top, powerbuf, i);
733        }
734# else
735        /* same as above, but uses squaring for 1/2 of operations */
736        for (i = 4; i < 32; i *= 2) {
737            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
738            bn_scatter5(tmp.d, top, powerbuf, i);
739        }
740        for (i = 3; i < 8; i += 2) {
741            int j;
742            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
743            bn_scatter5(tmp.d, top, powerbuf, i);
744            for (j = 2 * i; j < 32; j *= 2) {
745                bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
746                bn_scatter5(tmp.d, top, powerbuf, j);
747            }
748        }
749        for (; i < 16; i += 2) {
750            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
751            bn_scatter5(tmp.d, top, powerbuf, i);
752            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
753            bn_scatter5(tmp.d, top, powerbuf, 2 * i);
754        }
755        for (; i < 32; i += 2) {
756            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
757            bn_scatter5(tmp.d, top, powerbuf, i);
758        }
759# endif
760        bits--;
761        for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
762            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
763        bn_gather5(tmp.d, top, powerbuf, wvalue);
764
765        /*
766         * Scan the exponent one window at a time starting from the most
767         * significant bits.
768         */
769        while (bits >= 0) {
770            for (wvalue = 0, i = 0; i < 5; i++, bits--)
771                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
772
773            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
774            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
775            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
776            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
777            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
778            bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
779        }
780
781        tmp.top = top;
782        bn_correct_top(&tmp);
783    } else
784#endif
785    {
786        if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers))
787            goto err;
788        if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers))
789            goto err;
790
791        /*
792         * If the window size is greater than 1, then calculate
793         * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
794         * powers could instead be computed as (a^(i/2))^2 to use the slight
795         * performance advantage of sqr over mul).
796         */
797        if (window > 1) {
798            if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
799                goto err;
800            if (!MOD_EXP_CTIME_COPY_TO_PREBUF
801                (&tmp, top, powerbuf, 2, numPowers))
802                goto err;
803            for (i = 3; i < numPowers; i++) {
804                /* Calculate a^i = a^(i-1) * a */
805                if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
806                    goto err;
807                if (!MOD_EXP_CTIME_COPY_TO_PREBUF
808                    (&tmp, top, powerbuf, i, numPowers))
809                    goto err;
810            }
811        }
812
813        bits--;
814        for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
815            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
816        if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
817            (&tmp, top, powerbuf, wvalue, numPowers))
818            goto err;
819
820        /*
821         * Scan the exponent one window at a time starting from the most
822         * significant bits.
823         */
824        while (bits >= 0) {
825            wvalue = 0;         /* The 'value' of the window */
826
827            /* Scan the window, squaring the result as we go */
828            for (i = 0; i < window; i++, bits--) {
829                if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
830                    goto err;
831                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
832            }
833
834            /*
835             * Fetch the appropriate pre-computed value from the pre-buf
836             */
837            if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
838                (&am, top, powerbuf, wvalue, numPowers))
839                goto err;
840
841            /* Multiply the result into the intermediate result */
842            if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
843                goto err;
844        }
845    }
846
847    /* Convert the final result from montgomery to standard format */
848    if (!BN_from_montgomery(rr, &tmp, mont, ctx))
849        goto err;
850    ret = 1;
851 err:
852    if ((in_mont == NULL) && (mont != NULL))
853        BN_MONT_CTX_free(mont);
854    if (powerbuf != NULL) {
855        OPENSSL_cleanse(powerbuf, powerbufLen);
856        if (powerbufFree)
857            OPENSSL_free(powerbufFree);
858    }
859    BN_CTX_end(ctx);
860    return (ret);
861}
862
863int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
864                         const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
865{
866    BN_MONT_CTX *mont = NULL;
867    int b, bits, ret = 0;
868    int r_is_one;
869    BN_ULONG w, next_w;
870    BIGNUM *d, *r, *t;
871    BIGNUM *swap_tmp;
872#define BN_MOD_MUL_WORD(r, w, m) \
873                (BN_mul_word(r, (w)) && \
874                (/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
875                        (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
876    /*
877     * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
878     * probably more overhead than always using BN_mod (which uses BN_copy if
879     * a similar test returns true).
880     */
881    /*
882     * We can use BN_mod and do not need BN_nnmod because our accumulator is
883     * never negative (the result of BN_mod does not depend on the sign of
884     * the modulus).
885     */
886#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
887                (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
888
889    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
890        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
891        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
892        return -1;
893    }
894
895    bn_check_top(p);
896    bn_check_top(m);
897
898    if (!BN_is_odd(m)) {
899        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
900        return (0);
901    }
902    if (m->top == 1)
903        a %= m->d[0];           /* make sure that 'a' is reduced */
904
905    bits = BN_num_bits(p);
906    if (bits == 0) {
907        /* x**0 mod 1 is still zero. */
908        if (BN_is_one(m)) {
909            ret = 1;
910            BN_zero(rr);
911        } else
912            ret = BN_one(rr);
913        return ret;
914    }
915    if (a == 0) {
916        BN_zero(rr);
917        ret = 1;
918        return ret;
919    }
920
921    BN_CTX_start(ctx);
922    d = BN_CTX_get(ctx);
923    r = BN_CTX_get(ctx);
924    t = BN_CTX_get(ctx);
925    if (d == NULL || r == NULL || t == NULL)
926        goto err;
927
928    if (in_mont != NULL)
929        mont = in_mont;
930    else {
931        if ((mont = BN_MONT_CTX_new()) == NULL)
932            goto err;
933        if (!BN_MONT_CTX_set(mont, m, ctx))
934            goto err;
935    }
936
937    r_is_one = 1;               /* except for Montgomery factor */
938
939    /* bits-1 >= 0 */
940
941    /* The result is accumulated in the product r*w. */
942    w = a;                      /* bit 'bits-1' of 'p' is always set */
943    for (b = bits - 2; b >= 0; b--) {
944        /* First, square r*w. */
945        next_w = w * w;
946        if ((next_w / w) != w) { /* overflow */
947            if (r_is_one) {
948                if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
949                    goto err;
950                r_is_one = 0;
951            } else {
952                if (!BN_MOD_MUL_WORD(r, w, m))
953                    goto err;
954            }
955            next_w = 1;
956        }
957        w = next_w;
958        if (!r_is_one) {
959            if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
960                goto err;
961        }
962
963        /* Second, multiply r*w by 'a' if exponent bit is set. */
964        if (BN_is_bit_set(p, b)) {
965            next_w = w * a;
966            if ((next_w / a) != w) { /* overflow */
967                if (r_is_one) {
968                    if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
969                        goto err;
970                    r_is_one = 0;
971                } else {
972                    if (!BN_MOD_MUL_WORD(r, w, m))
973                        goto err;
974                }
975                next_w = a;
976            }
977            w = next_w;
978        }
979    }
980
981    /* Finally, set r:=r*w. */
982    if (w != 1) {
983        if (r_is_one) {
984            if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
985                goto err;
986            r_is_one = 0;
987        } else {
988            if (!BN_MOD_MUL_WORD(r, w, m))
989                goto err;
990        }
991    }
992
993    if (r_is_one) {             /* can happen only if a == 1 */
994        if (!BN_one(rr))
995            goto err;
996    } else {
997        if (!BN_from_montgomery(rr, r, mont, ctx))
998            goto err;
999    }
1000    ret = 1;
1001 err:
1002    if ((in_mont == NULL) && (mont != NULL))
1003        BN_MONT_CTX_free(mont);
1004    BN_CTX_end(ctx);
1005    bn_check_top(rr);
1006    return (ret);
1007}
1008
1009/* The old fallback, simple version :-) */
1010int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
1011                      const BIGNUM *m, BN_CTX *ctx)
1012{
1013    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
1014    int start = 1;
1015    BIGNUM *d;
1016    /* Table of variables obtained from 'ctx' */
1017    BIGNUM *val[TABLE_SIZE];
1018
1019    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1020        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1021        BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1022        return -1;
1023    }
1024
1025    bits = BN_num_bits(p);
1026
1027    if (bits == 0) {
1028        ret = BN_one(r);
1029        return ret;
1030    }
1031
1032    BN_CTX_start(ctx);
1033    d = BN_CTX_get(ctx);
1034    val[0] = BN_CTX_get(ctx);
1035    if (!d || !val[0])
1036        goto err;
1037
1038    if (!BN_nnmod(val[0], a, m, ctx))
1039        goto err;               /* 1 */
1040    if (BN_is_zero(val[0])) {
1041        BN_zero(r);
1042        ret = 1;
1043        goto err;
1044    }
1045
1046    window = BN_window_bits_for_exponent_size(bits);
1047    if (window > 1) {
1048        if (!BN_mod_mul(d, val[0], val[0], m, ctx))
1049            goto err;           /* 2 */
1050        j = 1 << (window - 1);
1051        for (i = 1; i < j; i++) {
1052            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1053                !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
1054                goto err;
1055        }
1056    }
1057
1058    start = 1;                  /* This is used to avoid multiplication etc
1059                                 * when there is only the value '1' in the
1060                                 * buffer. */
1061    wvalue = 0;                 /* The 'value' of the window */
1062    wstart = bits - 1;          /* The top bit of the window */
1063    wend = 0;                   /* The bottom bit of the window */
1064
1065    if (!BN_one(r))
1066        goto err;
1067
1068    for (;;) {
1069        if (BN_is_bit_set(p, wstart) == 0) {
1070            if (!start)
1071                if (!BN_mod_mul(r, r, r, m, ctx))
1072                    goto err;
1073            if (wstart == 0)
1074                break;
1075            wstart--;
1076            continue;
1077        }
1078        /*
1079         * We now have wstart on a 'set' bit, we now need to work out how bit
1080         * a window to do.  To do this we need to scan forward until the last
1081         * set bit before the end of the window
1082         */
1083        j = wstart;
1084        wvalue = 1;
1085        wend = 0;
1086        for (i = 1; i < window; i++) {
1087            if (wstart - i < 0)
1088                break;
1089            if (BN_is_bit_set(p, wstart - i)) {
1090                wvalue <<= (i - wend);
1091                wvalue |= 1;
1092                wend = i;
1093            }
1094        }
1095
1096        /* wend is the size of the current window */
1097        j = wend + 1;
1098        /* add the 'bytes above' */
1099        if (!start)
1100            for (i = 0; i < j; i++) {
1101                if (!BN_mod_mul(r, r, r, m, ctx))
1102                    goto err;
1103            }
1104
1105        /* wvalue will be an odd number < 2^window */
1106        if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1107            goto err;
1108
1109        /* move the 'window' down further */
1110        wstart -= wend + 1;
1111        wvalue = 0;
1112        start = 0;
1113        if (wstart < 0)
1114            break;
1115    }
1116    ret = 1;
1117 err:
1118    BN_CTX_end(ctx);
1119    bn_check_top(r);
1120    return (ret);
1121}
1122