bn_exp.c revision 280304
1/* crypto/bn/bn_exp.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58/* ====================================================================
59 * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 *    notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 *    notice, this list of conditions and the following disclaimer in
70 *    the documentation and/or other materials provided with the
71 *    distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 *    software must display the following acknowledgment:
75 *    "This product includes software developed by the OpenSSL Project
76 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 *    endorse or promote products derived from this software without
80 *    prior written permission. For written permission, please contact
81 *    openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 *    nor may "OpenSSL" appear in their names without prior written
85 *    permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 *    acknowledgment:
89 *    "This product includes software developed by the OpenSSL Project
90 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com).  This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112#include "cryptlib.h"
113#include "bn_lcl.h"
114
115#include <stdlib.h>
116#ifdef _WIN32
117# include <malloc.h>
118# ifndef alloca
119#  define alloca _alloca
120# endif
121#elif defined(__GNUC__)
122# ifndef alloca
123#  define alloca(s) __builtin_alloca((s))
124# endif
125#endif
126
127/* maximum precomputation table size for *variable* sliding windows */
128#define TABLE_SIZE      32
129
130/* this one works - simple but works */
131int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
132{
133    int i, bits, ret = 0;
134    BIGNUM *v, *rr;
135
136    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
137        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
138        BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
139        return -1;
140    }
141
142    BN_CTX_start(ctx);
143    if ((r == a) || (r == p))
144        rr = BN_CTX_get(ctx);
145    else
146        rr = r;
147    v = BN_CTX_get(ctx);
148    if (rr == NULL || v == NULL)
149        goto err;
150
151    if (BN_copy(v, a) == NULL)
152        goto err;
153    bits = BN_num_bits(p);
154
155    if (BN_is_odd(p)) {
156        if (BN_copy(rr, a) == NULL)
157            goto err;
158    } else {
159        if (!BN_one(rr))
160            goto err;
161    }
162
163    for (i = 1; i < bits; i++) {
164        if (!BN_sqr(v, v, ctx))
165            goto err;
166        if (BN_is_bit_set(p, i)) {
167            if (!BN_mul(rr, rr, v, ctx))
168                goto err;
169        }
170    }
171    if (r != rr)
172        BN_copy(r, rr);
173    ret = 1;
174 err:
175    BN_CTX_end(ctx);
176    bn_check_top(r);
177    return (ret);
178}
179
180int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
181               BN_CTX *ctx)
182{
183    int ret;
184
185    bn_check_top(a);
186    bn_check_top(p);
187    bn_check_top(m);
188
189    /*-
190     * For even modulus  m = 2^k*m_odd,  it might make sense to compute
191     * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
192     * exponentiation for the odd part), using appropriate exponent
193     * reductions, and combine the results using the CRT.
194     *
195     * For now, we use Montgomery only if the modulus is odd; otherwise,
196     * exponentiation using the reciprocal-based quick remaindering
197     * algorithm is used.
198     *
199     * (Timing obtained with expspeed.c [computations  a^p mod m
200     * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
201     * 4096, 8192 bits], compared to the running time of the
202     * standard algorithm:
203     *
204     *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
205     *                     55 .. 77 %  [UltraSparc processor, but
206     *                                  debug-solaris-sparcv8-gcc conf.]
207     *
208     *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
209     *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
210     *
211     * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
212     * at 2048 and more bits, but at 512 and 1024 bits, it was
213     * slower even than the standard algorithm!
214     *
215     * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
216     * should be obtained when the new Montgomery reduction code
217     * has been integrated into OpenSSL.)
218     */
219
220#define MONT_MUL_MOD
221#define MONT_EXP_WORD
222#define RECP_MUL_MOD
223
224#ifdef MONT_MUL_MOD
225    /*
226     * I have finally been able to take out this pre-condition of the top bit
227     * being set.  It was caused by an error in BN_div with negatives.  There
228     * was also another problem when for a^b%m a >= m.  eay 07-May-97
229     */
230    /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
231
232    if (BN_is_odd(m)) {
233# ifdef MONT_EXP_WORD
234        if (a->top == 1 && !a->neg
235            && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
236            BN_ULONG A = a->d[0];
237            ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
238        } else
239# endif
240            ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
241    } else
242#endif
243#ifdef RECP_MUL_MOD
244    {
245        ret = BN_mod_exp_recp(r, a, p, m, ctx);
246    }
247#else
248    {
249        ret = BN_mod_exp_simple(r, a, p, m, ctx);
250    }
251#endif
252
253    bn_check_top(r);
254    return (ret);
255}
256
257int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
258                    const BIGNUM *m, BN_CTX *ctx)
259{
260    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
261    int start = 1;
262    BIGNUM *aa;
263    /* Table of variables obtained from 'ctx' */
264    BIGNUM *val[TABLE_SIZE];
265    BN_RECP_CTX recp;
266
267    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
268        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
269        BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
270        return -1;
271    }
272
273    bits = BN_num_bits(p);
274
275    if (bits == 0) {
276        ret = BN_one(r);
277        return ret;
278    }
279
280    BN_CTX_start(ctx);
281    aa = BN_CTX_get(ctx);
282    val[0] = BN_CTX_get(ctx);
283    if (!aa || !val[0])
284        goto err;
285
286    BN_RECP_CTX_init(&recp);
287    if (m->neg) {
288        /* ignore sign of 'm' */
289        if (!BN_copy(aa, m))
290            goto err;
291        aa->neg = 0;
292        if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
293            goto err;
294    } else {
295        if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
296            goto err;
297    }
298
299    if (!BN_nnmod(val[0], a, m, ctx))
300        goto err;               /* 1 */
301    if (BN_is_zero(val[0])) {
302        BN_zero(r);
303        ret = 1;
304        goto err;
305    }
306
307    window = BN_window_bits_for_exponent_size(bits);
308    if (window > 1) {
309        if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
310            goto err;           /* 2 */
311        j = 1 << (window - 1);
312        for (i = 1; i < j; i++) {
313            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
314                !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
315                goto err;
316        }
317    }
318
319    start = 1;                  /* This is used to avoid multiplication etc
320                                 * when there is only the value '1' in the
321                                 * buffer. */
322    wvalue = 0;                 /* The 'value' of the window */
323    wstart = bits - 1;          /* The top bit of the window */
324    wend = 0;                   /* The bottom bit of the window */
325
326    if (!BN_one(r))
327        goto err;
328
329    for (;;) {
330        if (BN_is_bit_set(p, wstart) == 0) {
331            if (!start)
332                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
333                    goto err;
334            if (wstart == 0)
335                break;
336            wstart--;
337            continue;
338        }
339        /*
340         * We now have wstart on a 'set' bit, we now need to work out how bit
341         * a window to do.  To do this we need to scan forward until the last
342         * set bit before the end of the window
343         */
344        j = wstart;
345        wvalue = 1;
346        wend = 0;
347        for (i = 1; i < window; i++) {
348            if (wstart - i < 0)
349                break;
350            if (BN_is_bit_set(p, wstart - i)) {
351                wvalue <<= (i - wend);
352                wvalue |= 1;
353                wend = i;
354            }
355        }
356
357        /* wend is the size of the current window */
358        j = wend + 1;
359        /* add the 'bytes above' */
360        if (!start)
361            for (i = 0; i < j; i++) {
362                if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
363                    goto err;
364            }
365
366        /* wvalue will be an odd number < 2^window */
367        if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
368            goto err;
369
370        /* move the 'window' down further */
371        wstart -= wend + 1;
372        wvalue = 0;
373        start = 0;
374        if (wstart < 0)
375            break;
376    }
377    ret = 1;
378 err:
379    BN_CTX_end(ctx);
380    BN_RECP_CTX_free(&recp);
381    bn_check_top(r);
382    return (ret);
383}
384
385int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
386                    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
387{
388    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
389    int start = 1;
390    BIGNUM *d, *r;
391    const BIGNUM *aa;
392    /* Table of variables obtained from 'ctx' */
393    BIGNUM *val[TABLE_SIZE];
394    BN_MONT_CTX *mont = NULL;
395
396    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
397        return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
398    }
399
400    bn_check_top(a);
401    bn_check_top(p);
402    bn_check_top(m);
403
404    if (!BN_is_odd(m)) {
405        BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
406        return (0);
407    }
408    bits = BN_num_bits(p);
409    if (bits == 0) {
410        ret = BN_one(rr);
411        return ret;
412    }
413
414    BN_CTX_start(ctx);
415    d = BN_CTX_get(ctx);
416    r = BN_CTX_get(ctx);
417    val[0] = BN_CTX_get(ctx);
418    if (!d || !r || !val[0])
419        goto err;
420
421    /*
422     * If this is not done, things will break in the montgomery part
423     */
424
425    if (in_mont != NULL)
426        mont = in_mont;
427    else {
428        if ((mont = BN_MONT_CTX_new()) == NULL)
429            goto err;
430        if (!BN_MONT_CTX_set(mont, m, ctx))
431            goto err;
432    }
433
434    if (a->neg || BN_ucmp(a, m) >= 0) {
435        if (!BN_nnmod(val[0], a, m, ctx))
436            goto err;
437        aa = val[0];
438    } else
439        aa = a;
440    if (BN_is_zero(aa)) {
441        BN_zero(rr);
442        ret = 1;
443        goto err;
444    }
445    if (!BN_to_montgomery(val[0], aa, mont, ctx))
446        goto err;               /* 1 */
447
448    window = BN_window_bits_for_exponent_size(bits);
449    if (window > 1) {
450        if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
451            goto err;           /* 2 */
452        j = 1 << (window - 1);
453        for (i = 1; i < j; i++) {
454            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
455                !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
456                goto err;
457        }
458    }
459
460    start = 1;                  /* This is used to avoid multiplication etc
461                                 * when there is only the value '1' in the
462                                 * buffer. */
463    wvalue = 0;                 /* The 'value' of the window */
464    wstart = bits - 1;          /* The top bit of the window */
465    wend = 0;                   /* The bottom bit of the window */
466
467    if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
468        goto err;
469    for (;;) {
470        if (BN_is_bit_set(p, wstart) == 0) {
471            if (!start) {
472                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
473                    goto err;
474            }
475            if (wstart == 0)
476                break;
477            wstart--;
478            continue;
479        }
480        /*
481         * We now have wstart on a 'set' bit, we now need to work out how bit
482         * a window to do.  To do this we need to scan forward until the last
483         * set bit before the end of the window
484         */
485        j = wstart;
486        wvalue = 1;
487        wend = 0;
488        for (i = 1; i < window; i++) {
489            if (wstart - i < 0)
490                break;
491            if (BN_is_bit_set(p, wstart - i)) {
492                wvalue <<= (i - wend);
493                wvalue |= 1;
494                wend = i;
495            }
496        }
497
498        /* wend is the size of the current window */
499        j = wend + 1;
500        /* add the 'bytes above' */
501        if (!start)
502            for (i = 0; i < j; i++) {
503                if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
504                    goto err;
505            }
506
507        /* wvalue will be an odd number < 2^window */
508        if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
509            goto err;
510
511        /* move the 'window' down further */
512        wstart -= wend + 1;
513        wvalue = 0;
514        start = 0;
515        if (wstart < 0)
516            break;
517    }
518    if (!BN_from_montgomery(rr, r, mont, ctx))
519        goto err;
520    ret = 1;
521 err:
522    if ((in_mont == NULL) && (mont != NULL))
523        BN_MONT_CTX_free(mont);
524    BN_CTX_end(ctx);
525    bn_check_top(rr);
526    return (ret);
527}
528
529/*
530 * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
531 * layout so that accessing any of these table values shows the same access
532 * pattern as far as cache lines are concerned.  The following functions are
533 * used to transfer a BIGNUM from/to that table.
534 */
535
536static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
537                                        unsigned char *buf, int idx,
538                                        int width)
539{
540    size_t i, j;
541
542    if (top > b->top)
543        top = b->top;           /* this works because 'buf' is explicitly
544                                 * zeroed */
545    for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
546        buf[j] = ((unsigned char *)b->d)[i];
547    }
548
549    return 1;
550}
551
552static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
553                                          unsigned char *buf, int idx,
554                                          int width)
555{
556    size_t i, j;
557
558    if (bn_wexpand(b, top) == NULL)
559        return 0;
560
561    for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
562        ((unsigned char *)b->d)[i] = buf[j];
563    }
564
565    b->top = top;
566    bn_correct_top(b);
567    return 1;
568}
569
570/*
571 * Given a pointer value, compute the next address that is a cache line
572 * multiple.
573 */
574#define MOD_EXP_CTIME_ALIGN(x_) \
575        ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
576
577/*
578 * This variant of BN_mod_exp_mont() uses fixed windows and the special
579 * precomputation memory layout to limit data-dependency to a minimum to
580 * protect secret exponents (cf. the hyper-threading timing attacks pointed
581 * out by Colin Percival,
582 * http://www.daemong-consideredperthreading-considered-harmful/)
583 */
584int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
585                              const BIGNUM *m, BN_CTX *ctx,
586                              BN_MONT_CTX *in_mont)
587{
588    int i, bits, ret = 0, window, wvalue;
589    int top;
590    BN_MONT_CTX *mont = NULL;
591
592    int numPowers;
593    unsigned char *powerbufFree = NULL;
594    int powerbufLen = 0;
595    unsigned char *powerbuf = NULL;
596    BIGNUM tmp, am;
597
598    bn_check_top(a);
599    bn_check_top(p);
600    bn_check_top(m);
601
602    top = m->top;
603
604    if (!(m->d[0] & 1)) {
605        BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
606        return (0);
607    }
608    bits = BN_num_bits(p);
609    if (bits == 0) {
610        ret = BN_one(rr);
611        return ret;
612    }
613
614    BN_CTX_start(ctx);
615
616    /*
617     * Allocate a montgomery context if it was not supplied by the caller. If
618     * this is not done, things will break in the montgomery part.
619     */
620    if (in_mont != NULL)
621        mont = in_mont;
622    else {
623        if ((mont = BN_MONT_CTX_new()) == NULL)
624            goto err;
625        if (!BN_MONT_CTX_set(mont, m, ctx))
626            goto err;
627    }
628
629    /* Get the window size to use with size of p. */
630    window = BN_window_bits_for_ctime_exponent_size(bits);
631#if defined(OPENSSL_BN_ASM_MONT5)
632    if (window == 6 && bits <= 1024)
633        window = 5;             /* ~5% improvement of 2048-bit RSA sign */
634#endif
635
636    /*
637     * Allocate a buffer large enough to hold all of the pre-computed powers
638     * of am, am itself and tmp.
639     */
640    numPowers = 1 << window;
641    powerbufLen = sizeof(m->d[0]) * (top * numPowers +
642                                     ((2 * top) >
643                                      numPowers ? (2 * top) : numPowers));
644#ifdef alloca
645    if (powerbufLen < 3072)
646        powerbufFree =
647            alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
648    else
649#endif
650        if ((powerbufFree =
651             (unsigned char *)OPENSSL_malloc(powerbufLen +
652                                             MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
653            == NULL)
654        goto err;
655
656    powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
657    memset(powerbuf, 0, powerbufLen);
658
659#ifdef alloca
660    if (powerbufLen < 3072)
661        powerbufFree = NULL;
662#endif
663
664    /* lay down tmp and am right after powers table */
665    tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
666    am.d = tmp.d + top;
667    tmp.top = am.top = 0;
668    tmp.dmax = am.dmax = top;
669    tmp.neg = am.neg = 0;
670    tmp.flags = am.flags = BN_FLG_STATIC_DATA;
671
672    /* prepare a^0 in Montgomery domain */
673#if 1
674    if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
675        goto err;
676#else
677    tmp.d[0] = (0 - m->d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
678    for (i = 1; i < top; i++)
679        tmp.d[i] = (~m->d[i]) & BN_MASK2;
680    tmp.top = top;
681#endif
682
683    /* prepare a^1 in Montgomery domain */
684    if (a->neg || BN_ucmp(a, m) >= 0) {
685        if (!BN_mod(&am, a, m, ctx))
686            goto err;
687        if (!BN_to_montgomery(&am, &am, mont, ctx))
688            goto err;
689    } else if (!BN_to_montgomery(&am, a, mont, ctx))
690        goto err;
691
692#if defined(OPENSSL_BN_ASM_MONT5)
693    if (window == 5 && top > 1) {
694        /*
695         * This optimization uses ideas from http://eprint.iacr.org/2011/239,
696         * specifically optimization of cache-timing attack countermeasures
697         * and pre-computation optimization.
698         */
699
700        /*
701         * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
702         * 512-bit RSA is hardly relevant, we omit it to spare size...
703         */
704        void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
705                                 const void *table, const BN_ULONG *np,
706                                 const BN_ULONG *n0, int num, int power);
707        void bn_scatter5(const BN_ULONG *inp, size_t num,
708                         void *table, size_t power);
709        void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
710
711        BN_ULONG *np = mont->N.d, *n0 = mont->n0;
712
713        /*
714         * BN_to_montgomery can contaminate words above .top [in
715         * BN_DEBUG[_DEBUG] build]...
716         */
717        for (i = am.top; i < top; i++)
718            am.d[i] = 0;
719        for (i = tmp.top; i < top; i++)
720            tmp.d[i] = 0;
721
722        bn_scatter5(tmp.d, top, powerbuf, 0);
723        bn_scatter5(am.d, am.top, powerbuf, 1);
724        bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
725        bn_scatter5(tmp.d, top, powerbuf, 2);
726
727# if 0
728        for (i = 3; i < 32; i++) {
729            /* Calculate a^i = a^(i-1) * a */
730            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
731            bn_scatter5(tmp.d, top, powerbuf, i);
732        }
733# else
734        /* same as above, but uses squaring for 1/2 of operations */
735        for (i = 4; i < 32; i *= 2) {
736            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
737            bn_scatter5(tmp.d, top, powerbuf, i);
738        }
739        for (i = 3; i < 8; i += 2) {
740            int j;
741            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
742            bn_scatter5(tmp.d, top, powerbuf, i);
743            for (j = 2 * i; j < 32; j *= 2) {
744                bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
745                bn_scatter5(tmp.d, top, powerbuf, j);
746            }
747        }
748        for (; i < 16; i += 2) {
749            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
750            bn_scatter5(tmp.d, top, powerbuf, i);
751            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
752            bn_scatter5(tmp.d, top, powerbuf, 2 * i);
753        }
754        for (; i < 32; i += 2) {
755            bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
756            bn_scatter5(tmp.d, top, powerbuf, i);
757        }
758# endif
759        bits--;
760        for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
761            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
762        bn_gather5(tmp.d, top, powerbuf, wvalue);
763
764        /*
765         * Scan the exponent one window at a time starting from the most
766         * significant bits.
767         */
768        while (bits >= 0) {
769            for (wvalue = 0, i = 0; i < 5; i++, bits--)
770                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
771
772            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
773            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
774            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
775            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
776            bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
777            bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
778        }
779
780        tmp.top = top;
781        bn_correct_top(&tmp);
782    } else
783#endif
784    {
785        if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers))
786            goto err;
787        if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers))
788            goto err;
789
790        /*
791         * If the window size is greater than 1, then calculate
792         * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
793         * powers could instead be computed as (a^(i/2))^2 to use the slight
794         * performance advantage of sqr over mul).
795         */
796        if (window > 1) {
797            if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
798                goto err;
799            if (!MOD_EXP_CTIME_COPY_TO_PREBUF
800                (&tmp, top, powerbuf, 2, numPowers))
801                goto err;
802            for (i = 3; i < numPowers; i++) {
803                /* Calculate a^i = a^(i-1) * a */
804                if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
805                    goto err;
806                if (!MOD_EXP_CTIME_COPY_TO_PREBUF
807                    (&tmp, top, powerbuf, i, numPowers))
808                    goto err;
809            }
810        }
811
812        bits--;
813        for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
814            wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
815        if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
816            (&tmp, top, powerbuf, wvalue, numPowers))
817            goto err;
818
819        /*
820         * Scan the exponent one window at a time starting from the most
821         * significant bits.
822         */
823        while (bits >= 0) {
824            wvalue = 0;         /* The 'value' of the window */
825
826            /* Scan the window, squaring the result as we go */
827            for (i = 0; i < window; i++, bits--) {
828                if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
829                    goto err;
830                wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
831            }
832
833            /*
834             * Fetch the appropriate pre-computed value from the pre-buf
835             */
836            if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
837                (&am, top, powerbuf, wvalue, numPowers))
838                goto err;
839
840            /* Multiply the result into the intermediate result */
841            if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
842                goto err;
843        }
844    }
845
846    /* Convert the final result from montgomery to standard format */
847    if (!BN_from_montgomery(rr, &tmp, mont, ctx))
848        goto err;
849    ret = 1;
850 err:
851    if ((in_mont == NULL) && (mont != NULL))
852        BN_MONT_CTX_free(mont);
853    if (powerbuf != NULL) {
854        OPENSSL_cleanse(powerbuf, powerbufLen);
855        if (powerbufFree)
856            OPENSSL_free(powerbufFree);
857    }
858    BN_CTX_end(ctx);
859    return (ret);
860}
861
862int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
863                         const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
864{
865    BN_MONT_CTX *mont = NULL;
866    int b, bits, ret = 0;
867    int r_is_one;
868    BN_ULONG w, next_w;
869    BIGNUM *d, *r, *t;
870    BIGNUM *swap_tmp;
871#define BN_MOD_MUL_WORD(r, w, m) \
872                (BN_mul_word(r, (w)) && \
873                (/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
874                        (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
875    /*
876     * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
877     * probably more overhead than always using BN_mod (which uses BN_copy if
878     * a similar test returns true).
879     */
880    /*
881     * We can use BN_mod and do not need BN_nnmod because our accumulator is
882     * never negative (the result of BN_mod does not depend on the sign of
883     * the modulus).
884     */
885#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
886                (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
887
888    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
889        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
890        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
891        return -1;
892    }
893
894    bn_check_top(p);
895    bn_check_top(m);
896
897    if (!BN_is_odd(m)) {
898        BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
899        return (0);
900    }
901    if (m->top == 1)
902        a %= m->d[0];           /* make sure that 'a' is reduced */
903
904    bits = BN_num_bits(p);
905    if (bits == 0) {
906        /* x**0 mod 1 is still zero. */
907        if (BN_is_one(m)) {
908            ret = 1;
909            BN_zero(rr);
910        } else
911            ret = BN_one(rr);
912        return ret;
913    }
914    if (a == 0) {
915        BN_zero(rr);
916        ret = 1;
917        return ret;
918    }
919
920    BN_CTX_start(ctx);
921    d = BN_CTX_get(ctx);
922    r = BN_CTX_get(ctx);
923    t = BN_CTX_get(ctx);
924    if (d == NULL || r == NULL || t == NULL)
925        goto err;
926
927    if (in_mont != NULL)
928        mont = in_mont;
929    else {
930        if ((mont = BN_MONT_CTX_new()) == NULL)
931            goto err;
932        if (!BN_MONT_CTX_set(mont, m, ctx))
933            goto err;
934    }
935
936    r_is_one = 1;               /* except for Montgomery factor */
937
938    /* bits-1 >= 0 */
939
940    /* The result is accumulated in the product r*w. */
941    w = a;                      /* bit 'bits-1' of 'p' is always set */
942    for (b = bits - 2; b >= 0; b--) {
943        /* First, square r*w. */
944        next_w = w * w;
945        if ((next_w / w) != w) { /* overflow */
946            if (r_is_one) {
947                if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
948                    goto err;
949                r_is_one = 0;
950            } else {
951                if (!BN_MOD_MUL_WORD(r, w, m))
952                    goto err;
953            }
954            next_w = 1;
955        }
956        w = next_w;
957        if (!r_is_one) {
958            if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
959                goto err;
960        }
961
962        /* Second, multiply r*w by 'a' if exponent bit is set. */
963        if (BN_is_bit_set(p, b)) {
964            next_w = w * a;
965            if ((next_w / a) != w) { /* overflow */
966                if (r_is_one) {
967                    if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
968                        goto err;
969                    r_is_one = 0;
970                } else {
971                    if (!BN_MOD_MUL_WORD(r, w, m))
972                        goto err;
973                }
974                next_w = a;
975            }
976            w = next_w;
977        }
978    }
979
980    /* Finally, set r:=r*w. */
981    if (w != 1) {
982        if (r_is_one) {
983            if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
984                goto err;
985            r_is_one = 0;
986        } else {
987            if (!BN_MOD_MUL_WORD(r, w, m))
988                goto err;
989        }
990    }
991
992    if (r_is_one) {             /* can happen only if a == 1 */
993        if (!BN_one(rr))
994            goto err;
995    } else {
996        if (!BN_from_montgomery(rr, r, mont, ctx))
997            goto err;
998    }
999    ret = 1;
1000 err:
1001    if ((in_mont == NULL) && (mont != NULL))
1002        BN_MONT_CTX_free(mont);
1003    BN_CTX_end(ctx);
1004    bn_check_top(rr);
1005    return (ret);
1006}
1007
1008/* The old fallback, simple version :-) */
1009int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
1010                      const BIGNUM *m, BN_CTX *ctx)
1011{
1012    int i, j, bits, ret = 0, wstart, wend, window, wvalue;
1013    int start = 1;
1014    BIGNUM *d;
1015    /* Table of variables obtained from 'ctx' */
1016    BIGNUM *val[TABLE_SIZE];
1017
1018    if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1019        /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1020        BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1021        return -1;
1022    }
1023
1024    bits = BN_num_bits(p);
1025
1026    if (bits == 0) {
1027        ret = BN_one(r);
1028        return ret;
1029    }
1030
1031    BN_CTX_start(ctx);
1032    d = BN_CTX_get(ctx);
1033    val[0] = BN_CTX_get(ctx);
1034    if (!d || !val[0])
1035        goto err;
1036
1037    if (!BN_nnmod(val[0], a, m, ctx))
1038        goto err;               /* 1 */
1039    if (BN_is_zero(val[0])) {
1040        BN_zero(r);
1041        ret = 1;
1042        goto err;
1043    }
1044
1045    window = BN_window_bits_for_exponent_size(bits);
1046    if (window > 1) {
1047        if (!BN_mod_mul(d, val[0], val[0], m, ctx))
1048            goto err;           /* 2 */
1049        j = 1 << (window - 1);
1050        for (i = 1; i < j; i++) {
1051            if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1052                !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
1053                goto err;
1054        }
1055    }
1056
1057    start = 1;                  /* This is used to avoid multiplication etc
1058                                 * when there is only the value '1' in the
1059                                 * buffer. */
1060    wvalue = 0;                 /* The 'value' of the window */
1061    wstart = bits - 1;          /* The top bit of the window */
1062    wend = 0;                   /* The bottom bit of the window */
1063
1064    if (!BN_one(r))
1065        goto err;
1066
1067    for (;;) {
1068        if (BN_is_bit_set(p, wstart) == 0) {
1069            if (!start)
1070                if (!BN_mod_mul(r, r, r, m, ctx))
1071                    goto err;
1072            if (wstart == 0)
1073                break;
1074            wstart--;
1075            continue;
1076        }
1077        /*
1078         * We now have wstart on a 'set' bit, we now need to work out how bit
1079         * a window to do.  To do this we need to scan forward until the last
1080         * set bit before the end of the window
1081         */
1082        j = wstart;
1083        wvalue = 1;
1084        wend = 0;
1085        for (i = 1; i < window; i++) {
1086            if (wstart - i < 0)
1087                break;
1088            if (BN_is_bit_set(p, wstart - i)) {
1089                wvalue <<= (i - wend);
1090                wvalue |= 1;
1091                wend = i;
1092            }
1093        }
1094
1095        /* wend is the size of the current window */
1096        j = wend + 1;
1097        /* add the 'bytes above' */
1098        if (!start)
1099            for (i = 0; i < j; i++) {
1100                if (!BN_mod_mul(r, r, r, m, ctx))
1101                    goto err;
1102            }
1103
1104        /* wvalue will be an odd number < 2^window */
1105        if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1106            goto err;
1107
1108        /* move the 'window' down further */
1109        wstart -= wend + 1;
1110        wvalue = 0;
1111        start = 0;
1112        if (wstart < 0)
1113            break;
1114    }
1115    ret = 1;
1116 err:
1117    BN_CTX_end(ctx);
1118    bn_check_top(r);
1119    return (ret);
1120}
1121