1/*
2 * Copyright 2002-2023 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License").  You may not use
5 * this file except in compliance with the License.  You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10#include "bn_local.h"
11#include "internal/cryptlib.h"
12
13#define BN_NIST_192_TOP (192+BN_BITS2-1)/BN_BITS2
14#define BN_NIST_224_TOP (224+BN_BITS2-1)/BN_BITS2
15#define BN_NIST_256_TOP (256+BN_BITS2-1)/BN_BITS2
16#define BN_NIST_384_TOP (384+BN_BITS2-1)/BN_BITS2
17#define BN_NIST_521_TOP (521+BN_BITS2-1)/BN_BITS2
18
19/* pre-computed tables are "carry-less" values of modulus*(i+1) */
20#if BN_BITS2 == 64
21static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
22    {0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL},
23    {0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL},
24    {0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFFULL}
25};
26
27static const BN_ULONG _nist_p_192_sqr[] = {
28    0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000001ULL,
29    0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL
30};
31
32static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
33    {0x0000000000000001ULL, 0xFFFFFFFF00000000ULL,
34     0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL},
35    {0x0000000000000002ULL, 0xFFFFFFFE00000000ULL,
36     0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFFULL} /* this one is
37                                                    * "carry-full" */
38};
39
40static const BN_ULONG _nist_p_224_sqr[] = {
41    0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
42    0xFFFFFFFFFFFFFFFFULL, 0x0000000200000000ULL,
43    0x0000000000000000ULL, 0xFFFFFFFFFFFFFFFEULL,
44    0xFFFFFFFFFFFFFFFFULL
45};
46
47static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
48    {0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL,
49     0x0000000000000000ULL, 0xFFFFFFFF00000001ULL},
50    {0xFFFFFFFFFFFFFFFEULL, 0x00000001FFFFFFFFULL,
51     0x0000000000000000ULL, 0xFFFFFFFE00000002ULL},
52    {0xFFFFFFFFFFFFFFFDULL, 0x00000002FFFFFFFFULL,
53     0x0000000000000000ULL, 0xFFFFFFFD00000003ULL},
54    {0xFFFFFFFFFFFFFFFCULL, 0x00000003FFFFFFFFULL,
55     0x0000000000000000ULL, 0xFFFFFFFC00000004ULL},
56    {0xFFFFFFFFFFFFFFFBULL, 0x00000004FFFFFFFFULL,
57     0x0000000000000000ULL, 0xFFFFFFFB00000005ULL},
58};
59
60static const BN_ULONG _nist_p_256_sqr[] = {
61    0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
62    0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFEULL,
63    0x00000001FFFFFFFEULL, 0x00000001FFFFFFFEULL,
64    0xFFFFFFFE00000001ULL, 0xFFFFFFFE00000002ULL
65};
66
67static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
68    {0x00000000FFFFFFFFULL, 0xFFFFFFFF00000000ULL, 0xFFFFFFFFFFFFFFFEULL,
69     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
70    {0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
71     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
72    {0x00000002FFFFFFFDULL, 0xFFFFFFFD00000000ULL, 0xFFFFFFFFFFFFFFFCULL,
73     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
74    {0x00000003FFFFFFFCULL, 0xFFFFFFFC00000000ULL, 0xFFFFFFFFFFFFFFFBULL,
75     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
76    {0x00000004FFFFFFFBULL, 0xFFFFFFFB00000000ULL, 0xFFFFFFFFFFFFFFFAULL,
77     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
78};
79
80static const BN_ULONG _nist_p_384_sqr[] = {
81    0xFFFFFFFE00000001ULL, 0x0000000200000000ULL, 0xFFFFFFFE00000000ULL,
82    0x0000000200000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL,
83    0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
84    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
85};
86
87static const BN_ULONG _nist_p_521[] =
88    { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
89    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
90    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
91    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
92    0x00000000000001FFULL
93};
94
95static const BN_ULONG _nist_p_521_sqr[] = {
96    0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
97    0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
98    0x0000000000000000ULL, 0x0000000000000000ULL, 0xFFFFFFFFFFFFFC00ULL,
99    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
100    0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
101    0xFFFFFFFFFFFFFFFFULL, 0x000000000003FFFFULL
102};
103#elif BN_BITS2 == 32
104static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
105    {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
106    {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
107    {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
108};
109
110static const BN_ULONG _nist_p_192_sqr[] = {
111    0x00000001, 0x00000000, 0x00000002, 0x00000000, 0x00000001, 0x00000000,
112    0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
113};
114
115static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
116    {0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF,
117     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
118    {0x00000002, 0x00000000, 0x00000000, 0xFFFFFFFE,
119     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
120};
121
122static const BN_ULONG _nist_p_224_sqr[] = {
123    0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
124    0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000002,
125    0x00000000, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFF,
126    0xFFFFFFFF, 0xFFFFFFFF
127};
128
129static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
130    {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
131     0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF},
132    {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001,
133     0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE},
134    {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002,
135     0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD},
136    {0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003,
137     0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC},
138    {0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004,
139     0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB},
140};
141
142static const BN_ULONG _nist_p_256_sqr[] = {
143    0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
144    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000001,
145    0xFFFFFFFE, 0x00000001, 0xFFFFFFFE, 0x00000001,
146    0x00000001, 0xFFFFFFFE, 0x00000002, 0xFFFFFFFE
147};
148
149static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
150    {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
151     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
152    {0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
153     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
154    {0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFC, 0xFFFFFFFF,
155     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
156    {0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFFF,
157     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
158    {0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFFF,
159     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
160};
161
162static const BN_ULONG _nist_p_384_sqr[] = {
163    0x00000001, 0xFFFFFFFE, 0x00000000, 0x00000002, 0x00000000, 0xFFFFFFFE,
164    0x00000000, 0x00000002, 0x00000001, 0x00000000, 0x00000000, 0x00000000,
165    0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
166    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
167};
168
169static const BN_ULONG _nist_p_521[] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
170    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
171    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
172    0xFFFFFFFF, 0x000001FF
173};
174
175static const BN_ULONG _nist_p_521_sqr[] = {
176    0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
177    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
178    0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFC00, 0xFFFFFFFF,
179    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
180    0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
181    0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFFF
182};
183#else
184# error "unsupported BN_BITS2"
185#endif
186
187static const BIGNUM ossl_bignum_nist_p_192 = {
188    (BN_ULONG *)_nist_p_192[0],
189    BN_NIST_192_TOP,
190    BN_NIST_192_TOP,
191    0,
192    BN_FLG_STATIC_DATA
193};
194
195static const BIGNUM ossl_bignum_nist_p_224 = {
196    (BN_ULONG *)_nist_p_224[0],
197    BN_NIST_224_TOP,
198    BN_NIST_224_TOP,
199    0,
200    BN_FLG_STATIC_DATA
201};
202
203static const BIGNUM ossl_bignum_nist_p_256 = {
204    (BN_ULONG *)_nist_p_256[0],
205    BN_NIST_256_TOP,
206    BN_NIST_256_TOP,
207    0,
208    BN_FLG_STATIC_DATA
209};
210
211static const BIGNUM ossl_bignum_nist_p_384 = {
212    (BN_ULONG *)_nist_p_384[0],
213    BN_NIST_384_TOP,
214    BN_NIST_384_TOP,
215    0,
216    BN_FLG_STATIC_DATA
217};
218
219static const BIGNUM ossl_bignum_nist_p_521 = {
220    (BN_ULONG *)_nist_p_521,
221    BN_NIST_521_TOP,
222    BN_NIST_521_TOP,
223    0,
224    BN_FLG_STATIC_DATA
225};
226
227const BIGNUM *BN_get0_nist_prime_192(void)
228{
229    return &ossl_bignum_nist_p_192;
230}
231
232const BIGNUM *BN_get0_nist_prime_224(void)
233{
234    return &ossl_bignum_nist_p_224;
235}
236
237const BIGNUM *BN_get0_nist_prime_256(void)
238{
239    return &ossl_bignum_nist_p_256;
240}
241
242const BIGNUM *BN_get0_nist_prime_384(void)
243{
244    return &ossl_bignum_nist_p_384;
245}
246
247const BIGNUM *BN_get0_nist_prime_521(void)
248{
249    return &ossl_bignum_nist_p_521;
250}
251
252/*
253 * To avoid more recent compilers (specifically clang-14) from treating this
254 * code as a violation of the strict aliasing conditions and omiting it, this
255 * cannot be declared as a function.  Moreover, the dst parameter cannot be
256 * cached in a local since this no longer references the union and again falls
257 * foul of the strict aliasing criteria.  Refer to #18225 for the initial
258 * diagnostics and llvm/llvm-project#55255 for the later discussions with the
259 * LLVM developers.  The problem boils down to if an array in the union is
260 * converted to a pointer or if it is used directly.
261 *
262 * This function was inlined regardless, so there is no space cost to be
263 * paid for making it a macro.
264 */
265#define nist_cp_bn_0(dst, src_in, top, max) \
266{                                           \
267    int ii;                                 \
268    const BN_ULONG *src = src_in;           \
269                                            \
270    for (ii = 0; ii < top; ii++)            \
271        (dst)[ii] = src[ii];                \
272    for (; ii < max; ii++)                  \
273        (dst)[ii] = 0;                      \
274}
275
276static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
277{
278    int i;
279
280    for (i = 0; i < top; i++)
281        dst[i] = src[i];
282}
283
284#if BN_BITS2 == 64
285# define bn_cp_64(to, n, from, m)        (to)[n] = (m>=0)?((from)[m]):0;
286# define bn_64_set_0(to, n)              (to)[n] = (BN_ULONG)0;
287/*
288 * two following macros are implemented under assumption that they
289 * are called in a sequence with *ascending* n, i.e. as they are...
290 */
291# define bn_cp_32_naked(to, n, from, m)  (((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
292                                                :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
293# define bn_32_set_0(to, n)              (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
294# define bn_cp_32(to,n,from,m)           ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
295# if defined(L_ENDIAN)
296#  if defined(__arch64__)
297#   define NIST_INT64 long
298#  else
299#   define NIST_INT64 long long
300#  endif
301# endif
302#else
303# define bn_cp_64(to, n, from, m) \
304        { \
305        bn_cp_32(to, (n)*2, from, (m)*2); \
306        bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
307        }
308# define bn_64_set_0(to, n) \
309        { \
310        bn_32_set_0(to, (n)*2); \
311        bn_32_set_0(to, (n)*2+1); \
312        }
313# define bn_cp_32(to, n, from, m)        (to)[n] = (m>=0)?((from)[m]):0;
314# define bn_32_set_0(to, n)              (to)[n] = (BN_ULONG)0;
315# if defined(_WIN32) && !defined(__GNUC__)
316#  define NIST_INT64 __int64
317# elif defined(BN_LLONG)
318#  define NIST_INT64 long long
319# endif
320#endif                          /* BN_BITS2 != 64 */
321
322#ifdef NIST_INT64
323/* Helpers to load/store a 32-bit word (uint32_t) from/into a memory
324 * location and avoid potential aliasing issue.  */
325static ossl_inline uint32_t load_u32(const void *ptr)
326{
327    uint32_t tmp;
328
329    memcpy(&tmp, ptr, sizeof(tmp));
330    return tmp;
331}
332
333static ossl_inline void store_lo32(void *ptr, NIST_INT64 val)
334{
335    /* A cast is needed for big-endian system: on a 32-bit BE system
336     * NIST_INT64 may be defined as well if the compiler supports 64-bit
337     * long long.  */
338    uint32_t tmp = (uint32_t)val;
339
340    memcpy(ptr, &tmp, sizeof(tmp));
341}
342#endif /* NIST_INT64 */
343
344#define nist_set_192(to, from, a1, a2, a3) \
345        { \
346        bn_cp_64(to, 0, from, (a3) - 3) \
347        bn_cp_64(to, 1, from, (a2) - 3) \
348        bn_cp_64(to, 2, from, (a1) - 3) \
349        }
350
351int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
352                    BN_CTX *ctx)
353{
354    int top = a->top, i;
355    int carry;
356    register BN_ULONG *r_d, *a_d = a->d;
357    union {
358        BN_ULONG bn[BN_NIST_192_TOP];
359        unsigned int ui[BN_NIST_192_TOP * sizeof(BN_ULONG) /
360                        sizeof(unsigned int)];
361    } buf;
362    BN_ULONG c_d[BN_NIST_192_TOP], *res;
363    static const BIGNUM ossl_bignum_nist_p_192_sqr = {
364        (BN_ULONG *)_nist_p_192_sqr,
365        OSSL_NELEM(_nist_p_192_sqr),
366        OSSL_NELEM(_nist_p_192_sqr),
367        0, BN_FLG_STATIC_DATA
368    };
369
370    field = &ossl_bignum_nist_p_192; /* just to make sure */
371
372    if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_192_sqr) >= 0)
373        return BN_nnmod(r, a, field, ctx);
374
375    i = BN_ucmp(field, a);
376    if (i == 0) {
377        BN_zero(r);
378        return 1;
379    } else if (i > 0)
380        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
381
382    if (r != a) {
383        if (!bn_wexpand(r, BN_NIST_192_TOP))
384            return 0;
385        r_d = r->d;
386        nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
387    } else
388        r_d = a_d;
389
390    nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP,
391                 BN_NIST_192_TOP);
392
393#if defined(NIST_INT64)
394    {
395        NIST_INT64 acc;         /* accumulator */
396        unsigned int *rp = (unsigned int *)r_d;
397        const unsigned int *bp = (const unsigned int *)buf.ui;
398
399        acc = load_u32(&rp[0]);
400        acc += bp[3 * 2 - 6];
401        acc += bp[5 * 2 - 6];
402        store_lo32(&rp[0], acc);
403        acc >>= 32;
404
405        acc += load_u32(&rp[1]);
406        acc += bp[3 * 2 - 5];
407        acc += bp[5 * 2 - 5];
408        store_lo32(&rp[1], acc);
409        acc >>= 32;
410
411        acc += load_u32(&rp[2]);
412        acc += bp[3 * 2 - 6];
413        acc += bp[4 * 2 - 6];
414        acc += bp[5 * 2 - 6];
415        store_lo32(&rp[2], acc);
416        acc >>= 32;
417
418        acc += load_u32(&rp[3]);
419        acc += bp[3 * 2 - 5];
420        acc += bp[4 * 2 - 5];
421        acc += bp[5 * 2 - 5];
422        store_lo32(&rp[3], acc);
423        acc >>= 32;
424
425        acc += load_u32(&rp[4]);
426        acc += bp[4 * 2 - 6];
427        acc += bp[5 * 2 - 6];
428        store_lo32(&rp[4], acc);
429        acc >>= 32;
430
431        acc += load_u32(&rp[5]);
432        acc += bp[4 * 2 - 5];
433        acc += bp[5 * 2 - 5];
434        store_lo32(&rp[5], acc);
435
436        carry = (int)(acc >> 32);
437    }
438#else
439    {
440        BN_ULONG t_d[BN_NIST_192_TOP];
441
442        nist_set_192(t_d, buf.bn, 0, 3, 3);
443        carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
444        nist_set_192(t_d, buf.bn, 4, 4, 0);
445        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
446        nist_set_192(t_d, buf.bn, 5, 5, 5)
447            carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
448    }
449#endif
450    if (carry > 0)
451        carry =
452            (int)bn_sub_words(r_d, r_d, _nist_p_192[carry - 1],
453                              BN_NIST_192_TOP);
454    else
455        carry = 1;
456
457    /*
458     * we need 'if (carry==0 || result>=modulus) result-=modulus;'
459     * as comparison implies subtraction, we can write
460     * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
461     * this is what happens below, but without explicit if:-) a.
462     */
463    res = (bn_sub_words(c_d, r_d, _nist_p_192[0], BN_NIST_192_TOP) && carry)
464        ? r_d
465        : c_d;
466    nist_cp_bn(r_d, res, BN_NIST_192_TOP);
467    r->top = BN_NIST_192_TOP;
468    bn_correct_top(r);
469
470    return 1;
471}
472
473typedef BN_ULONG (*bn_addsub_f) (BN_ULONG *, const BN_ULONG *,
474                                 const BN_ULONG *, int);
475
476#define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
477        { \
478        bn_cp_32(to, 0, from, (a7) - 7) \
479        bn_cp_32(to, 1, from, (a6) - 7) \
480        bn_cp_32(to, 2, from, (a5) - 7) \
481        bn_cp_32(to, 3, from, (a4) - 7) \
482        bn_cp_32(to, 4, from, (a3) - 7) \
483        bn_cp_32(to, 5, from, (a2) - 7) \
484        bn_cp_32(to, 6, from, (a1) - 7) \
485        }
486
487int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
488                    BN_CTX *ctx)
489{
490    int top = a->top, i;
491    int carry;
492    BN_ULONG *r_d, *a_d = a->d;
493    union {
494        BN_ULONG bn[BN_NIST_224_TOP];
495        unsigned int ui[BN_NIST_224_TOP * sizeof(BN_ULONG) /
496                        sizeof(unsigned int)];
497    } buf;
498    BN_ULONG c_d[BN_NIST_224_TOP], *res;
499    bn_addsub_f adjust;
500    static const BIGNUM ossl_bignum_nist_p_224_sqr = {
501        (BN_ULONG *)_nist_p_224_sqr,
502        OSSL_NELEM(_nist_p_224_sqr),
503        OSSL_NELEM(_nist_p_224_sqr),
504        0, BN_FLG_STATIC_DATA
505    };
506
507    field = &ossl_bignum_nist_p_224; /* just to make sure */
508
509    if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_224_sqr) >= 0)
510        return BN_nnmod(r, a, field, ctx);
511
512    i = BN_ucmp(field, a);
513    if (i == 0) {
514        BN_zero(r);
515        return 1;
516    } else if (i > 0)
517        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
518
519    if (r != a) {
520        if (!bn_wexpand(r, BN_NIST_224_TOP))
521            return 0;
522        r_d = r->d;
523        nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
524    } else
525        r_d = a_d;
526
527#if BN_BITS2==64
528    /* copy upper 256 bits of 448 bit number ... */
529    nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP - 1),
530                 top - (BN_NIST_224_TOP - 1), BN_NIST_224_TOP);
531    /* ... and right shift by 32 to obtain upper 224 bits */
532    nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8);
533    /* truncate lower part to 224 bits too */
534    r_d[BN_NIST_224_TOP - 1] &= BN_MASK2l;
535#else
536    nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP,
537                 BN_NIST_224_TOP);
538#endif
539
540#if defined(NIST_INT64) && BN_BITS2!=64
541    {
542        NIST_INT64 acc;         /* accumulator */
543        unsigned int *rp = (unsigned int *)r_d;
544        const unsigned int *bp = (const unsigned int *)buf.ui;
545
546        acc = rp[0];
547        acc -= bp[7 - 7];
548        acc -= bp[11 - 7];
549        rp[0] = (unsigned int)acc;
550        acc >>= 32;
551
552        acc += rp[1];
553        acc -= bp[8 - 7];
554        acc -= bp[12 - 7];
555        rp[1] = (unsigned int)acc;
556        acc >>= 32;
557
558        acc += rp[2];
559        acc -= bp[9 - 7];
560        acc -= bp[13 - 7];
561        rp[2] = (unsigned int)acc;
562        acc >>= 32;
563
564        acc += rp[3];
565        acc += bp[7 - 7];
566        acc += bp[11 - 7];
567        acc -= bp[10 - 7];
568        rp[3] = (unsigned int)acc;
569        acc >>= 32;
570
571        acc += rp[4];
572        acc += bp[8 - 7];
573        acc += bp[12 - 7];
574        acc -= bp[11 - 7];
575        rp[4] = (unsigned int)acc;
576        acc >>= 32;
577
578        acc += rp[5];
579        acc += bp[9 - 7];
580        acc += bp[13 - 7];
581        acc -= bp[12 - 7];
582        rp[5] = (unsigned int)acc;
583        acc >>= 32;
584
585        acc += rp[6];
586        acc += bp[10 - 7];
587        acc -= bp[13 - 7];
588        rp[6] = (unsigned int)acc;
589
590        carry = (int)(acc >> 32);
591# if BN_BITS2==64
592        rp[7] = carry;
593# endif
594    }
595#else
596    {
597        BN_ULONG t_d[BN_NIST_224_TOP];
598
599        nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0);
600        carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
601        nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0);
602        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
603        nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7);
604        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
605        nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11);
606        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
607
608# if BN_BITS2==64
609        carry = (int)(r_d[BN_NIST_224_TOP - 1] >> 32);
610# endif
611    }
612#endif
613    adjust = bn_sub_words;
614    if (carry > 0) {
615        carry =
616            (int)bn_sub_words(r_d, r_d, _nist_p_224[carry - 1],
617                              BN_NIST_224_TOP);
618#if BN_BITS2==64
619        carry = (int)(~(r_d[BN_NIST_224_TOP - 1] >> 32)) & 1;
620#endif
621    } else if (carry < 0) {
622        /*
623         * it's a bit more complicated logic in this case. if bn_add_words
624         * yields no carry, then result has to be adjusted by unconditionally
625         * *adding* the modulus. but if it does, then result has to be
626         * compared to the modulus and conditionally adjusted by
627         * *subtracting* the latter.
628         */
629        carry =
630            (int)bn_add_words(r_d, r_d, _nist_p_224[-carry - 1],
631                              BN_NIST_224_TOP);
632        adjust = carry ? bn_sub_words : bn_add_words;
633    } else
634        carry = 1;
635
636    /* otherwise it's effectively same as in BN_nist_mod_192... */
637    res = ((*adjust) (c_d, r_d, _nist_p_224[0], BN_NIST_224_TOP) && carry)
638        ? r_d
639        : c_d;
640    nist_cp_bn(r_d, res, BN_NIST_224_TOP);
641    r->top = BN_NIST_224_TOP;
642    bn_correct_top(r);
643
644    return 1;
645}
646
647#define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
648        { \
649        bn_cp_32(to, 0, from, (a8) - 8) \
650        bn_cp_32(to, 1, from, (a7) - 8) \
651        bn_cp_32(to, 2, from, (a6) - 8) \
652        bn_cp_32(to, 3, from, (a5) - 8) \
653        bn_cp_32(to, 4, from, (a4) - 8) \
654        bn_cp_32(to, 5, from, (a3) - 8) \
655        bn_cp_32(to, 6, from, (a2) - 8) \
656        bn_cp_32(to, 7, from, (a1) - 8) \
657        }
658
659int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
660                    BN_CTX *ctx)
661{
662    int i, top = a->top;
663    int carry = 0;
664    register BN_ULONG *a_d = a->d, *r_d;
665    union {
666        BN_ULONG bn[BN_NIST_256_TOP];
667        unsigned int ui[BN_NIST_256_TOP * sizeof(BN_ULONG) /
668                        sizeof(unsigned int)];
669    } buf;
670    BN_ULONG c_d[BN_NIST_256_TOP], *res;
671    bn_addsub_f adjust;
672    static const BIGNUM ossl_bignum_nist_p_256_sqr = {
673        (BN_ULONG *)_nist_p_256_sqr,
674        OSSL_NELEM(_nist_p_256_sqr),
675        OSSL_NELEM(_nist_p_256_sqr),
676        0, BN_FLG_STATIC_DATA
677    };
678
679    field = &ossl_bignum_nist_p_256; /* just to make sure */
680
681    if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_256_sqr) >= 0)
682        return BN_nnmod(r, a, field, ctx);
683
684    i = BN_ucmp(field, a);
685    if (i == 0) {
686        BN_zero(r);
687        return 1;
688    } else if (i > 0)
689        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
690
691    if (r != a) {
692        if (!bn_wexpand(r, BN_NIST_256_TOP))
693            return 0;
694        r_d = r->d;
695        nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
696    } else
697        r_d = a_d;
698
699    nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP,
700                 BN_NIST_256_TOP);
701
702#if defined(NIST_INT64)
703    {
704        NIST_INT64 acc;         /* accumulator */
705        unsigned int *rp = (unsigned int *)r_d;
706        const unsigned int *bp = (const unsigned int *)buf.ui;
707
708        acc = load_u32(&rp[0]);
709        acc += bp[8 - 8];
710        acc += bp[9 - 8];
711        acc -= bp[11 - 8];
712        acc -= bp[12 - 8];
713        acc -= bp[13 - 8];
714        acc -= bp[14 - 8];
715        store_lo32(&rp[0], acc);
716        acc >>= 32;
717
718        acc += load_u32(&rp[1]);
719        acc += bp[9 - 8];
720        acc += bp[10 - 8];
721        acc -= bp[12 - 8];
722        acc -= bp[13 - 8];
723        acc -= bp[14 - 8];
724        acc -= bp[15 - 8];
725        store_lo32(&rp[1], acc);
726        acc >>= 32;
727
728        acc += load_u32(&rp[2]);
729        acc += bp[10 - 8];
730        acc += bp[11 - 8];
731        acc -= bp[13 - 8];
732        acc -= bp[14 - 8];
733        acc -= bp[15 - 8];
734        store_lo32(&rp[2], acc);
735        acc >>= 32;
736
737        acc += load_u32(&rp[3]);
738        acc += bp[11 - 8];
739        acc += bp[11 - 8];
740        acc += bp[12 - 8];
741        acc += bp[12 - 8];
742        acc += bp[13 - 8];
743        acc -= bp[15 - 8];
744        acc -= bp[8 - 8];
745        acc -= bp[9 - 8];
746        store_lo32(&rp[3], acc);
747        acc >>= 32;
748
749        acc += load_u32(&rp[4]);
750        acc += bp[12 - 8];
751        acc += bp[12 - 8];
752        acc += bp[13 - 8];
753        acc += bp[13 - 8];
754        acc += bp[14 - 8];
755        acc -= bp[9 - 8];
756        acc -= bp[10 - 8];
757        store_lo32(&rp[4], acc);
758        acc >>= 32;
759
760        acc += load_u32(&rp[5]);
761        acc += bp[13 - 8];
762        acc += bp[13 - 8];
763        acc += bp[14 - 8];
764        acc += bp[14 - 8];
765        acc += bp[15 - 8];
766        acc -= bp[10 - 8];
767        acc -= bp[11 - 8];
768        store_lo32(&rp[5], acc);
769        acc >>= 32;
770
771        acc += load_u32(&rp[6]);
772        acc += bp[14 - 8];
773        acc += bp[14 - 8];
774        acc += bp[15 - 8];
775        acc += bp[15 - 8];
776        acc += bp[14 - 8];
777        acc += bp[13 - 8];
778        acc -= bp[8 - 8];
779        acc -= bp[9 - 8];
780        store_lo32(&rp[6], acc);
781        acc >>= 32;
782
783        acc += load_u32(&rp[7]);
784        acc += bp[15 - 8];
785        acc += bp[15 - 8];
786        acc += bp[15 - 8];
787        acc += bp[8 - 8];
788        acc -= bp[10 - 8];
789        acc -= bp[11 - 8];
790        acc -= bp[12 - 8];
791        acc -= bp[13 - 8];
792        store_lo32(&rp[7], acc);
793
794        carry = (int)(acc >> 32);
795    }
796#else
797    {
798        BN_ULONG t_d[BN_NIST_256_TOP];
799
800        /*
801         * S1
802         */
803        nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
804        /*
805         * S2
806         */
807        nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
808        carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
809        /* left shift */
810        {
811            register BN_ULONG *ap, t, c;
812            ap = t_d;
813            c = 0;
814            for (i = BN_NIST_256_TOP; i != 0; --i) {
815                t = *ap;
816                *(ap++) = ((t << 1) | c) & BN_MASK2;
817                c = (t & BN_TBIT) ? 1 : 0;
818            }
819            carry <<= 1;
820            carry |= c;
821        }
822        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
823        /*
824         * S3
825         */
826        nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
827        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
828        /*
829         * S4
830         */
831        nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
832        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
833        /*
834         * D1
835         */
836        nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
837        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
838        /*
839         * D2
840         */
841        nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
842        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
843        /*
844         * D3
845         */
846        nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
847        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
848        /*
849         * D4
850         */
851        nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
852        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
853
854    }
855#endif
856    /* see BN_nist_mod_224 for explanation */
857    adjust = bn_sub_words;
858    if (carry > 0)
859        carry =
860            (int)bn_sub_words(r_d, r_d, _nist_p_256[carry - 1],
861                              BN_NIST_256_TOP);
862    else if (carry < 0) {
863        carry =
864            (int)bn_add_words(r_d, r_d, _nist_p_256[-carry - 1],
865                              BN_NIST_256_TOP);
866        adjust = carry ? bn_sub_words : bn_add_words;
867    } else
868        carry = 1;
869
870    res = ((*adjust) (c_d, r_d, _nist_p_256[0], BN_NIST_256_TOP) && carry)
871        ? r_d
872        : c_d;
873    nist_cp_bn(r_d, res, BN_NIST_256_TOP);
874    r->top = BN_NIST_256_TOP;
875    bn_correct_top(r);
876
877    return 1;
878}
879
880#define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
881        { \
882        bn_cp_32(to, 0, from,  (a12) - 12) \
883        bn_cp_32(to, 1, from,  (a11) - 12) \
884        bn_cp_32(to, 2, from,  (a10) - 12) \
885        bn_cp_32(to, 3, from,  (a9) - 12)  \
886        bn_cp_32(to, 4, from,  (a8) - 12)  \
887        bn_cp_32(to, 5, from,  (a7) - 12)  \
888        bn_cp_32(to, 6, from,  (a6) - 12)  \
889        bn_cp_32(to, 7, from,  (a5) - 12)  \
890        bn_cp_32(to, 8, from,  (a4) - 12)  \
891        bn_cp_32(to, 9, from,  (a3) - 12)  \
892        bn_cp_32(to, 10, from, (a2) - 12)  \
893        bn_cp_32(to, 11, from, (a1) - 12)  \
894        }
895
896int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
897                    BN_CTX *ctx)
898{
899    int i, top = a->top;
900    int carry = 0;
901    register BN_ULONG *r_d, *a_d = a->d;
902    union {
903        BN_ULONG bn[BN_NIST_384_TOP];
904        unsigned int ui[BN_NIST_384_TOP * sizeof(BN_ULONG) /
905                        sizeof(unsigned int)];
906    } buf;
907    BN_ULONG c_d[BN_NIST_384_TOP], *res;
908    bn_addsub_f adjust;
909    static const BIGNUM ossl_bignum_nist_p_384_sqr = {
910        (BN_ULONG *)_nist_p_384_sqr,
911        OSSL_NELEM(_nist_p_384_sqr),
912        OSSL_NELEM(_nist_p_384_sqr),
913        0, BN_FLG_STATIC_DATA
914    };
915
916    field = &ossl_bignum_nist_p_384; /* just to make sure */
917
918    if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_384_sqr) >= 0)
919        return BN_nnmod(r, a, field, ctx);
920
921    i = BN_ucmp(field, a);
922    if (i == 0) {
923        BN_zero(r);
924        return 1;
925    } else if (i > 0)
926        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
927
928    if (r != a) {
929        if (!bn_wexpand(r, BN_NIST_384_TOP))
930            return 0;
931        r_d = r->d;
932        nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
933    } else
934        r_d = a_d;
935
936    nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP,
937                 BN_NIST_384_TOP);
938
939#if defined(NIST_INT64)
940    {
941        NIST_INT64 acc;         /* accumulator */
942        unsigned int *rp = (unsigned int *)r_d;
943        const unsigned int *bp = (const unsigned int *)buf.ui;
944
945        acc = load_u32(&rp[0]);
946        acc += bp[12 - 12];
947        acc += bp[21 - 12];
948        acc += bp[20 - 12];
949        acc -= bp[23 - 12];
950        store_lo32(&rp[0], acc);
951        acc >>= 32;
952
953        acc += load_u32(&rp[1]);
954        acc += bp[13 - 12];
955        acc += bp[22 - 12];
956        acc += bp[23 - 12];
957        acc -= bp[12 - 12];
958        acc -= bp[20 - 12];
959        store_lo32(&rp[1], acc);
960        acc >>= 32;
961
962        acc += load_u32(&rp[2]);
963        acc += bp[14 - 12];
964        acc += bp[23 - 12];
965        acc -= bp[13 - 12];
966        acc -= bp[21 - 12];
967        store_lo32(&rp[2], acc);
968        acc >>= 32;
969
970        acc += load_u32(&rp[3]);
971        acc += bp[15 - 12];
972        acc += bp[12 - 12];
973        acc += bp[20 - 12];
974        acc += bp[21 - 12];
975        acc -= bp[14 - 12];
976        acc -= bp[22 - 12];
977        acc -= bp[23 - 12];
978        store_lo32(&rp[3], acc);
979        acc >>= 32;
980
981        acc += load_u32(&rp[4]);
982        acc += bp[21 - 12];
983        acc += bp[21 - 12];
984        acc += bp[16 - 12];
985        acc += bp[13 - 12];
986        acc += bp[12 - 12];
987        acc += bp[20 - 12];
988        acc += bp[22 - 12];
989        acc -= bp[15 - 12];
990        acc -= bp[23 - 12];
991        acc -= bp[23 - 12];
992        store_lo32(&rp[4], acc);
993        acc >>= 32;
994
995        acc += load_u32(&rp[5]);
996        acc += bp[22 - 12];
997        acc += bp[22 - 12];
998        acc += bp[17 - 12];
999        acc += bp[14 - 12];
1000        acc += bp[13 - 12];
1001        acc += bp[21 - 12];
1002        acc += bp[23 - 12];
1003        acc -= bp[16 - 12];
1004        store_lo32(&rp[5], acc);
1005        acc >>= 32;
1006
1007        acc += load_u32(&rp[6]);
1008        acc += bp[23 - 12];
1009        acc += bp[23 - 12];
1010        acc += bp[18 - 12];
1011        acc += bp[15 - 12];
1012        acc += bp[14 - 12];
1013        acc += bp[22 - 12];
1014        acc -= bp[17 - 12];
1015        store_lo32(&rp[6], acc);
1016        acc >>= 32;
1017
1018        acc += load_u32(&rp[7]);
1019        acc += bp[19 - 12];
1020        acc += bp[16 - 12];
1021        acc += bp[15 - 12];
1022        acc += bp[23 - 12];
1023        acc -= bp[18 - 12];
1024        store_lo32(&rp[7], acc);
1025        acc >>= 32;
1026
1027        acc += load_u32(&rp[8]);
1028        acc += bp[20 - 12];
1029        acc += bp[17 - 12];
1030        acc += bp[16 - 12];
1031        acc -= bp[19 - 12];
1032        store_lo32(&rp[8], acc);
1033        acc >>= 32;
1034
1035        acc += load_u32(&rp[9]);
1036        acc += bp[21 - 12];
1037        acc += bp[18 - 12];
1038        acc += bp[17 - 12];
1039        acc -= bp[20 - 12];
1040        store_lo32(&rp[9], acc);
1041        acc >>= 32;
1042
1043        acc += load_u32(&rp[10]);
1044        acc += bp[22 - 12];
1045        acc += bp[19 - 12];
1046        acc += bp[18 - 12];
1047        acc -= bp[21 - 12];
1048        store_lo32(&rp[10], acc);
1049        acc >>= 32;
1050
1051        acc += load_u32(&rp[11]);
1052        acc += bp[23 - 12];
1053        acc += bp[20 - 12];
1054        acc += bp[19 - 12];
1055        acc -= bp[22 - 12];
1056        store_lo32(&rp[11], acc);
1057
1058        carry = (int)(acc >> 32);
1059    }
1060#else
1061    {
1062        BN_ULONG t_d[BN_NIST_384_TOP];
1063
1064        /*
1065         * S1
1066         */
1067        nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23 - 4, 22 - 4, 21 - 4);
1068        /* left shift */
1069        {
1070            register BN_ULONG *ap, t, c;
1071            ap = t_d;
1072            c = 0;
1073            for (i = 3; i != 0; --i) {
1074                t = *ap;
1075                *(ap++) = ((t << 1) | c) & BN_MASK2;
1076                c = (t & BN_TBIT) ? 1 : 0;
1077            }
1078            *ap = c;
1079        }
1080        carry =
1081            (int)bn_add_words(r_d + (128 / BN_BITS2), r_d + (128 / BN_BITS2),
1082                              t_d, BN_NIST_256_TOP);
1083        /*
1084         * S2
1085         */
1086        carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
1087        /*
1088         * S3
1089         */
1090        nist_set_384(t_d, buf.bn, 20, 19, 18, 17, 16, 15, 14, 13, 12, 23, 22,
1091                     21);
1092        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1093        /*
1094         * S4
1095         */
1096        nist_set_384(t_d, buf.bn, 19, 18, 17, 16, 15, 14, 13, 12, 20, 0, 23,
1097                     0);
1098        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1099        /*
1100         * S5
1101         */
1102        nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 23, 22, 21, 20, 0, 0, 0, 0);
1103        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1104        /*
1105         * S6
1106         */
1107        nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 23, 22, 21, 0, 0, 20);
1108        carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1109        /*
1110         * D1
1111         */
1112        nist_set_384(t_d, buf.bn, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12,
1113                     23);
1114        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1115        /*
1116         * D2
1117         */
1118        nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 22, 21, 20, 0);
1119        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1120        /*
1121         * D3
1122         */
1123        nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 0, 0);
1124        carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1125
1126    }
1127#endif
1128    /* see BN_nist_mod_224 for explanation */
1129    adjust = bn_sub_words;
1130    if (carry > 0)
1131        carry =
1132            (int)bn_sub_words(r_d, r_d, _nist_p_384[carry - 1],
1133                              BN_NIST_384_TOP);
1134    else if (carry < 0) {
1135        carry =
1136            (int)bn_add_words(r_d, r_d, _nist_p_384[-carry - 1],
1137                              BN_NIST_384_TOP);
1138        adjust = carry ? bn_sub_words : bn_add_words;
1139    } else
1140        carry = 1;
1141
1142    res = ((*adjust) (c_d, r_d, _nist_p_384[0], BN_NIST_384_TOP) && carry)
1143        ? r_d
1144        : c_d;
1145    nist_cp_bn(r_d, res, BN_NIST_384_TOP);
1146    r->top = BN_NIST_384_TOP;
1147    bn_correct_top(r);
1148
1149    return 1;
1150}
1151
1152#define BN_NIST_521_RSHIFT      (521%BN_BITS2)
1153#define BN_NIST_521_LSHIFT      (BN_BITS2-BN_NIST_521_RSHIFT)
1154#define BN_NIST_521_TOP_MASK    ((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
1155
1156int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
1157                    BN_CTX *ctx)
1158{
1159    int top = a->top, i;
1160    BN_ULONG *r_d, *a_d = a->d, t_d[BN_NIST_521_TOP], val, tmp, *res;
1161    static const BIGNUM ossl_bignum_nist_p_521_sqr = {
1162        (BN_ULONG *)_nist_p_521_sqr,
1163        OSSL_NELEM(_nist_p_521_sqr),
1164        OSSL_NELEM(_nist_p_521_sqr),
1165        0, BN_FLG_STATIC_DATA
1166    };
1167
1168    field = &ossl_bignum_nist_p_521; /* just to make sure */
1169
1170    if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_521_sqr) >= 0)
1171        return BN_nnmod(r, a, field, ctx);
1172
1173    i = BN_ucmp(field, a);
1174    if (i == 0) {
1175        BN_zero(r);
1176        return 1;
1177    } else if (i > 0)
1178        return (r == a) ? 1 : (BN_copy(r, a) != NULL);
1179
1180    if (r != a) {
1181        if (!bn_wexpand(r, BN_NIST_521_TOP))
1182            return 0;
1183        r_d = r->d;
1184        nist_cp_bn(r_d, a_d, BN_NIST_521_TOP);
1185    } else
1186        r_d = a_d;
1187
1188    /* upper 521 bits, copy ... */
1189    nist_cp_bn_0(t_d, a_d + (BN_NIST_521_TOP - 1),
1190                 top - (BN_NIST_521_TOP - 1), BN_NIST_521_TOP);
1191    /* ... and right shift */
1192    for (val = t_d[0], i = 0; i < BN_NIST_521_TOP - 1; i++) {
1193#if 0
1194        /*
1195         * MSC ARM compiler [version 2013, presumably even earlier,
1196         * much earlier] miscompiles this code, but not one in
1197         * #else section. See RT#3541.
1198         */
1199        tmp = val >> BN_NIST_521_RSHIFT;
1200        val = t_d[i + 1];
1201        t_d[i] = (tmp | val << BN_NIST_521_LSHIFT) & BN_MASK2;
1202#else
1203        t_d[i] = (val >> BN_NIST_521_RSHIFT |
1204                  (tmp = t_d[i + 1]) << BN_NIST_521_LSHIFT) & BN_MASK2;
1205        val = tmp;
1206#endif
1207    }
1208    t_d[i] = val >> BN_NIST_521_RSHIFT;
1209    /* lower 521 bits */
1210    r_d[i] &= BN_NIST_521_TOP_MASK;
1211
1212    bn_add_words(r_d, r_d, t_d, BN_NIST_521_TOP);
1213    res = bn_sub_words(t_d, r_d, _nist_p_521,
1214                       BN_NIST_521_TOP)
1215        ? r_d
1216        : t_d;
1217    nist_cp_bn(r_d, res, BN_NIST_521_TOP);
1218    r->top = BN_NIST_521_TOP;
1219    bn_correct_top(r);
1220
1221    return 1;
1222}
1223
1224int (*BN_nist_mod_func(const BIGNUM *p)) (BIGNUM *r, const BIGNUM *a,
1225                                          const BIGNUM *field, BN_CTX *ctx) {
1226    if (BN_ucmp(&ossl_bignum_nist_p_192, p) == 0)
1227        return BN_nist_mod_192;
1228    if (BN_ucmp(&ossl_bignum_nist_p_224, p) == 0)
1229        return BN_nist_mod_224;
1230    if (BN_ucmp(&ossl_bignum_nist_p_256, p) == 0)
1231        return BN_nist_mod_256;
1232    if (BN_ucmp(&ossl_bignum_nist_p_384, p) == 0)
1233        return BN_nist_mod_384;
1234    if (BN_ucmp(&ossl_bignum_nist_p_521, p) == 0)
1235        return BN_nist_mod_521;
1236    return 0;
1237}
1238