1238384Sjkim/* crypto/ec/ecp_nistp256.c */ 2238384Sjkim/* 3238384Sjkim * Written by Adam Langley (Google) for the OpenSSL project 4238384Sjkim */ 5238384Sjkim/* Copyright 2011 Google Inc. 6238384Sjkim * 7238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License"); 8238384Sjkim * 9238384Sjkim * you may not use this file except in compliance with the License. 10238384Sjkim * You may obtain a copy of the License at 11238384Sjkim * 12238384Sjkim * http://www.apache.org/licenses/LICENSE-2.0 13238384Sjkim * 14238384Sjkim * Unless required by applicable law or agreed to in writing, software 15238384Sjkim * distributed under the License is distributed on an "AS IS" BASIS, 16238384Sjkim * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17238384Sjkim * See the License for the specific language governing permissions and 18238384Sjkim * limitations under the License. 19238384Sjkim */ 20238384Sjkim 21238384Sjkim/* 22238384Sjkim * A 64-bit implementation of the NIST P-256 elliptic curve point multiplication 23238384Sjkim * 24238384Sjkim * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c. 25238384Sjkim * Otherwise based on Emilia's P224 work, which was inspired by my curve25519 26238384Sjkim * work which got its smarts from Daniel J. Bernstein's work on the same. 27238384Sjkim */ 28238384Sjkim 29238384Sjkim#include <openssl/opensslconf.h> 30238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 31238384Sjkim 32238384Sjkim#ifndef OPENSSL_SYS_VMS 33238384Sjkim#include <stdint.h> 34238384Sjkim#else 35238384Sjkim#include <inttypes.h> 36238384Sjkim#endif 37238384Sjkim 38238384Sjkim#include <string.h> 39238384Sjkim#include <openssl/err.h> 40238384Sjkim#include "ec_lcl.h" 41238384Sjkim 42238384Sjkim#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 43238384Sjkim /* even with gcc, the typedef won't work for 32-bit platforms */ 44238384Sjkim typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */ 45238384Sjkim typedef __int128_t int128_t; 46238384Sjkim#else 47238384Sjkim #error "Need GCC 3.1 or later to define type uint128_t" 48238384Sjkim#endif 49238384Sjkim 50238384Sjkimtypedef uint8_t u8; 51238384Sjkimtypedef uint32_t u32; 52238384Sjkimtypedef uint64_t u64; 53238384Sjkimtypedef int64_t s64; 54238384Sjkim 55238384Sjkim/* The underlying field. 56238384Sjkim * 57238384Sjkim * P256 operates over GF(2^256-2^224+2^192+2^96-1). We can serialise an element 58238384Sjkim * of this field into 32 bytes. We call this an felem_bytearray. */ 59238384Sjkim 60238384Sjkimtypedef u8 felem_bytearray[32]; 61238384Sjkim 62238384Sjkim/* These are the parameters of P256, taken from FIPS 186-3, page 86. These 63238384Sjkim * values are big-endian. */ 64238384Sjkimstatic const felem_bytearray nistp256_curve_params[5] = { 65238384Sjkim {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* p */ 66238384Sjkim 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 67238384Sjkim 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 68238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 69238384Sjkim {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, /* a = -3 */ 70238384Sjkim 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 71238384Sjkim 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 72238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc}, /* b */ 73238384Sjkim {0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, 74238384Sjkim 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, 75238384Sjkim 0x65, 0x1d, 0x06, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, 76238384Sjkim 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b}, 77238384Sjkim {0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, /* x */ 78238384Sjkim 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, 79238384Sjkim 0x77, 0x03, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, 80238384Sjkim 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96}, 81238384Sjkim {0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, /* y */ 82238384Sjkim 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0x0f, 0x9e, 0x16, 83238384Sjkim 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, 84238384Sjkim 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5} 85238384Sjkim}; 86238384Sjkim 87238384Sjkim/* The representation of field elements. 88238384Sjkim * ------------------------------------ 89238384Sjkim * 90238384Sjkim * We represent field elements with either four 128-bit values, eight 128-bit 91238384Sjkim * values, or four 64-bit values. The field element represented is: 92238384Sjkim * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + v[3]*2^192 (mod p) 93238384Sjkim * or: 94238384Sjkim * v[0]*2^0 + v[1]*2^64 + v[2]*2^128 + ... + v[8]*2^512 (mod p) 95238384Sjkim * 96238384Sjkim * 128-bit values are called 'limbs'. Since the limbs are spaced only 64 bits 97238384Sjkim * apart, but are 128-bits wide, the most significant bits of each limb overlap 98238384Sjkim * with the least significant bits of the next. 99238384Sjkim * 100238384Sjkim * A field element with four limbs is an 'felem'. One with eight limbs is a 101238384Sjkim * 'longfelem' 102238384Sjkim * 103238384Sjkim * A field element with four, 64-bit values is called a 'smallfelem'. Small 104238384Sjkim * values are used as intermediate values before multiplication. 105238384Sjkim */ 106238384Sjkim 107238384Sjkim#define NLIMBS 4 108238384Sjkim 109238384Sjkimtypedef uint128_t limb; 110238384Sjkimtypedef limb felem[NLIMBS]; 111238384Sjkimtypedef limb longfelem[NLIMBS * 2]; 112238384Sjkimtypedef u64 smallfelem[NLIMBS]; 113238384Sjkim 114238384Sjkim/* This is the value of the prime as four 64-bit words, little-endian. */ 115238384Sjkimstatic const u64 kPrime[4] = { 0xfffffffffffffffful, 0xffffffff, 0, 0xffffffff00000001ul }; 116238384Sjkimstatic const u64 bottom63bits = 0x7ffffffffffffffful; 117238384Sjkim 118238384Sjkim/* bin32_to_felem takes a little-endian byte array and converts it into felem 119238384Sjkim * form. This assumes that the CPU is little-endian. */ 120238384Sjkimstatic void bin32_to_felem(felem out, const u8 in[32]) 121238384Sjkim { 122238384Sjkim out[0] = *((u64*) &in[0]); 123238384Sjkim out[1] = *((u64*) &in[8]); 124238384Sjkim out[2] = *((u64*) &in[16]); 125238384Sjkim out[3] = *((u64*) &in[24]); 126238384Sjkim } 127238384Sjkim 128238384Sjkim/* smallfelem_to_bin32 takes a smallfelem and serialises into a little endian, 129238384Sjkim * 32 byte array. This assumes that the CPU is little-endian. */ 130238384Sjkimstatic void smallfelem_to_bin32(u8 out[32], const smallfelem in) 131238384Sjkim { 132238384Sjkim *((u64*) &out[0]) = in[0]; 133238384Sjkim *((u64*) &out[8]) = in[1]; 134238384Sjkim *((u64*) &out[16]) = in[2]; 135238384Sjkim *((u64*) &out[24]) = in[3]; 136238384Sjkim } 137238384Sjkim 138238384Sjkim/* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 139238384Sjkimstatic void flip_endian(u8 *out, const u8 *in, unsigned len) 140238384Sjkim { 141238384Sjkim unsigned i; 142238384Sjkim for (i = 0; i < len; ++i) 143238384Sjkim out[i] = in[len-1-i]; 144238384Sjkim } 145238384Sjkim 146238384Sjkim/* BN_to_felem converts an OpenSSL BIGNUM into an felem */ 147238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn) 148238384Sjkim { 149238384Sjkim felem_bytearray b_in; 150238384Sjkim felem_bytearray b_out; 151238384Sjkim unsigned num_bytes; 152238384Sjkim 153238384Sjkim /* BN_bn2bin eats leading zeroes */ 154238384Sjkim memset(b_out, 0, sizeof b_out); 155238384Sjkim num_bytes = BN_num_bytes(bn); 156238384Sjkim if (num_bytes > sizeof b_out) 157238384Sjkim { 158238384Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 159238384Sjkim return 0; 160238384Sjkim } 161238384Sjkim if (BN_is_negative(bn)) 162238384Sjkim { 163238384Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 164238384Sjkim return 0; 165238384Sjkim } 166238384Sjkim num_bytes = BN_bn2bin(bn, b_in); 167238384Sjkim flip_endian(b_out, b_in, num_bytes); 168238384Sjkim bin32_to_felem(out, b_out); 169238384Sjkim return 1; 170238384Sjkim } 171238384Sjkim 172238384Sjkim/* felem_to_BN converts an felem into an OpenSSL BIGNUM */ 173238384Sjkimstatic BIGNUM *smallfelem_to_BN(BIGNUM *out, const smallfelem in) 174238384Sjkim { 175238384Sjkim felem_bytearray b_in, b_out; 176238384Sjkim smallfelem_to_bin32(b_in, in); 177238384Sjkim flip_endian(b_out, b_in, sizeof b_out); 178238384Sjkim return BN_bin2bn(b_out, sizeof b_out, out); 179238384Sjkim } 180238384Sjkim 181238384Sjkim 182238384Sjkim/* Field operations 183238384Sjkim * ---------------- */ 184238384Sjkim 185238384Sjkimstatic void smallfelem_one(smallfelem out) 186238384Sjkim { 187238384Sjkim out[0] = 1; 188238384Sjkim out[1] = 0; 189238384Sjkim out[2] = 0; 190238384Sjkim out[3] = 0; 191238384Sjkim } 192238384Sjkim 193238384Sjkimstatic void smallfelem_assign(smallfelem out, const smallfelem in) 194238384Sjkim { 195238384Sjkim out[0] = in[0]; 196238384Sjkim out[1] = in[1]; 197238384Sjkim out[2] = in[2]; 198238384Sjkim out[3] = in[3]; 199238384Sjkim } 200238384Sjkim 201238384Sjkimstatic void felem_assign(felem out, const felem in) 202238384Sjkim { 203238384Sjkim out[0] = in[0]; 204238384Sjkim out[1] = in[1]; 205238384Sjkim out[2] = in[2]; 206238384Sjkim out[3] = in[3]; 207238384Sjkim } 208238384Sjkim 209238384Sjkim/* felem_sum sets out = out + in. */ 210238384Sjkimstatic void felem_sum(felem out, const felem in) 211238384Sjkim { 212238384Sjkim out[0] += in[0]; 213238384Sjkim out[1] += in[1]; 214238384Sjkim out[2] += in[2]; 215238384Sjkim out[3] += in[3]; 216238384Sjkim } 217238384Sjkim 218238384Sjkim/* felem_small_sum sets out = out + in. */ 219238384Sjkimstatic void felem_small_sum(felem out, const smallfelem in) 220238384Sjkim { 221238384Sjkim out[0] += in[0]; 222238384Sjkim out[1] += in[1]; 223238384Sjkim out[2] += in[2]; 224238384Sjkim out[3] += in[3]; 225238384Sjkim } 226238384Sjkim 227238384Sjkim/* felem_scalar sets out = out * scalar */ 228238384Sjkimstatic void felem_scalar(felem out, const u64 scalar) 229238384Sjkim { 230238384Sjkim out[0] *= scalar; 231238384Sjkim out[1] *= scalar; 232238384Sjkim out[2] *= scalar; 233238384Sjkim out[3] *= scalar; 234238384Sjkim } 235238384Sjkim 236238384Sjkim/* longfelem_scalar sets out = out * scalar */ 237238384Sjkimstatic void longfelem_scalar(longfelem out, const u64 scalar) 238238384Sjkim { 239238384Sjkim out[0] *= scalar; 240238384Sjkim out[1] *= scalar; 241238384Sjkim out[2] *= scalar; 242238384Sjkim out[3] *= scalar; 243238384Sjkim out[4] *= scalar; 244238384Sjkim out[5] *= scalar; 245238384Sjkim out[6] *= scalar; 246238384Sjkim out[7] *= scalar; 247238384Sjkim } 248238384Sjkim 249238384Sjkim#define two105m41m9 (((limb)1) << 105) - (((limb)1) << 41) - (((limb)1) << 9) 250238384Sjkim#define two105 (((limb)1) << 105) 251238384Sjkim#define two105m41p9 (((limb)1) << 105) - (((limb)1) << 41) + (((limb)1) << 9) 252238384Sjkim 253238384Sjkim/* zero105 is 0 mod p */ 254238384Sjkimstatic const felem zero105 = { two105m41m9, two105, two105m41p9, two105m41p9 }; 255238384Sjkim 256238384Sjkim/* smallfelem_neg sets |out| to |-small| 257238384Sjkim * On exit: 258238384Sjkim * out[i] < out[i] + 2^105 259238384Sjkim */ 260238384Sjkimstatic void smallfelem_neg(felem out, const smallfelem small) 261238384Sjkim { 262238384Sjkim /* In order to prevent underflow, we subtract from 0 mod p. */ 263238384Sjkim out[0] = zero105[0] - small[0]; 264238384Sjkim out[1] = zero105[1] - small[1]; 265238384Sjkim out[2] = zero105[2] - small[2]; 266238384Sjkim out[3] = zero105[3] - small[3]; 267238384Sjkim } 268238384Sjkim 269238384Sjkim/* felem_diff subtracts |in| from |out| 270238384Sjkim * On entry: 271238384Sjkim * in[i] < 2^104 272238384Sjkim * On exit: 273238384Sjkim * out[i] < out[i] + 2^105 274238384Sjkim */ 275238384Sjkimstatic void felem_diff(felem out, const felem in) 276238384Sjkim { 277238384Sjkim /* In order to prevent underflow, we add 0 mod p before subtracting. */ 278238384Sjkim out[0] += zero105[0]; 279238384Sjkim out[1] += zero105[1]; 280238384Sjkim out[2] += zero105[2]; 281238384Sjkim out[3] += zero105[3]; 282238384Sjkim 283238384Sjkim out[0] -= in[0]; 284238384Sjkim out[1] -= in[1]; 285238384Sjkim out[2] -= in[2]; 286238384Sjkim out[3] -= in[3]; 287238384Sjkim } 288238384Sjkim 289238384Sjkim#define two107m43m11 (((limb)1) << 107) - (((limb)1) << 43) - (((limb)1) << 11) 290238384Sjkim#define two107 (((limb)1) << 107) 291238384Sjkim#define two107m43p11 (((limb)1) << 107) - (((limb)1) << 43) + (((limb)1) << 11) 292238384Sjkim 293238384Sjkim/* zero107 is 0 mod p */ 294238384Sjkimstatic const felem zero107 = { two107m43m11, two107, two107m43p11, two107m43p11 }; 295238384Sjkim 296238384Sjkim/* An alternative felem_diff for larger inputs |in| 297238384Sjkim * felem_diff_zero107 subtracts |in| from |out| 298238384Sjkim * On entry: 299238384Sjkim * in[i] < 2^106 300238384Sjkim * On exit: 301238384Sjkim * out[i] < out[i] + 2^107 302238384Sjkim */ 303238384Sjkimstatic void felem_diff_zero107(felem out, const felem in) 304238384Sjkim { 305238384Sjkim /* In order to prevent underflow, we add 0 mod p before subtracting. */ 306238384Sjkim out[0] += zero107[0]; 307238384Sjkim out[1] += zero107[1]; 308238384Sjkim out[2] += zero107[2]; 309238384Sjkim out[3] += zero107[3]; 310238384Sjkim 311238384Sjkim out[0] -= in[0]; 312238384Sjkim out[1] -= in[1]; 313238384Sjkim out[2] -= in[2]; 314238384Sjkim out[3] -= in[3]; 315238384Sjkim } 316238384Sjkim 317238384Sjkim/* longfelem_diff subtracts |in| from |out| 318238384Sjkim * On entry: 319238384Sjkim * in[i] < 7*2^67 320238384Sjkim * On exit: 321238384Sjkim * out[i] < out[i] + 2^70 + 2^40 322238384Sjkim */ 323238384Sjkimstatic void longfelem_diff(longfelem out, const longfelem in) 324238384Sjkim { 325238384Sjkim static const limb two70m8p6 = (((limb)1) << 70) - (((limb)1) << 8) + (((limb)1) << 6); 326238384Sjkim static const limb two70p40 = (((limb)1) << 70) + (((limb)1) << 40); 327238384Sjkim static const limb two70 = (((limb)1) << 70); 328238384Sjkim static const limb two70m40m38p6 = (((limb)1) << 70) - (((limb)1) << 40) - (((limb)1) << 38) + (((limb)1) << 6); 329238384Sjkim static const limb two70m6 = (((limb)1) << 70) - (((limb)1) << 6); 330238384Sjkim 331238384Sjkim /* add 0 mod p to avoid underflow */ 332238384Sjkim out[0] += two70m8p6; 333238384Sjkim out[1] += two70p40; 334238384Sjkim out[2] += two70; 335238384Sjkim out[3] += two70m40m38p6; 336238384Sjkim out[4] += two70m6; 337238384Sjkim out[5] += two70m6; 338238384Sjkim out[6] += two70m6; 339238384Sjkim out[7] += two70m6; 340238384Sjkim 341238384Sjkim /* in[i] < 7*2^67 < 2^70 - 2^40 - 2^38 + 2^6 */ 342238384Sjkim out[0] -= in[0]; 343238384Sjkim out[1] -= in[1]; 344238384Sjkim out[2] -= in[2]; 345238384Sjkim out[3] -= in[3]; 346238384Sjkim out[4] -= in[4]; 347238384Sjkim out[5] -= in[5]; 348238384Sjkim out[6] -= in[6]; 349238384Sjkim out[7] -= in[7]; 350238384Sjkim } 351238384Sjkim 352238384Sjkim#define two64m0 (((limb)1) << 64) - 1 353238384Sjkim#define two110p32m0 (((limb)1) << 110) + (((limb)1) << 32) - 1 354238384Sjkim#define two64m46 (((limb)1) << 64) - (((limb)1) << 46) 355238384Sjkim#define two64m32 (((limb)1) << 64) - (((limb)1) << 32) 356238384Sjkim 357238384Sjkim/* zero110 is 0 mod p */ 358238384Sjkimstatic const felem zero110 = { two64m0, two110p32m0, two64m46, two64m32 }; 359238384Sjkim 360238384Sjkim/* felem_shrink converts an felem into a smallfelem. The result isn't quite 361238384Sjkim * minimal as the value may be greater than p. 362238384Sjkim * 363238384Sjkim * On entry: 364238384Sjkim * in[i] < 2^109 365238384Sjkim * On exit: 366238384Sjkim * out[i] < 2^64 367238384Sjkim */ 368238384Sjkimstatic void felem_shrink(smallfelem out, const felem in) 369238384Sjkim { 370238384Sjkim felem tmp; 371238384Sjkim u64 a, b, mask; 372238384Sjkim s64 high, low; 373238384Sjkim static const u64 kPrime3Test = 0x7fffffff00000001ul; /* 2^63 - 2^32 + 1 */ 374238384Sjkim 375238384Sjkim /* Carry 2->3 */ 376238384Sjkim tmp[3] = zero110[3] + in[3] + ((u64) (in[2] >> 64)); 377238384Sjkim /* tmp[3] < 2^110 */ 378238384Sjkim 379238384Sjkim tmp[2] = zero110[2] + (u64) in[2]; 380238384Sjkim tmp[0] = zero110[0] + in[0]; 381238384Sjkim tmp[1] = zero110[1] + in[1]; 382238384Sjkim /* tmp[0] < 2**110, tmp[1] < 2^111, tmp[2] < 2**65 */ 383238384Sjkim 384238384Sjkim /* We perform two partial reductions where we eliminate the 385238384Sjkim * high-word of tmp[3]. We don't update the other words till the end. 386238384Sjkim */ 387238384Sjkim a = tmp[3] >> 64; /* a < 2^46 */ 388238384Sjkim tmp[3] = (u64) tmp[3]; 389238384Sjkim tmp[3] -= a; 390238384Sjkim tmp[3] += ((limb)a) << 32; 391238384Sjkim /* tmp[3] < 2^79 */ 392238384Sjkim 393238384Sjkim b = a; 394238384Sjkim a = tmp[3] >> 64; /* a < 2^15 */ 395238384Sjkim b += a; /* b < 2^46 + 2^15 < 2^47 */ 396238384Sjkim tmp[3] = (u64) tmp[3]; 397238384Sjkim tmp[3] -= a; 398238384Sjkim tmp[3] += ((limb)a) << 32; 399238384Sjkim /* tmp[3] < 2^64 + 2^47 */ 400238384Sjkim 401238384Sjkim /* This adjusts the other two words to complete the two partial 402238384Sjkim * reductions. */ 403238384Sjkim tmp[0] += b; 404238384Sjkim tmp[1] -= (((limb)b) << 32); 405238384Sjkim 406238384Sjkim /* In order to make space in tmp[3] for the carry from 2 -> 3, we 407238384Sjkim * conditionally subtract kPrime if tmp[3] is large enough. */ 408238384Sjkim high = tmp[3] >> 64; 409238384Sjkim /* As tmp[3] < 2^65, high is either 1 or 0 */ 410238384Sjkim high <<= 63; 411238384Sjkim high >>= 63; 412238384Sjkim /* high is: 413238384Sjkim * all ones if the high word of tmp[3] is 1 414238384Sjkim * all zeros if the high word of tmp[3] if 0 */ 415238384Sjkim low = tmp[3]; 416238384Sjkim mask = low >> 63; 417238384Sjkim /* mask is: 418238384Sjkim * all ones if the MSB of low is 1 419238384Sjkim * all zeros if the MSB of low if 0 */ 420238384Sjkim low &= bottom63bits; 421238384Sjkim low -= kPrime3Test; 422238384Sjkim /* if low was greater than kPrime3Test then the MSB is zero */ 423238384Sjkim low = ~low; 424238384Sjkim low >>= 63; 425238384Sjkim /* low is: 426238384Sjkim * all ones if low was > kPrime3Test 427238384Sjkim * all zeros if low was <= kPrime3Test */ 428238384Sjkim mask = (mask & low) | high; 429238384Sjkim tmp[0] -= mask & kPrime[0]; 430238384Sjkim tmp[1] -= mask & kPrime[1]; 431238384Sjkim /* kPrime[2] is zero, so omitted */ 432238384Sjkim tmp[3] -= mask & kPrime[3]; 433238384Sjkim /* tmp[3] < 2**64 - 2**32 + 1 */ 434238384Sjkim 435238384Sjkim tmp[1] += ((u64) (tmp[0] >> 64)); tmp[0] = (u64) tmp[0]; 436238384Sjkim tmp[2] += ((u64) (tmp[1] >> 64)); tmp[1] = (u64) tmp[1]; 437238384Sjkim tmp[3] += ((u64) (tmp[2] >> 64)); tmp[2] = (u64) tmp[2]; 438238384Sjkim /* tmp[i] < 2^64 */ 439238384Sjkim 440238384Sjkim out[0] = tmp[0]; 441238384Sjkim out[1] = tmp[1]; 442238384Sjkim out[2] = tmp[2]; 443238384Sjkim out[3] = tmp[3]; 444238384Sjkim } 445238384Sjkim 446238384Sjkim/* smallfelem_expand converts a smallfelem to an felem */ 447238384Sjkimstatic void smallfelem_expand(felem out, const smallfelem in) 448238384Sjkim { 449238384Sjkim out[0] = in[0]; 450238384Sjkim out[1] = in[1]; 451238384Sjkim out[2] = in[2]; 452238384Sjkim out[3] = in[3]; 453238384Sjkim } 454238384Sjkim 455238384Sjkim/* smallfelem_square sets |out| = |small|^2 456238384Sjkim * On entry: 457238384Sjkim * small[i] < 2^64 458238384Sjkim * On exit: 459238384Sjkim * out[i] < 7 * 2^64 < 2^67 460238384Sjkim */ 461238384Sjkimstatic void smallfelem_square(longfelem out, const smallfelem small) 462238384Sjkim { 463238384Sjkim limb a; 464238384Sjkim u64 high, low; 465238384Sjkim 466238384Sjkim a = ((uint128_t) small[0]) * small[0]; 467238384Sjkim low = a; 468238384Sjkim high = a >> 64; 469238384Sjkim out[0] = low; 470238384Sjkim out[1] = high; 471238384Sjkim 472238384Sjkim a = ((uint128_t) small[0]) * small[1]; 473238384Sjkim low = a; 474238384Sjkim high = a >> 64; 475238384Sjkim out[1] += low; 476238384Sjkim out[1] += low; 477238384Sjkim out[2] = high; 478238384Sjkim 479238384Sjkim a = ((uint128_t) small[0]) * small[2]; 480238384Sjkim low = a; 481238384Sjkim high = a >> 64; 482238384Sjkim out[2] += low; 483238384Sjkim out[2] *= 2; 484238384Sjkim out[3] = high; 485238384Sjkim 486238384Sjkim a = ((uint128_t) small[0]) * small[3]; 487238384Sjkim low = a; 488238384Sjkim high = a >> 64; 489238384Sjkim out[3] += low; 490238384Sjkim out[4] = high; 491238384Sjkim 492238384Sjkim a = ((uint128_t) small[1]) * small[2]; 493238384Sjkim low = a; 494238384Sjkim high = a >> 64; 495238384Sjkim out[3] += low; 496238384Sjkim out[3] *= 2; 497238384Sjkim out[4] += high; 498238384Sjkim 499238384Sjkim a = ((uint128_t) small[1]) * small[1]; 500238384Sjkim low = a; 501238384Sjkim high = a >> 64; 502238384Sjkim out[2] += low; 503238384Sjkim out[3] += high; 504238384Sjkim 505238384Sjkim a = ((uint128_t) small[1]) * small[3]; 506238384Sjkim low = a; 507238384Sjkim high = a >> 64; 508238384Sjkim out[4] += low; 509238384Sjkim out[4] *= 2; 510238384Sjkim out[5] = high; 511238384Sjkim 512238384Sjkim a = ((uint128_t) small[2]) * small[3]; 513238384Sjkim low = a; 514238384Sjkim high = a >> 64; 515238384Sjkim out[5] += low; 516238384Sjkim out[5] *= 2; 517238384Sjkim out[6] = high; 518238384Sjkim out[6] += high; 519238384Sjkim 520238384Sjkim a = ((uint128_t) small[2]) * small[2]; 521238384Sjkim low = a; 522238384Sjkim high = a >> 64; 523238384Sjkim out[4] += low; 524238384Sjkim out[5] += high; 525238384Sjkim 526238384Sjkim a = ((uint128_t) small[3]) * small[3]; 527238384Sjkim low = a; 528238384Sjkim high = a >> 64; 529238384Sjkim out[6] += low; 530238384Sjkim out[7] = high; 531238384Sjkim } 532238384Sjkim 533238384Sjkim/* felem_square sets |out| = |in|^2 534238384Sjkim * On entry: 535238384Sjkim * in[i] < 2^109 536238384Sjkim * On exit: 537238384Sjkim * out[i] < 7 * 2^64 < 2^67 538238384Sjkim */ 539238384Sjkimstatic void felem_square(longfelem out, const felem in) 540238384Sjkim { 541238384Sjkim u64 small[4]; 542238384Sjkim felem_shrink(small, in); 543238384Sjkim smallfelem_square(out, small); 544238384Sjkim } 545238384Sjkim 546238384Sjkim/* smallfelem_mul sets |out| = |small1| * |small2| 547238384Sjkim * On entry: 548238384Sjkim * small1[i] < 2^64 549238384Sjkim * small2[i] < 2^64 550238384Sjkim * On exit: 551238384Sjkim * out[i] < 7 * 2^64 < 2^67 552238384Sjkim */ 553238384Sjkimstatic void smallfelem_mul(longfelem out, const smallfelem small1, const smallfelem small2) 554238384Sjkim { 555238384Sjkim limb a; 556238384Sjkim u64 high, low; 557238384Sjkim 558238384Sjkim a = ((uint128_t) small1[0]) * small2[0]; 559238384Sjkim low = a; 560238384Sjkim high = a >> 64; 561238384Sjkim out[0] = low; 562238384Sjkim out[1] = high; 563238384Sjkim 564238384Sjkim 565238384Sjkim a = ((uint128_t) small1[0]) * small2[1]; 566238384Sjkim low = a; 567238384Sjkim high = a >> 64; 568238384Sjkim out[1] += low; 569238384Sjkim out[2] = high; 570238384Sjkim 571238384Sjkim a = ((uint128_t) small1[1]) * small2[0]; 572238384Sjkim low = a; 573238384Sjkim high = a >> 64; 574238384Sjkim out[1] += low; 575238384Sjkim out[2] += high; 576238384Sjkim 577238384Sjkim 578238384Sjkim a = ((uint128_t) small1[0]) * small2[2]; 579238384Sjkim low = a; 580238384Sjkim high = a >> 64; 581238384Sjkim out[2] += low; 582238384Sjkim out[3] = high; 583238384Sjkim 584238384Sjkim a = ((uint128_t) small1[1]) * small2[1]; 585238384Sjkim low = a; 586238384Sjkim high = a >> 64; 587238384Sjkim out[2] += low; 588238384Sjkim out[3] += high; 589238384Sjkim 590238384Sjkim a = ((uint128_t) small1[2]) * small2[0]; 591238384Sjkim low = a; 592238384Sjkim high = a >> 64; 593238384Sjkim out[2] += low; 594238384Sjkim out[3] += high; 595238384Sjkim 596238384Sjkim 597238384Sjkim a = ((uint128_t) small1[0]) * small2[3]; 598238384Sjkim low = a; 599238384Sjkim high = a >> 64; 600238384Sjkim out[3] += low; 601238384Sjkim out[4] = high; 602238384Sjkim 603238384Sjkim a = ((uint128_t) small1[1]) * small2[2]; 604238384Sjkim low = a; 605238384Sjkim high = a >> 64; 606238384Sjkim out[3] += low; 607238384Sjkim out[4] += high; 608238384Sjkim 609238384Sjkim a = ((uint128_t) small1[2]) * small2[1]; 610238384Sjkim low = a; 611238384Sjkim high = a >> 64; 612238384Sjkim out[3] += low; 613238384Sjkim out[4] += high; 614238384Sjkim 615238384Sjkim a = ((uint128_t) small1[3]) * small2[0]; 616238384Sjkim low = a; 617238384Sjkim high = a >> 64; 618238384Sjkim out[3] += low; 619238384Sjkim out[4] += high; 620238384Sjkim 621238384Sjkim 622238384Sjkim a = ((uint128_t) small1[1]) * small2[3]; 623238384Sjkim low = a; 624238384Sjkim high = a >> 64; 625238384Sjkim out[4] += low; 626238384Sjkim out[5] = high; 627238384Sjkim 628238384Sjkim a = ((uint128_t) small1[2]) * small2[2]; 629238384Sjkim low = a; 630238384Sjkim high = a >> 64; 631238384Sjkim out[4] += low; 632238384Sjkim out[5] += high; 633238384Sjkim 634238384Sjkim a = ((uint128_t) small1[3]) * small2[1]; 635238384Sjkim low = a; 636238384Sjkim high = a >> 64; 637238384Sjkim out[4] += low; 638238384Sjkim out[5] += high; 639238384Sjkim 640238384Sjkim 641238384Sjkim a = ((uint128_t) small1[2]) * small2[3]; 642238384Sjkim low = a; 643238384Sjkim high = a >> 64; 644238384Sjkim out[5] += low; 645238384Sjkim out[6] = high; 646238384Sjkim 647238384Sjkim a = ((uint128_t) small1[3]) * small2[2]; 648238384Sjkim low = a; 649238384Sjkim high = a >> 64; 650238384Sjkim out[5] += low; 651238384Sjkim out[6] += high; 652238384Sjkim 653238384Sjkim 654238384Sjkim a = ((uint128_t) small1[3]) * small2[3]; 655238384Sjkim low = a; 656238384Sjkim high = a >> 64; 657238384Sjkim out[6] += low; 658238384Sjkim out[7] = high; 659238384Sjkim } 660238384Sjkim 661238384Sjkim/* felem_mul sets |out| = |in1| * |in2| 662238384Sjkim * On entry: 663238384Sjkim * in1[i] < 2^109 664238384Sjkim * in2[i] < 2^109 665238384Sjkim * On exit: 666238384Sjkim * out[i] < 7 * 2^64 < 2^67 667238384Sjkim */ 668238384Sjkimstatic void felem_mul(longfelem out, const felem in1, const felem in2) 669238384Sjkim { 670238384Sjkim smallfelem small1, small2; 671238384Sjkim felem_shrink(small1, in1); 672238384Sjkim felem_shrink(small2, in2); 673238384Sjkim smallfelem_mul(out, small1, small2); 674238384Sjkim } 675238384Sjkim 676238384Sjkim/* felem_small_mul sets |out| = |small1| * |in2| 677238384Sjkim * On entry: 678238384Sjkim * small1[i] < 2^64 679238384Sjkim * in2[i] < 2^109 680238384Sjkim * On exit: 681238384Sjkim * out[i] < 7 * 2^64 < 2^67 682238384Sjkim */ 683238384Sjkimstatic void felem_small_mul(longfelem out, const smallfelem small1, const felem in2) 684238384Sjkim { 685238384Sjkim smallfelem small2; 686238384Sjkim felem_shrink(small2, in2); 687238384Sjkim smallfelem_mul(out, small1, small2); 688238384Sjkim } 689238384Sjkim 690238384Sjkim#define two100m36m4 (((limb)1) << 100) - (((limb)1) << 36) - (((limb)1) << 4) 691238384Sjkim#define two100 (((limb)1) << 100) 692238384Sjkim#define two100m36p4 (((limb)1) << 100) - (((limb)1) << 36) + (((limb)1) << 4) 693238384Sjkim/* zero100 is 0 mod p */ 694238384Sjkimstatic const felem zero100 = { two100m36m4, two100, two100m36p4, two100m36p4 }; 695238384Sjkim 696238384Sjkim/* Internal function for the different flavours of felem_reduce. 697238384Sjkim * felem_reduce_ reduces the higher coefficients in[4]-in[7]. 698238384Sjkim * On entry: 699238384Sjkim * out[0] >= in[6] + 2^32*in[6] + in[7] + 2^32*in[7] 700238384Sjkim * out[1] >= in[7] + 2^32*in[4] 701238384Sjkim * out[2] >= in[5] + 2^32*in[5] 702238384Sjkim * out[3] >= in[4] + 2^32*in[5] + 2^32*in[6] 703238384Sjkim * On exit: 704238384Sjkim * out[0] <= out[0] + in[4] + 2^32*in[5] 705238384Sjkim * out[1] <= out[1] + in[5] + 2^33*in[6] 706238384Sjkim * out[2] <= out[2] + in[7] + 2*in[6] + 2^33*in[7] 707238384Sjkim * out[3] <= out[3] + 2^32*in[4] + 3*in[7] 708238384Sjkim */ 709238384Sjkimstatic void felem_reduce_(felem out, const longfelem in) 710238384Sjkim { 711238384Sjkim int128_t c; 712238384Sjkim /* combine common terms from below */ 713238384Sjkim c = in[4] + (in[5] << 32); 714238384Sjkim out[0] += c; 715238384Sjkim out[3] -= c; 716238384Sjkim 717238384Sjkim c = in[5] - in[7]; 718238384Sjkim out[1] += c; 719238384Sjkim out[2] -= c; 720238384Sjkim 721238384Sjkim /* the remaining terms */ 722238384Sjkim /* 256: [(0,1),(96,-1),(192,-1),(224,1)] */ 723238384Sjkim out[1] -= (in[4] << 32); 724238384Sjkim out[3] += (in[4] << 32); 725238384Sjkim 726238384Sjkim /* 320: [(32,1),(64,1),(128,-1),(160,-1),(224,-1)] */ 727238384Sjkim out[2] -= (in[5] << 32); 728238384Sjkim 729238384Sjkim /* 384: [(0,-1),(32,-1),(96,2),(128,2),(224,-1)] */ 730238384Sjkim out[0] -= in[6]; 731238384Sjkim out[0] -= (in[6] << 32); 732238384Sjkim out[1] += (in[6] << 33); 733238384Sjkim out[2] += (in[6] * 2); 734238384Sjkim out[3] -= (in[6] << 32); 735238384Sjkim 736238384Sjkim /* 448: [(0,-1),(32,-1),(64,-1),(128,1),(160,2),(192,3)] */ 737238384Sjkim out[0] -= in[7]; 738238384Sjkim out[0] -= (in[7] << 32); 739238384Sjkim out[2] += (in[7] << 33); 740238384Sjkim out[3] += (in[7] * 3); 741238384Sjkim } 742238384Sjkim 743238384Sjkim/* felem_reduce converts a longfelem into an felem. 744238384Sjkim * To be called directly after felem_square or felem_mul. 745238384Sjkim * On entry: 746238384Sjkim * in[0] < 2^64, in[1] < 3*2^64, in[2] < 5*2^64, in[3] < 7*2^64 747238384Sjkim * in[4] < 7*2^64, in[5] < 5*2^64, in[6] < 3*2^64, in[7] < 2*64 748238384Sjkim * On exit: 749238384Sjkim * out[i] < 2^101 750238384Sjkim */ 751238384Sjkimstatic void felem_reduce(felem out, const longfelem in) 752238384Sjkim { 753238384Sjkim out[0] = zero100[0] + in[0]; 754238384Sjkim out[1] = zero100[1] + in[1]; 755238384Sjkim out[2] = zero100[2] + in[2]; 756238384Sjkim out[3] = zero100[3] + in[3]; 757238384Sjkim 758238384Sjkim felem_reduce_(out, in); 759238384Sjkim 760238384Sjkim /* out[0] > 2^100 - 2^36 - 2^4 - 3*2^64 - 3*2^96 - 2^64 - 2^96 > 0 761238384Sjkim * out[1] > 2^100 - 2^64 - 7*2^96 > 0 762238384Sjkim * out[2] > 2^100 - 2^36 + 2^4 - 5*2^64 - 5*2^96 > 0 763238384Sjkim * out[3] > 2^100 - 2^36 + 2^4 - 7*2^64 - 5*2^96 - 3*2^96 > 0 764238384Sjkim * 765238384Sjkim * out[0] < 2^100 + 2^64 + 7*2^64 + 5*2^96 < 2^101 766238384Sjkim * out[1] < 2^100 + 3*2^64 + 5*2^64 + 3*2^97 < 2^101 767238384Sjkim * out[2] < 2^100 + 5*2^64 + 2^64 + 3*2^65 + 2^97 < 2^101 768238384Sjkim * out[3] < 2^100 + 7*2^64 + 7*2^96 + 3*2^64 < 2^101 769238384Sjkim */ 770238384Sjkim } 771238384Sjkim 772238384Sjkim/* felem_reduce_zero105 converts a larger longfelem into an felem. 773238384Sjkim * On entry: 774238384Sjkim * in[0] < 2^71 775238384Sjkim * On exit: 776238384Sjkim * out[i] < 2^106 777238384Sjkim */ 778238384Sjkimstatic void felem_reduce_zero105(felem out, const longfelem in) 779238384Sjkim { 780238384Sjkim out[0] = zero105[0] + in[0]; 781238384Sjkim out[1] = zero105[1] + in[1]; 782238384Sjkim out[2] = zero105[2] + in[2]; 783238384Sjkim out[3] = zero105[3] + in[3]; 784238384Sjkim 785238384Sjkim felem_reduce_(out, in); 786238384Sjkim 787238384Sjkim /* out[0] > 2^105 - 2^41 - 2^9 - 2^71 - 2^103 - 2^71 - 2^103 > 0 788238384Sjkim * out[1] > 2^105 - 2^71 - 2^103 > 0 789238384Sjkim * out[2] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 > 0 790238384Sjkim * out[3] > 2^105 - 2^41 + 2^9 - 2^71 - 2^103 - 2^103 > 0 791238384Sjkim * 792238384Sjkim * out[0] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106 793238384Sjkim * out[1] < 2^105 + 2^71 + 2^71 + 2^103 < 2^106 794238384Sjkim * out[2] < 2^105 + 2^71 + 2^71 + 2^71 + 2^103 < 2^106 795238384Sjkim * out[3] < 2^105 + 2^71 + 2^103 + 2^71 < 2^106 796238384Sjkim */ 797238384Sjkim } 798238384Sjkim 799238384Sjkim/* subtract_u64 sets *result = *result - v and *carry to one if the subtraction 800238384Sjkim * underflowed. */ 801238384Sjkimstatic void subtract_u64(u64* result, u64* carry, u64 v) 802238384Sjkim { 803238384Sjkim uint128_t r = *result; 804238384Sjkim r -= v; 805238384Sjkim *carry = (r >> 64) & 1; 806238384Sjkim *result = (u64) r; 807238384Sjkim } 808238384Sjkim 809238384Sjkim/* felem_contract converts |in| to its unique, minimal representation. 810238384Sjkim * On entry: 811238384Sjkim * in[i] < 2^109 812238384Sjkim */ 813238384Sjkimstatic void felem_contract(smallfelem out, const felem in) 814238384Sjkim { 815238384Sjkim unsigned i; 816238384Sjkim u64 all_equal_so_far = 0, result = 0, carry; 817238384Sjkim 818238384Sjkim felem_shrink(out, in); 819238384Sjkim /* small is minimal except that the value might be > p */ 820238384Sjkim 821238384Sjkim all_equal_so_far--; 822238384Sjkim /* We are doing a constant time test if out >= kPrime. We need to 823238384Sjkim * compare each u64, from most-significant to least significant. For 824238384Sjkim * each one, if all words so far have been equal (m is all ones) then a 825238384Sjkim * non-equal result is the answer. Otherwise we continue. */ 826238384Sjkim for (i = 3; i < 4; i--) 827238384Sjkim { 828238384Sjkim u64 equal; 829238384Sjkim uint128_t a = ((uint128_t) kPrime[i]) - out[i]; 830238384Sjkim /* if out[i] > kPrime[i] then a will underflow and the high 831238384Sjkim * 64-bits will all be set. */ 832238384Sjkim result |= all_equal_so_far & ((u64) (a >> 64)); 833238384Sjkim 834238384Sjkim /* if kPrime[i] == out[i] then |equal| will be all zeros and 835238384Sjkim * the decrement will make it all ones. */ 836238384Sjkim equal = kPrime[i] ^ out[i]; 837238384Sjkim equal--; 838238384Sjkim equal &= equal << 32; 839238384Sjkim equal &= equal << 16; 840238384Sjkim equal &= equal << 8; 841238384Sjkim equal &= equal << 4; 842238384Sjkim equal &= equal << 2; 843238384Sjkim equal &= equal << 1; 844238384Sjkim equal = ((s64) equal) >> 63; 845238384Sjkim 846238384Sjkim all_equal_so_far &= equal; 847238384Sjkim } 848238384Sjkim 849238384Sjkim /* if all_equal_so_far is still all ones then the two values are equal 850238384Sjkim * and so out >= kPrime is true. */ 851238384Sjkim result |= all_equal_so_far; 852238384Sjkim 853238384Sjkim /* if out >= kPrime then we subtract kPrime. */ 854238384Sjkim subtract_u64(&out[0], &carry, result & kPrime[0]); 855238384Sjkim subtract_u64(&out[1], &carry, carry); 856238384Sjkim subtract_u64(&out[2], &carry, carry); 857238384Sjkim subtract_u64(&out[3], &carry, carry); 858238384Sjkim 859238384Sjkim subtract_u64(&out[1], &carry, result & kPrime[1]); 860238384Sjkim subtract_u64(&out[2], &carry, carry); 861238384Sjkim subtract_u64(&out[3], &carry, carry); 862238384Sjkim 863238384Sjkim subtract_u64(&out[2], &carry, result & kPrime[2]); 864238384Sjkim subtract_u64(&out[3], &carry, carry); 865238384Sjkim 866238384Sjkim subtract_u64(&out[3], &carry, result & kPrime[3]); 867238384Sjkim } 868238384Sjkim 869238384Sjkimstatic void smallfelem_square_contract(smallfelem out, const smallfelem in) 870238384Sjkim { 871238384Sjkim longfelem longtmp; 872238384Sjkim felem tmp; 873238384Sjkim 874238384Sjkim smallfelem_square(longtmp, in); 875238384Sjkim felem_reduce(tmp, longtmp); 876238384Sjkim felem_contract(out, tmp); 877238384Sjkim } 878238384Sjkim 879238384Sjkimstatic void smallfelem_mul_contract(smallfelem out, const smallfelem in1, const smallfelem in2) 880238384Sjkim { 881238384Sjkim longfelem longtmp; 882238384Sjkim felem tmp; 883238384Sjkim 884238384Sjkim smallfelem_mul(longtmp, in1, in2); 885238384Sjkim felem_reduce(tmp, longtmp); 886238384Sjkim felem_contract(out, tmp); 887238384Sjkim } 888238384Sjkim 889238384Sjkim/* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0 890238384Sjkim * otherwise. 891238384Sjkim * On entry: 892238384Sjkim * small[i] < 2^64 893238384Sjkim */ 894238384Sjkimstatic limb smallfelem_is_zero(const smallfelem small) 895238384Sjkim { 896238384Sjkim limb result; 897238384Sjkim u64 is_p; 898238384Sjkim 899238384Sjkim u64 is_zero = small[0] | small[1] | small[2] | small[3]; 900238384Sjkim is_zero--; 901238384Sjkim is_zero &= is_zero << 32; 902238384Sjkim is_zero &= is_zero << 16; 903238384Sjkim is_zero &= is_zero << 8; 904238384Sjkim is_zero &= is_zero << 4; 905238384Sjkim is_zero &= is_zero << 2; 906238384Sjkim is_zero &= is_zero << 1; 907238384Sjkim is_zero = ((s64) is_zero) >> 63; 908238384Sjkim 909238384Sjkim is_p = (small[0] ^ kPrime[0]) | 910238384Sjkim (small[1] ^ kPrime[1]) | 911238384Sjkim (small[2] ^ kPrime[2]) | 912238384Sjkim (small[3] ^ kPrime[3]); 913238384Sjkim is_p--; 914238384Sjkim is_p &= is_p << 32; 915238384Sjkim is_p &= is_p << 16; 916238384Sjkim is_p &= is_p << 8; 917238384Sjkim is_p &= is_p << 4; 918238384Sjkim is_p &= is_p << 2; 919238384Sjkim is_p &= is_p << 1; 920238384Sjkim is_p = ((s64) is_p) >> 63; 921238384Sjkim 922238384Sjkim is_zero |= is_p; 923238384Sjkim 924238384Sjkim result = is_zero; 925238384Sjkim result |= ((limb) is_zero) << 64; 926238384Sjkim return result; 927238384Sjkim } 928238384Sjkim 929238384Sjkimstatic int smallfelem_is_zero_int(const smallfelem small) 930238384Sjkim { 931238384Sjkim return (int) (smallfelem_is_zero(small) & ((limb)1)); 932238384Sjkim } 933238384Sjkim 934238384Sjkim/* felem_inv calculates |out| = |in|^{-1} 935238384Sjkim * 936238384Sjkim * Based on Fermat's Little Theorem: 937238384Sjkim * a^p = a (mod p) 938238384Sjkim * a^{p-1} = 1 (mod p) 939238384Sjkim * a^{p-2} = a^{-1} (mod p) 940238384Sjkim */ 941238384Sjkimstatic void felem_inv(felem out, const felem in) 942238384Sjkim { 943238384Sjkim felem ftmp, ftmp2; 944238384Sjkim /* each e_I will hold |in|^{2^I - 1} */ 945238384Sjkim felem e2, e4, e8, e16, e32, e64; 946238384Sjkim longfelem tmp; 947238384Sjkim unsigned i; 948238384Sjkim 949238384Sjkim felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */ 950238384Sjkim felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */ 951238384Sjkim felem_assign(e2, ftmp); 952238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */ 953238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^2 */ 954238384Sjkim felem_mul(tmp, ftmp, e2); felem_reduce(ftmp, tmp); /* 2^4 - 2^0 */ 955238384Sjkim felem_assign(e4, ftmp); 956238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^5 - 2^1 */ 957238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^6 - 2^2 */ 958238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^7 - 2^3 */ 959238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^8 - 2^4 */ 960238384Sjkim felem_mul(tmp, ftmp, e4); felem_reduce(ftmp, tmp); /* 2^8 - 2^0 */ 961238384Sjkim felem_assign(e8, ftmp); 962238384Sjkim for (i = 0; i < 8; i++) { 963238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); 964238384Sjkim } /* 2^16 - 2^8 */ 965238384Sjkim felem_mul(tmp, ftmp, e8); felem_reduce(ftmp, tmp); /* 2^16 - 2^0 */ 966238384Sjkim felem_assign(e16, ftmp); 967238384Sjkim for (i = 0; i < 16; i++) { 968238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); 969238384Sjkim } /* 2^32 - 2^16 */ 970238384Sjkim felem_mul(tmp, ftmp, e16); felem_reduce(ftmp, tmp); /* 2^32 - 2^0 */ 971238384Sjkim felem_assign(e32, ftmp); 972238384Sjkim for (i = 0; i < 32; i++) { 973238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); 974238384Sjkim } /* 2^64 - 2^32 */ 975238384Sjkim felem_assign(e64, ftmp); 976238384Sjkim felem_mul(tmp, ftmp, in); felem_reduce(ftmp, tmp); /* 2^64 - 2^32 + 2^0 */ 977238384Sjkim for (i = 0; i < 192; i++) { 978238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); 979238384Sjkim } /* 2^256 - 2^224 + 2^192 */ 980238384Sjkim 981238384Sjkim felem_mul(tmp, e64, e32); felem_reduce(ftmp2, tmp); /* 2^64 - 2^0 */ 982238384Sjkim for (i = 0; i < 16; i++) { 983238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); 984238384Sjkim } /* 2^80 - 2^16 */ 985238384Sjkim felem_mul(tmp, ftmp2, e16); felem_reduce(ftmp2, tmp); /* 2^80 - 2^0 */ 986238384Sjkim for (i = 0; i < 8; i++) { 987238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); 988238384Sjkim } /* 2^88 - 2^8 */ 989238384Sjkim felem_mul(tmp, ftmp2, e8); felem_reduce(ftmp2, tmp); /* 2^88 - 2^0 */ 990238384Sjkim for (i = 0; i < 4; i++) { 991238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); 992238384Sjkim } /* 2^92 - 2^4 */ 993238384Sjkim felem_mul(tmp, ftmp2, e4); felem_reduce(ftmp2, tmp); /* 2^92 - 2^0 */ 994238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^93 - 2^1 */ 995238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^2 */ 996238384Sjkim felem_mul(tmp, ftmp2, e2); felem_reduce(ftmp2, tmp); /* 2^94 - 2^0 */ 997238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^95 - 2^1 */ 998238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp2, tmp); /* 2^96 - 2^2 */ 999238384Sjkim felem_mul(tmp, ftmp2, in); felem_reduce(ftmp2, tmp); /* 2^96 - 3 */ 1000238384Sjkim 1001238384Sjkim felem_mul(tmp, ftmp2, ftmp); felem_reduce(out, tmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */ 1002238384Sjkim } 1003238384Sjkim 1004238384Sjkimstatic void smallfelem_inv_contract(smallfelem out, const smallfelem in) 1005238384Sjkim { 1006238384Sjkim felem tmp; 1007238384Sjkim 1008238384Sjkim smallfelem_expand(tmp, in); 1009238384Sjkim felem_inv(tmp, tmp); 1010238384Sjkim felem_contract(out, tmp); 1011238384Sjkim } 1012238384Sjkim 1013238384Sjkim/* Group operations 1014238384Sjkim * ---------------- 1015238384Sjkim * 1016238384Sjkim * Building on top of the field operations we have the operations on the 1017238384Sjkim * elliptic curve group itself. Points on the curve are represented in Jacobian 1018238384Sjkim * coordinates */ 1019238384Sjkim 1020238384Sjkim/* point_double calculates 2*(x_in, y_in, z_in) 1021238384Sjkim * 1022238384Sjkim * The method is taken from: 1023238384Sjkim * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b 1024238384Sjkim * 1025238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed. 1026238384Sjkim * while x_out == y_in is not (maybe this works, but it's not tested). */ 1027238384Sjkimstatic void 1028238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out, 1029238384Sjkim const felem x_in, const felem y_in, const felem z_in) 1030238384Sjkim { 1031238384Sjkim longfelem tmp, tmp2; 1032238384Sjkim felem delta, gamma, beta, alpha, ftmp, ftmp2; 1033238384Sjkim smallfelem small1, small2; 1034238384Sjkim 1035238384Sjkim felem_assign(ftmp, x_in); 1036238384Sjkim /* ftmp[i] < 2^106 */ 1037238384Sjkim felem_assign(ftmp2, x_in); 1038238384Sjkim /* ftmp2[i] < 2^106 */ 1039238384Sjkim 1040238384Sjkim /* delta = z^2 */ 1041238384Sjkim felem_square(tmp, z_in); 1042238384Sjkim felem_reduce(delta, tmp); 1043238384Sjkim /* delta[i] < 2^101 */ 1044238384Sjkim 1045238384Sjkim /* gamma = y^2 */ 1046238384Sjkim felem_square(tmp, y_in); 1047238384Sjkim felem_reduce(gamma, tmp); 1048238384Sjkim /* gamma[i] < 2^101 */ 1049238384Sjkim felem_shrink(small1, gamma); 1050238384Sjkim 1051238384Sjkim /* beta = x*gamma */ 1052238384Sjkim felem_small_mul(tmp, small1, x_in); 1053238384Sjkim felem_reduce(beta, tmp); 1054238384Sjkim /* beta[i] < 2^101 */ 1055238384Sjkim 1056238384Sjkim /* alpha = 3*(x-delta)*(x+delta) */ 1057238384Sjkim felem_diff(ftmp, delta); 1058238384Sjkim /* ftmp[i] < 2^105 + 2^106 < 2^107 */ 1059238384Sjkim felem_sum(ftmp2, delta); 1060238384Sjkim /* ftmp2[i] < 2^105 + 2^106 < 2^107 */ 1061238384Sjkim felem_scalar(ftmp2, 3); 1062238384Sjkim /* ftmp2[i] < 3 * 2^107 < 2^109 */ 1063238384Sjkim felem_mul(tmp, ftmp, ftmp2); 1064238384Sjkim felem_reduce(alpha, tmp); 1065238384Sjkim /* alpha[i] < 2^101 */ 1066238384Sjkim felem_shrink(small2, alpha); 1067238384Sjkim 1068238384Sjkim /* x' = alpha^2 - 8*beta */ 1069238384Sjkim smallfelem_square(tmp, small2); 1070238384Sjkim felem_reduce(x_out, tmp); 1071238384Sjkim felem_assign(ftmp, beta); 1072238384Sjkim felem_scalar(ftmp, 8); 1073238384Sjkim /* ftmp[i] < 8 * 2^101 = 2^104 */ 1074238384Sjkim felem_diff(x_out, ftmp); 1075238384Sjkim /* x_out[i] < 2^105 + 2^101 < 2^106 */ 1076238384Sjkim 1077238384Sjkim /* z' = (y + z)^2 - gamma - delta */ 1078238384Sjkim felem_sum(delta, gamma); 1079238384Sjkim /* delta[i] < 2^101 + 2^101 = 2^102 */ 1080238384Sjkim felem_assign(ftmp, y_in); 1081238384Sjkim felem_sum(ftmp, z_in); 1082238384Sjkim /* ftmp[i] < 2^106 + 2^106 = 2^107 */ 1083238384Sjkim felem_square(tmp, ftmp); 1084238384Sjkim felem_reduce(z_out, tmp); 1085238384Sjkim felem_diff(z_out, delta); 1086238384Sjkim /* z_out[i] < 2^105 + 2^101 < 2^106 */ 1087238384Sjkim 1088238384Sjkim /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 1089238384Sjkim felem_scalar(beta, 4); 1090238384Sjkim /* beta[i] < 4 * 2^101 = 2^103 */ 1091238384Sjkim felem_diff_zero107(beta, x_out); 1092238384Sjkim /* beta[i] < 2^107 + 2^103 < 2^108 */ 1093238384Sjkim felem_small_mul(tmp, small2, beta); 1094238384Sjkim /* tmp[i] < 7 * 2^64 < 2^67 */ 1095238384Sjkim smallfelem_square(tmp2, small1); 1096238384Sjkim /* tmp2[i] < 7 * 2^64 */ 1097238384Sjkim longfelem_scalar(tmp2, 8); 1098238384Sjkim /* tmp2[i] < 8 * 7 * 2^64 = 7 * 2^67 */ 1099238384Sjkim longfelem_diff(tmp, tmp2); 1100238384Sjkim /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */ 1101238384Sjkim felem_reduce_zero105(y_out, tmp); 1102238384Sjkim /* y_out[i] < 2^106 */ 1103238384Sjkim } 1104238384Sjkim 1105238384Sjkim/* point_double_small is the same as point_double, except that it operates on 1106238384Sjkim * smallfelems */ 1107238384Sjkimstatic void 1108238384Sjkimpoint_double_small(smallfelem x_out, smallfelem y_out, smallfelem z_out, 1109238384Sjkim const smallfelem x_in, const smallfelem y_in, const smallfelem z_in) 1110238384Sjkim { 1111238384Sjkim felem felem_x_out, felem_y_out, felem_z_out; 1112238384Sjkim felem felem_x_in, felem_y_in, felem_z_in; 1113238384Sjkim 1114238384Sjkim smallfelem_expand(felem_x_in, x_in); 1115238384Sjkim smallfelem_expand(felem_y_in, y_in); 1116238384Sjkim smallfelem_expand(felem_z_in, z_in); 1117238384Sjkim point_double(felem_x_out, felem_y_out, felem_z_out, 1118238384Sjkim felem_x_in, felem_y_in, felem_z_in); 1119238384Sjkim felem_shrink(x_out, felem_x_out); 1120238384Sjkim felem_shrink(y_out, felem_y_out); 1121238384Sjkim felem_shrink(z_out, felem_z_out); 1122238384Sjkim } 1123238384Sjkim 1124238384Sjkim/* copy_conditional copies in to out iff mask is all ones. */ 1125238384Sjkimstatic void 1126238384Sjkimcopy_conditional(felem out, const felem in, limb mask) 1127238384Sjkim { 1128238384Sjkim unsigned i; 1129238384Sjkim for (i = 0; i < NLIMBS; ++i) 1130238384Sjkim { 1131238384Sjkim const limb tmp = mask & (in[i] ^ out[i]); 1132238384Sjkim out[i] ^= tmp; 1133238384Sjkim } 1134238384Sjkim } 1135238384Sjkim 1136238384Sjkim/* copy_small_conditional copies in to out iff mask is all ones. */ 1137238384Sjkimstatic void 1138238384Sjkimcopy_small_conditional(felem out, const smallfelem in, limb mask) 1139238384Sjkim { 1140238384Sjkim unsigned i; 1141238384Sjkim const u64 mask64 = mask; 1142238384Sjkim for (i = 0; i < NLIMBS; ++i) 1143238384Sjkim { 1144238384Sjkim out[i] = ((limb) (in[i] & mask64)) | (out[i] & ~mask); 1145238384Sjkim } 1146238384Sjkim } 1147238384Sjkim 1148238384Sjkim/* point_add calcuates (x1, y1, z1) + (x2, y2, z2) 1149238384Sjkim * 1150238384Sjkim * The method is taken from: 1151238384Sjkim * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl, 1152238384Sjkim * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). 1153238384Sjkim * 1154238384Sjkim * This function includes a branch for checking whether the two input points 1155238384Sjkim * are equal, (while not equal to the point at infinity). This case never 1156238384Sjkim * happens during single point multiplication, so there is no timing leak for 1157238384Sjkim * ECDH or ECDSA signing. */ 1158238384Sjkimstatic void point_add(felem x3, felem y3, felem z3, 1159238384Sjkim const felem x1, const felem y1, const felem z1, 1160238384Sjkim const int mixed, const smallfelem x2, const smallfelem y2, const smallfelem z2) 1161238384Sjkim { 1162238384Sjkim felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out; 1163238384Sjkim longfelem tmp, tmp2; 1164238384Sjkim smallfelem small1, small2, small3, small4, small5; 1165238384Sjkim limb x_equal, y_equal, z1_is_zero, z2_is_zero; 1166238384Sjkim 1167238384Sjkim felem_shrink(small3, z1); 1168238384Sjkim 1169238384Sjkim z1_is_zero = smallfelem_is_zero(small3); 1170238384Sjkim z2_is_zero = smallfelem_is_zero(z2); 1171238384Sjkim 1172238384Sjkim /* ftmp = z1z1 = z1**2 */ 1173238384Sjkim smallfelem_square(tmp, small3); 1174238384Sjkim felem_reduce(ftmp, tmp); 1175238384Sjkim /* ftmp[i] < 2^101 */ 1176238384Sjkim felem_shrink(small1, ftmp); 1177238384Sjkim 1178238384Sjkim if(!mixed) 1179238384Sjkim { 1180238384Sjkim /* ftmp2 = z2z2 = z2**2 */ 1181238384Sjkim smallfelem_square(tmp, z2); 1182238384Sjkim felem_reduce(ftmp2, tmp); 1183238384Sjkim /* ftmp2[i] < 2^101 */ 1184238384Sjkim felem_shrink(small2, ftmp2); 1185238384Sjkim 1186238384Sjkim felem_shrink(small5, x1); 1187238384Sjkim 1188238384Sjkim /* u1 = ftmp3 = x1*z2z2 */ 1189238384Sjkim smallfelem_mul(tmp, small5, small2); 1190238384Sjkim felem_reduce(ftmp3, tmp); 1191238384Sjkim /* ftmp3[i] < 2^101 */ 1192238384Sjkim 1193238384Sjkim /* ftmp5 = z1 + z2 */ 1194238384Sjkim felem_assign(ftmp5, z1); 1195238384Sjkim felem_small_sum(ftmp5, z2); 1196238384Sjkim /* ftmp5[i] < 2^107 */ 1197238384Sjkim 1198238384Sjkim /* ftmp5 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2 */ 1199238384Sjkim felem_square(tmp, ftmp5); 1200238384Sjkim felem_reduce(ftmp5, tmp); 1201238384Sjkim /* ftmp2 = z2z2 + z1z1 */ 1202238384Sjkim felem_sum(ftmp2, ftmp); 1203238384Sjkim /* ftmp2[i] < 2^101 + 2^101 = 2^102 */ 1204238384Sjkim felem_diff(ftmp5, ftmp2); 1205238384Sjkim /* ftmp5[i] < 2^105 + 2^101 < 2^106 */ 1206238384Sjkim 1207238384Sjkim /* ftmp2 = z2 * z2z2 */ 1208238384Sjkim smallfelem_mul(tmp, small2, z2); 1209238384Sjkim felem_reduce(ftmp2, tmp); 1210238384Sjkim 1211238384Sjkim /* s1 = ftmp2 = y1 * z2**3 */ 1212238384Sjkim felem_mul(tmp, y1, ftmp2); 1213238384Sjkim felem_reduce(ftmp6, tmp); 1214238384Sjkim /* ftmp6[i] < 2^101 */ 1215238384Sjkim } 1216238384Sjkim else 1217238384Sjkim { 1218238384Sjkim /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */ 1219238384Sjkim 1220238384Sjkim /* u1 = ftmp3 = x1*z2z2 */ 1221238384Sjkim felem_assign(ftmp3, x1); 1222238384Sjkim /* ftmp3[i] < 2^106 */ 1223238384Sjkim 1224238384Sjkim /* ftmp5 = 2z1z2 */ 1225238384Sjkim felem_assign(ftmp5, z1); 1226238384Sjkim felem_scalar(ftmp5, 2); 1227238384Sjkim /* ftmp5[i] < 2*2^106 = 2^107 */ 1228238384Sjkim 1229238384Sjkim /* s1 = ftmp2 = y1 * z2**3 */ 1230238384Sjkim felem_assign(ftmp6, y1); 1231238384Sjkim /* ftmp6[i] < 2^106 */ 1232238384Sjkim } 1233238384Sjkim 1234238384Sjkim /* u2 = x2*z1z1 */ 1235238384Sjkim smallfelem_mul(tmp, x2, small1); 1236238384Sjkim felem_reduce(ftmp4, tmp); 1237238384Sjkim 1238238384Sjkim /* h = ftmp4 = u2 - u1 */ 1239238384Sjkim felem_diff_zero107(ftmp4, ftmp3); 1240238384Sjkim /* ftmp4[i] < 2^107 + 2^101 < 2^108 */ 1241238384Sjkim felem_shrink(small4, ftmp4); 1242238384Sjkim 1243238384Sjkim x_equal = smallfelem_is_zero(small4); 1244238384Sjkim 1245238384Sjkim /* z_out = ftmp5 * h */ 1246238384Sjkim felem_small_mul(tmp, small4, ftmp5); 1247238384Sjkim felem_reduce(z_out, tmp); 1248238384Sjkim /* z_out[i] < 2^101 */ 1249238384Sjkim 1250238384Sjkim /* ftmp = z1 * z1z1 */ 1251238384Sjkim smallfelem_mul(tmp, small1, small3); 1252238384Sjkim felem_reduce(ftmp, tmp); 1253238384Sjkim 1254238384Sjkim /* s2 = tmp = y2 * z1**3 */ 1255238384Sjkim felem_small_mul(tmp, y2, ftmp); 1256238384Sjkim felem_reduce(ftmp5, tmp); 1257238384Sjkim 1258238384Sjkim /* r = ftmp5 = (s2 - s1)*2 */ 1259238384Sjkim felem_diff_zero107(ftmp5, ftmp6); 1260238384Sjkim /* ftmp5[i] < 2^107 + 2^107 = 2^108*/ 1261238384Sjkim felem_scalar(ftmp5, 2); 1262238384Sjkim /* ftmp5[i] < 2^109 */ 1263238384Sjkim felem_shrink(small1, ftmp5); 1264238384Sjkim y_equal = smallfelem_is_zero(small1); 1265238384Sjkim 1266238384Sjkim if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) 1267238384Sjkim { 1268238384Sjkim point_double(x3, y3, z3, x1, y1, z1); 1269238384Sjkim return; 1270238384Sjkim } 1271238384Sjkim 1272238384Sjkim /* I = ftmp = (2h)**2 */ 1273238384Sjkim felem_assign(ftmp, ftmp4); 1274238384Sjkim felem_scalar(ftmp, 2); 1275238384Sjkim /* ftmp[i] < 2*2^108 = 2^109 */ 1276238384Sjkim felem_square(tmp, ftmp); 1277238384Sjkim felem_reduce(ftmp, tmp); 1278238384Sjkim 1279238384Sjkim /* J = ftmp2 = h * I */ 1280238384Sjkim felem_mul(tmp, ftmp4, ftmp); 1281238384Sjkim felem_reduce(ftmp2, tmp); 1282238384Sjkim 1283238384Sjkim /* V = ftmp4 = U1 * I */ 1284238384Sjkim felem_mul(tmp, ftmp3, ftmp); 1285238384Sjkim felem_reduce(ftmp4, tmp); 1286238384Sjkim 1287238384Sjkim /* x_out = r**2 - J - 2V */ 1288238384Sjkim smallfelem_square(tmp, small1); 1289238384Sjkim felem_reduce(x_out, tmp); 1290238384Sjkim felem_assign(ftmp3, ftmp4); 1291238384Sjkim felem_scalar(ftmp4, 2); 1292238384Sjkim felem_sum(ftmp4, ftmp2); 1293238384Sjkim /* ftmp4[i] < 2*2^101 + 2^101 < 2^103 */ 1294238384Sjkim felem_diff(x_out, ftmp4); 1295238384Sjkim /* x_out[i] < 2^105 + 2^101 */ 1296238384Sjkim 1297238384Sjkim /* y_out = r(V-x_out) - 2 * s1 * J */ 1298238384Sjkim felem_diff_zero107(ftmp3, x_out); 1299238384Sjkim /* ftmp3[i] < 2^107 + 2^101 < 2^108 */ 1300238384Sjkim felem_small_mul(tmp, small1, ftmp3); 1301238384Sjkim felem_mul(tmp2, ftmp6, ftmp2); 1302238384Sjkim longfelem_scalar(tmp2, 2); 1303238384Sjkim /* tmp2[i] < 2*2^67 = 2^68 */ 1304238384Sjkim longfelem_diff(tmp, tmp2); 1305238384Sjkim /* tmp[i] < 2^67 + 2^70 + 2^40 < 2^71 */ 1306238384Sjkim felem_reduce_zero105(y_out, tmp); 1307238384Sjkim /* y_out[i] < 2^106 */ 1308238384Sjkim 1309238384Sjkim copy_small_conditional(x_out, x2, z1_is_zero); 1310238384Sjkim copy_conditional(x_out, x1, z2_is_zero); 1311238384Sjkim copy_small_conditional(y_out, y2, z1_is_zero); 1312238384Sjkim copy_conditional(y_out, y1, z2_is_zero); 1313238384Sjkim copy_small_conditional(z_out, z2, z1_is_zero); 1314238384Sjkim copy_conditional(z_out, z1, z2_is_zero); 1315238384Sjkim felem_assign(x3, x_out); 1316238384Sjkim felem_assign(y3, y_out); 1317238384Sjkim felem_assign(z3, z_out); 1318238384Sjkim } 1319238384Sjkim 1320238384Sjkim/* point_add_small is the same as point_add, except that it operates on 1321238384Sjkim * smallfelems */ 1322238384Sjkimstatic void point_add_small(smallfelem x3, smallfelem y3, smallfelem z3, 1323238384Sjkim smallfelem x1, smallfelem y1, smallfelem z1, 1324238384Sjkim smallfelem x2, smallfelem y2, smallfelem z2) 1325238384Sjkim { 1326238384Sjkim felem felem_x3, felem_y3, felem_z3; 1327238384Sjkim felem felem_x1, felem_y1, felem_z1; 1328238384Sjkim smallfelem_expand(felem_x1, x1); 1329238384Sjkim smallfelem_expand(felem_y1, y1); 1330238384Sjkim smallfelem_expand(felem_z1, z1); 1331238384Sjkim point_add(felem_x3, felem_y3, felem_z3, felem_x1, felem_y1, felem_z1, 0, x2, y2, z2); 1332238384Sjkim felem_shrink(x3, felem_x3); 1333238384Sjkim felem_shrink(y3, felem_y3); 1334238384Sjkim felem_shrink(z3, felem_z3); 1335238384Sjkim } 1336238384Sjkim 1337238384Sjkim/* Base point pre computation 1338238384Sjkim * -------------------------- 1339238384Sjkim * 1340238384Sjkim * Two different sorts of precomputed tables are used in the following code. 1341238384Sjkim * Each contain various points on the curve, where each point is three field 1342238384Sjkim * elements (x, y, z). 1343238384Sjkim * 1344238384Sjkim * For the base point table, z is usually 1 (0 for the point at infinity). 1345238384Sjkim * This table has 2 * 16 elements, starting with the following: 1346238384Sjkim * index | bits | point 1347238384Sjkim * ------+---------+------------------------------ 1348238384Sjkim * 0 | 0 0 0 0 | 0G 1349238384Sjkim * 1 | 0 0 0 1 | 1G 1350238384Sjkim * 2 | 0 0 1 0 | 2^64G 1351238384Sjkim * 3 | 0 0 1 1 | (2^64 + 1)G 1352238384Sjkim * 4 | 0 1 0 0 | 2^128G 1353238384Sjkim * 5 | 0 1 0 1 | (2^128 + 1)G 1354238384Sjkim * 6 | 0 1 1 0 | (2^128 + 2^64)G 1355238384Sjkim * 7 | 0 1 1 1 | (2^128 + 2^64 + 1)G 1356238384Sjkim * 8 | 1 0 0 0 | 2^192G 1357238384Sjkim * 9 | 1 0 0 1 | (2^192 + 1)G 1358238384Sjkim * 10 | 1 0 1 0 | (2^192 + 2^64)G 1359238384Sjkim * 11 | 1 0 1 1 | (2^192 + 2^64 + 1)G 1360238384Sjkim * 12 | 1 1 0 0 | (2^192 + 2^128)G 1361238384Sjkim * 13 | 1 1 0 1 | (2^192 + 2^128 + 1)G 1362238384Sjkim * 14 | 1 1 1 0 | (2^192 + 2^128 + 2^64)G 1363238384Sjkim * 15 | 1 1 1 1 | (2^192 + 2^128 + 2^64 + 1)G 1364238384Sjkim * followed by a copy of this with each element multiplied by 2^32. 1365238384Sjkim * 1366238384Sjkim * The reason for this is so that we can clock bits into four different 1367238384Sjkim * locations when doing simple scalar multiplies against the base point, 1368238384Sjkim * and then another four locations using the second 16 elements. 1369238384Sjkim * 1370238384Sjkim * Tables for other points have table[i] = iG for i in 0 .. 16. */ 1371238384Sjkim 1372238384Sjkim/* gmul is the table of precomputed base points */ 1373238384Sjkimstatic const smallfelem gmul[2][16][3] = 1374238384Sjkim{{{{0, 0, 0, 0}, 1375238384Sjkim {0, 0, 0, 0}, 1376238384Sjkim {0, 0, 0, 0}}, 1377238384Sjkim {{0xf4a13945d898c296, 0x77037d812deb33a0, 0xf8bce6e563a440f2, 0x6b17d1f2e12c4247}, 1378238384Sjkim {0xcbb6406837bf51f5, 0x2bce33576b315ece, 0x8ee7eb4a7c0f9e16, 0x4fe342e2fe1a7f9b}, 1379238384Sjkim {1, 0, 0, 0}}, 1380238384Sjkim {{0x90e75cb48e14db63, 0x29493baaad651f7e, 0x8492592e326e25de, 0x0fa822bc2811aaa5}, 1381238384Sjkim {0xe41124545f462ee7, 0x34b1a65050fe82f5, 0x6f4ad4bcb3df188b, 0xbff44ae8f5dba80d}, 1382238384Sjkim {1, 0, 0, 0}}, 1383238384Sjkim {{0x93391ce2097992af, 0xe96c98fd0d35f1fa, 0xb257c0de95e02789, 0x300a4bbc89d6726f}, 1384238384Sjkim {0xaa54a291c08127a0, 0x5bb1eeada9d806a5, 0x7f1ddb25ff1e3c6f, 0x72aac7e0d09b4644}, 1385238384Sjkim {1, 0, 0, 0}}, 1386238384Sjkim {{0x57c84fc9d789bd85, 0xfc35ff7dc297eac3, 0xfb982fd588c6766e, 0x447d739beedb5e67}, 1387238384Sjkim {0x0c7e33c972e25b32, 0x3d349b95a7fae500, 0xe12e9d953a4aaff7, 0x2d4825ab834131ee}, 1388238384Sjkim {1, 0, 0, 0}}, 1389238384Sjkim {{0x13949c932a1d367f, 0xef7fbd2b1a0a11b7, 0xddc6068bb91dfc60, 0xef9519328a9c72ff}, 1390238384Sjkim {0x196035a77376d8a8, 0x23183b0895ca1740, 0xc1ee9807022c219c, 0x611e9fc37dbb2c9b}, 1391238384Sjkim {1, 0, 0, 0}}, 1392238384Sjkim {{0xcae2b1920b57f4bc, 0x2936df5ec6c9bc36, 0x7dea6482e11238bf, 0x550663797b51f5d8}, 1393238384Sjkim {0x44ffe216348a964c, 0x9fb3d576dbdefbe1, 0x0afa40018d9d50e5, 0x157164848aecb851}, 1394238384Sjkim {1, 0, 0, 0}}, 1395238384Sjkim {{0xe48ecafffc5cde01, 0x7ccd84e70d715f26, 0xa2e8f483f43e4391, 0xeb5d7745b21141ea}, 1396238384Sjkim {0xcac917e2731a3479, 0x85f22cfe2844b645, 0x0990e6a158006cee, 0xeafd72ebdbecc17b}, 1397238384Sjkim {1, 0, 0, 0}}, 1398238384Sjkim {{0x6cf20ffb313728be, 0x96439591a3c6b94a, 0x2736ff8344315fc5, 0xa6d39677a7849276}, 1399238384Sjkim {0xf2bab833c357f5f4, 0x824a920c2284059b, 0x66b8babd2d27ecdf, 0x674f84749b0b8816}, 1400238384Sjkim {1, 0, 0, 0}}, 1401238384Sjkim {{0x2df48c04677c8a3e, 0x74e02f080203a56b, 0x31855f7db8c7fedb, 0x4e769e7672c9ddad}, 1402238384Sjkim {0xa4c36165b824bbb0, 0xfb9ae16f3b9122a5, 0x1ec0057206947281, 0x42b99082de830663}, 1403238384Sjkim {1, 0, 0, 0}}, 1404238384Sjkim {{0x6ef95150dda868b9, 0xd1f89e799c0ce131, 0x7fdc1ca008a1c478, 0x78878ef61c6ce04d}, 1405238384Sjkim {0x9c62b9121fe0d976, 0x6ace570ebde08d4f, 0xde53142c12309def, 0xb6cb3f5d7b72c321}, 1406238384Sjkim {1, 0, 0, 0}}, 1407238384Sjkim {{0x7f991ed2c31a3573, 0x5b82dd5bd54fb496, 0x595c5220812ffcae, 0x0c88bc4d716b1287}, 1408238384Sjkim {0x3a57bf635f48aca8, 0x7c8181f4df2564f3, 0x18d1b5b39c04e6aa, 0xdd5ddea3f3901dc6}, 1409238384Sjkim {1, 0, 0, 0}}, 1410238384Sjkim {{0xe96a79fb3e72ad0c, 0x43a0a28c42ba792f, 0xefe0a423083e49f3, 0x68f344af6b317466}, 1411238384Sjkim {0xcdfe17db3fb24d4a, 0x668bfc2271f5c626, 0x604ed93c24d67ff3, 0x31b9c405f8540a20}, 1412238384Sjkim {1, 0, 0, 0}}, 1413238384Sjkim {{0xd36b4789a2582e7f, 0x0d1a10144ec39c28, 0x663c62c3edbad7a0, 0x4052bf4b6f461db9}, 1414238384Sjkim {0x235a27c3188d25eb, 0xe724f33999bfcc5b, 0x862be6bd71d70cc8, 0xfecf4d5190b0fc61}, 1415238384Sjkim {1, 0, 0, 0}}, 1416238384Sjkim {{0x74346c10a1d4cfac, 0xafdf5cc08526a7a4, 0x123202a8f62bff7a, 0x1eddbae2c802e41a}, 1417238384Sjkim {0x8fa0af2dd603f844, 0x36e06b7e4c701917, 0x0c45f45273db33a0, 0x43104d86560ebcfc}, 1418238384Sjkim {1, 0, 0, 0}}, 1419238384Sjkim {{0x9615b5110d1d78e5, 0x66b0de3225c4744b, 0x0a4a46fb6aaf363a, 0xb48e26b484f7a21c}, 1420238384Sjkim {0x06ebb0f621a01b2d, 0xc004e4048b7b0f98, 0x64131bcdfed6f668, 0xfac015404d4d3dab}, 1421238384Sjkim {1, 0, 0, 0}}}, 1422238384Sjkim {{{0, 0, 0, 0}, 1423238384Sjkim {0, 0, 0, 0}, 1424238384Sjkim {0, 0, 0, 0}}, 1425238384Sjkim {{0x3a5a9e22185a5943, 0x1ab919365c65dfb6, 0x21656b32262c71da, 0x7fe36b40af22af89}, 1426238384Sjkim {0xd50d152c699ca101, 0x74b3d5867b8af212, 0x9f09f40407dca6f1, 0xe697d45825b63624}, 1427238384Sjkim {1, 0, 0, 0}}, 1428238384Sjkim {{0xa84aa9397512218e, 0xe9a521b074ca0141, 0x57880b3a18a2e902, 0x4a5b506612a677a6}, 1429238384Sjkim {0x0beada7a4c4f3840, 0x626db15419e26d9d, 0xc42604fbe1627d40, 0xeb13461ceac089f1}, 1430238384Sjkim {1, 0, 0, 0}}, 1431238384Sjkim {{0xf9faed0927a43281, 0x5e52c4144103ecbc, 0xc342967aa815c857, 0x0781b8291c6a220a}, 1432238384Sjkim {0x5a8343ceeac55f80, 0x88f80eeee54a05e3, 0x97b2a14f12916434, 0x690cde8df0151593}, 1433238384Sjkim {1, 0, 0, 0}}, 1434238384Sjkim {{0xaee9c75df7f82f2a, 0x9e4c35874afdf43a, 0xf5622df437371326, 0x8a535f566ec73617}, 1435238384Sjkim {0xc5f9a0ac223094b7, 0xcde533864c8c7669, 0x37e02819085a92bf, 0x0455c08468b08bd7}, 1436238384Sjkim {1, 0, 0, 0}}, 1437238384Sjkim {{0x0c0a6e2c9477b5d9, 0xf9a4bf62876dc444, 0x5050a949b6cdc279, 0x06bada7ab77f8276}, 1438238384Sjkim {0xc8b4aed1ea48dac9, 0xdebd8a4b7ea1070f, 0x427d49101366eb70, 0x5b476dfd0e6cb18a}, 1439238384Sjkim {1, 0, 0, 0}}, 1440238384Sjkim {{0x7c5c3e44278c340a, 0x4d54606812d66f3b, 0x29a751b1ae23c5d8, 0x3e29864e8a2ec908}, 1441238384Sjkim {0x142d2a6626dbb850, 0xad1744c4765bd780, 0x1f150e68e322d1ed, 0x239b90ea3dc31e7e}, 1442238384Sjkim {1, 0, 0, 0}}, 1443238384Sjkim {{0x78c416527a53322a, 0x305dde6709776f8e, 0xdbcab759f8862ed4, 0x820f4dd949f72ff7}, 1444238384Sjkim {0x6cc544a62b5debd4, 0x75be5d937b4e8cc4, 0x1b481b1b215c14d3, 0x140406ec783a05ec}, 1445238384Sjkim {1, 0, 0, 0}}, 1446238384Sjkim {{0x6a703f10e895df07, 0xfd75f3fa01876bd8, 0xeb5b06e70ce08ffe, 0x68f6b8542783dfee}, 1447238384Sjkim {0x90c76f8a78712655, 0xcf5293d2f310bf7f, 0xfbc8044dfda45028, 0xcbe1feba92e40ce6}, 1448238384Sjkim {1, 0, 0, 0}}, 1449238384Sjkim {{0xe998ceea4396e4c1, 0xfc82ef0b6acea274, 0x230f729f2250e927, 0xd0b2f94d2f420109}, 1450238384Sjkim {0x4305adddb38d4966, 0x10b838f8624c3b45, 0x7db2636658954e7a, 0x971459828b0719e5}, 1451238384Sjkim {1, 0, 0, 0}}, 1452238384Sjkim {{0x4bd6b72623369fc9, 0x57f2929e53d0b876, 0xc2d5cba4f2340687, 0x961610004a866aba}, 1453238384Sjkim {0x49997bcd2e407a5e, 0x69ab197d92ddcb24, 0x2cf1f2438fe5131c, 0x7acb9fadcee75e44}, 1454238384Sjkim {1, 0, 0, 0}}, 1455238384Sjkim {{0x254e839423d2d4c0, 0xf57f0c917aea685b, 0xa60d880f6f75aaea, 0x24eb9acca333bf5b}, 1456238384Sjkim {0xe3de4ccb1cda5dea, 0xfeef9341c51a6b4f, 0x743125f88bac4c4d, 0x69f891c5acd079cc}, 1457238384Sjkim {1, 0, 0, 0}}, 1458238384Sjkim {{0xeee44b35702476b5, 0x7ed031a0e45c2258, 0xb422d1e7bd6f8514, 0xe51f547c5972a107}, 1459238384Sjkim {0xa25bcd6fc9cf343d, 0x8ca922ee097c184e, 0xa62f98b3a9fe9a06, 0x1c309a2b25bb1387}, 1460238384Sjkim {1, 0, 0, 0}}, 1461238384Sjkim {{0x9295dbeb1967c459, 0xb00148833472c98e, 0xc504977708011828, 0x20b87b8aa2c4e503}, 1462238384Sjkim {0x3063175de057c277, 0x1bd539338fe582dd, 0x0d11adef5f69a044, 0xf5c6fa49919776be}, 1463238384Sjkim {1, 0, 0, 0}}, 1464238384Sjkim {{0x8c944e760fd59e11, 0x3876cba1102fad5f, 0xa454c3fad83faa56, 0x1ed7d1b9332010b9}, 1465238384Sjkim {0xa1011a270024b889, 0x05e4d0dcac0cd344, 0x52b520f0eb6a2a24, 0x3a2b03f03217257a}, 1466238384Sjkim {1, 0, 0, 0}}, 1467238384Sjkim {{0xf20fc2afdf1d043d, 0xf330240db58d5a62, 0xfc7d229ca0058c3b, 0x15fee545c78dd9f6}, 1468238384Sjkim {0x501e82885bc98cda, 0x41ef80e5d046ac04, 0x557d9f49461210fb, 0x4ab5b6b2b8753f81}, 1469238384Sjkim {1, 0, 0, 0}}}}; 1470238384Sjkim 1471238384Sjkim/* select_point selects the |idx|th point from a precomputation table and 1472238384Sjkim * copies it to out. */ 1473238384Sjkimstatic void select_point(const u64 idx, unsigned int size, const smallfelem pre_comp[16][3], smallfelem out[3]) 1474238384Sjkim { 1475238384Sjkim unsigned i, j; 1476238384Sjkim u64 *outlimbs = &out[0][0]; 1477238384Sjkim memset(outlimbs, 0, 3 * sizeof(smallfelem)); 1478238384Sjkim 1479238384Sjkim for (i = 0; i < size; i++) 1480238384Sjkim { 1481238384Sjkim const u64 *inlimbs = (u64*) &pre_comp[i][0][0]; 1482238384Sjkim u64 mask = i ^ idx; 1483238384Sjkim mask |= mask >> 4; 1484238384Sjkim mask |= mask >> 2; 1485238384Sjkim mask |= mask >> 1; 1486238384Sjkim mask &= 1; 1487238384Sjkim mask--; 1488238384Sjkim for (j = 0; j < NLIMBS * 3; j++) 1489238384Sjkim outlimbs[j] |= inlimbs[j] & mask; 1490238384Sjkim } 1491238384Sjkim } 1492238384Sjkim 1493238384Sjkim/* get_bit returns the |i|th bit in |in| */ 1494238384Sjkimstatic char get_bit(const felem_bytearray in, int i) 1495238384Sjkim { 1496238384Sjkim if ((i < 0) || (i >= 256)) 1497238384Sjkim return 0; 1498238384Sjkim return (in[i >> 3] >> (i & 7)) & 1; 1499238384Sjkim } 1500238384Sjkim 1501238384Sjkim/* Interleaved point multiplication using precomputed point multiples: 1502238384Sjkim * The small point multiples 0*P, 1*P, ..., 17*P are in pre_comp[], 1503238384Sjkim * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple 1504238384Sjkim * of the generator, using certain (large) precomputed multiples in g_pre_comp. 1505238384Sjkim * Output point (X, Y, Z) is stored in x_out, y_out, z_out */ 1506238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out, 1507238384Sjkim const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar, 1508238384Sjkim const int mixed, const smallfelem pre_comp[][17][3], const smallfelem g_pre_comp[2][16][3]) 1509238384Sjkim { 1510238384Sjkim int i, skip; 1511238384Sjkim unsigned num, gen_mul = (g_scalar != NULL); 1512238384Sjkim felem nq[3], ftmp; 1513238384Sjkim smallfelem tmp[3]; 1514238384Sjkim u64 bits; 1515238384Sjkim u8 sign, digit; 1516238384Sjkim 1517238384Sjkim /* set nq to the point at infinity */ 1518238384Sjkim memset(nq, 0, 3 * sizeof(felem)); 1519238384Sjkim 1520238384Sjkim /* Loop over all scalars msb-to-lsb, interleaving additions 1521238384Sjkim * of multiples of the generator (two in each of the last 32 rounds) 1522238384Sjkim * and additions of other points multiples (every 5th round). 1523238384Sjkim */ 1524238384Sjkim skip = 1; /* save two point operations in the first round */ 1525238384Sjkim for (i = (num_points ? 255 : 31); i >= 0; --i) 1526238384Sjkim { 1527238384Sjkim /* double */ 1528238384Sjkim if (!skip) 1529238384Sjkim point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1530238384Sjkim 1531238384Sjkim /* add multiples of the generator */ 1532238384Sjkim if (gen_mul && (i <= 31)) 1533238384Sjkim { 1534238384Sjkim /* first, look 32 bits upwards */ 1535238384Sjkim bits = get_bit(g_scalar, i + 224) << 3; 1536238384Sjkim bits |= get_bit(g_scalar, i + 160) << 2; 1537238384Sjkim bits |= get_bit(g_scalar, i + 96) << 1; 1538238384Sjkim bits |= get_bit(g_scalar, i + 32); 1539238384Sjkim /* select the point to add, in constant time */ 1540238384Sjkim select_point(bits, 16, g_pre_comp[1], tmp); 1541238384Sjkim 1542238384Sjkim if (!skip) 1543238384Sjkim { 1544238384Sjkim point_add(nq[0], nq[1], nq[2], 1545238384Sjkim nq[0], nq[1], nq[2], 1546238384Sjkim 1 /* mixed */, tmp[0], tmp[1], tmp[2]); 1547238384Sjkim } 1548238384Sjkim else 1549238384Sjkim { 1550238384Sjkim smallfelem_expand(nq[0], tmp[0]); 1551238384Sjkim smallfelem_expand(nq[1], tmp[1]); 1552238384Sjkim smallfelem_expand(nq[2], tmp[2]); 1553238384Sjkim skip = 0; 1554238384Sjkim } 1555238384Sjkim 1556238384Sjkim /* second, look at the current position */ 1557238384Sjkim bits = get_bit(g_scalar, i + 192) << 3; 1558238384Sjkim bits |= get_bit(g_scalar, i + 128) << 2; 1559238384Sjkim bits |= get_bit(g_scalar, i + 64) << 1; 1560238384Sjkim bits |= get_bit(g_scalar, i); 1561238384Sjkim /* select the point to add, in constant time */ 1562238384Sjkim select_point(bits, 16, g_pre_comp[0], tmp); 1563238384Sjkim point_add(nq[0], nq[1], nq[2], 1564238384Sjkim nq[0], nq[1], nq[2], 1565238384Sjkim 1 /* mixed */, tmp[0], tmp[1], tmp[2]); 1566238384Sjkim } 1567238384Sjkim 1568238384Sjkim /* do other additions every 5 doublings */ 1569238384Sjkim if (num_points && (i % 5 == 0)) 1570238384Sjkim { 1571238384Sjkim /* loop over all scalars */ 1572238384Sjkim for (num = 0; num < num_points; ++num) 1573238384Sjkim { 1574238384Sjkim bits = get_bit(scalars[num], i + 4) << 5; 1575238384Sjkim bits |= get_bit(scalars[num], i + 3) << 4; 1576238384Sjkim bits |= get_bit(scalars[num], i + 2) << 3; 1577238384Sjkim bits |= get_bit(scalars[num], i + 1) << 2; 1578238384Sjkim bits |= get_bit(scalars[num], i) << 1; 1579238384Sjkim bits |= get_bit(scalars[num], i - 1); 1580238384Sjkim ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1581238384Sjkim 1582238384Sjkim /* select the point to add or subtract, in constant time */ 1583238384Sjkim select_point(digit, 17, pre_comp[num], tmp); 1584238384Sjkim smallfelem_neg(ftmp, tmp[1]); /* (X, -Y, Z) is the negative point */ 1585238384Sjkim copy_small_conditional(ftmp, tmp[1], (((limb) sign) - 1)); 1586238384Sjkim felem_contract(tmp[1], ftmp); 1587238384Sjkim 1588238384Sjkim if (!skip) 1589238384Sjkim { 1590238384Sjkim point_add(nq[0], nq[1], nq[2], 1591238384Sjkim nq[0], nq[1], nq[2], 1592238384Sjkim mixed, tmp[0], tmp[1], tmp[2]); 1593238384Sjkim } 1594238384Sjkim else 1595238384Sjkim { 1596238384Sjkim smallfelem_expand(nq[0], tmp[0]); 1597238384Sjkim smallfelem_expand(nq[1], tmp[1]); 1598238384Sjkim smallfelem_expand(nq[2], tmp[2]); 1599238384Sjkim skip = 0; 1600238384Sjkim } 1601238384Sjkim } 1602238384Sjkim } 1603238384Sjkim } 1604238384Sjkim felem_assign(x_out, nq[0]); 1605238384Sjkim felem_assign(y_out, nq[1]); 1606238384Sjkim felem_assign(z_out, nq[2]); 1607238384Sjkim } 1608238384Sjkim 1609238384Sjkim/* Precomputation for the group generator. */ 1610238384Sjkimtypedef struct { 1611238384Sjkim smallfelem g_pre_comp[2][16][3]; 1612238384Sjkim int references; 1613238384Sjkim} NISTP256_PRE_COMP; 1614238384Sjkim 1615238384Sjkimconst EC_METHOD *EC_GFp_nistp256_method(void) 1616238384Sjkim { 1617238384Sjkim static const EC_METHOD ret = { 1618238384Sjkim EC_FLAGS_DEFAULT_OCT, 1619238384Sjkim NID_X9_62_prime_field, 1620238384Sjkim ec_GFp_nistp256_group_init, 1621238384Sjkim ec_GFp_simple_group_finish, 1622238384Sjkim ec_GFp_simple_group_clear_finish, 1623238384Sjkim ec_GFp_nist_group_copy, 1624238384Sjkim ec_GFp_nistp256_group_set_curve, 1625238384Sjkim ec_GFp_simple_group_get_curve, 1626238384Sjkim ec_GFp_simple_group_get_degree, 1627238384Sjkim ec_GFp_simple_group_check_discriminant, 1628238384Sjkim ec_GFp_simple_point_init, 1629238384Sjkim ec_GFp_simple_point_finish, 1630238384Sjkim ec_GFp_simple_point_clear_finish, 1631238384Sjkim ec_GFp_simple_point_copy, 1632238384Sjkim ec_GFp_simple_point_set_to_infinity, 1633238384Sjkim ec_GFp_simple_set_Jprojective_coordinates_GFp, 1634238384Sjkim ec_GFp_simple_get_Jprojective_coordinates_GFp, 1635238384Sjkim ec_GFp_simple_point_set_affine_coordinates, 1636238384Sjkim ec_GFp_nistp256_point_get_affine_coordinates, 1637238384Sjkim 0 /* point_set_compressed_coordinates */, 1638238384Sjkim 0 /* point2oct */, 1639238384Sjkim 0 /* oct2point */, 1640238384Sjkim ec_GFp_simple_add, 1641238384Sjkim ec_GFp_simple_dbl, 1642238384Sjkim ec_GFp_simple_invert, 1643238384Sjkim ec_GFp_simple_is_at_infinity, 1644238384Sjkim ec_GFp_simple_is_on_curve, 1645238384Sjkim ec_GFp_simple_cmp, 1646238384Sjkim ec_GFp_simple_make_affine, 1647238384Sjkim ec_GFp_simple_points_make_affine, 1648238384Sjkim ec_GFp_nistp256_points_mul, 1649238384Sjkim ec_GFp_nistp256_precompute_mult, 1650238384Sjkim ec_GFp_nistp256_have_precompute_mult, 1651238384Sjkim ec_GFp_nist_field_mul, 1652238384Sjkim ec_GFp_nist_field_sqr, 1653238384Sjkim 0 /* field_div */, 1654238384Sjkim 0 /* field_encode */, 1655238384Sjkim 0 /* field_decode */, 1656238384Sjkim 0 /* field_set_to_one */ }; 1657238384Sjkim 1658238384Sjkim return &ret; 1659238384Sjkim } 1660238384Sjkim 1661238384Sjkim/******************************************************************************/ 1662238384Sjkim/* FUNCTIONS TO MANAGE PRECOMPUTATION 1663238384Sjkim */ 1664238384Sjkim 1665238384Sjkimstatic NISTP256_PRE_COMP *nistp256_pre_comp_new() 1666238384Sjkim { 1667238384Sjkim NISTP256_PRE_COMP *ret = NULL; 1668238384Sjkim ret = (NISTP256_PRE_COMP *) OPENSSL_malloc(sizeof *ret); 1669238384Sjkim if (!ret) 1670238384Sjkim { 1671238384Sjkim ECerr(EC_F_NISTP256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1672238384Sjkim return ret; 1673238384Sjkim } 1674238384Sjkim memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1675238384Sjkim ret->references = 1; 1676238384Sjkim return ret; 1677238384Sjkim } 1678238384Sjkim 1679238384Sjkimstatic void *nistp256_pre_comp_dup(void *src_) 1680238384Sjkim { 1681238384Sjkim NISTP256_PRE_COMP *src = src_; 1682238384Sjkim 1683238384Sjkim /* no need to actually copy, these objects never change! */ 1684238384Sjkim CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1685238384Sjkim 1686238384Sjkim return src_; 1687238384Sjkim } 1688238384Sjkim 1689238384Sjkimstatic void nistp256_pre_comp_free(void *pre_) 1690238384Sjkim { 1691238384Sjkim int i; 1692238384Sjkim NISTP256_PRE_COMP *pre = pre_; 1693238384Sjkim 1694238384Sjkim if (!pre) 1695238384Sjkim return; 1696238384Sjkim 1697238384Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1698238384Sjkim if (i > 0) 1699238384Sjkim return; 1700238384Sjkim 1701238384Sjkim OPENSSL_free(pre); 1702238384Sjkim } 1703238384Sjkim 1704238384Sjkimstatic void nistp256_pre_comp_clear_free(void *pre_) 1705238384Sjkim { 1706238384Sjkim int i; 1707238384Sjkim NISTP256_PRE_COMP *pre = pre_; 1708238384Sjkim 1709238384Sjkim if (!pre) 1710238384Sjkim return; 1711238384Sjkim 1712238384Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1713238384Sjkim if (i > 0) 1714238384Sjkim return; 1715238384Sjkim 1716238384Sjkim OPENSSL_cleanse(pre, sizeof *pre); 1717238384Sjkim OPENSSL_free(pre); 1718238384Sjkim } 1719238384Sjkim 1720238384Sjkim/******************************************************************************/ 1721238384Sjkim/* OPENSSL EC_METHOD FUNCTIONS 1722238384Sjkim */ 1723238384Sjkim 1724238384Sjkimint ec_GFp_nistp256_group_init(EC_GROUP *group) 1725238384Sjkim { 1726238384Sjkim int ret; 1727238384Sjkim ret = ec_GFp_simple_group_init(group); 1728238384Sjkim group->a_is_minus3 = 1; 1729238384Sjkim return ret; 1730238384Sjkim } 1731238384Sjkim 1732238384Sjkimint ec_GFp_nistp256_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1733238384Sjkim const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 1734238384Sjkim { 1735238384Sjkim int ret = 0; 1736238384Sjkim BN_CTX *new_ctx = NULL; 1737238384Sjkim BIGNUM *curve_p, *curve_a, *curve_b; 1738238384Sjkim 1739238384Sjkim if (ctx == NULL) 1740238384Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1741238384Sjkim BN_CTX_start(ctx); 1742238384Sjkim if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1743238384Sjkim ((curve_a = BN_CTX_get(ctx)) == NULL) || 1744238384Sjkim ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err; 1745238384Sjkim BN_bin2bn(nistp256_curve_params[0], sizeof(felem_bytearray), curve_p); 1746238384Sjkim BN_bin2bn(nistp256_curve_params[1], sizeof(felem_bytearray), curve_a); 1747238384Sjkim BN_bin2bn(nistp256_curve_params[2], sizeof(felem_bytearray), curve_b); 1748238384Sjkim if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || 1749238384Sjkim (BN_cmp(curve_b, b))) 1750238384Sjkim { 1751238384Sjkim ECerr(EC_F_EC_GFP_NISTP256_GROUP_SET_CURVE, 1752238384Sjkim EC_R_WRONG_CURVE_PARAMETERS); 1753238384Sjkim goto err; 1754238384Sjkim } 1755238384Sjkim group->field_mod_func = BN_nist_mod_256; 1756238384Sjkim ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1757238384Sjkimerr: 1758238384Sjkim BN_CTX_end(ctx); 1759238384Sjkim if (new_ctx != NULL) 1760238384Sjkim BN_CTX_free(new_ctx); 1761238384Sjkim return ret; 1762238384Sjkim } 1763238384Sjkim 1764238384Sjkim/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns 1765238384Sjkim * (X', Y') = (X/Z^2, Y/Z^3) */ 1766238384Sjkimint ec_GFp_nistp256_point_get_affine_coordinates(const EC_GROUP *group, 1767238384Sjkim const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx) 1768238384Sjkim { 1769238384Sjkim felem z1, z2, x_in, y_in; 1770238384Sjkim smallfelem x_out, y_out; 1771238384Sjkim longfelem tmp; 1772238384Sjkim 1773238384Sjkim if (EC_POINT_is_at_infinity(group, point)) 1774238384Sjkim { 1775238384Sjkim ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES, 1776238384Sjkim EC_R_POINT_AT_INFINITY); 1777238384Sjkim return 0; 1778238384Sjkim } 1779238384Sjkim if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1780238384Sjkim (!BN_to_felem(z1, &point->Z))) return 0; 1781238384Sjkim felem_inv(z2, z1); 1782238384Sjkim felem_square(tmp, z2); felem_reduce(z1, tmp); 1783238384Sjkim felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp); 1784238384Sjkim felem_contract(x_out, x_in); 1785238384Sjkim if (x != NULL) 1786238384Sjkim { 1787238384Sjkim if (!smallfelem_to_BN(x, x_out)) { 1788238384Sjkim ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES, 1789238384Sjkim ERR_R_BN_LIB); 1790238384Sjkim return 0; 1791238384Sjkim } 1792238384Sjkim } 1793238384Sjkim felem_mul(tmp, z1, z2); felem_reduce(z1, tmp); 1794238384Sjkim felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp); 1795238384Sjkim felem_contract(y_out, y_in); 1796238384Sjkim if (y != NULL) 1797238384Sjkim { 1798238384Sjkim if (!smallfelem_to_BN(y, y_out)) 1799238384Sjkim { 1800238384Sjkim ECerr(EC_F_EC_GFP_NISTP256_POINT_GET_AFFINE_COORDINATES, 1801238384Sjkim ERR_R_BN_LIB); 1802238384Sjkim return 0; 1803238384Sjkim } 1804238384Sjkim } 1805238384Sjkim return 1; 1806238384Sjkim } 1807238384Sjkim 1808238384Sjkimstatic void make_points_affine(size_t num, smallfelem points[/* num */][3], smallfelem tmp_smallfelems[/* num+1 */]) 1809238384Sjkim { 1810238384Sjkim /* Runs in constant time, unless an input is the point at infinity 1811238384Sjkim * (which normally shouldn't happen). */ 1812238384Sjkim ec_GFp_nistp_points_make_affine_internal( 1813238384Sjkim num, 1814238384Sjkim points, 1815238384Sjkim sizeof(smallfelem), 1816238384Sjkim tmp_smallfelems, 1817238384Sjkim (void (*)(void *)) smallfelem_one, 1818238384Sjkim (int (*)(const void *)) smallfelem_is_zero_int, 1819238384Sjkim (void (*)(void *, const void *)) smallfelem_assign, 1820238384Sjkim (void (*)(void *, const void *)) smallfelem_square_contract, 1821238384Sjkim (void (*)(void *, const void *, const void *)) smallfelem_mul_contract, 1822238384Sjkim (void (*)(void *, const void *)) smallfelem_inv_contract, 1823238384Sjkim (void (*)(void *, const void *)) smallfelem_assign /* nothing to contract */); 1824238384Sjkim } 1825238384Sjkim 1826238384Sjkim/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values 1827238384Sjkim * Result is stored in r (r can equal one of the inputs). */ 1828238384Sjkimint ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r, 1829238384Sjkim const BIGNUM *scalar, size_t num, const EC_POINT *points[], 1830238384Sjkim const BIGNUM *scalars[], BN_CTX *ctx) 1831238384Sjkim { 1832238384Sjkim int ret = 0; 1833238384Sjkim int j; 1834238384Sjkim int mixed = 0; 1835238384Sjkim BN_CTX *new_ctx = NULL; 1836238384Sjkim BIGNUM *x, *y, *z, *tmp_scalar; 1837238384Sjkim felem_bytearray g_secret; 1838238384Sjkim felem_bytearray *secrets = NULL; 1839238384Sjkim smallfelem (*pre_comp)[17][3] = NULL; 1840238384Sjkim smallfelem *tmp_smallfelems = NULL; 1841238384Sjkim felem_bytearray tmp; 1842238384Sjkim unsigned i, num_bytes; 1843238384Sjkim int have_pre_comp = 0; 1844238384Sjkim size_t num_points = num; 1845238384Sjkim smallfelem x_in, y_in, z_in; 1846238384Sjkim felem x_out, y_out, z_out; 1847238384Sjkim NISTP256_PRE_COMP *pre = NULL; 1848238384Sjkim const smallfelem (*g_pre_comp)[16][3] = NULL; 1849238384Sjkim EC_POINT *generator = NULL; 1850238384Sjkim const EC_POINT *p = NULL; 1851238384Sjkim const BIGNUM *p_scalar = NULL; 1852238384Sjkim 1853238384Sjkim if (ctx == NULL) 1854238384Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1855238384Sjkim BN_CTX_start(ctx); 1856238384Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || 1857238384Sjkim ((y = BN_CTX_get(ctx)) == NULL) || 1858238384Sjkim ((z = BN_CTX_get(ctx)) == NULL) || 1859238384Sjkim ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1860238384Sjkim goto err; 1861238384Sjkim 1862238384Sjkim if (scalar != NULL) 1863238384Sjkim { 1864238384Sjkim pre = EC_EX_DATA_get_data(group->extra_data, 1865238384Sjkim nistp256_pre_comp_dup, nistp256_pre_comp_free, 1866238384Sjkim nistp256_pre_comp_clear_free); 1867238384Sjkim if (pre) 1868238384Sjkim /* we have precomputation, try to use it */ 1869238384Sjkim g_pre_comp = (const smallfelem (*)[16][3]) pre->g_pre_comp; 1870238384Sjkim else 1871238384Sjkim /* try to use the standard precomputation */ 1872238384Sjkim g_pre_comp = &gmul[0]; 1873238384Sjkim generator = EC_POINT_new(group); 1874238384Sjkim if (generator == NULL) 1875238384Sjkim goto err; 1876238384Sjkim /* get the generator from precomputation */ 1877238384Sjkim if (!smallfelem_to_BN(x, g_pre_comp[0][1][0]) || 1878238384Sjkim !smallfelem_to_BN(y, g_pre_comp[0][1][1]) || 1879238384Sjkim !smallfelem_to_BN(z, g_pre_comp[0][1][2])) 1880238384Sjkim { 1881238384Sjkim ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 1882238384Sjkim goto err; 1883238384Sjkim } 1884238384Sjkim if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1885238384Sjkim generator, x, y, z, ctx)) 1886238384Sjkim goto err; 1887238384Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1888238384Sjkim /* precomputation matches generator */ 1889238384Sjkim have_pre_comp = 1; 1890238384Sjkim else 1891238384Sjkim /* we don't have valid precomputation: 1892238384Sjkim * treat the generator as a random point */ 1893238384Sjkim num_points++; 1894238384Sjkim } 1895238384Sjkim if (num_points > 0) 1896238384Sjkim { 1897238384Sjkim if (num_points >= 3) 1898238384Sjkim { 1899238384Sjkim /* unless we precompute multiples for just one or two points, 1900238384Sjkim * converting those into affine form is time well spent */ 1901238384Sjkim mixed = 1; 1902238384Sjkim } 1903238384Sjkim secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1904238384Sjkim pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(smallfelem)); 1905238384Sjkim if (mixed) 1906238384Sjkim tmp_smallfelems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(smallfelem)); 1907238384Sjkim if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_smallfelems == NULL))) 1908238384Sjkim { 1909238384Sjkim ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1910238384Sjkim goto err; 1911238384Sjkim } 1912238384Sjkim 1913238384Sjkim /* we treat NULL scalars as 0, and NULL points as points at infinity, 1914238384Sjkim * i.e., they contribute nothing to the linear combination */ 1915238384Sjkim memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1916238384Sjkim memset(pre_comp, 0, num_points * 17 * 3 * sizeof(smallfelem)); 1917238384Sjkim for (i = 0; i < num_points; ++i) 1918238384Sjkim { 1919238384Sjkim if (i == num) 1920238384Sjkim /* we didn't have a valid precomputation, so we pick 1921238384Sjkim * the generator */ 1922238384Sjkim { 1923238384Sjkim p = EC_GROUP_get0_generator(group); 1924238384Sjkim p_scalar = scalar; 1925238384Sjkim } 1926238384Sjkim else 1927238384Sjkim /* the i^th point */ 1928238384Sjkim { 1929238384Sjkim p = points[i]; 1930238384Sjkim p_scalar = scalars[i]; 1931238384Sjkim } 1932238384Sjkim if ((p_scalar != NULL) && (p != NULL)) 1933238384Sjkim { 1934238384Sjkim /* reduce scalar to 0 <= scalar < 2^256 */ 1935238384Sjkim if ((BN_num_bits(p_scalar) > 256) || (BN_is_negative(p_scalar))) 1936238384Sjkim { 1937238384Sjkim /* this is an unusual input, and we don't guarantee 1938238384Sjkim * constant-timeness */ 1939238384Sjkim if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) 1940238384Sjkim { 1941238384Sjkim ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 1942238384Sjkim goto err; 1943238384Sjkim } 1944238384Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1945238384Sjkim } 1946238384Sjkim else 1947238384Sjkim num_bytes = BN_bn2bin(p_scalar, tmp); 1948238384Sjkim flip_endian(secrets[i], tmp, num_bytes); 1949238384Sjkim /* precompute multiples */ 1950238384Sjkim if ((!BN_to_felem(x_out, &p->X)) || 1951238384Sjkim (!BN_to_felem(y_out, &p->Y)) || 1952238384Sjkim (!BN_to_felem(z_out, &p->Z))) goto err; 1953238384Sjkim felem_shrink(pre_comp[i][1][0], x_out); 1954238384Sjkim felem_shrink(pre_comp[i][1][1], y_out); 1955238384Sjkim felem_shrink(pre_comp[i][1][2], z_out); 1956238384Sjkim for (j = 2; j <= 16; ++j) 1957238384Sjkim { 1958238384Sjkim if (j & 1) 1959238384Sjkim { 1960238384Sjkim point_add_small( 1961238384Sjkim pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2], 1962238384Sjkim pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2], 1963238384Sjkim pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]); 1964238384Sjkim } 1965238384Sjkim else 1966238384Sjkim { 1967238384Sjkim point_double_small( 1968238384Sjkim pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2], 1969238384Sjkim pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]); 1970238384Sjkim } 1971238384Sjkim } 1972238384Sjkim } 1973238384Sjkim } 1974238384Sjkim if (mixed) 1975238384Sjkim make_points_affine(num_points * 17, pre_comp[0], tmp_smallfelems); 1976238384Sjkim } 1977238384Sjkim 1978238384Sjkim /* the scalar for the generator */ 1979238384Sjkim if ((scalar != NULL) && (have_pre_comp)) 1980238384Sjkim { 1981238384Sjkim memset(g_secret, 0, sizeof(g_secret)); 1982238384Sjkim /* reduce scalar to 0 <= scalar < 2^256 */ 1983238384Sjkim if ((BN_num_bits(scalar) > 256) || (BN_is_negative(scalar))) 1984238384Sjkim { 1985238384Sjkim /* this is an unusual input, and we don't guarantee 1986238384Sjkim * constant-timeness */ 1987238384Sjkim if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) 1988238384Sjkim { 1989238384Sjkim ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 1990238384Sjkim goto err; 1991238384Sjkim } 1992238384Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1993238384Sjkim } 1994238384Sjkim else 1995238384Sjkim num_bytes = BN_bn2bin(scalar, tmp); 1996238384Sjkim flip_endian(g_secret, tmp, num_bytes); 1997238384Sjkim /* do the multiplication with generator precomputation*/ 1998238384Sjkim batch_mul(x_out, y_out, z_out, 1999238384Sjkim (const felem_bytearray (*)) secrets, num_points, 2000238384Sjkim g_secret, 2001238384Sjkim mixed, (const smallfelem (*)[17][3]) pre_comp, 2002238384Sjkim g_pre_comp); 2003238384Sjkim } 2004238384Sjkim else 2005238384Sjkim /* do the multiplication without generator precomputation */ 2006238384Sjkim batch_mul(x_out, y_out, z_out, 2007238384Sjkim (const felem_bytearray (*)) secrets, num_points, 2008238384Sjkim NULL, mixed, (const smallfelem (*)[17][3]) pre_comp, NULL); 2009238384Sjkim /* reduce the output to its unique minimal representation */ 2010238384Sjkim felem_contract(x_in, x_out); 2011238384Sjkim felem_contract(y_in, y_out); 2012238384Sjkim felem_contract(z_in, z_out); 2013238384Sjkim if ((!smallfelem_to_BN(x, x_in)) || (!smallfelem_to_BN(y, y_in)) || 2014238384Sjkim (!smallfelem_to_BN(z, z_in))) 2015238384Sjkim { 2016238384Sjkim ECerr(EC_F_EC_GFP_NISTP256_POINTS_MUL, ERR_R_BN_LIB); 2017238384Sjkim goto err; 2018238384Sjkim } 2019238384Sjkim ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 2020238384Sjkim 2021238384Sjkimerr: 2022238384Sjkim BN_CTX_end(ctx); 2023238384Sjkim if (generator != NULL) 2024238384Sjkim EC_POINT_free(generator); 2025238384Sjkim if (new_ctx != NULL) 2026238384Sjkim BN_CTX_free(new_ctx); 2027238384Sjkim if (secrets != NULL) 2028238384Sjkim OPENSSL_free(secrets); 2029238384Sjkim if (pre_comp != NULL) 2030238384Sjkim OPENSSL_free(pre_comp); 2031238384Sjkim if (tmp_smallfelems != NULL) 2032238384Sjkim OPENSSL_free(tmp_smallfelems); 2033238384Sjkim return ret; 2034238384Sjkim } 2035238384Sjkim 2036238384Sjkimint ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 2037238384Sjkim { 2038238384Sjkim int ret = 0; 2039238384Sjkim NISTP256_PRE_COMP *pre = NULL; 2040238384Sjkim int i, j; 2041238384Sjkim BN_CTX *new_ctx = NULL; 2042238384Sjkim BIGNUM *x, *y; 2043238384Sjkim EC_POINT *generator = NULL; 2044238384Sjkim smallfelem tmp_smallfelems[32]; 2045238384Sjkim felem x_tmp, y_tmp, z_tmp; 2046238384Sjkim 2047238384Sjkim /* throw away old precomputation */ 2048238384Sjkim EC_EX_DATA_free_data(&group->extra_data, nistp256_pre_comp_dup, 2049238384Sjkim nistp256_pre_comp_free, nistp256_pre_comp_clear_free); 2050238384Sjkim if (ctx == NULL) 2051238384Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 2052238384Sjkim BN_CTX_start(ctx); 2053238384Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || 2054238384Sjkim ((y = BN_CTX_get(ctx)) == NULL)) 2055238384Sjkim goto err; 2056238384Sjkim /* get the generator */ 2057238384Sjkim if (group->generator == NULL) goto err; 2058238384Sjkim generator = EC_POINT_new(group); 2059238384Sjkim if (generator == NULL) 2060238384Sjkim goto err; 2061238384Sjkim BN_bin2bn(nistp256_curve_params[3], sizeof (felem_bytearray), x); 2062238384Sjkim BN_bin2bn(nistp256_curve_params[4], sizeof (felem_bytearray), y); 2063238384Sjkim if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 2064238384Sjkim goto err; 2065238384Sjkim if ((pre = nistp256_pre_comp_new()) == NULL) 2066238384Sjkim goto err; 2067238384Sjkim /* if the generator is the standard one, use built-in precomputation */ 2068238384Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 2069238384Sjkim { 2070238384Sjkim memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 2071238384Sjkim ret = 1; 2072238384Sjkim goto err; 2073238384Sjkim } 2074238384Sjkim if ((!BN_to_felem(x_tmp, &group->generator->X)) || 2075238384Sjkim (!BN_to_felem(y_tmp, &group->generator->Y)) || 2076238384Sjkim (!BN_to_felem(z_tmp, &group->generator->Z))) 2077238384Sjkim goto err; 2078238384Sjkim felem_shrink(pre->g_pre_comp[0][1][0], x_tmp); 2079238384Sjkim felem_shrink(pre->g_pre_comp[0][1][1], y_tmp); 2080238384Sjkim felem_shrink(pre->g_pre_comp[0][1][2], z_tmp); 2081238384Sjkim /* compute 2^64*G, 2^128*G, 2^192*G for the first table, 2082238384Sjkim * 2^32*G, 2^96*G, 2^160*G, 2^224*G for the second one 2083238384Sjkim */ 2084238384Sjkim for (i = 1; i <= 8; i <<= 1) 2085238384Sjkim { 2086238384Sjkim point_double_small( 2087238384Sjkim pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2], 2088238384Sjkim pre->g_pre_comp[0][i][0], pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]); 2089238384Sjkim for (j = 0; j < 31; ++j) 2090238384Sjkim { 2091238384Sjkim point_double_small( 2092238384Sjkim pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2], 2093238384Sjkim pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 2094238384Sjkim } 2095238384Sjkim if (i == 8) 2096238384Sjkim break; 2097238384Sjkim point_double_small( 2098238384Sjkim pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2], 2099238384Sjkim pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 2100238384Sjkim for (j = 0; j < 31; ++j) 2101238384Sjkim { 2102238384Sjkim point_double_small( 2103238384Sjkim pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2], 2104238384Sjkim pre->g_pre_comp[0][2*i][0], pre->g_pre_comp[0][2*i][1], pre->g_pre_comp[0][2*i][2]); 2105238384Sjkim } 2106238384Sjkim } 2107238384Sjkim for (i = 0; i < 2; i++) 2108238384Sjkim { 2109238384Sjkim /* g_pre_comp[i][0] is the point at infinity */ 2110238384Sjkim memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 2111238384Sjkim /* the remaining multiples */ 2112238384Sjkim /* 2^64*G + 2^128*G resp. 2^96*G + 2^160*G */ 2113238384Sjkim point_add_small( 2114238384Sjkim pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], pre->g_pre_comp[i][6][2], 2115238384Sjkim pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 2116238384Sjkim pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]); 2117238384Sjkim /* 2^64*G + 2^192*G resp. 2^96*G + 2^224*G */ 2118238384Sjkim point_add_small( 2119238384Sjkim pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], pre->g_pre_comp[i][10][2], 2120238384Sjkim pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 2121238384Sjkim pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]); 2122238384Sjkim /* 2^128*G + 2^192*G resp. 2^160*G + 2^224*G */ 2123238384Sjkim point_add_small( 2124238384Sjkim pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 2125238384Sjkim pre->g_pre_comp[i][8][0], pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 2126238384Sjkim pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2]); 2127238384Sjkim /* 2^64*G + 2^128*G + 2^192*G resp. 2^96*G + 2^160*G + 2^224*G */ 2128238384Sjkim point_add_small( 2129238384Sjkim pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], pre->g_pre_comp[i][14][2], 2130238384Sjkim pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 2131238384Sjkim pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], pre->g_pre_comp[i][2][2]); 2132238384Sjkim for (j = 1; j < 8; ++j) 2133238384Sjkim { 2134238384Sjkim /* odd multiples: add G resp. 2^32*G */ 2135238384Sjkim point_add_small( 2136238384Sjkim pre->g_pre_comp[i][2*j+1][0], pre->g_pre_comp[i][2*j+1][1], pre->g_pre_comp[i][2*j+1][2], 2137238384Sjkim pre->g_pre_comp[i][2*j][0], pre->g_pre_comp[i][2*j][1], pre->g_pre_comp[i][2*j][2], 2138238384Sjkim pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], pre->g_pre_comp[i][1][2]); 2139238384Sjkim } 2140238384Sjkim } 2141238384Sjkim make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems); 2142238384Sjkim 2143238384Sjkim if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup, 2144238384Sjkim nistp256_pre_comp_free, nistp256_pre_comp_clear_free)) 2145238384Sjkim goto err; 2146238384Sjkim ret = 1; 2147238384Sjkim pre = NULL; 2148238384Sjkim err: 2149238384Sjkim BN_CTX_end(ctx); 2150238384Sjkim if (generator != NULL) 2151238384Sjkim EC_POINT_free(generator); 2152238384Sjkim if (new_ctx != NULL) 2153238384Sjkim BN_CTX_free(new_ctx); 2154238384Sjkim if (pre) 2155238384Sjkim nistp256_pre_comp_free(pre); 2156238384Sjkim return ret; 2157238384Sjkim } 2158238384Sjkim 2159238384Sjkimint ec_GFp_nistp256_have_precompute_mult(const EC_GROUP *group) 2160238384Sjkim { 2161238384Sjkim if (EC_EX_DATA_get_data(group->extra_data, nistp256_pre_comp_dup, 2162238384Sjkim nistp256_pre_comp_free, nistp256_pre_comp_clear_free) 2163238384Sjkim != NULL) 2164238384Sjkim return 1; 2165238384Sjkim else 2166238384Sjkim return 0; 2167238384Sjkim } 2168238384Sjkim#else 2169238384Sjkimstatic void *dummy=&dummy; 2170238384Sjkim#endif 2171