1238384Sjkim/* crypto/ec/ecp_nistp521.c */ 2238384Sjkim/* 3238384Sjkim * Written by Adam Langley (Google) for the OpenSSL project 4238384Sjkim */ 5238384Sjkim/* Copyright 2011 Google Inc. 6238384Sjkim * 7238384Sjkim * Licensed under the Apache License, Version 2.0 (the "License"); 8238384Sjkim * 9238384Sjkim * you may not use this file except in compliance with the License. 10238384Sjkim * You may obtain a copy of the License at 11238384Sjkim * 12238384Sjkim * http://www.apache.org/licenses/LICENSE-2.0 13238384Sjkim * 14238384Sjkim * Unless required by applicable law or agreed to in writing, software 15238384Sjkim * distributed under the License is distributed on an "AS IS" BASIS, 16238384Sjkim * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17238384Sjkim * See the License for the specific language governing permissions and 18238384Sjkim * limitations under the License. 19238384Sjkim */ 20238384Sjkim 21238384Sjkim/* 22238384Sjkim * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication 23238384Sjkim * 24238384Sjkim * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c. 25238384Sjkim * Otherwise based on Emilia's P224 work, which was inspired by my curve25519 26238384Sjkim * work which got its smarts from Daniel J. Bernstein's work on the same. 27238384Sjkim */ 28238384Sjkim 29238384Sjkim#include <openssl/opensslconf.h> 30238384Sjkim#ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 31238384Sjkim 32238384Sjkim#ifndef OPENSSL_SYS_VMS 33238384Sjkim#include <stdint.h> 34238384Sjkim#else 35238384Sjkim#include <inttypes.h> 36238384Sjkim#endif 37238384Sjkim 38238384Sjkim#include <string.h> 39238384Sjkim#include <openssl/err.h> 40238384Sjkim#include "ec_lcl.h" 41238384Sjkim 42238384Sjkim#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) 43238384Sjkim /* even with gcc, the typedef won't work for 32-bit platforms */ 44238384Sjkim typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit platforms */ 45238384Sjkim#else 46238384Sjkim #error "Need GCC 3.1 or later to define type uint128_t" 47238384Sjkim#endif 48238384Sjkim 49238384Sjkimtypedef uint8_t u8; 50238384Sjkimtypedef uint64_t u64; 51238384Sjkimtypedef int64_t s64; 52238384Sjkim 53238384Sjkim/* The underlying field. 54238384Sjkim * 55238384Sjkim * P521 operates over GF(2^521-1). We can serialise an element of this field 56238384Sjkim * into 66 bytes where the most significant byte contains only a single bit. We 57238384Sjkim * call this an felem_bytearray. */ 58238384Sjkim 59238384Sjkimtypedef u8 felem_bytearray[66]; 60238384Sjkim 61238384Sjkim/* These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5. 62238384Sjkim * These values are big-endian. */ 63238384Sjkimstatic const felem_bytearray nistp521_curve_params[5] = 64238384Sjkim { 65238384Sjkim {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */ 66238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 67238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 68238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 69238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 70238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 71238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 72238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 73238384Sjkim 0xff, 0xff}, 74238384Sjkim {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */ 75238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 76238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 77238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 78238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 79238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 80238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 81238384Sjkim 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 82238384Sjkim 0xff, 0xfc}, 83238384Sjkim {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */ 84238384Sjkim 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, 85238384Sjkim 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, 86238384Sjkim 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, 87238384Sjkim 0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, 88238384Sjkim 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, 89238384Sjkim 0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, 90238384Sjkim 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, 91238384Sjkim 0x3f, 0x00}, 92238384Sjkim {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */ 93238384Sjkim 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, 94238384Sjkim 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f, 95238384Sjkim 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, 96238384Sjkim 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, 97238384Sjkim 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, 98238384Sjkim 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, 99238384Sjkim 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, 100238384Sjkim 0xbd, 0x66}, 101238384Sjkim {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */ 102238384Sjkim 0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d, 103238384Sjkim 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b, 104238384Sjkim 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e, 105238384Sjkim 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4, 106238384Sjkim 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad, 107238384Sjkim 0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72, 108238384Sjkim 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1, 109238384Sjkim 0x66, 0x50} 110238384Sjkim }; 111238384Sjkim 112238384Sjkim/* The representation of field elements. 113238384Sjkim * ------------------------------------ 114238384Sjkim * 115238384Sjkim * We represent field elements with nine values. These values are either 64 or 116238384Sjkim * 128 bits and the field element represented is: 117238384Sjkim * v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464 (mod p) 118238384Sjkim * Each of the nine values is called a 'limb'. Since the limbs are spaced only 119238384Sjkim * 58 bits apart, but are greater than 58 bits in length, the most significant 120238384Sjkim * bits of each limb overlap with the least significant bits of the next. 121238384Sjkim * 122238384Sjkim * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a 123238384Sjkim * 'largefelem' */ 124238384Sjkim 125238384Sjkim#define NLIMBS 9 126238384Sjkim 127238384Sjkimtypedef uint64_t limb; 128238384Sjkimtypedef limb felem[NLIMBS]; 129238384Sjkimtypedef uint128_t largefelem[NLIMBS]; 130238384Sjkim 131238384Sjkimstatic const limb bottom57bits = 0x1ffffffffffffff; 132238384Sjkimstatic const limb bottom58bits = 0x3ffffffffffffff; 133238384Sjkim 134238384Sjkim/* bin66_to_felem takes a little-endian byte array and converts it into felem 135238384Sjkim * form. This assumes that the CPU is little-endian. */ 136238384Sjkimstatic void bin66_to_felem(felem out, const u8 in[66]) 137238384Sjkim { 138238384Sjkim out[0] = (*((limb*) &in[0])) & bottom58bits; 139238384Sjkim out[1] = (*((limb*) &in[7]) >> 2) & bottom58bits; 140238384Sjkim out[2] = (*((limb*) &in[14]) >> 4) & bottom58bits; 141238384Sjkim out[3] = (*((limb*) &in[21]) >> 6) & bottom58bits; 142238384Sjkim out[4] = (*((limb*) &in[29])) & bottom58bits; 143238384Sjkim out[5] = (*((limb*) &in[36]) >> 2) & bottom58bits; 144238384Sjkim out[6] = (*((limb*) &in[43]) >> 4) & bottom58bits; 145238384Sjkim out[7] = (*((limb*) &in[50]) >> 6) & bottom58bits; 146238384Sjkim out[8] = (*((limb*) &in[58])) & bottom57bits; 147238384Sjkim } 148238384Sjkim 149238384Sjkim/* felem_to_bin66 takes an felem and serialises into a little endian, 66 byte 150238384Sjkim * array. This assumes that the CPU is little-endian. */ 151238384Sjkimstatic void felem_to_bin66(u8 out[66], const felem in) 152238384Sjkim { 153238384Sjkim memset(out, 0, 66); 154238384Sjkim (*((limb*) &out[0])) = in[0]; 155238384Sjkim (*((limb*) &out[7])) |= in[1] << 2; 156238384Sjkim (*((limb*) &out[14])) |= in[2] << 4; 157238384Sjkim (*((limb*) &out[21])) |= in[3] << 6; 158238384Sjkim (*((limb*) &out[29])) = in[4]; 159238384Sjkim (*((limb*) &out[36])) |= in[5] << 2; 160238384Sjkim (*((limb*) &out[43])) |= in[6] << 4; 161238384Sjkim (*((limb*) &out[50])) |= in[7] << 6; 162238384Sjkim (*((limb*) &out[58])) = in[8]; 163238384Sjkim } 164238384Sjkim 165238384Sjkim/* To preserve endianness when using BN_bn2bin and BN_bin2bn */ 166238384Sjkimstatic void flip_endian(u8 *out, const u8 *in, unsigned len) 167238384Sjkim { 168238384Sjkim unsigned i; 169238384Sjkim for (i = 0; i < len; ++i) 170238384Sjkim out[i] = in[len-1-i]; 171238384Sjkim } 172238384Sjkim 173238384Sjkim/* BN_to_felem converts an OpenSSL BIGNUM into an felem */ 174238384Sjkimstatic int BN_to_felem(felem out, const BIGNUM *bn) 175238384Sjkim { 176238384Sjkim felem_bytearray b_in; 177238384Sjkim felem_bytearray b_out; 178238384Sjkim unsigned num_bytes; 179238384Sjkim 180238384Sjkim /* BN_bn2bin eats leading zeroes */ 181238384Sjkim memset(b_out, 0, sizeof b_out); 182238384Sjkim num_bytes = BN_num_bytes(bn); 183238384Sjkim if (num_bytes > sizeof b_out) 184238384Sjkim { 185238384Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 186238384Sjkim return 0; 187238384Sjkim } 188238384Sjkim if (BN_is_negative(bn)) 189238384Sjkim { 190238384Sjkim ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 191238384Sjkim return 0; 192238384Sjkim } 193238384Sjkim num_bytes = BN_bn2bin(bn, b_in); 194238384Sjkim flip_endian(b_out, b_in, num_bytes); 195238384Sjkim bin66_to_felem(out, b_out); 196238384Sjkim return 1; 197238384Sjkim } 198238384Sjkim 199238384Sjkim/* felem_to_BN converts an felem into an OpenSSL BIGNUM */ 200238384Sjkimstatic BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 201238384Sjkim { 202238384Sjkim felem_bytearray b_in, b_out; 203238384Sjkim felem_to_bin66(b_in, in); 204238384Sjkim flip_endian(b_out, b_in, sizeof b_out); 205238384Sjkim return BN_bin2bn(b_out, sizeof b_out, out); 206238384Sjkim } 207238384Sjkim 208238384Sjkim 209238384Sjkim/* Field operations 210238384Sjkim * ---------------- */ 211238384Sjkim 212238384Sjkimstatic void felem_one(felem out) 213238384Sjkim { 214238384Sjkim out[0] = 1; 215238384Sjkim out[1] = 0; 216238384Sjkim out[2] = 0; 217238384Sjkim out[3] = 0; 218238384Sjkim out[4] = 0; 219238384Sjkim out[5] = 0; 220238384Sjkim out[6] = 0; 221238384Sjkim out[7] = 0; 222238384Sjkim out[8] = 0; 223238384Sjkim } 224238384Sjkim 225238384Sjkimstatic void felem_assign(felem out, const felem in) 226238384Sjkim { 227238384Sjkim out[0] = in[0]; 228238384Sjkim out[1] = in[1]; 229238384Sjkim out[2] = in[2]; 230238384Sjkim out[3] = in[3]; 231238384Sjkim out[4] = in[4]; 232238384Sjkim out[5] = in[5]; 233238384Sjkim out[6] = in[6]; 234238384Sjkim out[7] = in[7]; 235238384Sjkim out[8] = in[8]; 236238384Sjkim } 237238384Sjkim 238238384Sjkim/* felem_sum64 sets out = out + in. */ 239238384Sjkimstatic void felem_sum64(felem out, const felem in) 240238384Sjkim { 241238384Sjkim out[0] += in[0]; 242238384Sjkim out[1] += in[1]; 243238384Sjkim out[2] += in[2]; 244238384Sjkim out[3] += in[3]; 245238384Sjkim out[4] += in[4]; 246238384Sjkim out[5] += in[5]; 247238384Sjkim out[6] += in[6]; 248238384Sjkim out[7] += in[7]; 249238384Sjkim out[8] += in[8]; 250238384Sjkim } 251238384Sjkim 252238384Sjkim/* felem_scalar sets out = in * scalar */ 253238384Sjkimstatic void felem_scalar(felem out, const felem in, limb scalar) 254238384Sjkim { 255238384Sjkim out[0] = in[0] * scalar; 256238384Sjkim out[1] = in[1] * scalar; 257238384Sjkim out[2] = in[2] * scalar; 258238384Sjkim out[3] = in[3] * scalar; 259238384Sjkim out[4] = in[4] * scalar; 260238384Sjkim out[5] = in[5] * scalar; 261238384Sjkim out[6] = in[6] * scalar; 262238384Sjkim out[7] = in[7] * scalar; 263238384Sjkim out[8] = in[8] * scalar; 264238384Sjkim } 265238384Sjkim 266238384Sjkim/* felem_scalar64 sets out = out * scalar */ 267238384Sjkimstatic void felem_scalar64(felem out, limb scalar) 268238384Sjkim { 269238384Sjkim out[0] *= scalar; 270238384Sjkim out[1] *= scalar; 271238384Sjkim out[2] *= scalar; 272238384Sjkim out[3] *= scalar; 273238384Sjkim out[4] *= scalar; 274238384Sjkim out[5] *= scalar; 275238384Sjkim out[6] *= scalar; 276238384Sjkim out[7] *= scalar; 277238384Sjkim out[8] *= scalar; 278238384Sjkim } 279238384Sjkim 280238384Sjkim/* felem_scalar128 sets out = out * scalar */ 281238384Sjkimstatic void felem_scalar128(largefelem out, limb scalar) 282238384Sjkim { 283238384Sjkim out[0] *= scalar; 284238384Sjkim out[1] *= scalar; 285238384Sjkim out[2] *= scalar; 286238384Sjkim out[3] *= scalar; 287238384Sjkim out[4] *= scalar; 288238384Sjkim out[5] *= scalar; 289238384Sjkim out[6] *= scalar; 290238384Sjkim out[7] *= scalar; 291238384Sjkim out[8] *= scalar; 292238384Sjkim } 293238384Sjkim 294238384Sjkim/* felem_neg sets |out| to |-in| 295238384Sjkim * On entry: 296238384Sjkim * in[i] < 2^59 + 2^14 297238384Sjkim * On exit: 298238384Sjkim * out[i] < 2^62 299238384Sjkim */ 300238384Sjkimstatic void felem_neg(felem out, const felem in) 301238384Sjkim { 302238384Sjkim /* In order to prevent underflow, we subtract from 0 mod p. */ 303238384Sjkim static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5); 304238384Sjkim static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4); 305238384Sjkim 306238384Sjkim out[0] = two62m3 - in[0]; 307238384Sjkim out[1] = two62m2 - in[1]; 308238384Sjkim out[2] = two62m2 - in[2]; 309238384Sjkim out[3] = two62m2 - in[3]; 310238384Sjkim out[4] = two62m2 - in[4]; 311238384Sjkim out[5] = two62m2 - in[5]; 312238384Sjkim out[6] = two62m2 - in[6]; 313238384Sjkim out[7] = two62m2 - in[7]; 314238384Sjkim out[8] = two62m2 - in[8]; 315238384Sjkim } 316238384Sjkim 317238384Sjkim/* felem_diff64 subtracts |in| from |out| 318238384Sjkim * On entry: 319238384Sjkim * in[i] < 2^59 + 2^14 320238384Sjkim * On exit: 321238384Sjkim * out[i] < out[i] + 2^62 322238384Sjkim */ 323238384Sjkimstatic void felem_diff64(felem out, const felem in) 324238384Sjkim { 325238384Sjkim /* In order to prevent underflow, we add 0 mod p before subtracting. */ 326238384Sjkim static const limb two62m3 = (((limb)1) << 62) - (((limb)1) << 5); 327238384Sjkim static const limb two62m2 = (((limb)1) << 62) - (((limb)1) << 4); 328238384Sjkim 329238384Sjkim out[0] += two62m3 - in[0]; 330238384Sjkim out[1] += two62m2 - in[1]; 331238384Sjkim out[2] += two62m2 - in[2]; 332238384Sjkim out[3] += two62m2 - in[3]; 333238384Sjkim out[4] += two62m2 - in[4]; 334238384Sjkim out[5] += two62m2 - in[5]; 335238384Sjkim out[6] += two62m2 - in[6]; 336238384Sjkim out[7] += two62m2 - in[7]; 337238384Sjkim out[8] += two62m2 - in[8]; 338238384Sjkim } 339238384Sjkim 340238384Sjkim/* felem_diff_128_64 subtracts |in| from |out| 341238384Sjkim * On entry: 342238384Sjkim * in[i] < 2^62 + 2^17 343238384Sjkim * On exit: 344238384Sjkim * out[i] < out[i] + 2^63 345238384Sjkim */ 346238384Sjkimstatic void felem_diff_128_64(largefelem out, const felem in) 347238384Sjkim { 348238384Sjkim /* In order to prevent underflow, we add 0 mod p before subtracting. */ 349238384Sjkim static const limb two63m6 = (((limb)1) << 62) - (((limb)1) << 5); 350238384Sjkim static const limb two63m5 = (((limb)1) << 62) - (((limb)1) << 4); 351238384Sjkim 352238384Sjkim out[0] += two63m6 - in[0]; 353238384Sjkim out[1] += two63m5 - in[1]; 354238384Sjkim out[2] += two63m5 - in[2]; 355238384Sjkim out[3] += two63m5 - in[3]; 356238384Sjkim out[4] += two63m5 - in[4]; 357238384Sjkim out[5] += two63m5 - in[5]; 358238384Sjkim out[6] += two63m5 - in[6]; 359238384Sjkim out[7] += two63m5 - in[7]; 360238384Sjkim out[8] += two63m5 - in[8]; 361238384Sjkim } 362238384Sjkim 363238384Sjkim/* felem_diff_128_64 subtracts |in| from |out| 364238384Sjkim * On entry: 365238384Sjkim * in[i] < 2^126 366238384Sjkim * On exit: 367238384Sjkim * out[i] < out[i] + 2^127 - 2^69 368238384Sjkim */ 369238384Sjkimstatic void felem_diff128(largefelem out, const largefelem in) 370238384Sjkim { 371238384Sjkim /* In order to prevent underflow, we add 0 mod p before subtracting. */ 372238384Sjkim static const uint128_t two127m70 = (((uint128_t)1) << 127) - (((uint128_t)1) << 70); 373238384Sjkim static const uint128_t two127m69 = (((uint128_t)1) << 127) - (((uint128_t)1) << 69); 374238384Sjkim 375238384Sjkim out[0] += (two127m70 - in[0]); 376238384Sjkim out[1] += (two127m69 - in[1]); 377238384Sjkim out[2] += (two127m69 - in[2]); 378238384Sjkim out[3] += (two127m69 - in[3]); 379238384Sjkim out[4] += (two127m69 - in[4]); 380238384Sjkim out[5] += (two127m69 - in[5]); 381238384Sjkim out[6] += (two127m69 - in[6]); 382238384Sjkim out[7] += (two127m69 - in[7]); 383238384Sjkim out[8] += (two127m69 - in[8]); 384238384Sjkim } 385238384Sjkim 386238384Sjkim/* felem_square sets |out| = |in|^2 387238384Sjkim * On entry: 388238384Sjkim * in[i] < 2^62 389238384Sjkim * On exit: 390238384Sjkim * out[i] < 17 * max(in[i]) * max(in[i]) 391238384Sjkim */ 392238384Sjkimstatic void felem_square(largefelem out, const felem in) 393238384Sjkim { 394238384Sjkim felem inx2, inx4; 395238384Sjkim felem_scalar(inx2, in, 2); 396238384Sjkim felem_scalar(inx4, in, 4); 397238384Sjkim 398238384Sjkim /* We have many cases were we want to do 399238384Sjkim * in[x] * in[y] + 400238384Sjkim * in[y] * in[x] 401238384Sjkim * This is obviously just 402238384Sjkim * 2 * in[x] * in[y] 403238384Sjkim * However, rather than do the doubling on the 128 bit result, we 404238384Sjkim * double one of the inputs to the multiplication by reading from 405238384Sjkim * |inx2| */ 406238384Sjkim 407238384Sjkim out[0] = ((uint128_t) in[0]) * in[0]; 408238384Sjkim out[1] = ((uint128_t) in[0]) * inx2[1]; 409238384Sjkim out[2] = ((uint128_t) in[0]) * inx2[2] + 410238384Sjkim ((uint128_t) in[1]) * in[1]; 411238384Sjkim out[3] = ((uint128_t) in[0]) * inx2[3] + 412238384Sjkim ((uint128_t) in[1]) * inx2[2]; 413238384Sjkim out[4] = ((uint128_t) in[0]) * inx2[4] + 414238384Sjkim ((uint128_t) in[1]) * inx2[3] + 415238384Sjkim ((uint128_t) in[2]) * in[2]; 416238384Sjkim out[5] = ((uint128_t) in[0]) * inx2[5] + 417238384Sjkim ((uint128_t) in[1]) * inx2[4] + 418238384Sjkim ((uint128_t) in[2]) * inx2[3]; 419238384Sjkim out[6] = ((uint128_t) in[0]) * inx2[6] + 420238384Sjkim ((uint128_t) in[1]) * inx2[5] + 421238384Sjkim ((uint128_t) in[2]) * inx2[4] + 422238384Sjkim ((uint128_t) in[3]) * in[3]; 423238384Sjkim out[7] = ((uint128_t) in[0]) * inx2[7] + 424238384Sjkim ((uint128_t) in[1]) * inx2[6] + 425238384Sjkim ((uint128_t) in[2]) * inx2[5] + 426238384Sjkim ((uint128_t) in[3]) * inx2[4]; 427238384Sjkim out[8] = ((uint128_t) in[0]) * inx2[8] + 428238384Sjkim ((uint128_t) in[1]) * inx2[7] + 429238384Sjkim ((uint128_t) in[2]) * inx2[6] + 430238384Sjkim ((uint128_t) in[3]) * inx2[5] + 431238384Sjkim ((uint128_t) in[4]) * in[4]; 432238384Sjkim 433238384Sjkim /* The remaining limbs fall above 2^521, with the first falling at 434238384Sjkim * 2^522. They correspond to locations one bit up from the limbs 435238384Sjkim * produced above so we would have to multiply by two to align them. 436238384Sjkim * Again, rather than operate on the 128-bit result, we double one of 437238384Sjkim * the inputs to the multiplication. If we want to double for both this 438238384Sjkim * reason, and the reason above, then we end up multiplying by four. */ 439238384Sjkim 440238384Sjkim /* 9 */ 441238384Sjkim out[0] += ((uint128_t) in[1]) * inx4[8] + 442238384Sjkim ((uint128_t) in[2]) * inx4[7] + 443238384Sjkim ((uint128_t) in[3]) * inx4[6] + 444238384Sjkim ((uint128_t) in[4]) * inx4[5]; 445238384Sjkim 446238384Sjkim /* 10 */ 447238384Sjkim out[1] += ((uint128_t) in[2]) * inx4[8] + 448238384Sjkim ((uint128_t) in[3]) * inx4[7] + 449238384Sjkim ((uint128_t) in[4]) * inx4[6] + 450238384Sjkim ((uint128_t) in[5]) * inx2[5]; 451238384Sjkim 452238384Sjkim /* 11 */ 453238384Sjkim out[2] += ((uint128_t) in[3]) * inx4[8] + 454238384Sjkim ((uint128_t) in[4]) * inx4[7] + 455238384Sjkim ((uint128_t) in[5]) * inx4[6]; 456238384Sjkim 457238384Sjkim /* 12 */ 458238384Sjkim out[3] += ((uint128_t) in[4]) * inx4[8] + 459238384Sjkim ((uint128_t) in[5]) * inx4[7] + 460238384Sjkim ((uint128_t) in[6]) * inx2[6]; 461238384Sjkim 462238384Sjkim /* 13 */ 463238384Sjkim out[4] += ((uint128_t) in[5]) * inx4[8] + 464238384Sjkim ((uint128_t) in[6]) * inx4[7]; 465238384Sjkim 466238384Sjkim /* 14 */ 467238384Sjkim out[5] += ((uint128_t) in[6]) * inx4[8] + 468238384Sjkim ((uint128_t) in[7]) * inx2[7]; 469238384Sjkim 470238384Sjkim /* 15 */ 471238384Sjkim out[6] += ((uint128_t) in[7]) * inx4[8]; 472238384Sjkim 473238384Sjkim /* 16 */ 474238384Sjkim out[7] += ((uint128_t) in[8]) * inx2[8]; 475238384Sjkim } 476238384Sjkim 477238384Sjkim/* felem_mul sets |out| = |in1| * |in2| 478238384Sjkim * On entry: 479238384Sjkim * in1[i] < 2^64 480238384Sjkim * in2[i] < 2^63 481238384Sjkim * On exit: 482238384Sjkim * out[i] < 17 * max(in1[i]) * max(in2[i]) 483238384Sjkim */ 484238384Sjkimstatic void felem_mul(largefelem out, const felem in1, const felem in2) 485238384Sjkim { 486238384Sjkim felem in2x2; 487238384Sjkim felem_scalar(in2x2, in2, 2); 488238384Sjkim 489238384Sjkim out[0] = ((uint128_t) in1[0]) * in2[0]; 490238384Sjkim 491238384Sjkim out[1] = ((uint128_t) in1[0]) * in2[1] + 492238384Sjkim ((uint128_t) in1[1]) * in2[0]; 493238384Sjkim 494238384Sjkim out[2] = ((uint128_t) in1[0]) * in2[2] + 495238384Sjkim ((uint128_t) in1[1]) * in2[1] + 496238384Sjkim ((uint128_t) in1[2]) * in2[0]; 497238384Sjkim 498238384Sjkim out[3] = ((uint128_t) in1[0]) * in2[3] + 499238384Sjkim ((uint128_t) in1[1]) * in2[2] + 500238384Sjkim ((uint128_t) in1[2]) * in2[1] + 501238384Sjkim ((uint128_t) in1[3]) * in2[0]; 502238384Sjkim 503238384Sjkim out[4] = ((uint128_t) in1[0]) * in2[4] + 504238384Sjkim ((uint128_t) in1[1]) * in2[3] + 505238384Sjkim ((uint128_t) in1[2]) * in2[2] + 506238384Sjkim ((uint128_t) in1[3]) * in2[1] + 507238384Sjkim ((uint128_t) in1[4]) * in2[0]; 508238384Sjkim 509238384Sjkim out[5] = ((uint128_t) in1[0]) * in2[5] + 510238384Sjkim ((uint128_t) in1[1]) * in2[4] + 511238384Sjkim ((uint128_t) in1[2]) * in2[3] + 512238384Sjkim ((uint128_t) in1[3]) * in2[2] + 513238384Sjkim ((uint128_t) in1[4]) * in2[1] + 514238384Sjkim ((uint128_t) in1[5]) * in2[0]; 515238384Sjkim 516238384Sjkim out[6] = ((uint128_t) in1[0]) * in2[6] + 517238384Sjkim ((uint128_t) in1[1]) * in2[5] + 518238384Sjkim ((uint128_t) in1[2]) * in2[4] + 519238384Sjkim ((uint128_t) in1[3]) * in2[3] + 520238384Sjkim ((uint128_t) in1[4]) * in2[2] + 521238384Sjkim ((uint128_t) in1[5]) * in2[1] + 522238384Sjkim ((uint128_t) in1[6]) * in2[0]; 523238384Sjkim 524238384Sjkim out[7] = ((uint128_t) in1[0]) * in2[7] + 525238384Sjkim ((uint128_t) in1[1]) * in2[6] + 526238384Sjkim ((uint128_t) in1[2]) * in2[5] + 527238384Sjkim ((uint128_t) in1[3]) * in2[4] + 528238384Sjkim ((uint128_t) in1[4]) * in2[3] + 529238384Sjkim ((uint128_t) in1[5]) * in2[2] + 530238384Sjkim ((uint128_t) in1[6]) * in2[1] + 531238384Sjkim ((uint128_t) in1[7]) * in2[0]; 532238384Sjkim 533238384Sjkim out[8] = ((uint128_t) in1[0]) * in2[8] + 534238384Sjkim ((uint128_t) in1[1]) * in2[7] + 535238384Sjkim ((uint128_t) in1[2]) * in2[6] + 536238384Sjkim ((uint128_t) in1[3]) * in2[5] + 537238384Sjkim ((uint128_t) in1[4]) * in2[4] + 538238384Sjkim ((uint128_t) in1[5]) * in2[3] + 539238384Sjkim ((uint128_t) in1[6]) * in2[2] + 540238384Sjkim ((uint128_t) in1[7]) * in2[1] + 541238384Sjkim ((uint128_t) in1[8]) * in2[0]; 542238384Sjkim 543238384Sjkim /* See comment in felem_square about the use of in2x2 here */ 544238384Sjkim 545238384Sjkim out[0] += ((uint128_t) in1[1]) * in2x2[8] + 546238384Sjkim ((uint128_t) in1[2]) * in2x2[7] + 547238384Sjkim ((uint128_t) in1[3]) * in2x2[6] + 548238384Sjkim ((uint128_t) in1[4]) * in2x2[5] + 549238384Sjkim ((uint128_t) in1[5]) * in2x2[4] + 550238384Sjkim ((uint128_t) in1[6]) * in2x2[3] + 551238384Sjkim ((uint128_t) in1[7]) * in2x2[2] + 552238384Sjkim ((uint128_t) in1[8]) * in2x2[1]; 553238384Sjkim 554238384Sjkim out[1] += ((uint128_t) in1[2]) * in2x2[8] + 555238384Sjkim ((uint128_t) in1[3]) * in2x2[7] + 556238384Sjkim ((uint128_t) in1[4]) * in2x2[6] + 557238384Sjkim ((uint128_t) in1[5]) * in2x2[5] + 558238384Sjkim ((uint128_t) in1[6]) * in2x2[4] + 559238384Sjkim ((uint128_t) in1[7]) * in2x2[3] + 560238384Sjkim ((uint128_t) in1[8]) * in2x2[2]; 561238384Sjkim 562238384Sjkim out[2] += ((uint128_t) in1[3]) * in2x2[8] + 563238384Sjkim ((uint128_t) in1[4]) * in2x2[7] + 564238384Sjkim ((uint128_t) in1[5]) * in2x2[6] + 565238384Sjkim ((uint128_t) in1[6]) * in2x2[5] + 566238384Sjkim ((uint128_t) in1[7]) * in2x2[4] + 567238384Sjkim ((uint128_t) in1[8]) * in2x2[3]; 568238384Sjkim 569238384Sjkim out[3] += ((uint128_t) in1[4]) * in2x2[8] + 570238384Sjkim ((uint128_t) in1[5]) * in2x2[7] + 571238384Sjkim ((uint128_t) in1[6]) * in2x2[6] + 572238384Sjkim ((uint128_t) in1[7]) * in2x2[5] + 573238384Sjkim ((uint128_t) in1[8]) * in2x2[4]; 574238384Sjkim 575238384Sjkim out[4] += ((uint128_t) in1[5]) * in2x2[8] + 576238384Sjkim ((uint128_t) in1[6]) * in2x2[7] + 577238384Sjkim ((uint128_t) in1[7]) * in2x2[6] + 578238384Sjkim ((uint128_t) in1[8]) * in2x2[5]; 579238384Sjkim 580238384Sjkim out[5] += ((uint128_t) in1[6]) * in2x2[8] + 581238384Sjkim ((uint128_t) in1[7]) * in2x2[7] + 582238384Sjkim ((uint128_t) in1[8]) * in2x2[6]; 583238384Sjkim 584238384Sjkim out[6] += ((uint128_t) in1[7]) * in2x2[8] + 585238384Sjkim ((uint128_t) in1[8]) * in2x2[7]; 586238384Sjkim 587238384Sjkim out[7] += ((uint128_t) in1[8]) * in2x2[8]; 588238384Sjkim } 589238384Sjkim 590238384Sjkimstatic const limb bottom52bits = 0xfffffffffffff; 591238384Sjkim 592238384Sjkim/* felem_reduce converts a largefelem to an felem. 593238384Sjkim * On entry: 594238384Sjkim * in[i] < 2^128 595238384Sjkim * On exit: 596238384Sjkim * out[i] < 2^59 + 2^14 597238384Sjkim */ 598238384Sjkimstatic void felem_reduce(felem out, const largefelem in) 599238384Sjkim { 600238384Sjkim u64 overflow1, overflow2; 601238384Sjkim 602238384Sjkim out[0] = ((limb) in[0]) & bottom58bits; 603238384Sjkim out[1] = ((limb) in[1]) & bottom58bits; 604238384Sjkim out[2] = ((limb) in[2]) & bottom58bits; 605238384Sjkim out[3] = ((limb) in[3]) & bottom58bits; 606238384Sjkim out[4] = ((limb) in[4]) & bottom58bits; 607238384Sjkim out[5] = ((limb) in[5]) & bottom58bits; 608238384Sjkim out[6] = ((limb) in[6]) & bottom58bits; 609238384Sjkim out[7] = ((limb) in[7]) & bottom58bits; 610238384Sjkim out[8] = ((limb) in[8]) & bottom58bits; 611238384Sjkim 612238384Sjkim /* out[i] < 2^58 */ 613238384Sjkim 614238384Sjkim out[1] += ((limb) in[0]) >> 58; 615238384Sjkim out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6; 616238384Sjkim /* out[1] < 2^58 + 2^6 + 2^58 617238384Sjkim * = 2^59 + 2^6 */ 618238384Sjkim out[2] += ((limb) (in[0] >> 64)) >> 52; 619238384Sjkim 620238384Sjkim out[2] += ((limb) in[1]) >> 58; 621238384Sjkim out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6; 622238384Sjkim out[3] += ((limb) (in[1] >> 64)) >> 52; 623238384Sjkim 624238384Sjkim out[3] += ((limb) in[2]) >> 58; 625238384Sjkim out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6; 626238384Sjkim out[4] += ((limb) (in[2] >> 64)) >> 52; 627238384Sjkim 628238384Sjkim out[4] += ((limb) in[3]) >> 58; 629238384Sjkim out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6; 630238384Sjkim out[5] += ((limb) (in[3] >> 64)) >> 52; 631238384Sjkim 632238384Sjkim out[5] += ((limb) in[4]) >> 58; 633238384Sjkim out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6; 634238384Sjkim out[6] += ((limb) (in[4] >> 64)) >> 52; 635238384Sjkim 636238384Sjkim out[6] += ((limb) in[5]) >> 58; 637238384Sjkim out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6; 638238384Sjkim out[7] += ((limb) (in[5] >> 64)) >> 52; 639238384Sjkim 640238384Sjkim out[7] += ((limb) in[6]) >> 58; 641238384Sjkim out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6; 642238384Sjkim out[8] += ((limb) (in[6] >> 64)) >> 52; 643238384Sjkim 644238384Sjkim out[8] += ((limb) in[7]) >> 58; 645238384Sjkim out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6; 646238384Sjkim /* out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12 647238384Sjkim * < 2^59 + 2^13 */ 648238384Sjkim overflow1 = ((limb) (in[7] >> 64)) >> 52; 649238384Sjkim 650238384Sjkim overflow1 += ((limb) in[8]) >> 58; 651238384Sjkim overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6; 652238384Sjkim overflow2 = ((limb) (in[8] >> 64)) >> 52; 653238384Sjkim 654238384Sjkim overflow1 <<= 1; /* overflow1 < 2^13 + 2^7 + 2^59 */ 655238384Sjkim overflow2 <<= 1; /* overflow2 < 2^13 */ 656238384Sjkim 657238384Sjkim out[0] += overflow1; /* out[0] < 2^60 */ 658238384Sjkim out[1] += overflow2; /* out[1] < 2^59 + 2^6 + 2^13 */ 659238384Sjkim 660238384Sjkim out[1] += out[0] >> 58; out[0] &= bottom58bits; 661238384Sjkim /* out[0] < 2^58 662238384Sjkim * out[1] < 2^59 + 2^6 + 2^13 + 2^2 663238384Sjkim * < 2^59 + 2^14 */ 664238384Sjkim } 665238384Sjkim 666238384Sjkimstatic void felem_square_reduce(felem out, const felem in) 667238384Sjkim { 668238384Sjkim largefelem tmp; 669238384Sjkim felem_square(tmp, in); 670238384Sjkim felem_reduce(out, tmp); 671238384Sjkim } 672238384Sjkim 673238384Sjkimstatic void felem_mul_reduce(felem out, const felem in1, const felem in2) 674238384Sjkim { 675238384Sjkim largefelem tmp; 676238384Sjkim felem_mul(tmp, in1, in2); 677238384Sjkim felem_reduce(out, tmp); 678238384Sjkim } 679238384Sjkim 680238384Sjkim/* felem_inv calculates |out| = |in|^{-1} 681238384Sjkim * 682238384Sjkim * Based on Fermat's Little Theorem: 683238384Sjkim * a^p = a (mod p) 684238384Sjkim * a^{p-1} = 1 (mod p) 685238384Sjkim * a^{p-2} = a^{-1} (mod p) 686238384Sjkim */ 687238384Sjkimstatic void felem_inv(felem out, const felem in) 688238384Sjkim { 689238384Sjkim felem ftmp, ftmp2, ftmp3, ftmp4; 690238384Sjkim largefelem tmp; 691238384Sjkim unsigned i; 692238384Sjkim 693238384Sjkim felem_square(tmp, in); felem_reduce(ftmp, tmp); /* 2^1 */ 694238384Sjkim felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^2 - 2^0 */ 695238384Sjkim felem_assign(ftmp2, ftmp); 696238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^1 */ 697238384Sjkim felem_mul(tmp, in, ftmp); felem_reduce(ftmp, tmp); /* 2^3 - 2^0 */ 698238384Sjkim felem_square(tmp, ftmp); felem_reduce(ftmp, tmp); /* 2^4 - 2^1 */ 699238384Sjkim 700238384Sjkim felem_square(tmp, ftmp2); felem_reduce(ftmp3, tmp); /* 2^3 - 2^1 */ 701238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^4 - 2^2 */ 702238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^4 - 2^0 */ 703238384Sjkim 704238384Sjkim felem_assign(ftmp2, ftmp3); 705238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^5 - 2^1 */ 706238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^6 - 2^2 */ 707238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^7 - 2^3 */ 708238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^8 - 2^4 */ 709238384Sjkim felem_assign(ftmp4, ftmp3); 710238384Sjkim felem_mul(tmp, ftmp3, ftmp); felem_reduce(ftmp4, tmp); /* 2^8 - 2^1 */ 711238384Sjkim felem_square(tmp, ftmp4); felem_reduce(ftmp4, tmp); /* 2^9 - 2^2 */ 712238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^8 - 2^0 */ 713238384Sjkim felem_assign(ftmp2, ftmp3); 714238384Sjkim 715238384Sjkim for (i = 0; i < 8; i++) 716238384Sjkim { 717238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */ 718238384Sjkim } 719238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^16 - 2^0 */ 720238384Sjkim felem_assign(ftmp2, ftmp3); 721238384Sjkim 722238384Sjkim for (i = 0; i < 16; i++) 723238384Sjkim { 724238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */ 725238384Sjkim } 726238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^32 - 2^0 */ 727238384Sjkim felem_assign(ftmp2, ftmp3); 728238384Sjkim 729238384Sjkim for (i = 0; i < 32; i++) 730238384Sjkim { 731238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */ 732238384Sjkim } 733238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^64 - 2^0 */ 734238384Sjkim felem_assign(ftmp2, ftmp3); 735238384Sjkim 736238384Sjkim for (i = 0; i < 64; i++) 737238384Sjkim { 738238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */ 739238384Sjkim } 740238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^128 - 2^0 */ 741238384Sjkim felem_assign(ftmp2, ftmp3); 742238384Sjkim 743238384Sjkim for (i = 0; i < 128; i++) 744238384Sjkim { 745238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */ 746238384Sjkim } 747238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^256 - 2^0 */ 748238384Sjkim felem_assign(ftmp2, ftmp3); 749238384Sjkim 750238384Sjkim for (i = 0; i < 256; i++) 751238384Sjkim { 752238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */ 753238384Sjkim } 754238384Sjkim felem_mul(tmp, ftmp3, ftmp2); felem_reduce(ftmp3, tmp); /* 2^512 - 2^0 */ 755238384Sjkim 756238384Sjkim for (i = 0; i < 9; i++) 757238384Sjkim { 758238384Sjkim felem_square(tmp, ftmp3); felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */ 759238384Sjkim } 760238384Sjkim felem_mul(tmp, ftmp3, ftmp4); felem_reduce(ftmp3, tmp); /* 2^512 - 2^2 */ 761238384Sjkim felem_mul(tmp, ftmp3, in); felem_reduce(out, tmp); /* 2^512 - 3 */ 762238384Sjkim} 763238384Sjkim 764238384Sjkim/* This is 2^521-1, expressed as an felem */ 765238384Sjkimstatic const felem kPrime = 766238384Sjkim { 767238384Sjkim 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 768238384Sjkim 0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff, 769238384Sjkim 0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff 770238384Sjkim }; 771238384Sjkim 772238384Sjkim/* felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0 773238384Sjkim * otherwise. 774238384Sjkim * On entry: 775238384Sjkim * in[i] < 2^59 + 2^14 776238384Sjkim */ 777238384Sjkimstatic limb felem_is_zero(const felem in) 778238384Sjkim { 779238384Sjkim felem ftmp; 780238384Sjkim limb is_zero, is_p; 781238384Sjkim felem_assign(ftmp, in); 782238384Sjkim 783238384Sjkim ftmp[0] += ftmp[8] >> 57; ftmp[8] &= bottom57bits; 784238384Sjkim /* ftmp[8] < 2^57 */ 785238384Sjkim ftmp[1] += ftmp[0] >> 58; ftmp[0] &= bottom58bits; 786238384Sjkim ftmp[2] += ftmp[1] >> 58; ftmp[1] &= bottom58bits; 787238384Sjkim ftmp[3] += ftmp[2] >> 58; ftmp[2] &= bottom58bits; 788238384Sjkim ftmp[4] += ftmp[3] >> 58; ftmp[3] &= bottom58bits; 789238384Sjkim ftmp[5] += ftmp[4] >> 58; ftmp[4] &= bottom58bits; 790238384Sjkim ftmp[6] += ftmp[5] >> 58; ftmp[5] &= bottom58bits; 791238384Sjkim ftmp[7] += ftmp[6] >> 58; ftmp[6] &= bottom58bits; 792238384Sjkim ftmp[8] += ftmp[7] >> 58; ftmp[7] &= bottom58bits; 793238384Sjkim /* ftmp[8] < 2^57 + 4 */ 794238384Sjkim 795238384Sjkim /* The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is 796238384Sjkim * greater than our bound for ftmp[8]. Therefore we only have to check 797238384Sjkim * if the zero is zero or 2^521-1. */ 798238384Sjkim 799238384Sjkim is_zero = 0; 800238384Sjkim is_zero |= ftmp[0]; 801238384Sjkim is_zero |= ftmp[1]; 802238384Sjkim is_zero |= ftmp[2]; 803238384Sjkim is_zero |= ftmp[3]; 804238384Sjkim is_zero |= ftmp[4]; 805238384Sjkim is_zero |= ftmp[5]; 806238384Sjkim is_zero |= ftmp[6]; 807238384Sjkim is_zero |= ftmp[7]; 808238384Sjkim is_zero |= ftmp[8]; 809238384Sjkim 810238384Sjkim is_zero--; 811238384Sjkim /* We know that ftmp[i] < 2^63, therefore the only way that the top bit 812238384Sjkim * can be set is if is_zero was 0 before the decrement. */ 813238384Sjkim is_zero = ((s64) is_zero) >> 63; 814238384Sjkim 815238384Sjkim is_p = ftmp[0] ^ kPrime[0]; 816238384Sjkim is_p |= ftmp[1] ^ kPrime[1]; 817238384Sjkim is_p |= ftmp[2] ^ kPrime[2]; 818238384Sjkim is_p |= ftmp[3] ^ kPrime[3]; 819238384Sjkim is_p |= ftmp[4] ^ kPrime[4]; 820238384Sjkim is_p |= ftmp[5] ^ kPrime[5]; 821238384Sjkim is_p |= ftmp[6] ^ kPrime[6]; 822238384Sjkim is_p |= ftmp[7] ^ kPrime[7]; 823238384Sjkim is_p |= ftmp[8] ^ kPrime[8]; 824238384Sjkim 825238384Sjkim is_p--; 826238384Sjkim is_p = ((s64) is_p) >> 63; 827238384Sjkim 828238384Sjkim is_zero |= is_p; 829238384Sjkim return is_zero; 830238384Sjkim } 831238384Sjkim 832238384Sjkimstatic int felem_is_zero_int(const felem in) 833238384Sjkim { 834238384Sjkim return (int) (felem_is_zero(in) & ((limb)1)); 835238384Sjkim } 836238384Sjkim 837238384Sjkim/* felem_contract converts |in| to its unique, minimal representation. 838238384Sjkim * On entry: 839238384Sjkim * in[i] < 2^59 + 2^14 840238384Sjkim */ 841238384Sjkimstatic void felem_contract(felem out, const felem in) 842238384Sjkim { 843238384Sjkim limb is_p, is_greater, sign; 844238384Sjkim static const limb two58 = ((limb)1) << 58; 845238384Sjkim 846238384Sjkim felem_assign(out, in); 847238384Sjkim 848238384Sjkim out[0] += out[8] >> 57; out[8] &= bottom57bits; 849238384Sjkim /* out[8] < 2^57 */ 850238384Sjkim out[1] += out[0] >> 58; out[0] &= bottom58bits; 851238384Sjkim out[2] += out[1] >> 58; out[1] &= bottom58bits; 852238384Sjkim out[3] += out[2] >> 58; out[2] &= bottom58bits; 853238384Sjkim out[4] += out[3] >> 58; out[3] &= bottom58bits; 854238384Sjkim out[5] += out[4] >> 58; out[4] &= bottom58bits; 855238384Sjkim out[6] += out[5] >> 58; out[5] &= bottom58bits; 856238384Sjkim out[7] += out[6] >> 58; out[6] &= bottom58bits; 857238384Sjkim out[8] += out[7] >> 58; out[7] &= bottom58bits; 858238384Sjkim /* out[8] < 2^57 + 4 */ 859238384Sjkim 860238384Sjkim /* If the value is greater than 2^521-1 then we have to subtract 861238384Sjkim * 2^521-1 out. See the comments in felem_is_zero regarding why we 862238384Sjkim * don't test for other multiples of the prime. */ 863238384Sjkim 864238384Sjkim /* First, if |out| is equal to 2^521-1, we subtract it out to get zero. */ 865238384Sjkim 866238384Sjkim is_p = out[0] ^ kPrime[0]; 867238384Sjkim is_p |= out[1] ^ kPrime[1]; 868238384Sjkim is_p |= out[2] ^ kPrime[2]; 869238384Sjkim is_p |= out[3] ^ kPrime[3]; 870238384Sjkim is_p |= out[4] ^ kPrime[4]; 871238384Sjkim is_p |= out[5] ^ kPrime[5]; 872238384Sjkim is_p |= out[6] ^ kPrime[6]; 873238384Sjkim is_p |= out[7] ^ kPrime[7]; 874238384Sjkim is_p |= out[8] ^ kPrime[8]; 875238384Sjkim 876238384Sjkim is_p--; 877238384Sjkim is_p &= is_p << 32; 878238384Sjkim is_p &= is_p << 16; 879238384Sjkim is_p &= is_p << 8; 880238384Sjkim is_p &= is_p << 4; 881238384Sjkim is_p &= is_p << 2; 882238384Sjkim is_p &= is_p << 1; 883238384Sjkim is_p = ((s64) is_p) >> 63; 884238384Sjkim is_p = ~is_p; 885238384Sjkim 886238384Sjkim /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */ 887238384Sjkim 888238384Sjkim out[0] &= is_p; 889238384Sjkim out[1] &= is_p; 890238384Sjkim out[2] &= is_p; 891238384Sjkim out[3] &= is_p; 892238384Sjkim out[4] &= is_p; 893238384Sjkim out[5] &= is_p; 894238384Sjkim out[6] &= is_p; 895238384Sjkim out[7] &= is_p; 896238384Sjkim out[8] &= is_p; 897238384Sjkim 898238384Sjkim /* In order to test that |out| >= 2^521-1 we need only test if out[8] 899238384Sjkim * >> 57 is greater than zero as (2^521-1) + x >= 2^522 */ 900238384Sjkim is_greater = out[8] >> 57; 901238384Sjkim is_greater |= is_greater << 32; 902238384Sjkim is_greater |= is_greater << 16; 903238384Sjkim is_greater |= is_greater << 8; 904238384Sjkim is_greater |= is_greater << 4; 905238384Sjkim is_greater |= is_greater << 2; 906238384Sjkim is_greater |= is_greater << 1; 907238384Sjkim is_greater = ((s64) is_greater) >> 63; 908238384Sjkim 909238384Sjkim out[0] -= kPrime[0] & is_greater; 910238384Sjkim out[1] -= kPrime[1] & is_greater; 911238384Sjkim out[2] -= kPrime[2] & is_greater; 912238384Sjkim out[3] -= kPrime[3] & is_greater; 913238384Sjkim out[4] -= kPrime[4] & is_greater; 914238384Sjkim out[5] -= kPrime[5] & is_greater; 915238384Sjkim out[6] -= kPrime[6] & is_greater; 916238384Sjkim out[7] -= kPrime[7] & is_greater; 917238384Sjkim out[8] -= kPrime[8] & is_greater; 918238384Sjkim 919238384Sjkim /* Eliminate negative coefficients */ 920238384Sjkim sign = -(out[0] >> 63); out[0] += (two58 & sign); out[1] -= (1 & sign); 921238384Sjkim sign = -(out[1] >> 63); out[1] += (two58 & sign); out[2] -= (1 & sign); 922238384Sjkim sign = -(out[2] >> 63); out[2] += (two58 & sign); out[3] -= (1 & sign); 923238384Sjkim sign = -(out[3] >> 63); out[3] += (two58 & sign); out[4] -= (1 & sign); 924238384Sjkim sign = -(out[4] >> 63); out[4] += (two58 & sign); out[5] -= (1 & sign); 925238384Sjkim sign = -(out[0] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign); 926238384Sjkim sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign); 927238384Sjkim sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign); 928238384Sjkim sign = -(out[5] >> 63); out[5] += (two58 & sign); out[6] -= (1 & sign); 929238384Sjkim sign = -(out[6] >> 63); out[6] += (two58 & sign); out[7] -= (1 & sign); 930238384Sjkim sign = -(out[7] >> 63); out[7] += (two58 & sign); out[8] -= (1 & sign); 931238384Sjkim } 932238384Sjkim 933238384Sjkim/* Group operations 934238384Sjkim * ---------------- 935238384Sjkim * 936238384Sjkim * Building on top of the field operations we have the operations on the 937238384Sjkim * elliptic curve group itself. Points on the curve are represented in Jacobian 938238384Sjkim * coordinates */ 939238384Sjkim 940238384Sjkim/* point_double calcuates 2*(x_in, y_in, z_in) 941238384Sjkim * 942238384Sjkim * The method is taken from: 943238384Sjkim * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b 944238384Sjkim * 945238384Sjkim * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed. 946238384Sjkim * while x_out == y_in is not (maybe this works, but it's not tested). */ 947238384Sjkimstatic void 948238384Sjkimpoint_double(felem x_out, felem y_out, felem z_out, 949238384Sjkim const felem x_in, const felem y_in, const felem z_in) 950238384Sjkim { 951238384Sjkim largefelem tmp, tmp2; 952238384Sjkim felem delta, gamma, beta, alpha, ftmp, ftmp2; 953238384Sjkim 954238384Sjkim felem_assign(ftmp, x_in); 955238384Sjkim felem_assign(ftmp2, x_in); 956238384Sjkim 957238384Sjkim /* delta = z^2 */ 958238384Sjkim felem_square(tmp, z_in); 959238384Sjkim felem_reduce(delta, tmp); /* delta[i] < 2^59 + 2^14 */ 960238384Sjkim 961238384Sjkim /* gamma = y^2 */ 962238384Sjkim felem_square(tmp, y_in); 963238384Sjkim felem_reduce(gamma, tmp); /* gamma[i] < 2^59 + 2^14 */ 964238384Sjkim 965238384Sjkim /* beta = x*gamma */ 966238384Sjkim felem_mul(tmp, x_in, gamma); 967238384Sjkim felem_reduce(beta, tmp); /* beta[i] < 2^59 + 2^14 */ 968238384Sjkim 969238384Sjkim /* alpha = 3*(x-delta)*(x+delta) */ 970238384Sjkim felem_diff64(ftmp, delta); 971238384Sjkim /* ftmp[i] < 2^61 */ 972238384Sjkim felem_sum64(ftmp2, delta); 973238384Sjkim /* ftmp2[i] < 2^60 + 2^15 */ 974238384Sjkim felem_scalar64(ftmp2, 3); 975238384Sjkim /* ftmp2[i] < 3*2^60 + 3*2^15 */ 976238384Sjkim felem_mul(tmp, ftmp, ftmp2); 977238384Sjkim /* tmp[i] < 17(3*2^121 + 3*2^76) 978238384Sjkim * = 61*2^121 + 61*2^76 979238384Sjkim * < 64*2^121 + 64*2^76 980238384Sjkim * = 2^127 + 2^82 981238384Sjkim * < 2^128 */ 982238384Sjkim felem_reduce(alpha, tmp); 983238384Sjkim 984238384Sjkim /* x' = alpha^2 - 8*beta */ 985238384Sjkim felem_square(tmp, alpha); 986238384Sjkim /* tmp[i] < 17*2^120 987238384Sjkim * < 2^125 */ 988238384Sjkim felem_assign(ftmp, beta); 989238384Sjkim felem_scalar64(ftmp, 8); 990238384Sjkim /* ftmp[i] < 2^62 + 2^17 */ 991238384Sjkim felem_diff_128_64(tmp, ftmp); 992238384Sjkim /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */ 993238384Sjkim felem_reduce(x_out, tmp); 994238384Sjkim 995238384Sjkim /* z' = (y + z)^2 - gamma - delta */ 996238384Sjkim felem_sum64(delta, gamma); 997238384Sjkim /* delta[i] < 2^60 + 2^15 */ 998238384Sjkim felem_assign(ftmp, y_in); 999238384Sjkim felem_sum64(ftmp, z_in); 1000238384Sjkim /* ftmp[i] < 2^60 + 2^15 */ 1001238384Sjkim felem_square(tmp, ftmp); 1002238384Sjkim /* tmp[i] < 17(2^122) 1003238384Sjkim * < 2^127 */ 1004238384Sjkim felem_diff_128_64(tmp, delta); 1005238384Sjkim /* tmp[i] < 2^127 + 2^63 */ 1006238384Sjkim felem_reduce(z_out, tmp); 1007238384Sjkim 1008238384Sjkim /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 1009238384Sjkim felem_scalar64(beta, 4); 1010238384Sjkim /* beta[i] < 2^61 + 2^16 */ 1011238384Sjkim felem_diff64(beta, x_out); 1012238384Sjkim /* beta[i] < 2^61 + 2^60 + 2^16 */ 1013238384Sjkim felem_mul(tmp, alpha, beta); 1014238384Sjkim /* tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16)) 1015238384Sjkim * = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30) 1016238384Sjkim * = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30) 1017238384Sjkim * < 2^128 */ 1018238384Sjkim felem_square(tmp2, gamma); 1019238384Sjkim /* tmp2[i] < 17*(2^59 + 2^14)^2 1020238384Sjkim * = 17*(2^118 + 2^74 + 2^28) */ 1021238384Sjkim felem_scalar128(tmp2, 8); 1022238384Sjkim /* tmp2[i] < 8*17*(2^118 + 2^74 + 2^28) 1023238384Sjkim * = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31 1024238384Sjkim * < 2^126 */ 1025238384Sjkim felem_diff128(tmp, tmp2); 1026238384Sjkim /* tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30) 1027238384Sjkim * = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 + 1028238384Sjkim * 2^74 + 2^69 + 2^34 + 2^30 1029238384Sjkim * < 2^128 */ 1030238384Sjkim felem_reduce(y_out, tmp); 1031238384Sjkim } 1032238384Sjkim 1033238384Sjkim/* copy_conditional copies in to out iff mask is all ones. */ 1034238384Sjkimstatic void 1035238384Sjkimcopy_conditional(felem out, const felem in, limb mask) 1036238384Sjkim { 1037238384Sjkim unsigned i; 1038238384Sjkim for (i = 0; i < NLIMBS; ++i) 1039238384Sjkim { 1040238384Sjkim const limb tmp = mask & (in[i] ^ out[i]); 1041238384Sjkim out[i] ^= tmp; 1042238384Sjkim } 1043238384Sjkim } 1044238384Sjkim 1045238384Sjkim/* point_add calcuates (x1, y1, z1) + (x2, y2, z2) 1046238384Sjkim * 1047238384Sjkim * The method is taken from 1048238384Sjkim * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl, 1049238384Sjkim * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity). 1050238384Sjkim * 1051238384Sjkim * This function includes a branch for checking whether the two input points 1052238384Sjkim * are equal (while not equal to the point at infinity). This case never 1053238384Sjkim * happens during single point multiplication, so there is no timing leak for 1054238384Sjkim * ECDH or ECDSA signing. */ 1055238384Sjkimstatic void point_add(felem x3, felem y3, felem z3, 1056238384Sjkim const felem x1, const felem y1, const felem z1, 1057238384Sjkim const int mixed, const felem x2, const felem y2, const felem z2) 1058238384Sjkim { 1059238384Sjkim felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out; 1060238384Sjkim largefelem tmp, tmp2; 1061238384Sjkim limb x_equal, y_equal, z1_is_zero, z2_is_zero; 1062238384Sjkim 1063238384Sjkim z1_is_zero = felem_is_zero(z1); 1064238384Sjkim z2_is_zero = felem_is_zero(z2); 1065238384Sjkim 1066238384Sjkim /* ftmp = z1z1 = z1**2 */ 1067238384Sjkim felem_square(tmp, z1); 1068238384Sjkim felem_reduce(ftmp, tmp); 1069238384Sjkim 1070238384Sjkim if (!mixed) 1071238384Sjkim { 1072238384Sjkim /* ftmp2 = z2z2 = z2**2 */ 1073238384Sjkim felem_square(tmp, z2); 1074238384Sjkim felem_reduce(ftmp2, tmp); 1075238384Sjkim 1076238384Sjkim /* u1 = ftmp3 = x1*z2z2 */ 1077238384Sjkim felem_mul(tmp, x1, ftmp2); 1078238384Sjkim felem_reduce(ftmp3, tmp); 1079238384Sjkim 1080238384Sjkim /* ftmp5 = z1 + z2 */ 1081238384Sjkim felem_assign(ftmp5, z1); 1082238384Sjkim felem_sum64(ftmp5, z2); 1083238384Sjkim /* ftmp5[i] < 2^61 */ 1084238384Sjkim 1085238384Sjkim /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */ 1086238384Sjkim felem_square(tmp, ftmp5); 1087238384Sjkim /* tmp[i] < 17*2^122 */ 1088238384Sjkim felem_diff_128_64(tmp, ftmp); 1089238384Sjkim /* tmp[i] < 17*2^122 + 2^63 */ 1090238384Sjkim felem_diff_128_64(tmp, ftmp2); 1091238384Sjkim /* tmp[i] < 17*2^122 + 2^64 */ 1092238384Sjkim felem_reduce(ftmp5, tmp); 1093238384Sjkim 1094238384Sjkim /* ftmp2 = z2 * z2z2 */ 1095238384Sjkim felem_mul(tmp, ftmp2, z2); 1096238384Sjkim felem_reduce(ftmp2, tmp); 1097238384Sjkim 1098238384Sjkim /* s1 = ftmp6 = y1 * z2**3 */ 1099238384Sjkim felem_mul(tmp, y1, ftmp2); 1100238384Sjkim felem_reduce(ftmp6, tmp); 1101238384Sjkim } 1102238384Sjkim else 1103238384Sjkim { 1104238384Sjkim /* We'll assume z2 = 1 (special case z2 = 0 is handled later) */ 1105238384Sjkim 1106238384Sjkim /* u1 = ftmp3 = x1*z2z2 */ 1107238384Sjkim felem_assign(ftmp3, x1); 1108238384Sjkim 1109238384Sjkim /* ftmp5 = 2*z1z2 */ 1110238384Sjkim felem_scalar(ftmp5, z1, 2); 1111238384Sjkim 1112238384Sjkim /* s1 = ftmp6 = y1 * z2**3 */ 1113238384Sjkim felem_assign(ftmp6, y1); 1114238384Sjkim } 1115238384Sjkim 1116238384Sjkim /* u2 = x2*z1z1 */ 1117238384Sjkim felem_mul(tmp, x2, ftmp); 1118238384Sjkim /* tmp[i] < 17*2^120 */ 1119238384Sjkim 1120238384Sjkim /* h = ftmp4 = u2 - u1 */ 1121238384Sjkim felem_diff_128_64(tmp, ftmp3); 1122238384Sjkim /* tmp[i] < 17*2^120 + 2^63 */ 1123238384Sjkim felem_reduce(ftmp4, tmp); 1124238384Sjkim 1125238384Sjkim x_equal = felem_is_zero(ftmp4); 1126238384Sjkim 1127238384Sjkim /* z_out = ftmp5 * h */ 1128238384Sjkim felem_mul(tmp, ftmp5, ftmp4); 1129238384Sjkim felem_reduce(z_out, tmp); 1130238384Sjkim 1131238384Sjkim /* ftmp = z1 * z1z1 */ 1132238384Sjkim felem_mul(tmp, ftmp, z1); 1133238384Sjkim felem_reduce(ftmp, tmp); 1134238384Sjkim 1135238384Sjkim /* s2 = tmp = y2 * z1**3 */ 1136238384Sjkim felem_mul(tmp, y2, ftmp); 1137238384Sjkim /* tmp[i] < 17*2^120 */ 1138238384Sjkim 1139238384Sjkim /* r = ftmp5 = (s2 - s1)*2 */ 1140238384Sjkim felem_diff_128_64(tmp, ftmp6); 1141238384Sjkim /* tmp[i] < 17*2^120 + 2^63 */ 1142238384Sjkim felem_reduce(ftmp5, tmp); 1143238384Sjkim y_equal = felem_is_zero(ftmp5); 1144238384Sjkim felem_scalar64(ftmp5, 2); 1145238384Sjkim /* ftmp5[i] < 2^61 */ 1146238384Sjkim 1147238384Sjkim if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) 1148238384Sjkim { 1149238384Sjkim point_double(x3, y3, z3, x1, y1, z1); 1150238384Sjkim return; 1151238384Sjkim } 1152238384Sjkim 1153238384Sjkim /* I = ftmp = (2h)**2 */ 1154238384Sjkim felem_assign(ftmp, ftmp4); 1155238384Sjkim felem_scalar64(ftmp, 2); 1156238384Sjkim /* ftmp[i] < 2^61 */ 1157238384Sjkim felem_square(tmp, ftmp); 1158238384Sjkim /* tmp[i] < 17*2^122 */ 1159238384Sjkim felem_reduce(ftmp, tmp); 1160238384Sjkim 1161238384Sjkim /* J = ftmp2 = h * I */ 1162238384Sjkim felem_mul(tmp, ftmp4, ftmp); 1163238384Sjkim felem_reduce(ftmp2, tmp); 1164238384Sjkim 1165238384Sjkim /* V = ftmp4 = U1 * I */ 1166238384Sjkim felem_mul(tmp, ftmp3, ftmp); 1167238384Sjkim felem_reduce(ftmp4, tmp); 1168238384Sjkim 1169238384Sjkim /* x_out = r**2 - J - 2V */ 1170238384Sjkim felem_square(tmp, ftmp5); 1171238384Sjkim /* tmp[i] < 17*2^122 */ 1172238384Sjkim felem_diff_128_64(tmp, ftmp2); 1173238384Sjkim /* tmp[i] < 17*2^122 + 2^63 */ 1174238384Sjkim felem_assign(ftmp3, ftmp4); 1175238384Sjkim felem_scalar64(ftmp4, 2); 1176238384Sjkim /* ftmp4[i] < 2^61 */ 1177238384Sjkim felem_diff_128_64(tmp, ftmp4); 1178238384Sjkim /* tmp[i] < 17*2^122 + 2^64 */ 1179238384Sjkim felem_reduce(x_out, tmp); 1180238384Sjkim 1181238384Sjkim /* y_out = r(V-x_out) - 2 * s1 * J */ 1182238384Sjkim felem_diff64(ftmp3, x_out); 1183238384Sjkim /* ftmp3[i] < 2^60 + 2^60 1184238384Sjkim * = 2^61 */ 1185238384Sjkim felem_mul(tmp, ftmp5, ftmp3); 1186238384Sjkim /* tmp[i] < 17*2^122 */ 1187238384Sjkim felem_mul(tmp2, ftmp6, ftmp2); 1188238384Sjkim /* tmp2[i] < 17*2^120 */ 1189238384Sjkim felem_scalar128(tmp2, 2); 1190238384Sjkim /* tmp2[i] < 17*2^121 */ 1191238384Sjkim felem_diff128(tmp, tmp2); 1192238384Sjkim /* tmp[i] < 2^127 - 2^69 + 17*2^122 1193238384Sjkim * = 2^126 - 2^122 - 2^6 - 2^2 - 1 1194238384Sjkim * < 2^127 */ 1195238384Sjkim felem_reduce(y_out, tmp); 1196238384Sjkim 1197238384Sjkim copy_conditional(x_out, x2, z1_is_zero); 1198238384Sjkim copy_conditional(x_out, x1, z2_is_zero); 1199238384Sjkim copy_conditional(y_out, y2, z1_is_zero); 1200238384Sjkim copy_conditional(y_out, y1, z2_is_zero); 1201238384Sjkim copy_conditional(z_out, z2, z1_is_zero); 1202238384Sjkim copy_conditional(z_out, z1, z2_is_zero); 1203238384Sjkim felem_assign(x3, x_out); 1204238384Sjkim felem_assign(y3, y_out); 1205238384Sjkim felem_assign(z3, z_out); 1206238384Sjkim } 1207238384Sjkim 1208238384Sjkim/* Base point pre computation 1209238384Sjkim * -------------------------- 1210238384Sjkim * 1211238384Sjkim * Two different sorts of precomputed tables are used in the following code. 1212238384Sjkim * Each contain various points on the curve, where each point is three field 1213238384Sjkim * elements (x, y, z). 1214238384Sjkim * 1215238384Sjkim * For the base point table, z is usually 1 (0 for the point at infinity). 1216238384Sjkim * This table has 16 elements: 1217238384Sjkim * index | bits | point 1218238384Sjkim * ------+---------+------------------------------ 1219238384Sjkim * 0 | 0 0 0 0 | 0G 1220238384Sjkim * 1 | 0 0 0 1 | 1G 1221238384Sjkim * 2 | 0 0 1 0 | 2^130G 1222238384Sjkim * 3 | 0 0 1 1 | (2^130 + 1)G 1223238384Sjkim * 4 | 0 1 0 0 | 2^260G 1224238384Sjkim * 5 | 0 1 0 1 | (2^260 + 1)G 1225238384Sjkim * 6 | 0 1 1 0 | (2^260 + 2^130)G 1226238384Sjkim * 7 | 0 1 1 1 | (2^260 + 2^130 + 1)G 1227238384Sjkim * 8 | 1 0 0 0 | 2^390G 1228238384Sjkim * 9 | 1 0 0 1 | (2^390 + 1)G 1229238384Sjkim * 10 | 1 0 1 0 | (2^390 + 2^130)G 1230238384Sjkim * 11 | 1 0 1 1 | (2^390 + 2^130 + 1)G 1231238384Sjkim * 12 | 1 1 0 0 | (2^390 + 2^260)G 1232238384Sjkim * 13 | 1 1 0 1 | (2^390 + 2^260 + 1)G 1233238384Sjkim * 14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G 1234238384Sjkim * 15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G 1235238384Sjkim * 1236238384Sjkim * The reason for this is so that we can clock bits into four different 1237238384Sjkim * locations when doing simple scalar multiplies against the base point. 1238238384Sjkim * 1239238384Sjkim * Tables for other points have table[i] = iG for i in 0 .. 16. */ 1240238384Sjkim 1241238384Sjkim/* gmul is the table of precomputed base points */ 1242238384Sjkimstatic const felem gmul[16][3] = 1243238384Sjkim {{{0, 0, 0, 0, 0, 0, 0, 0, 0}, 1244238384Sjkim {0, 0, 0, 0, 0, 0, 0, 0, 0}, 1245238384Sjkim {0, 0, 0, 0, 0, 0, 0, 0, 0}}, 1246238384Sjkim {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334, 1247238384Sjkim 0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8, 1248238384Sjkim 0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404}, 1249238384Sjkim {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353, 1250238384Sjkim 0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45, 1251238384Sjkim 0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b}, 1252238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1253238384Sjkim {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad, 1254238384Sjkim 0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e, 1255238384Sjkim 0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5}, 1256238384Sjkim {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58, 1257238384Sjkim 0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c, 1258238384Sjkim 0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7}, 1259238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1260238384Sjkim {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873, 1261238384Sjkim 0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c, 1262238384Sjkim 0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9}, 1263238384Sjkim {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52, 1264238384Sjkim 0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e, 1265238384Sjkim 0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe}, 1266238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1267238384Sjkim {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2, 1268238384Sjkim 0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561, 1269238384Sjkim 0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065}, 1270238384Sjkim {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a, 1271238384Sjkim 0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e, 1272238384Sjkim 0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524}, 1273238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1274238384Sjkim {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6, 1275238384Sjkim 0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51, 1276238384Sjkim 0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe}, 1277238384Sjkim {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d, 1278238384Sjkim 0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c, 1279238384Sjkim 0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7}, 1280238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1281238384Sjkim {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27, 1282238384Sjkim 0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f, 1283238384Sjkim 0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256}, 1284238384Sjkim {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa, 1285238384Sjkim 0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2, 1286238384Sjkim 0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd}, 1287238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1288238384Sjkim {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890, 1289238384Sjkim 0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74, 1290238384Sjkim 0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23}, 1291238384Sjkim {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516, 1292238384Sjkim 0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1, 1293238384Sjkim 0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e}, 1294238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1295238384Sjkim {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce, 1296238384Sjkim 0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7, 1297238384Sjkim 0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5}, 1298238384Sjkim {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318, 1299238384Sjkim 0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83, 1300238384Sjkim 0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242}, 1301238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1302238384Sjkim {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae, 1303238384Sjkim 0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef, 1304238384Sjkim 0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203}, 1305238384Sjkim {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447, 1306238384Sjkim 0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283, 1307238384Sjkim 0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f}, 1308238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1309238384Sjkim {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5, 1310238384Sjkim 0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c, 1311238384Sjkim 0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a}, 1312238384Sjkim {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df, 1313238384Sjkim 0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645, 1314238384Sjkim 0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a}, 1315238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1316238384Sjkim {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292, 1317238384Sjkim 0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422, 1318238384Sjkim 0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b}, 1319238384Sjkim {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30, 1320238384Sjkim 0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb, 1321238384Sjkim 0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f}, 1322238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1323238384Sjkim {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767, 1324238384Sjkim 0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3, 1325238384Sjkim 0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf}, 1326238384Sjkim {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2, 1327238384Sjkim 0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692, 1328238384Sjkim 0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d}, 1329238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1330238384Sjkim {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3, 1331238384Sjkim 0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade, 1332238384Sjkim 0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684}, 1333238384Sjkim {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8, 1334238384Sjkim 0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a, 1335238384Sjkim 0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81}, 1336238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1337238384Sjkim {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608, 1338238384Sjkim 0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610, 1339238384Sjkim 0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d}, 1340238384Sjkim {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006, 1341238384Sjkim 0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86, 1342238384Sjkim 0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42}, 1343238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}, 1344238384Sjkim {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c, 1345238384Sjkim 0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9, 1346238384Sjkim 0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f}, 1347238384Sjkim {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7, 1348238384Sjkim 0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c, 1349238384Sjkim 0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055}, 1350238384Sjkim {1, 0, 0, 0, 0, 0, 0, 0, 0}}}; 1351238384Sjkim 1352238384Sjkim/* select_point selects the |idx|th point from a precomputation table and 1353238384Sjkim * copies it to out. */ 1354238384Sjkimstatic void select_point(const limb idx, unsigned int size, const felem pre_comp[/* size */][3], 1355238384Sjkim felem out[3]) 1356238384Sjkim { 1357238384Sjkim unsigned i, j; 1358238384Sjkim limb *outlimbs = &out[0][0]; 1359238384Sjkim memset(outlimbs, 0, 3 * sizeof(felem)); 1360238384Sjkim 1361238384Sjkim for (i = 0; i < size; i++) 1362238384Sjkim { 1363238384Sjkim const limb *inlimbs = &pre_comp[i][0][0]; 1364238384Sjkim limb mask = i ^ idx; 1365238384Sjkim mask |= mask >> 4; 1366238384Sjkim mask |= mask >> 2; 1367238384Sjkim mask |= mask >> 1; 1368238384Sjkim mask &= 1; 1369238384Sjkim mask--; 1370238384Sjkim for (j = 0; j < NLIMBS * 3; j++) 1371238384Sjkim outlimbs[j] |= inlimbs[j] & mask; 1372238384Sjkim } 1373238384Sjkim } 1374238384Sjkim 1375238384Sjkim/* get_bit returns the |i|th bit in |in| */ 1376238384Sjkimstatic char get_bit(const felem_bytearray in, int i) 1377238384Sjkim { 1378238384Sjkim if (i < 0) 1379238384Sjkim return 0; 1380238384Sjkim return (in[i >> 3] >> (i & 7)) & 1; 1381238384Sjkim } 1382238384Sjkim 1383238384Sjkim/* Interleaved point multiplication using precomputed point multiples: 1384238384Sjkim * The small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], 1385238384Sjkim * the scalars in scalars[]. If g_scalar is non-NULL, we also add this multiple 1386238384Sjkim * of the generator, using certain (large) precomputed multiples in g_pre_comp. 1387238384Sjkim * Output point (X, Y, Z) is stored in x_out, y_out, z_out */ 1388238384Sjkimstatic void batch_mul(felem x_out, felem y_out, felem z_out, 1389238384Sjkim const felem_bytearray scalars[], const unsigned num_points, const u8 *g_scalar, 1390238384Sjkim const int mixed, const felem pre_comp[][17][3], const felem g_pre_comp[16][3]) 1391238384Sjkim { 1392238384Sjkim int i, skip; 1393238384Sjkim unsigned num, gen_mul = (g_scalar != NULL); 1394238384Sjkim felem nq[3], tmp[4]; 1395238384Sjkim limb bits; 1396238384Sjkim u8 sign, digit; 1397238384Sjkim 1398238384Sjkim /* set nq to the point at infinity */ 1399238384Sjkim memset(nq, 0, 3 * sizeof(felem)); 1400238384Sjkim 1401238384Sjkim /* Loop over all scalars msb-to-lsb, interleaving additions 1402238384Sjkim * of multiples of the generator (last quarter of rounds) 1403238384Sjkim * and additions of other points multiples (every 5th round). 1404238384Sjkim */ 1405238384Sjkim skip = 1; /* save two point operations in the first round */ 1406238384Sjkim for (i = (num_points ? 520 : 130); i >= 0; --i) 1407238384Sjkim { 1408238384Sjkim /* double */ 1409238384Sjkim if (!skip) 1410238384Sjkim point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1411238384Sjkim 1412238384Sjkim /* add multiples of the generator */ 1413238384Sjkim if (gen_mul && (i <= 130)) 1414238384Sjkim { 1415238384Sjkim bits = get_bit(g_scalar, i + 390) << 3; 1416238384Sjkim if (i < 130) 1417238384Sjkim { 1418238384Sjkim bits |= get_bit(g_scalar, i + 260) << 2; 1419238384Sjkim bits |= get_bit(g_scalar, i + 130) << 1; 1420238384Sjkim bits |= get_bit(g_scalar, i); 1421238384Sjkim } 1422238384Sjkim /* select the point to add, in constant time */ 1423238384Sjkim select_point(bits, 16, g_pre_comp, tmp); 1424238384Sjkim if (!skip) 1425238384Sjkim { 1426238384Sjkim point_add(nq[0], nq[1], nq[2], 1427238384Sjkim nq[0], nq[1], nq[2], 1428238384Sjkim 1 /* mixed */, tmp[0], tmp[1], tmp[2]); 1429238384Sjkim } 1430238384Sjkim else 1431238384Sjkim { 1432238384Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1433238384Sjkim skip = 0; 1434238384Sjkim } 1435238384Sjkim } 1436238384Sjkim 1437238384Sjkim /* do other additions every 5 doublings */ 1438238384Sjkim if (num_points && (i % 5 == 0)) 1439238384Sjkim { 1440238384Sjkim /* loop over all scalars */ 1441238384Sjkim for (num = 0; num < num_points; ++num) 1442238384Sjkim { 1443238384Sjkim bits = get_bit(scalars[num], i + 4) << 5; 1444238384Sjkim bits |= get_bit(scalars[num], i + 3) << 4; 1445238384Sjkim bits |= get_bit(scalars[num], i + 2) << 3; 1446238384Sjkim bits |= get_bit(scalars[num], i + 1) << 2; 1447238384Sjkim bits |= get_bit(scalars[num], i) << 1; 1448238384Sjkim bits |= get_bit(scalars[num], i - 1); 1449238384Sjkim ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1450238384Sjkim 1451238384Sjkim /* select the point to add or subtract, in constant time */ 1452238384Sjkim select_point(digit, 17, pre_comp[num], tmp); 1453238384Sjkim felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative point */ 1454238384Sjkim copy_conditional(tmp[1], tmp[3], (-(limb) sign)); 1455238384Sjkim 1456238384Sjkim if (!skip) 1457238384Sjkim { 1458238384Sjkim point_add(nq[0], nq[1], nq[2], 1459238384Sjkim nq[0], nq[1], nq[2], 1460238384Sjkim mixed, tmp[0], tmp[1], tmp[2]); 1461238384Sjkim } 1462238384Sjkim else 1463238384Sjkim { 1464238384Sjkim memcpy(nq, tmp, 3 * sizeof(felem)); 1465238384Sjkim skip = 0; 1466238384Sjkim } 1467238384Sjkim } 1468238384Sjkim } 1469238384Sjkim } 1470238384Sjkim felem_assign(x_out, nq[0]); 1471238384Sjkim felem_assign(y_out, nq[1]); 1472238384Sjkim felem_assign(z_out, nq[2]); 1473238384Sjkim } 1474238384Sjkim 1475238384Sjkim 1476238384Sjkim/* Precomputation for the group generator. */ 1477238384Sjkimtypedef struct { 1478238384Sjkim felem g_pre_comp[16][3]; 1479238384Sjkim int references; 1480238384Sjkim} NISTP521_PRE_COMP; 1481238384Sjkim 1482238384Sjkimconst EC_METHOD *EC_GFp_nistp521_method(void) 1483238384Sjkim { 1484238384Sjkim static const EC_METHOD ret = { 1485238384Sjkim EC_FLAGS_DEFAULT_OCT, 1486238384Sjkim NID_X9_62_prime_field, 1487238384Sjkim ec_GFp_nistp521_group_init, 1488238384Sjkim ec_GFp_simple_group_finish, 1489238384Sjkim ec_GFp_simple_group_clear_finish, 1490238384Sjkim ec_GFp_nist_group_copy, 1491238384Sjkim ec_GFp_nistp521_group_set_curve, 1492238384Sjkim ec_GFp_simple_group_get_curve, 1493238384Sjkim ec_GFp_simple_group_get_degree, 1494238384Sjkim ec_GFp_simple_group_check_discriminant, 1495238384Sjkim ec_GFp_simple_point_init, 1496238384Sjkim ec_GFp_simple_point_finish, 1497238384Sjkim ec_GFp_simple_point_clear_finish, 1498238384Sjkim ec_GFp_simple_point_copy, 1499238384Sjkim ec_GFp_simple_point_set_to_infinity, 1500238384Sjkim ec_GFp_simple_set_Jprojective_coordinates_GFp, 1501238384Sjkim ec_GFp_simple_get_Jprojective_coordinates_GFp, 1502238384Sjkim ec_GFp_simple_point_set_affine_coordinates, 1503238384Sjkim ec_GFp_nistp521_point_get_affine_coordinates, 1504238384Sjkim 0 /* point_set_compressed_coordinates */, 1505238384Sjkim 0 /* point2oct */, 1506238384Sjkim 0 /* oct2point */, 1507238384Sjkim ec_GFp_simple_add, 1508238384Sjkim ec_GFp_simple_dbl, 1509238384Sjkim ec_GFp_simple_invert, 1510238384Sjkim ec_GFp_simple_is_at_infinity, 1511238384Sjkim ec_GFp_simple_is_on_curve, 1512238384Sjkim ec_GFp_simple_cmp, 1513238384Sjkim ec_GFp_simple_make_affine, 1514238384Sjkim ec_GFp_simple_points_make_affine, 1515238384Sjkim ec_GFp_nistp521_points_mul, 1516238384Sjkim ec_GFp_nistp521_precompute_mult, 1517238384Sjkim ec_GFp_nistp521_have_precompute_mult, 1518238384Sjkim ec_GFp_nist_field_mul, 1519238384Sjkim ec_GFp_nist_field_sqr, 1520238384Sjkim 0 /* field_div */, 1521238384Sjkim 0 /* field_encode */, 1522238384Sjkim 0 /* field_decode */, 1523238384Sjkim 0 /* field_set_to_one */ }; 1524238384Sjkim 1525238384Sjkim return &ret; 1526238384Sjkim } 1527238384Sjkim 1528238384Sjkim 1529238384Sjkim/******************************************************************************/ 1530238384Sjkim/* FUNCTIONS TO MANAGE PRECOMPUTATION 1531238384Sjkim */ 1532238384Sjkim 1533238384Sjkimstatic NISTP521_PRE_COMP *nistp521_pre_comp_new() 1534238384Sjkim { 1535238384Sjkim NISTP521_PRE_COMP *ret = NULL; 1536238384Sjkim ret = (NISTP521_PRE_COMP *)OPENSSL_malloc(sizeof(NISTP521_PRE_COMP)); 1537238384Sjkim if (!ret) 1538238384Sjkim { 1539238384Sjkim ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1540238384Sjkim return ret; 1541238384Sjkim } 1542238384Sjkim memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp)); 1543238384Sjkim ret->references = 1; 1544238384Sjkim return ret; 1545238384Sjkim } 1546238384Sjkim 1547238384Sjkimstatic void *nistp521_pre_comp_dup(void *src_) 1548238384Sjkim { 1549238384Sjkim NISTP521_PRE_COMP *src = src_; 1550238384Sjkim 1551238384Sjkim /* no need to actually copy, these objects never change! */ 1552238384Sjkim CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP); 1553238384Sjkim 1554238384Sjkim return src_; 1555238384Sjkim } 1556238384Sjkim 1557238384Sjkimstatic void nistp521_pre_comp_free(void *pre_) 1558238384Sjkim { 1559238384Sjkim int i; 1560238384Sjkim NISTP521_PRE_COMP *pre = pre_; 1561238384Sjkim 1562238384Sjkim if (!pre) 1563238384Sjkim return; 1564238384Sjkim 1565238384Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1566238384Sjkim if (i > 0) 1567238384Sjkim return; 1568238384Sjkim 1569238384Sjkim OPENSSL_free(pre); 1570238384Sjkim } 1571238384Sjkim 1572238384Sjkimstatic void nistp521_pre_comp_clear_free(void *pre_) 1573238384Sjkim { 1574238384Sjkim int i; 1575238384Sjkim NISTP521_PRE_COMP *pre = pre_; 1576238384Sjkim 1577238384Sjkim if (!pre) 1578238384Sjkim return; 1579238384Sjkim 1580238384Sjkim i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP); 1581238384Sjkim if (i > 0) 1582238384Sjkim return; 1583238384Sjkim 1584238384Sjkim OPENSSL_cleanse(pre, sizeof(*pre)); 1585238384Sjkim OPENSSL_free(pre); 1586238384Sjkim } 1587238384Sjkim 1588238384Sjkim/******************************************************************************/ 1589238384Sjkim/* OPENSSL EC_METHOD FUNCTIONS 1590238384Sjkim */ 1591238384Sjkim 1592238384Sjkimint ec_GFp_nistp521_group_init(EC_GROUP *group) 1593238384Sjkim { 1594238384Sjkim int ret; 1595238384Sjkim ret = ec_GFp_simple_group_init(group); 1596238384Sjkim group->a_is_minus3 = 1; 1597238384Sjkim return ret; 1598238384Sjkim } 1599238384Sjkim 1600238384Sjkimint ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1601238384Sjkim const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 1602238384Sjkim { 1603238384Sjkim int ret = 0; 1604238384Sjkim BN_CTX *new_ctx = NULL; 1605238384Sjkim BIGNUM *curve_p, *curve_a, *curve_b; 1606238384Sjkim 1607238384Sjkim if (ctx == NULL) 1608238384Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1609238384Sjkim BN_CTX_start(ctx); 1610238384Sjkim if (((curve_p = BN_CTX_get(ctx)) == NULL) || 1611238384Sjkim ((curve_a = BN_CTX_get(ctx)) == NULL) || 1612238384Sjkim ((curve_b = BN_CTX_get(ctx)) == NULL)) goto err; 1613238384Sjkim BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p); 1614238384Sjkim BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a); 1615238384Sjkim BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b); 1616238384Sjkim if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || 1617238384Sjkim (BN_cmp(curve_b, b))) 1618238384Sjkim { 1619238384Sjkim ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE, 1620238384Sjkim EC_R_WRONG_CURVE_PARAMETERS); 1621238384Sjkim goto err; 1622238384Sjkim } 1623238384Sjkim group->field_mod_func = BN_nist_mod_521; 1624238384Sjkim ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1625238384Sjkimerr: 1626238384Sjkim BN_CTX_end(ctx); 1627238384Sjkim if (new_ctx != NULL) 1628238384Sjkim BN_CTX_free(new_ctx); 1629238384Sjkim return ret; 1630238384Sjkim } 1631238384Sjkim 1632238384Sjkim/* Takes the Jacobian coordinates (X, Y, Z) of a point and returns 1633238384Sjkim * (X', Y') = (X/Z^2, Y/Z^3) */ 1634238384Sjkimint ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group, 1635238384Sjkim const EC_POINT *point, BIGNUM *x, BIGNUM *y, BN_CTX *ctx) 1636238384Sjkim { 1637238384Sjkim felem z1, z2, x_in, y_in, x_out, y_out; 1638238384Sjkim largefelem tmp; 1639238384Sjkim 1640238384Sjkim if (EC_POINT_is_at_infinity(group, point)) 1641238384Sjkim { 1642238384Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, 1643238384Sjkim EC_R_POINT_AT_INFINITY); 1644238384Sjkim return 0; 1645238384Sjkim } 1646238384Sjkim if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) || 1647238384Sjkim (!BN_to_felem(z1, &point->Z))) return 0; 1648238384Sjkim felem_inv(z2, z1); 1649238384Sjkim felem_square(tmp, z2); felem_reduce(z1, tmp); 1650238384Sjkim felem_mul(tmp, x_in, z1); felem_reduce(x_in, tmp); 1651238384Sjkim felem_contract(x_out, x_in); 1652238384Sjkim if (x != NULL) 1653238384Sjkim { 1654238384Sjkim if (!felem_to_BN(x, x_out)) 1655238384Sjkim { 1656238384Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB); 1657238384Sjkim return 0; 1658238384Sjkim } 1659238384Sjkim } 1660238384Sjkim felem_mul(tmp, z1, z2); felem_reduce(z1, tmp); 1661238384Sjkim felem_mul(tmp, y_in, z1); felem_reduce(y_in, tmp); 1662238384Sjkim felem_contract(y_out, y_in); 1663238384Sjkim if (y != NULL) 1664238384Sjkim { 1665238384Sjkim if (!felem_to_BN(y, y_out)) 1666238384Sjkim { 1667238384Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES, ERR_R_BN_LIB); 1668238384Sjkim return 0; 1669238384Sjkim } 1670238384Sjkim } 1671238384Sjkim return 1; 1672238384Sjkim } 1673238384Sjkim 1674238384Sjkimstatic void make_points_affine(size_t num, felem points[/* num */][3], felem tmp_felems[/* num+1 */]) 1675238384Sjkim { 1676238384Sjkim /* Runs in constant time, unless an input is the point at infinity 1677238384Sjkim * (which normally shouldn't happen). */ 1678238384Sjkim ec_GFp_nistp_points_make_affine_internal( 1679238384Sjkim num, 1680238384Sjkim points, 1681238384Sjkim sizeof(felem), 1682238384Sjkim tmp_felems, 1683238384Sjkim (void (*)(void *)) felem_one, 1684238384Sjkim (int (*)(const void *)) felem_is_zero_int, 1685238384Sjkim (void (*)(void *, const void *)) felem_assign, 1686238384Sjkim (void (*)(void *, const void *)) felem_square_reduce, 1687238384Sjkim (void (*)(void *, const void *, const void *)) felem_mul_reduce, 1688238384Sjkim (void (*)(void *, const void *)) felem_inv, 1689238384Sjkim (void (*)(void *, const void *)) felem_contract); 1690238384Sjkim } 1691238384Sjkim 1692238384Sjkim/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values 1693238384Sjkim * Result is stored in r (r can equal one of the inputs). */ 1694238384Sjkimint ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r, 1695238384Sjkim const BIGNUM *scalar, size_t num, const EC_POINT *points[], 1696238384Sjkim const BIGNUM *scalars[], BN_CTX *ctx) 1697238384Sjkim { 1698238384Sjkim int ret = 0; 1699238384Sjkim int j; 1700238384Sjkim int mixed = 0; 1701238384Sjkim BN_CTX *new_ctx = NULL; 1702238384Sjkim BIGNUM *x, *y, *z, *tmp_scalar; 1703238384Sjkim felem_bytearray g_secret; 1704238384Sjkim felem_bytearray *secrets = NULL; 1705238384Sjkim felem (*pre_comp)[17][3] = NULL; 1706238384Sjkim felem *tmp_felems = NULL; 1707238384Sjkim felem_bytearray tmp; 1708238384Sjkim unsigned i, num_bytes; 1709238384Sjkim int have_pre_comp = 0; 1710238384Sjkim size_t num_points = num; 1711238384Sjkim felem x_in, y_in, z_in, x_out, y_out, z_out; 1712238384Sjkim NISTP521_PRE_COMP *pre = NULL; 1713238384Sjkim felem (*g_pre_comp)[3] = NULL; 1714238384Sjkim EC_POINT *generator = NULL; 1715238384Sjkim const EC_POINT *p = NULL; 1716238384Sjkim const BIGNUM *p_scalar = NULL; 1717238384Sjkim 1718238384Sjkim if (ctx == NULL) 1719238384Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1720238384Sjkim BN_CTX_start(ctx); 1721238384Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || 1722238384Sjkim ((y = BN_CTX_get(ctx)) == NULL) || 1723238384Sjkim ((z = BN_CTX_get(ctx)) == NULL) || 1724238384Sjkim ((tmp_scalar = BN_CTX_get(ctx)) == NULL)) 1725238384Sjkim goto err; 1726238384Sjkim 1727238384Sjkim if (scalar != NULL) 1728238384Sjkim { 1729238384Sjkim pre = EC_EX_DATA_get_data(group->extra_data, 1730238384Sjkim nistp521_pre_comp_dup, nistp521_pre_comp_free, 1731238384Sjkim nistp521_pre_comp_clear_free); 1732238384Sjkim if (pre) 1733238384Sjkim /* we have precomputation, try to use it */ 1734238384Sjkim g_pre_comp = &pre->g_pre_comp[0]; 1735238384Sjkim else 1736238384Sjkim /* try to use the standard precomputation */ 1737238384Sjkim g_pre_comp = (felem (*)[3]) gmul; 1738238384Sjkim generator = EC_POINT_new(group); 1739238384Sjkim if (generator == NULL) 1740238384Sjkim goto err; 1741238384Sjkim /* get the generator from precomputation */ 1742238384Sjkim if (!felem_to_BN(x, g_pre_comp[1][0]) || 1743238384Sjkim !felem_to_BN(y, g_pre_comp[1][1]) || 1744238384Sjkim !felem_to_BN(z, g_pre_comp[1][2])) 1745238384Sjkim { 1746238384Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1747238384Sjkim goto err; 1748238384Sjkim } 1749238384Sjkim if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1750238384Sjkim generator, x, y, z, ctx)) 1751238384Sjkim goto err; 1752238384Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1753238384Sjkim /* precomputation matches generator */ 1754238384Sjkim have_pre_comp = 1; 1755238384Sjkim else 1756238384Sjkim /* we don't have valid precomputation: 1757238384Sjkim * treat the generator as a random point */ 1758238384Sjkim num_points++; 1759238384Sjkim } 1760238384Sjkim 1761238384Sjkim if (num_points > 0) 1762238384Sjkim { 1763238384Sjkim if (num_points >= 2) 1764238384Sjkim { 1765238384Sjkim /* unless we precompute multiples for just one point, 1766238384Sjkim * converting those into affine form is time well spent */ 1767238384Sjkim mixed = 1; 1768238384Sjkim } 1769238384Sjkim secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray)); 1770238384Sjkim pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem)); 1771238384Sjkim if (mixed) 1772238384Sjkim tmp_felems = OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem)); 1773238384Sjkim if ((secrets == NULL) || (pre_comp == NULL) || (mixed && (tmp_felems == NULL))) 1774238384Sjkim { 1775238384Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1776238384Sjkim goto err; 1777238384Sjkim } 1778238384Sjkim 1779238384Sjkim /* we treat NULL scalars as 0, and NULL points as points at infinity, 1780238384Sjkim * i.e., they contribute nothing to the linear combination */ 1781238384Sjkim memset(secrets, 0, num_points * sizeof(felem_bytearray)); 1782238384Sjkim memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem)); 1783238384Sjkim for (i = 0; i < num_points; ++i) 1784238384Sjkim { 1785238384Sjkim if (i == num) 1786238384Sjkim /* we didn't have a valid precomputation, so we pick 1787238384Sjkim * the generator */ 1788238384Sjkim { 1789238384Sjkim p = EC_GROUP_get0_generator(group); 1790238384Sjkim p_scalar = scalar; 1791238384Sjkim } 1792238384Sjkim else 1793238384Sjkim /* the i^th point */ 1794238384Sjkim { 1795238384Sjkim p = points[i]; 1796238384Sjkim p_scalar = scalars[i]; 1797238384Sjkim } 1798238384Sjkim if ((p_scalar != NULL) && (p != NULL)) 1799238384Sjkim { 1800238384Sjkim /* reduce scalar to 0 <= scalar < 2^521 */ 1801238384Sjkim if ((BN_num_bits(p_scalar) > 521) || (BN_is_negative(p_scalar))) 1802238384Sjkim { 1803238384Sjkim /* this is an unusual input, and we don't guarantee 1804238384Sjkim * constant-timeness */ 1805238384Sjkim if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) 1806238384Sjkim { 1807238384Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1808238384Sjkim goto err; 1809238384Sjkim } 1810238384Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1811238384Sjkim } 1812238384Sjkim else 1813238384Sjkim num_bytes = BN_bn2bin(p_scalar, tmp); 1814238384Sjkim flip_endian(secrets[i], tmp, num_bytes); 1815238384Sjkim /* precompute multiples */ 1816238384Sjkim if ((!BN_to_felem(x_out, &p->X)) || 1817238384Sjkim (!BN_to_felem(y_out, &p->Y)) || 1818238384Sjkim (!BN_to_felem(z_out, &p->Z))) goto err; 1819238384Sjkim memcpy(pre_comp[i][1][0], x_out, sizeof(felem)); 1820238384Sjkim memcpy(pre_comp[i][1][1], y_out, sizeof(felem)); 1821238384Sjkim memcpy(pre_comp[i][1][2], z_out, sizeof(felem)); 1822238384Sjkim for (j = 2; j <= 16; ++j) 1823238384Sjkim { 1824238384Sjkim if (j & 1) 1825238384Sjkim { 1826238384Sjkim point_add( 1827238384Sjkim pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2], 1828238384Sjkim pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2], 1829238384Sjkim 0, pre_comp[i][j-1][0], pre_comp[i][j-1][1], pre_comp[i][j-1][2]); 1830238384Sjkim } 1831238384Sjkim else 1832238384Sjkim { 1833238384Sjkim point_double( 1834238384Sjkim pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2], 1835238384Sjkim pre_comp[i][j/2][0], pre_comp[i][j/2][1], pre_comp[i][j/2][2]); 1836238384Sjkim } 1837238384Sjkim } 1838238384Sjkim } 1839238384Sjkim } 1840238384Sjkim if (mixed) 1841238384Sjkim make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1842238384Sjkim } 1843238384Sjkim 1844238384Sjkim /* the scalar for the generator */ 1845238384Sjkim if ((scalar != NULL) && (have_pre_comp)) 1846238384Sjkim { 1847238384Sjkim memset(g_secret, 0, sizeof(g_secret)); 1848238384Sjkim /* reduce scalar to 0 <= scalar < 2^521 */ 1849238384Sjkim if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) 1850238384Sjkim { 1851238384Sjkim /* this is an unusual input, and we don't guarantee 1852238384Sjkim * constant-timeness */ 1853238384Sjkim if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) 1854238384Sjkim { 1855238384Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1856238384Sjkim goto err; 1857238384Sjkim } 1858238384Sjkim num_bytes = BN_bn2bin(tmp_scalar, tmp); 1859238384Sjkim } 1860238384Sjkim else 1861238384Sjkim num_bytes = BN_bn2bin(scalar, tmp); 1862238384Sjkim flip_endian(g_secret, tmp, num_bytes); 1863238384Sjkim /* do the multiplication with generator precomputation*/ 1864238384Sjkim batch_mul(x_out, y_out, z_out, 1865238384Sjkim (const felem_bytearray (*)) secrets, num_points, 1866238384Sjkim g_secret, 1867238384Sjkim mixed, (const felem (*)[17][3]) pre_comp, 1868238384Sjkim (const felem (*)[3]) g_pre_comp); 1869238384Sjkim } 1870238384Sjkim else 1871238384Sjkim /* do the multiplication without generator precomputation */ 1872238384Sjkim batch_mul(x_out, y_out, z_out, 1873238384Sjkim (const felem_bytearray (*)) secrets, num_points, 1874238384Sjkim NULL, mixed, (const felem (*)[17][3]) pre_comp, NULL); 1875238384Sjkim /* reduce the output to its unique minimal representation */ 1876238384Sjkim felem_contract(x_in, x_out); 1877238384Sjkim felem_contract(y_in, y_out); 1878238384Sjkim felem_contract(z_in, z_out); 1879238384Sjkim if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1880238384Sjkim (!felem_to_BN(z, z_in))) 1881238384Sjkim { 1882238384Sjkim ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB); 1883238384Sjkim goto err; 1884238384Sjkim } 1885238384Sjkim ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 1886238384Sjkim 1887238384Sjkimerr: 1888238384Sjkim BN_CTX_end(ctx); 1889238384Sjkim if (generator != NULL) 1890238384Sjkim EC_POINT_free(generator); 1891238384Sjkim if (new_ctx != NULL) 1892238384Sjkim BN_CTX_free(new_ctx); 1893238384Sjkim if (secrets != NULL) 1894238384Sjkim OPENSSL_free(secrets); 1895238384Sjkim if (pre_comp != NULL) 1896238384Sjkim OPENSSL_free(pre_comp); 1897238384Sjkim if (tmp_felems != NULL) 1898238384Sjkim OPENSSL_free(tmp_felems); 1899238384Sjkim return ret; 1900238384Sjkim } 1901238384Sjkim 1902238384Sjkimint ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 1903238384Sjkim { 1904238384Sjkim int ret = 0; 1905238384Sjkim NISTP521_PRE_COMP *pre = NULL; 1906238384Sjkim int i, j; 1907238384Sjkim BN_CTX *new_ctx = NULL; 1908238384Sjkim BIGNUM *x, *y; 1909238384Sjkim EC_POINT *generator = NULL; 1910238384Sjkim felem tmp_felems[16]; 1911238384Sjkim 1912238384Sjkim /* throw away old precomputation */ 1913238384Sjkim EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup, 1914238384Sjkim nistp521_pre_comp_free, nistp521_pre_comp_clear_free); 1915238384Sjkim if (ctx == NULL) 1916238384Sjkim if ((ctx = new_ctx = BN_CTX_new()) == NULL) return 0; 1917238384Sjkim BN_CTX_start(ctx); 1918238384Sjkim if (((x = BN_CTX_get(ctx)) == NULL) || 1919238384Sjkim ((y = BN_CTX_get(ctx)) == NULL)) 1920238384Sjkim goto err; 1921238384Sjkim /* get the generator */ 1922238384Sjkim if (group->generator == NULL) goto err; 1923238384Sjkim generator = EC_POINT_new(group); 1924238384Sjkim if (generator == NULL) 1925238384Sjkim goto err; 1926238384Sjkim BN_bin2bn(nistp521_curve_params[3], sizeof (felem_bytearray), x); 1927238384Sjkim BN_bin2bn(nistp521_curve_params[4], sizeof (felem_bytearray), y); 1928238384Sjkim if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx)) 1929238384Sjkim goto err; 1930238384Sjkim if ((pre = nistp521_pre_comp_new()) == NULL) 1931238384Sjkim goto err; 1932238384Sjkim /* if the generator is the standard one, use built-in precomputation */ 1933238384Sjkim if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1934238384Sjkim { 1935238384Sjkim memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 1936238384Sjkim ret = 1; 1937238384Sjkim goto err; 1938238384Sjkim } 1939238384Sjkim if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) || 1940238384Sjkim (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) || 1941238384Sjkim (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z))) 1942238384Sjkim goto err; 1943238384Sjkim /* compute 2^130*G, 2^260*G, 2^390*G */ 1944238384Sjkim for (i = 1; i <= 4; i <<= 1) 1945238384Sjkim { 1946238384Sjkim point_double(pre->g_pre_comp[2*i][0], pre->g_pre_comp[2*i][1], 1947238384Sjkim pre->g_pre_comp[2*i][2], pre->g_pre_comp[i][0], 1948238384Sjkim pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]); 1949238384Sjkim for (j = 0; j < 129; ++j) 1950238384Sjkim { 1951238384Sjkim point_double(pre->g_pre_comp[2*i][0], 1952238384Sjkim pre->g_pre_comp[2*i][1], 1953238384Sjkim pre->g_pre_comp[2*i][2], 1954238384Sjkim pre->g_pre_comp[2*i][0], 1955238384Sjkim pre->g_pre_comp[2*i][1], 1956238384Sjkim pre->g_pre_comp[2*i][2]); 1957238384Sjkim } 1958238384Sjkim } 1959238384Sjkim /* g_pre_comp[0] is the point at infinity */ 1960238384Sjkim memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0])); 1961238384Sjkim /* the remaining multiples */ 1962238384Sjkim /* 2^130*G + 2^260*G */ 1963238384Sjkim point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1], 1964238384Sjkim pre->g_pre_comp[6][2], pre->g_pre_comp[4][0], 1965238384Sjkim pre->g_pre_comp[4][1], pre->g_pre_comp[4][2], 1966238384Sjkim 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 1967238384Sjkim pre->g_pre_comp[2][2]); 1968238384Sjkim /* 2^130*G + 2^390*G */ 1969238384Sjkim point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1], 1970238384Sjkim pre->g_pre_comp[10][2], pre->g_pre_comp[8][0], 1971238384Sjkim pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 1972238384Sjkim 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 1973238384Sjkim pre->g_pre_comp[2][2]); 1974238384Sjkim /* 2^260*G + 2^390*G */ 1975238384Sjkim point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], 1976238384Sjkim pre->g_pre_comp[12][2], pre->g_pre_comp[8][0], 1977238384Sjkim pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 1978238384Sjkim 0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1], 1979238384Sjkim pre->g_pre_comp[4][2]); 1980238384Sjkim /* 2^130*G + 2^260*G + 2^390*G */ 1981238384Sjkim point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1], 1982238384Sjkim pre->g_pre_comp[14][2], pre->g_pre_comp[12][0], 1983238384Sjkim pre->g_pre_comp[12][1], pre->g_pre_comp[12][2], 1984238384Sjkim 0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], 1985238384Sjkim pre->g_pre_comp[2][2]); 1986238384Sjkim for (i = 1; i < 8; ++i) 1987238384Sjkim { 1988238384Sjkim /* odd multiples: add G */ 1989238384Sjkim point_add(pre->g_pre_comp[2*i+1][0], pre->g_pre_comp[2*i+1][1], 1990238384Sjkim pre->g_pre_comp[2*i+1][2], pre->g_pre_comp[2*i][0], 1991238384Sjkim pre->g_pre_comp[2*i][1], pre->g_pre_comp[2*i][2], 1992238384Sjkim 0, pre->g_pre_comp[1][0], pre->g_pre_comp[1][1], 1993238384Sjkim pre->g_pre_comp[1][2]); 1994238384Sjkim } 1995238384Sjkim make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems); 1996238384Sjkim 1997238384Sjkim if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup, 1998238384Sjkim nistp521_pre_comp_free, nistp521_pre_comp_clear_free)) 1999238384Sjkim goto err; 2000238384Sjkim ret = 1; 2001238384Sjkim pre = NULL; 2002238384Sjkim err: 2003238384Sjkim BN_CTX_end(ctx); 2004238384Sjkim if (generator != NULL) 2005238384Sjkim EC_POINT_free(generator); 2006238384Sjkim if (new_ctx != NULL) 2007238384Sjkim BN_CTX_free(new_ctx); 2008238384Sjkim if (pre) 2009238384Sjkim nistp521_pre_comp_free(pre); 2010238384Sjkim return ret; 2011238384Sjkim } 2012238384Sjkim 2013238384Sjkimint ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group) 2014238384Sjkim { 2015238384Sjkim if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup, 2016238384Sjkim nistp521_pre_comp_free, nistp521_pre_comp_clear_free) 2017238384Sjkim != NULL) 2018238384Sjkim return 1; 2019238384Sjkim else 2020238384Sjkim return 0; 2021238384Sjkim } 2022238384Sjkim 2023238384Sjkim#else 2024238384Sjkimstatic void *dummy=&dummy; 2025238384Sjkim#endif 2026