1238106Sdes/* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */ 2238106Sdes/* 3238106Sdesversion 20081011 4238106SdesMatthew Dempsky 5238106SdesPublic domain. 6238106SdesDerived from public domain code by D. J. Bernstein. 7238106Sdes*/ 8238106Sdes 9238106Sdesint crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *); 10238106Sdes 11238106Sdesstatic void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) 12238106Sdes{ 13238106Sdes unsigned int j; 14238106Sdes unsigned int u; 15238106Sdes u = 0; 16238106Sdes for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; } 17238106Sdes u += a[31] + b[31]; out[31] = u; 18238106Sdes} 19238106Sdes 20238106Sdesstatic void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) 21238106Sdes{ 22238106Sdes unsigned int j; 23238106Sdes unsigned int u; 24269257Sdes u = 218; 25269257Sdes for (j = 0;j < 31;++j) { 26269257Sdes u += a[j] + 65280 - b[j]; 27269257Sdes out[j] = u & 255; 28269257Sdes u >>= 8; 29269257Sdes } 30269257Sdes u += a[31] - b[31]; 31269257Sdes out[31] = u; 32269257Sdes} 33269257Sdes 34238106Sdesstatic void squeeze(unsigned int a[32]) 35238106Sdes{ 36238106Sdes unsigned int j; 37238106Sdes unsigned int u; 38238106Sdes u = 0; 39238106Sdes for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } 40238106Sdes u += a[31]; a[31] = u & 127; 41238106Sdes u = 19 * (u >> 7); 42238106Sdes for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } 43238106Sdes u += a[31]; a[31] = u; 44238106Sdes} 45238106Sdes 46255588Sdesstatic const unsigned int minusp[32] = { 47238106Sdes 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 48238106Sdes} ; 49238106Sdes 50238106Sdesstatic void freeze(unsigned int a[32]) 51238106Sdes{ 52238106Sdes unsigned int aorig[32]; 53238106Sdes unsigned int j; 54285206Sdes unsigned int negative; 55238106Sdes 56238106Sdes for (j = 0;j < 32;++j) aorig[j] = a[j]; 57238106Sdes add(a,a,minusp); 58238106Sdes negative = -((a[31] >> 7) & 1); 59238106Sdes for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]); 60238106Sdes} 61238106Sdes 62238106Sdesstatic void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) 63238106Sdes{ 64238106Sdes unsigned int i; 65238106Sdes unsigned int j; 66238106Sdes unsigned int u; 67238106Sdes 68238106Sdes for (i = 0;i < 32;++i) { 69238106Sdes u = 0; 70238106Sdes for (j = 0;j <= i;++j) u += a[j] * b[i - j]; 71238106Sdes for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j]; 72238106Sdes out[i] = u; 73238106Sdes } 74238106Sdes squeeze(out); 75238106Sdes} 76238106Sdes 77238106Sdesstatic void mult121665(unsigned int out[32],const unsigned int a[32]) 78238106Sdes{ 79238106Sdes unsigned int j; 80238106Sdes unsigned int u; 81238106Sdes 82238106Sdes u = 0; 83238106Sdes for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; } 84238106Sdes u += 121665 * a[31]; out[31] = u & 127; 85238106Sdes u = 19 * (u >> 7); 86238106Sdes for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; } 87238106Sdes u += out[j]; out[j] = u; 88238106Sdes} 89238106Sdes 90238106Sdesstatic void square(unsigned int out[32],const unsigned int a[32]) 91238106Sdes{ 92238106Sdes unsigned int i; 93238106Sdes unsigned int j; 94238106Sdes unsigned int u; 95238106Sdes 96238106Sdes for (i = 0;i < 32;++i) { 97238106Sdes u = 0; 98238106Sdes for (j = 0;j < i - j;++j) u += a[j] * a[i - j]; 99238106Sdes for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j]; 100238106Sdes u *= 2; 101238106Sdes if ((i & 1) == 0) { 102238106Sdes u += a[i / 2] * a[i / 2]; 103238106Sdes u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; 104238106Sdes } 105238106Sdes out[i] = u; 106291767Sdes } 107291767Sdes squeeze(out); 108291767Sdes} 109291767Sdes 110238106Sdesstatic void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b) 111238106Sdes{ 112238106Sdes unsigned int j; 113238106Sdes unsigned int t; 114238106Sdes unsigned int bminus1; 115238106Sdes 116238106Sdes bminus1 = b - 1; 117238106Sdes for (j = 0;j < 64;++j) { 118238106Sdes t = bminus1 & (r[j] ^ s[j]); 119238106Sdes p[j] = s[j] ^ t; 120238106Sdes q[j] = r[j] ^ t; 121238106Sdes } 122238106Sdes} 123238106Sdes 124285206Sdesstatic void mainloop(unsigned int work[64],const unsigned char e[32]) 125285206Sdes{ 126285206Sdes unsigned int xzm1[64]; 127285206Sdes unsigned int xzm[64]; 128285206Sdes unsigned int xzmb[64]; 129238106Sdes unsigned int xzm1b[64]; 130238106Sdes unsigned int xznb[64]; 131238106Sdes unsigned int xzn1b[64]; 132238106Sdes unsigned int a0[64]; 133238106Sdes unsigned int a1[64]; 134238106Sdes unsigned int b0[64]; 135238106Sdes unsigned int b1[64]; 136238106Sdes unsigned int c1[64]; 137238106Sdes unsigned int r[32]; 138238106Sdes unsigned int s[32]; 139238106Sdes unsigned int t[32]; 140238106Sdes unsigned int u[32]; 141238106Sdes unsigned int j; 142238106Sdes unsigned int b; 143238106Sdes int pos; 144238106Sdes 145238106Sdes for (j = 0;j < 32;++j) xzm1[j] = work[j]; 146238106Sdes xzm1[32] = 1; 147238106Sdes for (j = 33;j < 64;++j) xzm1[j] = 0; 148238106Sdes 149238106Sdes xzm[0] = 1; 150238106Sdes for (j = 1;j < 64;++j) xzm[j] = 0; 151238106Sdes 152238106Sdes for (pos = 254;pos >= 0;--pos) { 153238106Sdes b = e[pos / 8] >> (pos & 7); 154238106Sdes b &= 1; 155238106Sdes select(xzmb,xzm1b,xzm,xzm1,b); 156238106Sdes add(a0,xzmb,xzmb + 32); 157238106Sdes sub(a0 + 32,xzmb,xzmb + 32); 158238106Sdes add(a1,xzm1b,xzm1b + 32); 159238106Sdes sub(a1 + 32,xzm1b,xzm1b + 32); 160238106Sdes square(b0,a0); 161238106Sdes square(b0 + 32,a0 + 32); 162238106Sdes mult(b1,a1,a0 + 32); 163238106Sdes mult(b1 + 32,a1 + 32,a0); 164238106Sdes add(c1,b1,b1 + 32); 165238106Sdes sub(c1 + 32,b1,b1 + 32); 166238106Sdes square(r,c1 + 32); 167238106Sdes sub(s,b0,b0 + 32); 168238106Sdes mult121665(t,s); 169238106Sdes add(u,t,b0); 170238106Sdes mult(xznb,b0,b0 + 32); 171238106Sdes mult(xznb + 32,s,u); 172238106Sdes square(xzn1b,c1); 173238106Sdes mult(xzn1b + 32,r,work); 174238106Sdes select(xzm,xzm1,xznb,xzn1b,b); 175238106Sdes } 176238106Sdes 177238106Sdes for (j = 0;j < 64;++j) work[j] = xzm[j]; 178} 179 180static void recip(unsigned int out[32],const unsigned int z[32]) 181{ 182 unsigned int z2[32]; 183 unsigned int z9[32]; 184 unsigned int z11[32]; 185 unsigned int z2_5_0[32]; 186 unsigned int z2_10_0[32]; 187 unsigned int z2_20_0[32]; 188 unsigned int z2_50_0[32]; 189 unsigned int z2_100_0[32]; 190 unsigned int t0[32]; 191 unsigned int t1[32]; 192 int i; 193 194 /* 2 */ square(z2,z); 195 /* 4 */ square(t1,z2); 196 /* 8 */ square(t0,t1); 197 /* 9 */ mult(z9,t0,z); 198 /* 11 */ mult(z11,z9,z2); 199 /* 22 */ square(t0,z11); 200 /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9); 201 202 /* 2^6 - 2^1 */ square(t0,z2_5_0); 203 /* 2^7 - 2^2 */ square(t1,t0); 204 /* 2^8 - 2^3 */ square(t0,t1); 205 /* 2^9 - 2^4 */ square(t1,t0); 206 /* 2^10 - 2^5 */ square(t0,t1); 207 /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0); 208 209 /* 2^11 - 2^1 */ square(t0,z2_10_0); 210 /* 2^12 - 2^2 */ square(t1,t0); 211 /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); } 212 /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0); 213 214 /* 2^21 - 2^1 */ square(t0,z2_20_0); 215 /* 2^22 - 2^2 */ square(t1,t0); 216 /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); } 217 /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0); 218 219 /* 2^41 - 2^1 */ square(t1,t0); 220 /* 2^42 - 2^2 */ square(t0,t1); 221 /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); } 222 /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0); 223 224 /* 2^51 - 2^1 */ square(t0,z2_50_0); 225 /* 2^52 - 2^2 */ square(t1,t0); 226 /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } 227 /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0); 228 229 /* 2^101 - 2^1 */ square(t1,z2_100_0); 230 /* 2^102 - 2^2 */ square(t0,t1); 231 /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); } 232 /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0); 233 234 /* 2^201 - 2^1 */ square(t0,t1); 235 /* 2^202 - 2^2 */ square(t1,t0); 236 /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } 237 /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0); 238 239 /* 2^251 - 2^1 */ square(t1,t0); 240 /* 2^252 - 2^2 */ square(t0,t1); 241 /* 2^253 - 2^3 */ square(t1,t0); 242 /* 2^254 - 2^4 */ square(t0,t1); 243 /* 2^255 - 2^5 */ square(t1,t0); 244 /* 2^255 - 21 */ mult(out,t1,z11); 245} 246 247int crypto_scalarmult_curve25519(unsigned char *q, 248 const unsigned char *n, 249 const unsigned char *p) 250{ 251 unsigned int work[96]; 252 unsigned char e[32]; 253 unsigned int i; 254 for (i = 0;i < 32;++i) e[i] = n[i]; 255 e[0] &= 248; 256 e[31] &= 127; 257 e[31] |= 64; 258 for (i = 0;i < 32;++i) work[i] = p[i]; 259 mainloop(work,e); 260 recip(work + 32,work + 32); 261 mult(work + 64,work,work + 32); 262 freeze(work + 64); 263 for (i = 0;i < 32;++i) q[i] = work[64 + i]; 264 return 0; 265} 266