1238106Sdes/* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */
2238106Sdes/*
3238106Sdesversion 20081011
4238106SdesMatthew Dempsky
5238106SdesPublic domain.
6238106SdesDerived from public domain code by D. J. Bernstein.
7238106Sdes*/
8238106Sdes
9238106Sdesint crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
10238106Sdes
11238106Sdesstatic void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
12238106Sdes{
13238106Sdes  unsigned int j;
14238106Sdes  unsigned int u;
15238106Sdes  u = 0;
16238106Sdes  for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
17238106Sdes  u += a[31] + b[31]; out[31] = u;
18238106Sdes}
19238106Sdes
20238106Sdesstatic void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
21238106Sdes{
22238106Sdes  unsigned int j;
23238106Sdes  unsigned int u;
24269257Sdes  u = 218;
25269257Sdes  for (j = 0;j < 31;++j) {
26269257Sdes    u += a[j] + 65280 - b[j];
27269257Sdes    out[j] = u & 255;
28269257Sdes    u >>= 8;
29269257Sdes  }
30269257Sdes  u += a[31] - b[31];
31269257Sdes  out[31] = u;
32269257Sdes}
33269257Sdes
34238106Sdesstatic void squeeze(unsigned int a[32])
35238106Sdes{
36238106Sdes  unsigned int j;
37238106Sdes  unsigned int u;
38238106Sdes  u = 0;
39238106Sdes  for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
40238106Sdes  u += a[31]; a[31] = u & 127;
41238106Sdes  u = 19 * (u >> 7);
42238106Sdes  for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
43238106Sdes  u += a[31]; a[31] = u;
44238106Sdes}
45238106Sdes
46255588Sdesstatic const unsigned int minusp[32] = {
47238106Sdes 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
48238106Sdes} ;
49238106Sdes
50238106Sdesstatic void freeze(unsigned int a[32])
51238106Sdes{
52238106Sdes  unsigned int aorig[32];
53238106Sdes  unsigned int j;
54285206Sdes  unsigned int negative;
55238106Sdes
56238106Sdes  for (j = 0;j < 32;++j) aorig[j] = a[j];
57238106Sdes  add(a,a,minusp);
58238106Sdes  negative = -((a[31] >> 7) & 1);
59238106Sdes  for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
60238106Sdes}
61238106Sdes
62238106Sdesstatic void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
63238106Sdes{
64238106Sdes  unsigned int i;
65238106Sdes  unsigned int j;
66238106Sdes  unsigned int u;
67238106Sdes
68238106Sdes  for (i = 0;i < 32;++i) {
69238106Sdes    u = 0;
70238106Sdes    for (j = 0;j <= i;++j) u += a[j] * b[i - j];
71238106Sdes    for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
72238106Sdes    out[i] = u;
73238106Sdes  }
74238106Sdes  squeeze(out);
75238106Sdes}
76238106Sdes
77238106Sdesstatic void mult121665(unsigned int out[32],const unsigned int a[32])
78238106Sdes{
79238106Sdes  unsigned int j;
80238106Sdes  unsigned int u;
81238106Sdes
82238106Sdes  u = 0;
83238106Sdes  for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
84238106Sdes  u += 121665 * a[31]; out[31] = u & 127;
85238106Sdes  u = 19 * (u >> 7);
86238106Sdes  for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
87238106Sdes  u += out[j]; out[j] = u;
88238106Sdes}
89238106Sdes
90238106Sdesstatic void square(unsigned int out[32],const unsigned int a[32])
91238106Sdes{
92238106Sdes  unsigned int i;
93238106Sdes  unsigned int j;
94238106Sdes  unsigned int u;
95238106Sdes
96238106Sdes  for (i = 0;i < 32;++i) {
97238106Sdes    u = 0;
98238106Sdes    for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
99238106Sdes    for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
100238106Sdes    u *= 2;
101238106Sdes    if ((i & 1) == 0) {
102238106Sdes      u += a[i / 2] * a[i / 2];
103238106Sdes      u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
104238106Sdes    }
105238106Sdes    out[i] = u;
106291767Sdes  }
107291767Sdes  squeeze(out);
108291767Sdes}
109291767Sdes
110238106Sdesstatic void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
111238106Sdes{
112238106Sdes  unsigned int j;
113238106Sdes  unsigned int t;
114238106Sdes  unsigned int bminus1;
115238106Sdes
116238106Sdes  bminus1 = b - 1;
117238106Sdes  for (j = 0;j < 64;++j) {
118238106Sdes    t = bminus1 & (r[j] ^ s[j]);
119238106Sdes    p[j] = s[j] ^ t;
120238106Sdes    q[j] = r[j] ^ t;
121238106Sdes  }
122238106Sdes}
123238106Sdes
124285206Sdesstatic void mainloop(unsigned int work[64],const unsigned char e[32])
125285206Sdes{
126285206Sdes  unsigned int xzm1[64];
127285206Sdes  unsigned int xzm[64];
128285206Sdes  unsigned int xzmb[64];
129238106Sdes  unsigned int xzm1b[64];
130238106Sdes  unsigned int xznb[64];
131238106Sdes  unsigned int xzn1b[64];
132238106Sdes  unsigned int a0[64];
133238106Sdes  unsigned int a1[64];
134238106Sdes  unsigned int b0[64];
135238106Sdes  unsigned int b1[64];
136238106Sdes  unsigned int c1[64];
137238106Sdes  unsigned int r[32];
138238106Sdes  unsigned int s[32];
139238106Sdes  unsigned int t[32];
140238106Sdes  unsigned int u[32];
141238106Sdes  unsigned int j;
142238106Sdes  unsigned int b;
143238106Sdes  int pos;
144238106Sdes
145238106Sdes  for (j = 0;j < 32;++j) xzm1[j] = work[j];
146238106Sdes  xzm1[32] = 1;
147238106Sdes  for (j = 33;j < 64;++j) xzm1[j] = 0;
148238106Sdes
149238106Sdes  xzm[0] = 1;
150238106Sdes  for (j = 1;j < 64;++j) xzm[j] = 0;
151238106Sdes
152238106Sdes  for (pos = 254;pos >= 0;--pos) {
153238106Sdes    b = e[pos / 8] >> (pos & 7);
154238106Sdes    b &= 1;
155238106Sdes    select(xzmb,xzm1b,xzm,xzm1,b);
156238106Sdes    add(a0,xzmb,xzmb + 32);
157238106Sdes    sub(a0 + 32,xzmb,xzmb + 32);
158238106Sdes    add(a1,xzm1b,xzm1b + 32);
159238106Sdes    sub(a1 + 32,xzm1b,xzm1b + 32);
160238106Sdes    square(b0,a0);
161238106Sdes    square(b0 + 32,a0 + 32);
162238106Sdes    mult(b1,a1,a0 + 32);
163238106Sdes    mult(b1 + 32,a1 + 32,a0);
164238106Sdes    add(c1,b1,b1 + 32);
165238106Sdes    sub(c1 + 32,b1,b1 + 32);
166238106Sdes    square(r,c1 + 32);
167238106Sdes    sub(s,b0,b0 + 32);
168238106Sdes    mult121665(t,s);
169238106Sdes    add(u,t,b0);
170238106Sdes    mult(xznb,b0,b0 + 32);
171238106Sdes    mult(xznb + 32,s,u);
172238106Sdes    square(xzn1b,c1);
173238106Sdes    mult(xzn1b + 32,r,work);
174238106Sdes    select(xzm,xzm1,xznb,xzn1b,b);
175238106Sdes  }
176238106Sdes
177238106Sdes  for (j = 0;j < 64;++j) work[j] = xzm[j];
178}
179
180static void recip(unsigned int out[32],const unsigned int z[32])
181{
182  unsigned int z2[32];
183  unsigned int z9[32];
184  unsigned int z11[32];
185  unsigned int z2_5_0[32];
186  unsigned int z2_10_0[32];
187  unsigned int z2_20_0[32];
188  unsigned int z2_50_0[32];
189  unsigned int z2_100_0[32];
190  unsigned int t0[32];
191  unsigned int t1[32];
192  int i;
193
194  /* 2 */ square(z2,z);
195  /* 4 */ square(t1,z2);
196  /* 8 */ square(t0,t1);
197  /* 9 */ mult(z9,t0,z);
198  /* 11 */ mult(z11,z9,z2);
199  /* 22 */ square(t0,z11);
200  /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
201
202  /* 2^6 - 2^1 */ square(t0,z2_5_0);
203  /* 2^7 - 2^2 */ square(t1,t0);
204  /* 2^8 - 2^3 */ square(t0,t1);
205  /* 2^9 - 2^4 */ square(t1,t0);
206  /* 2^10 - 2^5 */ square(t0,t1);
207  /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
208
209  /* 2^11 - 2^1 */ square(t0,z2_10_0);
210  /* 2^12 - 2^2 */ square(t1,t0);
211  /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
212  /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
213
214  /* 2^21 - 2^1 */ square(t0,z2_20_0);
215  /* 2^22 - 2^2 */ square(t1,t0);
216  /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
217  /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
218
219  /* 2^41 - 2^1 */ square(t1,t0);
220  /* 2^42 - 2^2 */ square(t0,t1);
221  /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
222  /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
223
224  /* 2^51 - 2^1 */ square(t0,z2_50_0);
225  /* 2^52 - 2^2 */ square(t1,t0);
226  /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
227  /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
228
229  /* 2^101 - 2^1 */ square(t1,z2_100_0);
230  /* 2^102 - 2^2 */ square(t0,t1);
231  /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
232  /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
233
234  /* 2^201 - 2^1 */ square(t0,t1);
235  /* 2^202 - 2^2 */ square(t1,t0);
236  /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
237  /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
238
239  /* 2^251 - 2^1 */ square(t1,t0);
240  /* 2^252 - 2^2 */ square(t0,t1);
241  /* 2^253 - 2^3 */ square(t1,t0);
242  /* 2^254 - 2^4 */ square(t0,t1);
243  /* 2^255 - 2^5 */ square(t1,t0);
244  /* 2^255 - 21 */ mult(out,t1,z11);
245}
246
247int crypto_scalarmult_curve25519(unsigned char *q,
248  const unsigned char *n,
249  const unsigned char *p)
250{
251  unsigned int work[96];
252  unsigned char e[32];
253  unsigned int i;
254  for (i = 0;i < 32;++i) e[i] = n[i];
255  e[0] &= 248;
256  e[31] &= 127;
257  e[31] |= 64;
258  for (i = 0;i < 32;++i) work[i] = p[i];
259  mainloop(work,e);
260  recip(work + 32,work + 32);
261  mult(work + 64,work,work + 32);
262  freeze(work + 64);
263  for (i = 0;i < 32;++i) q[i] = work[64 + i];
264  return 0;
265}
266