1/*	$OpenBSD: poly1305.c,v 1.2 2020/07/22 13:54:30 tobhe Exp $	*/
2/*
3 * Public Domain poly1305 from Andrew Moon
4 * Based on poly1305-donna.c, poly1305-donna-32.h and poly1305-donna.h from:
5 *   https://github.com/floodyberry/poly1305-donna
6 */
7
8#include <sys/types.h>
9#include <sys/systm.h>
10
11#include "poly1305.h"
12
13/*
14 * poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication
15 * and 64 bit addition.
16 */
17
18/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */
19static unsigned long
20U8TO32(const unsigned char *p)
21{
22	return (((unsigned long)(p[0] & 0xff)) |
23	    ((unsigned long)(p[1] & 0xff) <<  8) |
24	    ((unsigned long)(p[2] & 0xff) << 16) |
25	    ((unsigned long)(p[3] & 0xff) << 24));
26}
27
28/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */
29static void
30U32TO8(unsigned char *p, unsigned long v)
31{
32	p[0] = (v) & 0xff;
33	p[1] = (v >>  8) & 0xff;
34	p[2] = (v >> 16) & 0xff;
35	p[3] = (v >> 24) & 0xff;
36}
37
38void
39poly1305_init(poly1305_state *st, const unsigned char key[32])
40{
41	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
42	st->r[0] = (U8TO32(&key[0])) & 0x3ffffff;
43	st->r[1] = (U8TO32(&key[3]) >> 2) & 0x3ffff03;
44	st->r[2] = (U8TO32(&key[6]) >> 4) & 0x3ffc0ff;
45	st->r[3] = (U8TO32(&key[9]) >> 6) & 0x3f03fff;
46	st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
47
48	/* h = 0 */
49	st->h[0] = 0;
50	st->h[1] = 0;
51	st->h[2] = 0;
52	st->h[3] = 0;
53	st->h[4] = 0;
54
55	/* save pad for later */
56	st->pad[0] = U8TO32(&key[16]);
57	st->pad[1] = U8TO32(&key[20]);
58	st->pad[2] = U8TO32(&key[24]);
59	st->pad[3] = U8TO32(&key[28]);
60
61	st->leftover = 0;
62	st->final = 0;
63}
64
65static void
66poly1305_blocks(poly1305_state *st, const unsigned char *m, size_t bytes)
67{
68	const unsigned long hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */
69	unsigned long r0, r1, r2, r3, r4;
70	unsigned long s1, s2, s3, s4;
71	unsigned long h0, h1, h2, h3, h4;
72	unsigned long long d0, d1, d2, d3, d4;
73	unsigned long c;
74
75	r0 = st->r[0];
76	r1 = st->r[1];
77	r2 = st->r[2];
78	r3 = st->r[3];
79	r4 = st->r[4];
80
81	s1 = r1 * 5;
82	s2 = r2 * 5;
83	s3 = r3 * 5;
84	s4 = r4 * 5;
85
86	h0 = st->h[0];
87	h1 = st->h[1];
88	h2 = st->h[2];
89	h3 = st->h[3];
90	h4 = st->h[4];
91
92	while (bytes >= poly1305_block_size) {
93		/* h += m[i] */
94		h0 += (U8TO32(m + 0)) & 0x3ffffff;
95		h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff;
96		h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff;
97		h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff;
98		h4 += (U8TO32(m + 12) >> 8) | hibit;
99
100		/* h *= r */
101		d0 = ((unsigned long long)h0 * r0) +
102		    ((unsigned long long)h1 * s4) +
103		    ((unsigned long long)h2 * s3) +
104		    ((unsigned long long)h3 * s2) +
105		    ((unsigned long long)h4 * s1);
106		d1 = ((unsigned long long)h0 * r1) +
107		    ((unsigned long long)h1 * r0) +
108		    ((unsigned long long)h2 * s4) +
109		    ((unsigned long long)h3 * s3) +
110		    ((unsigned long long)h4 * s2);
111		d2 = ((unsigned long long)h0 * r2) +
112		    ((unsigned long long)h1 * r1) +
113		    ((unsigned long long)h2 * r0) +
114		    ((unsigned long long)h3 * s4) +
115		    ((unsigned long long)h4 * s3);
116		d3 = ((unsigned long long)h0 * r3) +
117		    ((unsigned long long)h1 * r2) +
118		    ((unsigned long long)h2 * r1) +
119		    ((unsigned long long)h3 * r0) +
120		    ((unsigned long long)h4 * s4);
121		d4 = ((unsigned long long)h0 * r4) +
122		    ((unsigned long long)h1 * r3) +
123		    ((unsigned long long)h2 * r2) +
124		    ((unsigned long long)h3 * r1) +
125		    ((unsigned long long)h4 * r0);
126
127		/* (partial) h %= p */
128		c = (unsigned long)(d0 >> 26);
129		h0 = (unsigned long)d0 & 0x3ffffff;
130		d1 += c;
131		c = (unsigned long)(d1 >> 26);
132		h1 = (unsigned long)d1 & 0x3ffffff;
133		d2 += c;
134		c = (unsigned long)(d2 >> 26);
135		h2 = (unsigned long)d2 & 0x3ffffff;
136		d3 += c;
137		c = (unsigned long)(d3 >> 26);
138		h3 = (unsigned long)d3 & 0x3ffffff;
139		d4 += c;
140		c = (unsigned long)(d4 >> 26);
141		h4 = (unsigned long)d4 & 0x3ffffff;
142		h0 += c * 5;
143		c = (h0 >> 26);
144		h0 = h0 & 0x3ffffff;
145		h1 += c;
146
147		m += poly1305_block_size;
148		bytes -= poly1305_block_size;
149	}
150
151	st->h[0] = h0;
152	st->h[1] = h1;
153	st->h[2] = h2;
154	st->h[3] = h3;
155	st->h[4] = h4;
156}
157
158void
159poly1305_update(poly1305_state *st, const unsigned char *m, size_t bytes)
160{
161	size_t i;
162
163	/* handle leftover */
164	if (st->leftover) {
165		size_t want = (poly1305_block_size - st->leftover);
166		if (want > bytes)
167			want = bytes;
168		for (i = 0; i < want; i++)
169			st->buffer[st->leftover + i] = m[i];
170		bytes -= want;
171		m += want;
172		st->leftover += want;
173		if (st->leftover < poly1305_block_size)
174			return;
175		poly1305_blocks(st, st->buffer, poly1305_block_size);
176		st->leftover = 0;
177	}
178
179	/* process full blocks */
180	if (bytes >= poly1305_block_size) {
181		size_t want = (bytes & ~(poly1305_block_size - 1));
182		poly1305_blocks(st, m, want);
183		m += want;
184		bytes -= want;
185	}
186
187	/* store leftover */
188	if (bytes) {
189		for (i = 0; i < bytes; i++)
190			st->buffer[st->leftover + i] = m[i];
191		st->leftover += bytes;
192	}
193}
194
195void
196poly1305_finish(poly1305_state *st, unsigned char mac[16])
197{
198	unsigned long h0, h1, h2, h3, h4, c;
199	unsigned long g0, g1, g2, g3, g4;
200	unsigned long long f;
201	unsigned long mask;
202
203	/* process the remaining block */
204	if (st->leftover) {
205		size_t i = st->leftover;
206		st->buffer[i++] = 1;
207		for (; i < poly1305_block_size; i++)
208			st->buffer[i] = 0;
209		st->final = 1;
210		poly1305_blocks(st, st->buffer, poly1305_block_size);
211	}
212
213	/* fully carry h */
214	h0 = st->h[0];
215	h1 = st->h[1];
216	h2 = st->h[2];
217	h3 = st->h[3];
218	h4 = st->h[4];
219
220	c = h1 >> 26;
221	h1 = h1 & 0x3ffffff;
222	h2 += c;
223	c = h2 >> 26;
224	h2 = h2 & 0x3ffffff;
225	h3 += c;
226	c = h3 >> 26;
227	h3 = h3 & 0x3ffffff;
228	h4 += c;
229	c = h4 >> 26;
230	h4 = h4 & 0x3ffffff;
231	h0 += c * 5;
232	c = h0 >> 26;
233	h0 = h0 & 0x3ffffff;
234	h1 += c;
235
236	/* compute h + -p */
237	g0 = h0 + 5;
238	c = g0 >> 26;
239	g0 &= 0x3ffffff;
240	g1 = h1 + c;
241	c = g1 >> 26;
242	g1 &= 0x3ffffff;
243	g2 = h2 + c;
244	c = g2 >> 26;
245	g2 &= 0x3ffffff;
246	g3 = h3 + c;
247	c = g3 >> 26;
248	g3 &= 0x3ffffff;
249	g4 = h4 + c - (1 << 26);
250
251	/* select h if h < p, or h + -p if h >= p */
252	mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
253	g0 &= mask;
254	g1 &= mask;
255	g2 &= mask;
256	g3 &= mask;
257	g4 &= mask;
258	mask = ~mask;
259	h0 = (h0 & mask) | g0;
260	h1 = (h1 & mask) | g1;
261	h2 = (h2 & mask) | g2;
262	h3 = (h3 & mask) | g3;
263	h4 = (h4 & mask) | g4;
264
265	/* h = h % (2^128) */
266	h0 = ((h0) | (h1 << 26)) & 0xffffffff;
267	h1 = ((h1 >>  6) | (h2 << 20)) & 0xffffffff;
268	h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
269	h3 = ((h3 >> 18) | (h4 <<  8)) & 0xffffffff;
270
271	/* mac = (h + pad) % (2^128) */
272	f = (unsigned long long)h0 + st->pad[0];
273	h0 = (unsigned long)f;
274	f = (unsigned long long)h1 + st->pad[1] + (f >> 32);
275	h1 = (unsigned long)f;
276	f = (unsigned long long)h2 + st->pad[2] + (f >> 32);
277	h2 = (unsigned long)f;
278	f = (unsigned long long)h3 + st->pad[3] + (f >> 32);
279	h3 = (unsigned long)f;
280
281	U32TO8(mac +  0, h0);
282	U32TO8(mac +  4, h1);
283	U32TO8(mac +  8, h2);
284	U32TO8(mac + 12, h3);
285
286	/* zero out the state */
287	st->h[0] = 0;
288	st->h[1] = 0;
289	st->h[2] = 0;
290	st->h[3] = 0;
291	st->h[4] = 0;
292	st->r[0] = 0;
293	st->r[1] = 0;
294	st->r[2] = 0;
295	st->r[3] = 0;
296	st->r[4] = 0;
297	st->pad[0] = 0;
298	st->pad[1] = 0;
299	st->pad[2] = 0;
300	st->pad[3] = 0;
301}
302