1139825Simp/*
242805Skato * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
342805Skato *
442805Skato * Permission is hereby granted, free of charge, to any person obtaining
542805Skato * a copy of this software and associated documentation files (the
642805Skato * "Software"), to deal in the Software without restriction, including
742805Skato * without limitation the rights to use, copy, modify, merge, publish,
842805Skato * distribute, sublicense, and/or sell copies of the Software, and to
942805Skato * permit persons to whom the Software is furnished to do so, subject to
1042805Skato * the following conditions:
1142805Skato *
1242805Skato * The above copyright notice and this permission notice shall be
1342805Skato * included in all copies or substantial portions of the Software.
1442805Skato *
1542805Skato * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1642805Skato * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1742805Skato * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1842805Skato * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
1942805Skato * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
2042805Skato * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2142805Skato * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2242805Skato * SOFTWARE.
2342805Skato */
2442805Skato
2542805Skato#include "inner.h"
2642805Skato
2742805Skato/*
2850477Speter * This is the 64-bit variant of br_ghash_ctmul32(), with 64-bit operands
2942795Skato * and bit reversal of 64-bit words.
3042795Skato */
31162711Sru
3242795Skatostatic inline uint64_t
3342795Skatobmul64(uint64_t x, uint64_t y)
3442795Skato{
3542795Skato	uint64_t x0, x1, x2, x3;
3642795Skato	uint64_t y0, y1, y2, y3;
3745783Skato	uint64_t z0, z1, z2, z3;
3842795Skato
3945783Skato	x0 = x & (uint64_t)0x1111111111111111;
4045783Skato	x1 = x & (uint64_t)0x2222222222222222;
4167370Skato	x2 = x & (uint64_t)0x4444444444444444;
4242795Skato	x3 = x & (uint64_t)0x8888888888888888;
4342795Skato	y0 = y & (uint64_t)0x1111111111111111;
4442795Skato	y1 = y & (uint64_t)0x2222222222222222;
4542795Skato	y2 = y & (uint64_t)0x4444444444444444;
4642795Skato	y3 = y & (uint64_t)0x8888888888888888;
47146049Snyan	z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
4845783Skato	z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
4945783Skato	z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
5042795Skato	z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
5142795Skato	z0 &= (uint64_t)0x1111111111111111;
5242795Skato	z1 &= (uint64_t)0x2222222222222222;
5342795Skato	z2 &= (uint64_t)0x4444444444444444;
5442795Skato	z3 &= (uint64_t)0x8888888888888888;
5545783Skato	return z0 | z1 | z2 | z3;
5642795Skato}
5745783Skato
5845783Skatostatic uint64_t
5979702Snyanrev64(uint64_t x)
6045783Skato{
6142795Skato#define RMS(m, s)   do { \
6245783Skato		x = ((x & (uint64_t)(m)) << (s)) \
6345783Skato			| ((x >> (s)) & (uint64_t)(m)); \
6445783Skato	} while (0)
6545783Skato
6679702Snyan	RMS(0x5555555555555555,  1);
6745783Skato	RMS(0x3333333333333333,  2);
6845783Skato	RMS(0x0F0F0F0F0F0F0F0F,  4);
6942795Skato	RMS(0x00FF00FF00FF00FF,  8);
7045783Skato	RMS(0x0000FFFF0000FFFF, 16);
7142795Skato	return (x << 32) | (x >> 32);
7245783Skato
7350236Skato#undef RMS
7442795Skato}
7542795Skato
7645783Skato/* see bearssl_ghash.h */
7745783Skatovoid
7878385Snyanbr_ghash_ctmul64(void *y, const void *h, const void *data, size_t len)
7978385Snyan{
80245317Simp	const unsigned char *buf, *hb;
8144635Skato	unsigned char *yb;
82245317Simp	uint64_t y0, y1;
8344635Skato	uint64_t h0, h1, h2, h0r, h1r, h2r;
8442795Skato
8542795Skato	buf = data;
8642795Skato	yb = y;
8742795Skato	hb = h;
8845783Skato	y1 = br_dec64be(yb);
8942795Skato	y0 = br_dec64be(yb + 8);
9078385Snyan	h1 = br_dec64be(hb);
9178385Snyan	h0 = br_dec64be(hb + 8);
9278385Snyan	h0r = rev64(h0);
9351276Snyan	h1r = rev64(h1);
9451276Snyan	h2 = h0 ^ h1;
9551276Snyan	h2r = h0r ^ h1r;
9651276Snyan	while (len > 0) {
9745783Skato		const unsigned char *src;
9845783Skato		unsigned char tmp[16];
9978385Snyan		uint64_t y0r, y1r, y2, y2r;
10078385Snyan		uint64_t z0, z1, z2, z0h, z1h, z2h;
10178385Snyan		uint64_t v0, v1, v2, v3;
10278385Snyan
10378385Snyan		if (len >= 16) {
10478385Snyan			src = buf;
10578385Snyan			buf += 16;
106245317Simp			len -= 16;
10778385Snyan		} else {
10878385Snyan			memcpy(tmp, buf, len);
10978385Snyan			memset(tmp + len, 0, (sizeof tmp) - len);
11078385Snyan			src = tmp;
11178385Snyan			len = 0;
11278385Snyan		}
11378385Snyan		y1 ^= br_dec64be(src);
11442795Skato		y0 ^= br_dec64be(src + 8);
11542795Skato
11642795Skato		y0r = rev64(y0);
11745783Skato		y1r = rev64(y1);
11842795Skato		y2 = y0 ^ y1;
11950236Skato		y2r = y0r ^ y1r;
12045783Skato
12145783Skato		z0 = bmul64(y0, h0);
12278385Snyan		z1 = bmul64(y1, h1);
12342795Skato		z2 = bmul64(y2, h2);
12478385Snyan		z0h = bmul64(y0r, h0r);
12578385Snyan		z1h = bmul64(y1r, h1r);
12678385Snyan		z2h = bmul64(y2r, h2r);
12778385Snyan		z2 ^= z0 ^ z1;
12878385Snyan		z2h ^= z0h ^ z1h;
12978385Snyan		z0h = rev64(z0h) >> 1;
13045783Skato		z1h = rev64(z1h) >> 1;
131245317Simp		z2h = rev64(z2h) >> 1;
13278385Snyan
13378385Snyan		v0 = z0;
13478385Snyan		v1 = z0h ^ z2;
13578385Snyan		v2 = z1 ^ z2h;
13678385Snyan		v3 = z1h;
137127135Snjl
13878385Snyan		v3 = (v3 << 1) | (v2 >> 63);
13978385Snyan		v2 = (v2 << 1) | (v1 >> 63);
140166901Spiso		v1 = (v1 << 1) | (v0 >> 63);
14145783Skato		v0 = (v0 << 1);
14279702Snyan
14379702Snyan		v2 ^= v0 ^ (v0 >> 1) ^ (v0 >> 2) ^ (v0 >> 7);
14479702Snyan		v1 ^= (v0 << 63) ^ (v0 << 62) ^ (v0 << 57);
14579702Snyan		v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7);
14679702Snyan		v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57);
14779702Snyan
14879702Snyan		y0 = v2;
14979702Snyan		y1 = v3;
15079702Snyan	}
15179702Snyan
15279702Snyan	br_enc64be(yb, y1);
153175001Snyan	br_enc64be(yb + 8, y0);
15479702Snyan}
15545783Skato