1255187Sjmg/*-
2255187Sjmg * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org>
3255187Sjmg * All rights reserved.
4255187Sjmg *
5255187Sjmg * Redistribution and use in source and binary forms, with or without
6255187Sjmg * modification, are permitted provided that the following conditions
7255187Sjmg * are met:
8255187Sjmg * 1. Redistributions of source code must retain the above copyright
9255187Sjmg *    notice, this list of conditions and the following disclaimer.
10255187Sjmg * 2. Redistributions in binary form must reproduce the above copyright
11255187Sjmg *    notice, this list of conditions and the following disclaimer in the
12255187Sjmg *    documentation and/or other materials provided with the distribution.
13255187Sjmg *
14255187Sjmg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15255187Sjmg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16255187Sjmg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17255187Sjmg * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18255187Sjmg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19255187Sjmg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20255187Sjmg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21255187Sjmg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22255187Sjmg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23255187Sjmg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24255187Sjmg * SUCH DAMAGE.
25255187Sjmg *
26255187Sjmg * $FreeBSD$
27255187Sjmg *
28255187Sjmg */
29255187Sjmg
30255187Sjmg#include <wmmintrin.h>
31255187Sjmg
32255187Sjmgstatic inline void
33258212Sjmgaesni_enc8(int rounds, const __m128i *keysched, __m128i a,
34255187Sjmg    __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g,
35255187Sjmg    __m128i h, __m128i out[8])
36255187Sjmg{
37255187Sjmg	int i;
38255187Sjmg
39255187Sjmg	a ^= keysched[0];
40255187Sjmg	b ^= keysched[0];
41255187Sjmg	c ^= keysched[0];
42255187Sjmg	d ^= keysched[0];
43255187Sjmg	e ^= keysched[0];
44255187Sjmg	f ^= keysched[0];
45255187Sjmg	g ^= keysched[0];
46255187Sjmg	h ^= keysched[0];
47255187Sjmg
48255187Sjmg	for (i = 0; i < rounds; i++) {
49255187Sjmg		a = _mm_aesenc_si128(a, keysched[i + 1]);
50255187Sjmg		b = _mm_aesenc_si128(b, keysched[i + 1]);
51255187Sjmg		c = _mm_aesenc_si128(c, keysched[i + 1]);
52255187Sjmg		d = _mm_aesenc_si128(d, keysched[i + 1]);
53255187Sjmg		e = _mm_aesenc_si128(e, keysched[i + 1]);
54255187Sjmg		f = _mm_aesenc_si128(f, keysched[i + 1]);
55255187Sjmg		g = _mm_aesenc_si128(g, keysched[i + 1]);
56255187Sjmg		h = _mm_aesenc_si128(h, keysched[i + 1]);
57255187Sjmg	}
58255187Sjmg
59255187Sjmg	out[0] = _mm_aesenclast_si128(a, keysched[i + 1]);
60255187Sjmg	out[1] = _mm_aesenclast_si128(b, keysched[i + 1]);
61255187Sjmg	out[2] = _mm_aesenclast_si128(c, keysched[i + 1]);
62255187Sjmg	out[3] = _mm_aesenclast_si128(d, keysched[i + 1]);
63255187Sjmg	out[4] = _mm_aesenclast_si128(e, keysched[i + 1]);
64255187Sjmg	out[5] = _mm_aesenclast_si128(f, keysched[i + 1]);
65255187Sjmg	out[6] = _mm_aesenclast_si128(g, keysched[i + 1]);
66255187Sjmg	out[7] = _mm_aesenclast_si128(h, keysched[i + 1]);
67255187Sjmg}
68255187Sjmg
69255187Sjmgstatic inline void
70258212Sjmgaesni_dec8(int rounds, const __m128i *keysched, __m128i a,
71255187Sjmg    __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g,
72255187Sjmg    __m128i h, __m128i out[8])
73255187Sjmg{
74255187Sjmg	int i;
75255187Sjmg
76255187Sjmg	a ^= keysched[0];
77255187Sjmg	b ^= keysched[0];
78255187Sjmg	c ^= keysched[0];
79255187Sjmg	d ^= keysched[0];
80255187Sjmg	e ^= keysched[0];
81255187Sjmg	f ^= keysched[0];
82255187Sjmg	g ^= keysched[0];
83255187Sjmg	h ^= keysched[0];
84255187Sjmg
85255187Sjmg	for (i = 0; i < rounds; i++) {
86255187Sjmg		a = _mm_aesdec_si128(a, keysched[i + 1]);
87255187Sjmg		b = _mm_aesdec_si128(b, keysched[i + 1]);
88255187Sjmg		c = _mm_aesdec_si128(c, keysched[i + 1]);
89255187Sjmg		d = _mm_aesdec_si128(d, keysched[i + 1]);
90255187Sjmg		e = _mm_aesdec_si128(e, keysched[i + 1]);
91255187Sjmg		f = _mm_aesdec_si128(f, keysched[i + 1]);
92255187Sjmg		g = _mm_aesdec_si128(g, keysched[i + 1]);
93255187Sjmg		h = _mm_aesdec_si128(h, keysched[i + 1]);
94255187Sjmg	}
95255187Sjmg
96255187Sjmg	out[0] = _mm_aesdeclast_si128(a, keysched[i + 1]);
97255187Sjmg	out[1] = _mm_aesdeclast_si128(b, keysched[i + 1]);
98255187Sjmg	out[2] = _mm_aesdeclast_si128(c, keysched[i + 1]);
99255187Sjmg	out[3] = _mm_aesdeclast_si128(d, keysched[i + 1]);
100255187Sjmg	out[4] = _mm_aesdeclast_si128(e, keysched[i + 1]);
101255187Sjmg	out[5] = _mm_aesdeclast_si128(f, keysched[i + 1]);
102255187Sjmg	out[6] = _mm_aesdeclast_si128(g, keysched[i + 1]);
103255187Sjmg	out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]);
104255187Sjmg}
105255187Sjmg
106255187Sjmgstatic inline __m128i
107258212Sjmgaesni_enc(int rounds, const __m128i *keysched, const __m128i from)
108255187Sjmg{
109255187Sjmg	__m128i tmp;
110255187Sjmg	int i;
111255187Sjmg
112255187Sjmg	tmp = from ^ keysched[0];
113255187Sjmg
114255187Sjmg	for (i = 0; i < rounds; i++)
115255187Sjmg		tmp = _mm_aesenc_si128(tmp, keysched[i + 1]);
116255187Sjmg
117255187Sjmg	return _mm_aesenclast_si128(tmp, keysched[i + 1]);
118255187Sjmg}
119255187Sjmg
120255187Sjmgstatic inline __m128i
121258212Sjmgaesni_dec(int rounds, const __m128i *keysched, const __m128i from)
122255187Sjmg{
123255187Sjmg	__m128i tmp;
124255187Sjmg	int i;
125255187Sjmg
126255187Sjmg	tmp = from ^ keysched[0];
127255187Sjmg
128255187Sjmg	for (i = 0; i < rounds; i++)
129255187Sjmg		tmp = _mm_aesdec_si128(tmp, keysched[i + 1]);
130255187Sjmg
131255187Sjmg	return _mm_aesdeclast_si128(tmp, keysched[i + 1]);
132255187Sjmg}
133