1255187Sjmg/*- 2255187Sjmg * Copyright 2013 John-Mark Gurney <jmg@FreeBSD.org> 3255187Sjmg * All rights reserved. 4255187Sjmg * 5255187Sjmg * Redistribution and use in source and binary forms, with or without 6255187Sjmg * modification, are permitted provided that the following conditions 7255187Sjmg * are met: 8255187Sjmg * 1. Redistributions of source code must retain the above copyright 9255187Sjmg * notice, this list of conditions and the following disclaimer. 10255187Sjmg * 2. Redistributions in binary form must reproduce the above copyright 11255187Sjmg * notice, this list of conditions and the following disclaimer in the 12255187Sjmg * documentation and/or other materials provided with the distribution. 13255187Sjmg * 14255187Sjmg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15255187Sjmg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16255187Sjmg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17255187Sjmg * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18255187Sjmg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19255187Sjmg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20255187Sjmg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21255187Sjmg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22255187Sjmg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23255187Sjmg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24255187Sjmg * SUCH DAMAGE. 25255187Sjmg * 26255187Sjmg * $FreeBSD$ 27255187Sjmg * 28255187Sjmg */ 29255187Sjmg 30255187Sjmg#include <wmmintrin.h> 31255187Sjmg 32255187Sjmgstatic inline void 33258212Sjmgaesni_enc8(int rounds, const __m128i *keysched, __m128i a, 34255187Sjmg __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g, 35255187Sjmg __m128i h, __m128i out[8]) 36255187Sjmg{ 37255187Sjmg int i; 38255187Sjmg 39255187Sjmg a ^= keysched[0]; 40255187Sjmg b ^= keysched[0]; 41255187Sjmg c ^= keysched[0]; 42255187Sjmg d ^= keysched[0]; 43255187Sjmg e ^= keysched[0]; 44255187Sjmg f ^= keysched[0]; 45255187Sjmg g ^= keysched[0]; 46255187Sjmg h ^= keysched[0]; 47255187Sjmg 48255187Sjmg for (i = 0; i < rounds; i++) { 49255187Sjmg a = _mm_aesenc_si128(a, keysched[i + 1]); 50255187Sjmg b = _mm_aesenc_si128(b, keysched[i + 1]); 51255187Sjmg c = _mm_aesenc_si128(c, keysched[i + 1]); 52255187Sjmg d = _mm_aesenc_si128(d, keysched[i + 1]); 53255187Sjmg e = _mm_aesenc_si128(e, keysched[i + 1]); 54255187Sjmg f = _mm_aesenc_si128(f, keysched[i + 1]); 55255187Sjmg g = _mm_aesenc_si128(g, keysched[i + 1]); 56255187Sjmg h = _mm_aesenc_si128(h, keysched[i + 1]); 57255187Sjmg } 58255187Sjmg 59255187Sjmg out[0] = _mm_aesenclast_si128(a, keysched[i + 1]); 60255187Sjmg out[1] = _mm_aesenclast_si128(b, keysched[i + 1]); 61255187Sjmg out[2] = _mm_aesenclast_si128(c, keysched[i + 1]); 62255187Sjmg out[3] = _mm_aesenclast_si128(d, keysched[i + 1]); 63255187Sjmg out[4] = _mm_aesenclast_si128(e, keysched[i + 1]); 64255187Sjmg out[5] = _mm_aesenclast_si128(f, keysched[i + 1]); 65255187Sjmg out[6] = _mm_aesenclast_si128(g, keysched[i + 1]); 66255187Sjmg out[7] = _mm_aesenclast_si128(h, keysched[i + 1]); 67255187Sjmg} 68255187Sjmg 69255187Sjmgstatic inline void 70258212Sjmgaesni_dec8(int rounds, const __m128i *keysched, __m128i a, 71255187Sjmg __m128i b, __m128i c, __m128i d, __m128i e, __m128i f, __m128i g, 72255187Sjmg __m128i h, __m128i out[8]) 73255187Sjmg{ 74255187Sjmg int i; 75255187Sjmg 76255187Sjmg a ^= keysched[0]; 77255187Sjmg b ^= keysched[0]; 78255187Sjmg c ^= keysched[0]; 79255187Sjmg d ^= keysched[0]; 80255187Sjmg e ^= keysched[0]; 81255187Sjmg f ^= keysched[0]; 82255187Sjmg g ^= keysched[0]; 83255187Sjmg h ^= keysched[0]; 84255187Sjmg 85255187Sjmg for (i = 0; i < rounds; i++) { 86255187Sjmg a = _mm_aesdec_si128(a, keysched[i + 1]); 87255187Sjmg b = _mm_aesdec_si128(b, keysched[i + 1]); 88255187Sjmg c = _mm_aesdec_si128(c, keysched[i + 1]); 89255187Sjmg d = _mm_aesdec_si128(d, keysched[i + 1]); 90255187Sjmg e = _mm_aesdec_si128(e, keysched[i + 1]); 91255187Sjmg f = _mm_aesdec_si128(f, keysched[i + 1]); 92255187Sjmg g = _mm_aesdec_si128(g, keysched[i + 1]); 93255187Sjmg h = _mm_aesdec_si128(h, keysched[i + 1]); 94255187Sjmg } 95255187Sjmg 96255187Sjmg out[0] = _mm_aesdeclast_si128(a, keysched[i + 1]); 97255187Sjmg out[1] = _mm_aesdeclast_si128(b, keysched[i + 1]); 98255187Sjmg out[2] = _mm_aesdeclast_si128(c, keysched[i + 1]); 99255187Sjmg out[3] = _mm_aesdeclast_si128(d, keysched[i + 1]); 100255187Sjmg out[4] = _mm_aesdeclast_si128(e, keysched[i + 1]); 101255187Sjmg out[5] = _mm_aesdeclast_si128(f, keysched[i + 1]); 102255187Sjmg out[6] = _mm_aesdeclast_si128(g, keysched[i + 1]); 103255187Sjmg out[7] = _mm_aesdeclast_si128(h, keysched[i + 1]); 104255187Sjmg} 105255187Sjmg 106255187Sjmgstatic inline __m128i 107258212Sjmgaesni_enc(int rounds, const __m128i *keysched, const __m128i from) 108255187Sjmg{ 109255187Sjmg __m128i tmp; 110255187Sjmg int i; 111255187Sjmg 112255187Sjmg tmp = from ^ keysched[0]; 113255187Sjmg 114255187Sjmg for (i = 0; i < rounds; i++) 115255187Sjmg tmp = _mm_aesenc_si128(tmp, keysched[i + 1]); 116255187Sjmg 117255187Sjmg return _mm_aesenclast_si128(tmp, keysched[i + 1]); 118255187Sjmg} 119255187Sjmg 120255187Sjmgstatic inline __m128i 121258212Sjmgaesni_dec(int rounds, const __m128i *keysched, const __m128i from) 122255187Sjmg{ 123255187Sjmg __m128i tmp; 124255187Sjmg int i; 125255187Sjmg 126255187Sjmg tmp = from ^ keysched[0]; 127255187Sjmg 128255187Sjmg for (i = 0; i < rounds; i++) 129255187Sjmg tmp = _mm_aesdec_si128(tmp, keysched[i + 1]); 130255187Sjmg 131255187Sjmg return _mm_aesdeclast_si128(tmp, keysched[i + 1]); 132255187Sjmg} 133