1295016Sjkim/* crypto/aes/aes_core.c */
2238384Sjkim/**
3238384Sjkim * rijndael-alg-fst.c
4238384Sjkim *
5238384Sjkim * @version 3.0 (December 2000)
6238384Sjkim *
7238384Sjkim * Optimised ANSI C code for the Rijndael cipher (now AES)
8238384Sjkim *
9238384Sjkim * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10238384Sjkim * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11238384Sjkim * @author Paulo Barreto <paulo.barreto@terra.com.br>
12238384Sjkim *
13238384Sjkim * This code is hereby placed in the public domain.
14238384Sjkim *
15238384Sjkim * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16238384Sjkim * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17238384Sjkim * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18238384Sjkim * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19238384Sjkim * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20238384Sjkim * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21238384Sjkim * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22238384Sjkim * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23238384Sjkim * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24238384Sjkim * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25238384Sjkim * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26238384Sjkim */
27238384Sjkim
28238384Sjkim/*
29238384Sjkim * This is experimental x86[_64] derivative. It assumes little-endian
30238384Sjkim * byte order and expects CPU to sustain unaligned memory references.
31238384Sjkim * It is used as playground for cache-time attack mitigations and
32238384Sjkim * serves as reference C implementation for x86[_64] assembler.
33238384Sjkim *
34280304Sjkim *                  <appro@fy.chalmers.se>
35238384Sjkim */
36238384Sjkim
37238384Sjkim
38238384Sjkim#ifndef AES_DEBUG
39238384Sjkim# ifndef NDEBUG
40238384Sjkim#  define NDEBUG
41238384Sjkim# endif
42238384Sjkim#endif
43238384Sjkim#include <assert.h>
44238384Sjkim
45238384Sjkim#include <stdlib.h>
46238384Sjkim#include <openssl/aes.h>
47238384Sjkim#include "aes_locl.h"
48238384Sjkim
49238384Sjkim/*
50238384Sjkim * These two parameters control which table, 256-byte or 2KB, is
51238384Sjkim * referenced in outer and respectively inner rounds.
52238384Sjkim */
53238384Sjkim#define AES_COMPACT_IN_OUTER_ROUNDS
54238384Sjkim#ifdef  AES_COMPACT_IN_OUTER_ROUNDS
55238384Sjkim/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
56238384Sjkim * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
57238384Sjkim * by factor of ~2. */
58238384Sjkim# undef  AES_COMPACT_IN_INNER_ROUNDS
59238384Sjkim#endif
60238384Sjkim
61238384Sjkim#if 1
62238384Sjkimstatic void prefetch256(const void *table)
63238384Sjkim{
64280304Sjkim    volatile unsigned long *t=(void *)table,ret;
65280304Sjkim    unsigned long sum;
66280304Sjkim    int i;
67238384Sjkim
68280304Sjkim    /* 32 is common least cache-line size */
69280304Sjkim    for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0]))   sum ^= t[i];
70238384Sjkim
71280304Sjkim    ret = sum;
72238384Sjkim}
73238384Sjkim#else
74238384Sjkim# define prefetch256(t)
75238384Sjkim#endif
76238384Sjkim
77238384Sjkim#undef GETU32
78238384Sjkim#define GETU32(p) (*((u32*)(p)))
79238384Sjkim
80238384Sjkim#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
81238384Sjkimtypedef unsigned __int64 u64;
82280304Sjkim#define U64(C)  C##UI64
83238384Sjkim#elif defined(__arch64__)
84238384Sjkimtypedef unsigned long u64;
85280304Sjkim#define U64(C)  C##UL
86238384Sjkim#else
87238384Sjkimtypedef unsigned long long u64;
88280304Sjkim#define U64(C)  C##ULL
89238384Sjkim#endif
90238384Sjkim
91238384Sjkim#undef ROTATE
92238384Sjkim#if defined(_MSC_VER) || defined(__ICC)
93238384Sjkim# define ROTATE(a,n)	_lrotl(a,n)
94238384Sjkim#elif defined(__GNUC__) && __GNUC__>=2
95238384Sjkim# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
96280304Sjkim#   define ROTATE(a,n)  ({ register unsigned int ret;   \
97280304Sjkim                asm (           \
98280304Sjkim                "roll %1,%0"        \
99280304Sjkim                : "=r"(ret)     \
100280304Sjkim                : "I"(n), "0"(a)    \
101280304Sjkim                : "cc");        \
102280304Sjkim               ret;             \
103280304Sjkim            })
104238384Sjkim# endif
105238384Sjkim#endif
106280304Sjkim/*-
107238384SjkimTe [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
108238384SjkimTe0[x] = S [x].[02, 01, 01, 03];
109238384SjkimTe1[x] = S [x].[03, 02, 01, 01];
110238384SjkimTe2[x] = S [x].[01, 03, 02, 01];
111238384SjkimTe3[x] = S [x].[01, 01, 03, 02];
112238384Sjkim*/
113238384Sjkim#define Te0 (u32)((u64*)((u8*)Te+0))
114238384Sjkim#define Te1 (u32)((u64*)((u8*)Te+3))
115238384Sjkim#define Te2 (u32)((u64*)((u8*)Te+2))
116238384Sjkim#define Te3 (u32)((u64*)((u8*)Te+1))
117280304Sjkim/*-
118238384SjkimTd [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
119238384SjkimTd0[x] = Si[x].[0e, 09, 0d, 0b];
120238384SjkimTd1[x] = Si[x].[0b, 0e, 09, 0d];
121238384SjkimTd2[x] = Si[x].[0d, 0b, 0e, 09];
122238384SjkimTd3[x] = Si[x].[09, 0d, 0b, 0e];
123238384SjkimTd4[x] = Si[x].[01];
124238384Sjkim*/
125238384Sjkim#define Td0 (u32)((u64*)((u8*)Td+0))
126238384Sjkim#define Td1 (u32)((u64*)((u8*)Td+3))
127238384Sjkim#define Td2 (u32)((u64*)((u8*)Td+2))
128238384Sjkim#define Td3 (u32)((u64*)((u8*)Td+1))
129238384Sjkim
130238384Sjkimstatic const u64 Te[256] = {
131238384Sjkim    U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
132238384Sjkim    U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
133238384Sjkim    U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
134238384Sjkim    U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
135238384Sjkim    U64(0x5030306050303060), U64(0x0301010203010102),
136238384Sjkim    U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
137238384Sjkim    U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
138238384Sjkim    U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
139238384Sjkim    U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
140238384Sjkim    U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
141238384Sjkim    U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
142238384Sjkim    U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
143238384Sjkim    U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
144238384Sjkim    U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
145238384Sjkim    U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
146238384Sjkim    U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
147238384Sjkim    U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
148238384Sjkim    U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
149238384Sjkim    U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
150238384Sjkim    U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
151238384Sjkim    U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
152238384Sjkim    U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
153238384Sjkim    U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
154238384Sjkim    U64(0x5331316253313162), U64(0x3f15152a3f15152a),
155238384Sjkim    U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
156238384Sjkim    U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
157238384Sjkim    U64(0x2818183028181830), U64(0xa1969637a1969637),
158238384Sjkim    U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
159238384Sjkim    U64(0x0907070e0907070e), U64(0x3612122436121224),
160238384Sjkim    U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
161238384Sjkim    U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
162238384Sjkim    U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
163238384Sjkim    U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
164238384Sjkim    U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
165238384Sjkim    U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
166238384Sjkim    U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
167238384Sjkim    U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
168238384Sjkim    U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
169238384Sjkim    U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
170238384Sjkim    U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
171238384Sjkim    U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
172238384Sjkim    U64(0x0000000000000000), U64(0x2cededc12cededc1),
173238384Sjkim    U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
174238384Sjkim    U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
175238384Sjkim    U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
176238384Sjkim    U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
177238384Sjkim    U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
178238384Sjkim    U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
179238384Sjkim    U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
180238384Sjkim    U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
181238384Sjkim    U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
182238384Sjkim    U64(0x5533336655333366), U64(0x9485851194858511),
183238384Sjkim    U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
184238384Sjkim    U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
185238384Sjkim    U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
186238384Sjkim    U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
187238384Sjkim    U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
188238384Sjkim    U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
189238384Sjkim    U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
190238384Sjkim    U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
191238384Sjkim    U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
192238384Sjkim    U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
193238384Sjkim    U64(0x3010102030101020), U64(0x1affffe51affffe5),
194238384Sjkim    U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
195238384Sjkim    U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
196238384Sjkim    U64(0x3513132635131326), U64(0x2fececc32fececc3),
197238384Sjkim    U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
198238384Sjkim    U64(0xcc444488cc444488), U64(0x3917172e3917172e),
199238384Sjkim    U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
200238384Sjkim    U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
201238384Sjkim    U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
202238384Sjkim    U64(0x2b1919322b191932), U64(0x957373e6957373e6),
203238384Sjkim    U64(0xa06060c0a06060c0), U64(0x9881811998818119),
204238384Sjkim    U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
205238384Sjkim    U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
206238384Sjkim    U64(0xab90903bab90903b), U64(0x8388880b8388880b),
207238384Sjkim    U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
208238384Sjkim    U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
209238384Sjkim    U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
210238384Sjkim    U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
211238384Sjkim    U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
212238384Sjkim    U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
213238384Sjkim    U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
214238384Sjkim    U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
215238384Sjkim    U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
216238384Sjkim    U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
217238384Sjkim    U64(0xa8919139a8919139), U64(0xa4959531a4959531),
218238384Sjkim    U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
219238384Sjkim    U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
220238384Sjkim    U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
221238384Sjkim    U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
222238384Sjkim    U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
223238384Sjkim    U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
224238384Sjkim    U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
225238384Sjkim    U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
226238384Sjkim    U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
227238384Sjkim    U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
228238384Sjkim    U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
229238384Sjkim    U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
230238384Sjkim    U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
231238384Sjkim    U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
232238384Sjkim    U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
233238384Sjkim    U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
234238384Sjkim    U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
235238384Sjkim    U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
236238384Sjkim    U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
237238384Sjkim    U64(0xd8484890d8484890), U64(0x0503030605030306),
238238384Sjkim    U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
239238384Sjkim    U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
240238384Sjkim    U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
241238384Sjkim    U64(0x9186861791868617), U64(0x58c1c19958c1c199),
242238384Sjkim    U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
243238384Sjkim    U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
244238384Sjkim    U64(0xb398982bb398982b), U64(0x3311112233111122),
245238384Sjkim    U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
246238384Sjkim    U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
247238384Sjkim    U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
248238384Sjkim    U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
249238384Sjkim    U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
250238384Sjkim    U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
251238384Sjkim    U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
252238384Sjkim    U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
253238384Sjkim    U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
254238384Sjkim    U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
255238384Sjkim    U64(0xc3414182c3414182), U64(0xb0999929b0999929),
256238384Sjkim    U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
257238384Sjkim    U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
258238384Sjkim    U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
259238384Sjkim};
260238384Sjkim
261238384Sjkimstatic const u8 Te4[256] = {
262238384Sjkim    0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
263238384Sjkim    0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
264238384Sjkim    0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
265238384Sjkim    0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
266238384Sjkim    0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
267238384Sjkim    0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
268238384Sjkim    0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
269238384Sjkim    0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
270238384Sjkim    0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
271238384Sjkim    0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
272238384Sjkim    0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
273238384Sjkim    0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
274238384Sjkim    0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
275238384Sjkim    0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
276238384Sjkim    0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
277238384Sjkim    0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
278238384Sjkim    0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
279238384Sjkim    0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
280238384Sjkim    0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
281238384Sjkim    0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
282238384Sjkim    0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
283238384Sjkim    0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
284238384Sjkim    0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
285238384Sjkim    0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
286238384Sjkim    0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
287238384Sjkim    0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
288238384Sjkim    0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
289238384Sjkim    0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
290238384Sjkim    0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
291238384Sjkim    0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
292238384Sjkim    0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
293238384Sjkim    0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
294238384Sjkim};
295238384Sjkim
296238384Sjkimstatic const u64 Td[256] = {
297238384Sjkim    U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
298238384Sjkim    U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
299238384Sjkim    U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
300238384Sjkim    U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
301238384Sjkim    U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
302238384Sjkim    U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
303238384Sjkim    U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
304238384Sjkim    U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
305238384Sjkim    U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
306238384Sjkim    U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
307238384Sjkim    U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
308238384Sjkim    U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
309238384Sjkim    U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
310238384Sjkim    U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
311238384Sjkim    U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
312238384Sjkim    U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
313238384Sjkim    U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
314238384Sjkim    U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
315238384Sjkim    U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
316238384Sjkim    U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
317238384Sjkim    U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
318238384Sjkim    U64(0x6033519760335197), U64(0x457f5362457f5362),
319238384Sjkim    U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
320238384Sjkim    U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
321238384Sjkim    U64(0x5868487058684870), U64(0x19fd458f19fd458f),
322238384Sjkim    U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
323238384Sjkim    U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
324238384Sjkim    U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
325238384Sjkim    U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
326238384Sjkim    U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
327238384Sjkim    U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
328238384Sjkim    U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
329238384Sjkim    U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
330238384Sjkim    U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
331238384Sjkim    U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
332238384Sjkim    U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
333238384Sjkim    U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
334238384Sjkim    U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
335238384Sjkim    U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
336238384Sjkim    U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
337238384Sjkim    U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
338238384Sjkim    U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
339238384Sjkim    U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
340238384Sjkim    U64(0x6fd406046fd40604), U64(0xff155060ff155060),
341238384Sjkim    U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
342238384Sjkim    U64(0xcc434089cc434089), U64(0x779ed967779ed967),
343238384Sjkim    U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
344238384Sjkim    U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
345238384Sjkim    U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
346238384Sjkim    U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
347238384Sjkim    U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
348238384Sjkim    U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
349238384Sjkim    U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
350238384Sjkim    U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
351238384Sjkim    U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
352238384Sjkim    U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
353238384Sjkim    U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
354238384Sjkim    U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
355238384Sjkim    U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
356238384Sjkim    U64(0x694b775a694b775a), U64(0x161a121c161a121c),
357238384Sjkim    U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
358238384Sjkim    U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
359238384Sjkim    U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
360238384Sjkim    U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
361238384Sjkim    U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
362238384Sjkim    U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
363238384Sjkim    U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
364238384Sjkim    U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
365238384Sjkim    U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
366238384Sjkim    U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
367238384Sjkim    U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
368238384Sjkim    U64(0x4022971340229713), U64(0x2011c6842011c684),
369238384Sjkim    U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
370238384Sjkim    U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
371238384Sjkim    U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
372238384Sjkim    U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
373238384Sjkim    U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
374238384Sjkim    U64(0xfa489411fa489411), U64(0x2264e9472264e947),
375238384Sjkim    U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
376238384Sjkim    U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
377238384Sjkim    U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
378238384Sjkim    U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
379238384Sjkim    U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
380238384Sjkim    U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
381238384Sjkim    U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
382238384Sjkim    U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
383238384Sjkim    U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
384238384Sjkim    U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
385238384Sjkim    U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
386238384Sjkim    U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
387238384Sjkim    U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
388238384Sjkim    U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
389238384Sjkim    U64(0x097826cd097826cd), U64(0xf418596ef418596e),
390238384Sjkim    U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
391238384Sjkim    U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
392238384Sjkim    U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
393238384Sjkim    U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
394238384Sjkim    U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
395238384Sjkim    U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
396238384Sjkim    U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
397238384Sjkim    U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
398238384Sjkim    U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
399238384Sjkim    U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
400238384Sjkim    U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
401238384Sjkim    U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
402238384Sjkim    U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
403238384Sjkim    U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
404238384Sjkim    U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
405238384Sjkim    U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
406238384Sjkim    U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
407238384Sjkim    U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
408238384Sjkim    U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
409238384Sjkim    U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
410238384Sjkim    U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
411238384Sjkim    U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
412238384Sjkim    U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
413238384Sjkim    U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
414238384Sjkim    U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
415238384Sjkim    U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
416238384Sjkim    U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
417238384Sjkim    U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
418238384Sjkim    U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
419238384Sjkim    U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
420238384Sjkim    U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
421238384Sjkim    U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
422238384Sjkim    U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
423238384Sjkim    U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
424238384Sjkim    U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
425238384Sjkim};
426238384Sjkimstatic const u8 Td4[256] = {
427238384Sjkim    0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
428238384Sjkim    0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
429238384Sjkim    0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
430238384Sjkim    0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
431238384Sjkim    0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
432238384Sjkim    0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
433238384Sjkim    0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
434238384Sjkim    0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
435238384Sjkim    0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
436238384Sjkim    0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
437238384Sjkim    0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
438238384Sjkim    0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
439238384Sjkim    0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
440238384Sjkim    0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
441238384Sjkim    0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
442238384Sjkim    0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
443238384Sjkim    0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
444238384Sjkim    0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
445238384Sjkim    0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
446238384Sjkim    0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
447238384Sjkim    0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
448238384Sjkim    0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
449238384Sjkim    0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
450238384Sjkim    0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
451238384Sjkim    0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
452238384Sjkim    0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
453238384Sjkim    0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
454238384Sjkim    0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
455238384Sjkim    0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
456238384Sjkim    0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
457238384Sjkim    0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
458238384Sjkim    0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
459238384Sjkim};
460238384Sjkim
461238384Sjkimstatic const u32 rcon[] = {
462238384Sjkim    0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
463238384Sjkim    0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
464238384Sjkim    0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
465238384Sjkim};
466238384Sjkim
467238384Sjkim/**
468238384Sjkim * Expand the cipher key into the encryption key schedule.
469238384Sjkim */
470238384Sjkimint AES_set_encrypt_key(const unsigned char *userKey, const int bits,
471280304Sjkim                        AES_KEY *key)
472280304Sjkim{
473238384Sjkim
474280304Sjkim    u32 *rk;
475280304Sjkim    int i = 0;
476280304Sjkim    u32 temp;
477238384Sjkim
478280304Sjkim    if (!userKey || !key)
479280304Sjkim        return -1;
480280304Sjkim    if (bits != 128 && bits != 192 && bits != 256)
481280304Sjkim        return -2;
482238384Sjkim
483280304Sjkim    rk = key->rd_key;
484238384Sjkim
485280304Sjkim    if (bits==128)
486280304Sjkim        key->rounds = 10;
487280304Sjkim    else if (bits==192)
488280304Sjkim        key->rounds = 12;
489280304Sjkim    else
490280304Sjkim        key->rounds = 14;
491238384Sjkim
492280304Sjkim    rk[0] = GETU32(userKey     );
493280304Sjkim    rk[1] = GETU32(userKey +  4);
494280304Sjkim    rk[2] = GETU32(userKey +  8);
495280304Sjkim    rk[3] = GETU32(userKey + 12);
496280304Sjkim    if (bits == 128) {
497280304Sjkim        while (1) {
498280304Sjkim            temp  = rk[3];
499280304Sjkim            rk[4] = rk[0] ^
500280304Sjkim                ((u32)Te4[(temp >>  8) & 0xff]      ) ^
501280304Sjkim                ((u32)Te4[(temp >> 16) & 0xff] <<  8) ^
502280304Sjkim                ((u32)Te4[(temp >> 24)       ] << 16) ^
503280304Sjkim                ((u32)Te4[(temp      ) & 0xff] << 24) ^
504280304Sjkim                rcon[i];
505280304Sjkim            rk[5] = rk[1] ^ rk[4];
506280304Sjkim            rk[6] = rk[2] ^ rk[5];
507280304Sjkim            rk[7] = rk[3] ^ rk[6];
508280304Sjkim            if (++i == 10) {
509280304Sjkim                return 0;
510280304Sjkim            }
511280304Sjkim            rk += 4;
512280304Sjkim        }
513280304Sjkim    }
514280304Sjkim    rk[4] = GETU32(userKey + 16);
515280304Sjkim    rk[5] = GETU32(userKey + 20);
516280304Sjkim    if (bits == 192) {
517280304Sjkim        while (1) {
518280304Sjkim            temp = rk[ 5];
519280304Sjkim            rk[ 6] = rk[ 0] ^
520280304Sjkim                ((u32)Te4[(temp >>  8) & 0xff]      ) ^
521280304Sjkim                ((u32)Te4[(temp >> 16) & 0xff] <<  8) ^
522280304Sjkim                ((u32)Te4[(temp >> 24)       ] << 16) ^
523280304Sjkim                ((u32)Te4[(temp      ) & 0xff] << 24) ^
524280304Sjkim                rcon[i];
525280304Sjkim            rk[ 7] = rk[ 1] ^ rk[ 6];
526280304Sjkim            rk[ 8] = rk[ 2] ^ rk[ 7];
527280304Sjkim            rk[ 9] = rk[ 3] ^ rk[ 8];
528280304Sjkim            if (++i == 8) {
529280304Sjkim                return 0;
530280304Sjkim            }
531280304Sjkim            rk[10] = rk[ 4] ^ rk[ 9];
532280304Sjkim            rk[11] = rk[ 5] ^ rk[10];
533280304Sjkim            rk += 6;
534280304Sjkim        }
535280304Sjkim    }
536280304Sjkim    rk[6] = GETU32(userKey + 24);
537280304Sjkim    rk[7] = GETU32(userKey + 28);
538280304Sjkim    if (bits == 256) {
539280304Sjkim        while (1) {
540280304Sjkim            temp = rk[ 7];
541280304Sjkim            rk[ 8] = rk[ 0] ^
542280304Sjkim                ((u32)Te4[(temp >>  8) & 0xff]      ) ^
543280304Sjkim                ((u32)Te4[(temp >> 16) & 0xff] <<  8) ^
544280304Sjkim                ((u32)Te4[(temp >> 24)       ] << 16) ^
545280304Sjkim                ((u32)Te4[(temp      ) & 0xff] << 24) ^
546280304Sjkim                rcon[i];
547280304Sjkim            rk[ 9] = rk[ 1] ^ rk[ 8];
548280304Sjkim            rk[10] = rk[ 2] ^ rk[ 9];
549280304Sjkim            rk[11] = rk[ 3] ^ rk[10];
550280304Sjkim            if (++i == 7) {
551280304Sjkim                return 0;
552280304Sjkim            }
553280304Sjkim            temp = rk[11];
554280304Sjkim            rk[12] = rk[ 4] ^
555280304Sjkim                ((u32)Te4[(temp      ) & 0xff]      ) ^
556280304Sjkim                ((u32)Te4[(temp >>  8) & 0xff] <<  8) ^
557280304Sjkim                ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
558280304Sjkim                ((u32)Te4[(temp >> 24)       ] << 24);
559280304Sjkim            rk[13] = rk[ 5] ^ rk[12];
560280304Sjkim            rk[14] = rk[ 6] ^ rk[13];
561280304Sjkim            rk[15] = rk[ 7] ^ rk[14];
562238384Sjkim
563280304Sjkim            rk += 8;
564280304Sjkim            }
565280304Sjkim    }
566280304Sjkim    return 0;
567238384Sjkim}
568238384Sjkim
569238384Sjkim/**
570238384Sjkim * Expand the cipher key into the decryption key schedule.
571238384Sjkim */
572238384Sjkimint AES_set_decrypt_key(const unsigned char *userKey, const int bits,
573280304Sjkim                        AES_KEY *key)
574280304Sjkim{
575238384Sjkim
576280304Sjkim    u32 *rk;
577280304Sjkim    int i, j, status;
578280304Sjkim    u32 temp;
579238384Sjkim
580280304Sjkim    /* first, start with an encryption schedule */
581280304Sjkim    status = AES_set_encrypt_key(userKey, bits, key);
582280304Sjkim    if (status < 0)
583280304Sjkim        return status;
584238384Sjkim
585280304Sjkim    rk = key->rd_key;
586238384Sjkim
587280304Sjkim    /* invert the order of the round keys: */
588280304Sjkim    for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
589280304Sjkim        temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
590280304Sjkim        temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
591280304Sjkim        temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
592280304Sjkim        temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
593280304Sjkim    }
594280304Sjkim    /* apply the inverse MixColumn transform to all round keys but the first and the last: */
595280304Sjkim    for (i = 1; i < (key->rounds); i++) {
596280304Sjkim        rk += 4;
597238384Sjkim#if 1
598280304Sjkim        for (j = 0; j < 4; j++) {
599280304Sjkim            u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
600238384Sjkim
601280304Sjkim            tp1 = rk[j];
602280304Sjkim            m = tp1 & 0x80808080;
603280304Sjkim            tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
604280304Sjkim                ((m - (m >> 7)) & 0x1b1b1b1b);
605280304Sjkim            m = tp2 & 0x80808080;
606280304Sjkim            tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
607280304Sjkim                ((m - (m >> 7)) & 0x1b1b1b1b);
608280304Sjkim            m = tp4 & 0x80808080;
609280304Sjkim            tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
610280304Sjkim                ((m - (m >> 7)) & 0x1b1b1b1b);
611280304Sjkim            tp9 = tp8 ^ tp1;
612280304Sjkim            tpb = tp9 ^ tp2;
613280304Sjkim            tpd = tp9 ^ tp4;
614280304Sjkim            tpe = tp8 ^ tp4 ^ tp2;
615238384Sjkim#if defined(ROTATE)
616280304Sjkim            rk[j] = tpe ^ ROTATE(tpd,16) ^
617280304Sjkim                ROTATE(tp9,8) ^ ROTATE(tpb,24);
618238384Sjkim#else
619280304Sjkim            rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
620280304Sjkim                (tp9 >> 24) ^ (tp9 << 8) ^
621280304Sjkim                (tpb >> 8) ^ (tpb << 24);
622238384Sjkim#endif
623280304Sjkim        }
624238384Sjkim#else
625280304Sjkim        rk[0] =
626280304Sjkim            Td0[Te2[(rk[0]      ) & 0xff] & 0xff] ^
627280304Sjkim            Td1[Te2[(rk[0] >>  8) & 0xff] & 0xff] ^
628280304Sjkim            Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
629280304Sjkim            Td3[Te2[(rk[0] >> 24)       ] & 0xff];
630280304Sjkim        rk[1] =
631280304Sjkim            Td0[Te2[(rk[1]      ) & 0xff] & 0xff] ^
632280304Sjkim            Td1[Te2[(rk[1] >>  8) & 0xff] & 0xff] ^
633280304Sjkim            Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
634280304Sjkim            Td3[Te2[(rk[1] >> 24)       ] & 0xff];
635280304Sjkim        rk[2] =
636280304Sjkim            Td0[Te2[(rk[2]      ) & 0xff] & 0xff] ^
637280304Sjkim            Td1[Te2[(rk[2] >>  8) & 0xff] & 0xff] ^
638280304Sjkim            Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
639280304Sjkim            Td3[Te2[(rk[2] >> 24)       ] & 0xff];
640280304Sjkim        rk[3] =
641280304Sjkim            Td0[Te2[(rk[3]      ) & 0xff] & 0xff] ^
642280304Sjkim            Td1[Te2[(rk[3] >>  8) & 0xff] & 0xff] ^
643280304Sjkim            Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
644280304Sjkim            Td3[Te2[(rk[3] >> 24)       ] & 0xff];
645238384Sjkim#endif
646280304Sjkim    }
647280304Sjkim    return 0;
648238384Sjkim}
649238384Sjkim
650238384Sjkim/*
651238384Sjkim * Encrypt a single block
652238384Sjkim * in and out can overlap
653238384Sjkim */
654238384Sjkimvoid AES_encrypt(const unsigned char *in, unsigned char *out,
655280304Sjkim                 const AES_KEY *key)
656280304Sjkim{
657238384Sjkim
658280304Sjkim    const u32 *rk;
659280304Sjkim    u32 s0, s1, s2, s3, t[4];
660280304Sjkim    int r;
661238384Sjkim
662280304Sjkim    assert(in && out && key);
663280304Sjkim    rk = key->rd_key;
664238384Sjkim
665280304Sjkim    /*
666280304Sjkim     * map byte array block to cipher state
667280304Sjkim     * and add initial round key:
668280304Sjkim     */
669280304Sjkim    s0 = GETU32(in     ) ^ rk[0];
670280304Sjkim    s1 = GETU32(in +  4) ^ rk[1];
671280304Sjkim    s2 = GETU32(in +  8) ^ rk[2];
672280304Sjkim    s3 = GETU32(in + 12) ^ rk[3];
673238384Sjkim
674238384Sjkim#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
675280304Sjkim    prefetch256(Te4);
676238384Sjkim
677280304Sjkim    t[0] = (u32)Te4[(s0      ) & 0xff]       ^
678280304Sjkim           (u32)Te4[(s1 >>  8) & 0xff] <<  8 ^
679280304Sjkim           (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
680280304Sjkim           (u32)Te4[(s3 >> 24)       ] << 24;
681280304Sjkim    t[1] = (u32)Te4[(s1      ) & 0xff]       ^
682280304Sjkim           (u32)Te4[(s2 >>  8) & 0xff] <<  8 ^
683280304Sjkim           (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
684280304Sjkim           (u32)Te4[(s0 >> 24)       ] << 24;
685280304Sjkim    t[2] = (u32)Te4[(s2      ) & 0xff]       ^
686280304Sjkim           (u32)Te4[(s3 >>  8) & 0xff] <<  8 ^
687280304Sjkim           (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
688280304Sjkim           (u32)Te4[(s1 >> 24)       ] << 24;
689280304Sjkim    t[3] = (u32)Te4[(s3      ) & 0xff]       ^
690280304Sjkim           (u32)Te4[(s0 >>  8) & 0xff] <<  8 ^
691280304Sjkim           (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
692280304Sjkim           (u32)Te4[(s2 >> 24)       ] << 24;
693238384Sjkim
694280304Sjkim    /* now do the linear transform using words */
695280304Sjkim    {   int i;
696280304Sjkim        u32 r0, r1, r2;
697238384Sjkim
698280304Sjkim        for (i = 0; i < 4; i++) {
699280304Sjkim            r0 = t[i];
700280304Sjkim            r1 = r0 & 0x80808080;
701280304Sjkim            r2 = ((r0 & 0x7f7f7f7f) << 1) ^
702280304Sjkim                ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
703238384Sjkim#if defined(ROTATE)
704280304Sjkim            t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
705280304Sjkim                ROTATE(r0,16) ^ ROTATE(r0,8);
706238384Sjkim#else
707280304Sjkim            t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
708280304Sjkim                (r0 << 16) ^ (r0 >> 16) ^
709280304Sjkim                (r0 << 8) ^ (r0 >> 24);
710238384Sjkim#endif
711280304Sjkim            t[i] ^= rk[4+i];
712280304Sjkim        }
713280304Sjkim    }
714238384Sjkim#else
715280304Sjkim    t[0] =  Te0[(s0      ) & 0xff] ^
716280304Sjkim        Te1[(s1 >>  8) & 0xff] ^
717280304Sjkim        Te2[(s2 >> 16) & 0xff] ^
718280304Sjkim        Te3[(s3 >> 24)       ] ^
719280304Sjkim        rk[4];
720280304Sjkim    t[1] =  Te0[(s1      ) & 0xff] ^
721280304Sjkim        Te1[(s2 >>  8) & 0xff] ^
722280304Sjkim        Te2[(s3 >> 16) & 0xff] ^
723280304Sjkim        Te3[(s0 >> 24)       ] ^
724280304Sjkim        rk[5];
725280304Sjkim    t[2] =  Te0[(s2      ) & 0xff] ^
726280304Sjkim        Te1[(s3 >>  8) & 0xff] ^
727280304Sjkim        Te2[(s0 >> 16) & 0xff] ^
728280304Sjkim        Te3[(s1 >> 24)       ] ^
729280304Sjkim        rk[6];
730280304Sjkim    t[3] =  Te0[(s3      ) & 0xff] ^
731280304Sjkim        Te1[(s0 >>  8) & 0xff] ^
732280304Sjkim        Te2[(s1 >> 16) & 0xff] ^
733280304Sjkim        Te3[(s2 >> 24)       ] ^
734280304Sjkim        rk[7];
735238384Sjkim#endif
736280304Sjkim    s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
737238384Sjkim
738238384Sjkim    /*
739238384Sjkim     * Nr - 2 full rounds:
740238384Sjkim     */
741238384Sjkim    for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
742238384Sjkim#if defined(AES_COMPACT_IN_INNER_ROUNDS)
743280304Sjkim        t[0] = (u32)Te4[(s0      ) & 0xff]       ^
744280304Sjkim               (u32)Te4[(s1 >>  8) & 0xff] <<  8 ^
745280304Sjkim               (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
746280304Sjkim               (u32)Te4[(s3 >> 24)       ] << 24;
747280304Sjkim        t[1] = (u32)Te4[(s1      ) & 0xff]       ^
748280304Sjkim               (u32)Te4[(s2 >>  8) & 0xff] <<  8 ^
749280304Sjkim               (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
750280304Sjkim               (u32)Te4[(s0 >> 24)       ] << 24;
751280304Sjkim        t[2] = (u32)Te4[(s2      ) & 0xff]       ^
752280304Sjkim               (u32)Te4[(s3 >>  8) & 0xff] <<  8 ^
753280304Sjkim               (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
754280304Sjkim               (u32)Te4[(s1 >> 24)       ] << 24;
755280304Sjkim        t[3] = (u32)Te4[(s3      ) & 0xff]       ^
756280304Sjkim               (u32)Te4[(s0 >>  8) & 0xff] <<  8 ^
757280304Sjkim               (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
758280304Sjkim               (u32)Te4[(s2 >> 24)       ] << 24;
759238384Sjkim
760280304Sjkim        /* now do the linear transform using words */
761280304Sjkim        {
762280304Sjkim            int i;
763280304Sjkim            u32 r0, r1, r2;
764238384Sjkim
765280304Sjkim            for (i = 0; i < 4; i++) {
766280304Sjkim                r0 = t[i];
767280304Sjkim                r1 = r0 & 0x80808080;
768280304Sjkim                r2 = ((r0 & 0x7f7f7f7f) << 1) ^
769280304Sjkim                    ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
770238384Sjkim#if defined(ROTATE)
771280304Sjkim                t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
772280304Sjkim                    ROTATE(r0,16) ^ ROTATE(r0,8);
773238384Sjkim#else
774280304Sjkim                t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
775280304Sjkim                    (r0 << 16) ^ (r0 >> 16) ^
776280304Sjkim                    (r0 << 8) ^ (r0 >> 24);
777238384Sjkim#endif
778280304Sjkim                t[i] ^= rk[i];
779280304Sjkim            }
780280304Sjkim        }
781238384Sjkim#else
782280304Sjkim        t[0] =  Te0[(s0      ) & 0xff] ^
783280304Sjkim            Te1[(s1 >>  8) & 0xff] ^
784280304Sjkim            Te2[(s2 >> 16) & 0xff] ^
785280304Sjkim            Te3[(s3 >> 24)       ] ^
786280304Sjkim            rk[0];
787280304Sjkim        t[1] =  Te0[(s1      ) & 0xff] ^
788280304Sjkim            Te1[(s2 >>  8) & 0xff] ^
789280304Sjkim            Te2[(s3 >> 16) & 0xff] ^
790280304Sjkim            Te3[(s0 >> 24)       ] ^
791280304Sjkim            rk[1];
792280304Sjkim        t[2] =  Te0[(s2      ) & 0xff] ^
793280304Sjkim            Te1[(s3 >>  8) & 0xff] ^
794280304Sjkim            Te2[(s0 >> 16) & 0xff] ^
795280304Sjkim            Te3[(s1 >> 24)       ] ^
796280304Sjkim            rk[2];
797280304Sjkim        t[3] =  Te0[(s3      ) & 0xff] ^
798280304Sjkim            Te1[(s0 >>  8) & 0xff] ^
799280304Sjkim            Te2[(s1 >> 16) & 0xff] ^
800280304Sjkim            Te3[(s2 >> 24)       ] ^
801280304Sjkim            rk[3];
802238384Sjkim#endif
803280304Sjkim        s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
804238384Sjkim    }
805238384Sjkim    /*
806280304Sjkim     * apply last round and
807280304Sjkim     * map cipher state to byte array block:
808280304Sjkim     */
809238384Sjkim#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
810280304Sjkim    prefetch256(Te4);
811238384Sjkim
812280304Sjkim    *(u32*)(out+0) =
813280304Sjkim           (u32)Te4[(s0      ) & 0xff]       ^
814280304Sjkim           (u32)Te4[(s1 >>  8) & 0xff] <<  8 ^
815280304Sjkim           (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
816280304Sjkim           (u32)Te4[(s3 >> 24)       ] << 24 ^
817280304Sjkim        rk[0];
818280304Sjkim    *(u32*)(out+4) =
819280304Sjkim           (u32)Te4[(s1      ) & 0xff]       ^
820280304Sjkim           (u32)Te4[(s2 >>  8) & 0xff] <<  8 ^
821280304Sjkim           (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
822280304Sjkim           (u32)Te4[(s0 >> 24)       ] << 24 ^
823280304Sjkim        rk[1];
824280304Sjkim    *(u32*)(out+8) =
825280304Sjkim           (u32)Te4[(s2      ) & 0xff]       ^
826280304Sjkim           (u32)Te4[(s3 >>  8) & 0xff] <<  8 ^
827280304Sjkim           (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
828280304Sjkim           (u32)Te4[(s1 >> 24)       ] << 24 ^
829280304Sjkim        rk[2];
830280304Sjkim    *(u32*)(out+12) =
831280304Sjkim           (u32)Te4[(s3      ) & 0xff]       ^
832280304Sjkim           (u32)Te4[(s0 >>  8) & 0xff] <<  8 ^
833280304Sjkim           (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
834280304Sjkim           (u32)Te4[(s2 >> 24)       ] << 24 ^
835280304Sjkim        rk[3];
836238384Sjkim#else
837280304Sjkim    *(u32*)(out+0) =
838280304Sjkim        (Te2[(s0      ) & 0xff] & 0x000000ffU) ^
839280304Sjkim        (Te3[(s1 >>  8) & 0xff] & 0x0000ff00U) ^
840280304Sjkim        (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
841280304Sjkim        (Te1[(s3 >> 24)       ] & 0xff000000U) ^
842280304Sjkim        rk[0];
843280304Sjkim    *(u32*)(out+4) =
844280304Sjkim        (Te2[(s1      ) & 0xff] & 0x000000ffU) ^
845280304Sjkim        (Te3[(s2 >>  8) & 0xff] & 0x0000ff00U) ^
846280304Sjkim        (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
847280304Sjkim        (Te1[(s0 >> 24)       ] & 0xff000000U) ^
848280304Sjkim        rk[1];
849280304Sjkim    *(u32*)(out+8) =
850280304Sjkim        (Te2[(s2      ) & 0xff] & 0x000000ffU) ^
851280304Sjkim        (Te3[(s3 >>  8) & 0xff] & 0x0000ff00U) ^
852280304Sjkim        (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
853280304Sjkim        (Te1[(s1 >> 24)       ] & 0xff000000U) ^
854280304Sjkim        rk[2];
855280304Sjkim    *(u32*)(out+12) =
856280304Sjkim        (Te2[(s3      ) & 0xff] & 0x000000ffU) ^
857280304Sjkim        (Te3[(s0 >>  8) & 0xff] & 0x0000ff00U) ^
858280304Sjkim        (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
859280304Sjkim        (Te1[(s2 >> 24)       ] & 0xff000000U) ^
860280304Sjkim        rk[3];
861238384Sjkim#endif
862238384Sjkim}
863238384Sjkim
864238384Sjkim/*
865238384Sjkim * Decrypt a single block
866238384Sjkim * in and out can overlap
867238384Sjkim */
868238384Sjkimvoid AES_decrypt(const unsigned char *in, unsigned char *out,
869280304Sjkim                 const AES_KEY *key)
870280304Sjkim{
871238384Sjkim
872280304Sjkim    const u32 *rk;
873280304Sjkim    u32 s0, s1, s2, s3, t[4];
874280304Sjkim    int r;
875238384Sjkim
876280304Sjkim    assert(in && out && key);
877280304Sjkim    rk = key->rd_key;
878238384Sjkim
879280304Sjkim    /*
880280304Sjkim     * map byte array block to cipher state
881280304Sjkim     * and add initial round key:
882280304Sjkim     */
883280304Sjkim    s0 = GETU32(in     ) ^ rk[0];
884280304Sjkim    s1 = GETU32(in +  4) ^ rk[1];
885280304Sjkim    s2 = GETU32(in +  8) ^ rk[2];
886280304Sjkim    s3 = GETU32(in + 12) ^ rk[3];
887238384Sjkim
888238384Sjkim#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
889280304Sjkim    prefetch256(Td4);
890238384Sjkim
891280304Sjkim    t[0] = (u32)Td4[(s0      ) & 0xff]       ^
892280304Sjkim           (u32)Td4[(s3 >>  8) & 0xff] <<  8 ^
893280304Sjkim           (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
894280304Sjkim           (u32)Td4[(s1 >> 24)       ] << 24;
895280304Sjkim    t[1] = (u32)Td4[(s1      ) & 0xff]       ^
896280304Sjkim           (u32)Td4[(s0 >>  8) & 0xff] <<  8 ^
897280304Sjkim           (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
898280304Sjkim           (u32)Td4[(s2 >> 24)       ] << 24;
899280304Sjkim    t[2] = (u32)Td4[(s2      ) & 0xff]       ^
900280304Sjkim           (u32)Td4[(s1 >>  8) & 0xff] <<  8 ^
901280304Sjkim           (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
902280304Sjkim           (u32)Td4[(s3 >> 24)       ] << 24;
903280304Sjkim    t[3] = (u32)Td4[(s3      ) & 0xff]       ^
904280304Sjkim           (u32)Td4[(s2 >>  8) & 0xff] <<  8 ^
905280304Sjkim           (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
906280304Sjkim           (u32)Td4[(s0 >> 24)       ] << 24;
907238384Sjkim
908280304Sjkim    /* now do the linear transform using words */
909280304Sjkim    {
910280304Sjkim        int i;
911280304Sjkim        u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
912238384Sjkim
913280304Sjkim        for (i = 0; i < 4; i++) {
914280304Sjkim            tp1 = t[i];
915280304Sjkim            m = tp1 & 0x80808080;
916280304Sjkim            tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
917280304Sjkim                ((m - (m >> 7)) & 0x1b1b1b1b);
918280304Sjkim            m = tp2 & 0x80808080;
919280304Sjkim            tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
920280304Sjkim                ((m - (m >> 7)) & 0x1b1b1b1b);
921280304Sjkim            m = tp4 & 0x80808080;
922280304Sjkim            tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
923280304Sjkim                ((m - (m >> 7)) & 0x1b1b1b1b);
924280304Sjkim            tp9 = tp8 ^ tp1;
925280304Sjkim            tpb = tp9 ^ tp2;
926280304Sjkim            tpd = tp9 ^ tp4;
927280304Sjkim            tpe = tp8 ^ tp4 ^ tp2;
928238384Sjkim#if defined(ROTATE)
929280304Sjkim            t[i] = tpe ^ ROTATE(tpd,16) ^
930280304Sjkim                ROTATE(tp9,8) ^ ROTATE(tpb,24);
931238384Sjkim#else
932280304Sjkim            t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
933280304Sjkim                (tp9 >> 24) ^ (tp9 << 8) ^
934280304Sjkim                (tpb >> 8) ^ (tpb << 24);
935238384Sjkim#endif
936280304Sjkim            t[i] ^= rk[4+i];
937280304Sjkim        }
938280304Sjkim    }
939238384Sjkim#else
940280304Sjkim    t[0] =  Td0[(s0      ) & 0xff] ^
941280304Sjkim        Td1[(s3 >>  8) & 0xff] ^
942280304Sjkim        Td2[(s2 >> 16) & 0xff] ^
943280304Sjkim        Td3[(s1 >> 24)       ] ^
944280304Sjkim        rk[4];
945280304Sjkim    t[1] =  Td0[(s1      ) & 0xff] ^
946280304Sjkim        Td1[(s0 >>  8) & 0xff] ^
947280304Sjkim        Td2[(s3 >> 16) & 0xff] ^
948280304Sjkim        Td3[(s2 >> 24)       ] ^
949280304Sjkim        rk[5];
950280304Sjkim    t[2] =  Td0[(s2      ) & 0xff] ^
951280304Sjkim        Td1[(s1 >>  8) & 0xff] ^
952280304Sjkim        Td2[(s0 >> 16) & 0xff] ^
953280304Sjkim        Td3[(s3 >> 24)       ] ^
954280304Sjkim        rk[6];
955280304Sjkim    t[3] =  Td0[(s3      ) & 0xff] ^
956280304Sjkim        Td1[(s2 >>  8) & 0xff] ^
957280304Sjkim        Td2[(s1 >> 16) & 0xff] ^
958280304Sjkim        Td3[(s0 >> 24)       ] ^
959280304Sjkim        rk[7];
960238384Sjkim#endif
961280304Sjkim    s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
962238384Sjkim
963238384Sjkim    /*
964238384Sjkim     * Nr - 2 full rounds:
965238384Sjkim     */
966238384Sjkim    for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
967238384Sjkim#if defined(AES_COMPACT_IN_INNER_ROUNDS)
968280304Sjkim        t[0] = (u32)Td4[(s0      ) & 0xff]       ^
969280304Sjkim               (u32)Td4[(s3 >>  8) & 0xff] <<  8 ^
970280304Sjkim               (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
971280304Sjkim               (u32)Td4[(s1 >> 24)       ] << 24;
972280304Sjkim        t[1] = (u32)Td4[(s1      ) & 0xff]       ^
973280304Sjkim               (u32)Td4[(s0 >>  8) & 0xff] <<  8 ^
974280304Sjkim               (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
975280304Sjkim               (u32)Td4[(s2 >> 24)       ] << 24;
976280304Sjkim        t[2] = (u32)Td4[(s2      ) & 0xff]       ^
977280304Sjkim               (u32)Td4[(s1 >>  8) & 0xff] <<  8 ^
978280304Sjkim               (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
979280304Sjkim               (u32)Td4[(s3 >> 24)       ] << 24;
980280304Sjkim        t[3] = (u32)Td4[(s3      ) & 0xff]       ^
981280304Sjkim               (u32)Td4[(s2 >>  8) & 0xff] <<  8 ^
982280304Sjkim               (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
983280304Sjkim               (u32)Td4[(s0 >> 24)       ] << 24;
984238384Sjkim
985280304Sjkim    /* now do the linear transform using words */
986280304Sjkim    {
987280304Sjkim        int i;
988280304Sjkim        u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
989238384Sjkim
990280304Sjkim        for (i = 0; i < 4; i++) {
991280304Sjkim            tp1 = t[i];
992280304Sjkim            m = tp1 & 0x80808080;
993280304Sjkim            tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
994280304Sjkim                ((m - (m >> 7)) & 0x1b1b1b1b);
995280304Sjkim            m = tp2 & 0x80808080;
996280304Sjkim            tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
997280304Sjkim                ((m - (m >> 7)) & 0x1b1b1b1b);
998280304Sjkim            m = tp4 & 0x80808080;
999280304Sjkim            tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1000280304Sjkim                ((m - (m >> 7)) & 0x1b1b1b1b);
1001280304Sjkim            tp9 = tp8 ^ tp1;
1002280304Sjkim            tpb = tp9 ^ tp2;
1003280304Sjkim            tpd = tp9 ^ tp4;
1004280304Sjkim            tpe = tp8 ^ tp4 ^ tp2;
1005238384Sjkim#if defined(ROTATE)
1006280304Sjkim            t[i] = tpe ^ ROTATE(tpd,16) ^
1007280304Sjkim                ROTATE(tp9,8) ^ ROTATE(tpb,24);
1008238384Sjkim#else
1009280304Sjkim            t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1010280304Sjkim                (tp9 >> 24) ^ (tp9 << 8) ^
1011280304Sjkim                (tpb >> 8) ^ (tpb << 24);
1012238384Sjkim#endif
1013280304Sjkim            t[i] ^= rk[i];
1014280304Sjkim        }
1015280304Sjkim    }
1016238384Sjkim#else
1017280304Sjkim    t[0] =  Td0[(s0      ) & 0xff] ^
1018280304Sjkim        Td1[(s3 >>  8) & 0xff] ^
1019280304Sjkim        Td2[(s2 >> 16) & 0xff] ^
1020280304Sjkim        Td3[(s1 >> 24)       ] ^
1021280304Sjkim        rk[0];
1022280304Sjkim    t[1] =  Td0[(s1      ) & 0xff] ^
1023280304Sjkim        Td1[(s0 >>  8) & 0xff] ^
1024280304Sjkim        Td2[(s3 >> 16) & 0xff] ^
1025280304Sjkim        Td3[(s2 >> 24)       ] ^
1026280304Sjkim        rk[1];
1027280304Sjkim    t[2] =  Td0[(s2      ) & 0xff] ^
1028280304Sjkim        Td1[(s1 >>  8) & 0xff] ^
1029280304Sjkim        Td2[(s0 >> 16) & 0xff] ^
1030280304Sjkim        Td3[(s3 >> 24)       ] ^
1031280304Sjkim        rk[2];
1032280304Sjkim    t[3] =  Td0[(s3      ) & 0xff] ^
1033280304Sjkim        Td1[(s2 >>  8) & 0xff] ^
1034280304Sjkim        Td2[(s1 >> 16) & 0xff] ^
1035280304Sjkim        Td3[(s0 >> 24)       ] ^
1036280304Sjkim        rk[3];
1037238384Sjkim#endif
1038280304Sjkim    s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
1039238384Sjkim    }
1040238384Sjkim    /*
1041280304Sjkim     * apply last round and
1042280304Sjkim     * map cipher state to byte array block:
1043280304Sjkim     */
1044280304Sjkim    prefetch256(Td4);
1045238384Sjkim
1046280304Sjkim    *(u32*)(out+0) =
1047280304Sjkim        ((u32)Td4[(s0      ) & 0xff])    ^
1048280304Sjkim        ((u32)Td4[(s3 >>  8) & 0xff] <<  8) ^
1049280304Sjkim        ((u32)Td4[(s2 >> 16) & 0xff] << 16) ^
1050280304Sjkim        ((u32)Td4[(s1 >> 24)       ] << 24) ^
1051280304Sjkim        rk[0];
1052280304Sjkim    *(u32*)(out+4) =
1053280304Sjkim        ((u32)Td4[(s1      ) & 0xff])     ^
1054280304Sjkim        ((u32)Td4[(s0 >>  8) & 0xff] <<  8) ^
1055280304Sjkim        ((u32)Td4[(s3 >> 16) & 0xff] << 16) ^
1056280304Sjkim        ((u32)Td4[(s2 >> 24)       ] << 24) ^
1057280304Sjkim        rk[1];
1058280304Sjkim    *(u32*)(out+8) =
1059280304Sjkim        ((u32)Td4[(s2      ) & 0xff])     ^
1060280304Sjkim        ((u32)Td4[(s1 >>  8) & 0xff] <<  8) ^
1061280304Sjkim        ((u32)Td4[(s0 >> 16) & 0xff] << 16) ^
1062280304Sjkim        ((u32)Td4[(s3 >> 24)       ] << 24) ^
1063280304Sjkim        rk[2];
1064280304Sjkim    *(u32*)(out+12) =
1065280304Sjkim        ((u32)Td4[(s3      ) & 0xff])     ^
1066280304Sjkim        ((u32)Td4[(s2 >>  8) & 0xff] <<  8) ^
1067280304Sjkim        ((u32)Td4[(s1 >> 16) & 0xff] << 16) ^
1068280304Sjkim        ((u32)Td4[(s0 >> 24)       ] << 24) ^
1069280304Sjkim        rk[3];
1070238384Sjkim}
1071