1295016Sjkim/* crypto/aes/aes_core.c */ 2238384Sjkim/** 3238384Sjkim * rijndael-alg-fst.c 4238384Sjkim * 5238384Sjkim * @version 3.0 (December 2000) 6238384Sjkim * 7238384Sjkim * Optimised ANSI C code for the Rijndael cipher (now AES) 8238384Sjkim * 9238384Sjkim * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be> 10238384Sjkim * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be> 11238384Sjkim * @author Paulo Barreto <paulo.barreto@terra.com.br> 12238384Sjkim * 13238384Sjkim * This code is hereby placed in the public domain. 14238384Sjkim * 15238384Sjkim * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS 16238384Sjkim * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17238384Sjkim * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18238384Sjkim * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE 19238384Sjkim * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20238384Sjkim * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21238384Sjkim * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22238384Sjkim * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23238384Sjkim * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 24238384Sjkim * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 25238384Sjkim * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26238384Sjkim */ 27238384Sjkim 28238384Sjkim/* 29238384Sjkim * This is experimental x86[_64] derivative. It assumes little-endian 30238384Sjkim * byte order and expects CPU to sustain unaligned memory references. 31238384Sjkim * It is used as playground for cache-time attack mitigations and 32238384Sjkim * serves as reference C implementation for x86[_64] assembler. 33238384Sjkim * 34280304Sjkim * <appro@fy.chalmers.se> 35238384Sjkim */ 36238384Sjkim 37238384Sjkim 38238384Sjkim#ifndef AES_DEBUG 39238384Sjkim# ifndef NDEBUG 40238384Sjkim# define NDEBUG 41238384Sjkim# endif 42238384Sjkim#endif 43238384Sjkim#include <assert.h> 44238384Sjkim 45238384Sjkim#include <stdlib.h> 46238384Sjkim#include <openssl/aes.h> 47238384Sjkim#include "aes_locl.h" 48238384Sjkim 49238384Sjkim/* 50238384Sjkim * These two parameters control which table, 256-byte or 2KB, is 51238384Sjkim * referenced in outer and respectively inner rounds. 52238384Sjkim */ 53238384Sjkim#define AES_COMPACT_IN_OUTER_ROUNDS 54238384Sjkim#ifdef AES_COMPACT_IN_OUTER_ROUNDS 55238384Sjkim/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while 56238384Sjkim * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further* 57238384Sjkim * by factor of ~2. */ 58238384Sjkim# undef AES_COMPACT_IN_INNER_ROUNDS 59238384Sjkim#endif 60238384Sjkim 61238384Sjkim#if 1 62238384Sjkimstatic void prefetch256(const void *table) 63238384Sjkim{ 64280304Sjkim volatile unsigned long *t=(void *)table,ret; 65280304Sjkim unsigned long sum; 66280304Sjkim int i; 67238384Sjkim 68280304Sjkim /* 32 is common least cache-line size */ 69280304Sjkim for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i]; 70238384Sjkim 71280304Sjkim ret = sum; 72238384Sjkim} 73238384Sjkim#else 74238384Sjkim# define prefetch256(t) 75238384Sjkim#endif 76238384Sjkim 77238384Sjkim#undef GETU32 78238384Sjkim#define GETU32(p) (*((u32*)(p))) 79238384Sjkim 80238384Sjkim#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) 81238384Sjkimtypedef unsigned __int64 u64; 82280304Sjkim#define U64(C) C##UI64 83238384Sjkim#elif defined(__arch64__) 84238384Sjkimtypedef unsigned long u64; 85280304Sjkim#define U64(C) C##UL 86238384Sjkim#else 87238384Sjkimtypedef unsigned long long u64; 88280304Sjkim#define U64(C) C##ULL 89238384Sjkim#endif 90238384Sjkim 91238384Sjkim#undef ROTATE 92238384Sjkim#if defined(_MSC_VER) || defined(__ICC) 93238384Sjkim# define ROTATE(a,n) _lrotl(a,n) 94238384Sjkim#elif defined(__GNUC__) && __GNUC__>=2 95238384Sjkim# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__) 96280304Sjkim# define ROTATE(a,n) ({ register unsigned int ret; \ 97280304Sjkim asm ( \ 98280304Sjkim "roll %1,%0" \ 99280304Sjkim : "=r"(ret) \ 100280304Sjkim : "I"(n), "0"(a) \ 101280304Sjkim : "cc"); \ 102280304Sjkim ret; \ 103280304Sjkim }) 104238384Sjkim# endif 105238384Sjkim#endif 106280304Sjkim/*- 107238384SjkimTe [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03]; 108238384SjkimTe0[x] = S [x].[02, 01, 01, 03]; 109238384SjkimTe1[x] = S [x].[03, 02, 01, 01]; 110238384SjkimTe2[x] = S [x].[01, 03, 02, 01]; 111238384SjkimTe3[x] = S [x].[01, 01, 03, 02]; 112238384Sjkim*/ 113238384Sjkim#define Te0 (u32)((u64*)((u8*)Te+0)) 114238384Sjkim#define Te1 (u32)((u64*)((u8*)Te+3)) 115238384Sjkim#define Te2 (u32)((u64*)((u8*)Te+2)) 116238384Sjkim#define Te3 (u32)((u64*)((u8*)Te+1)) 117280304Sjkim/*- 118238384SjkimTd [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b]; 119238384SjkimTd0[x] = Si[x].[0e, 09, 0d, 0b]; 120238384SjkimTd1[x] = Si[x].[0b, 0e, 09, 0d]; 121238384SjkimTd2[x] = Si[x].[0d, 0b, 0e, 09]; 122238384SjkimTd3[x] = Si[x].[09, 0d, 0b, 0e]; 123238384SjkimTd4[x] = Si[x].[01]; 124238384Sjkim*/ 125238384Sjkim#define Td0 (u32)((u64*)((u8*)Td+0)) 126238384Sjkim#define Td1 (u32)((u64*)((u8*)Td+3)) 127238384Sjkim#define Td2 (u32)((u64*)((u8*)Td+2)) 128238384Sjkim#define Td3 (u32)((u64*)((u8*)Td+1)) 129238384Sjkim 130238384Sjkimstatic const u64 Te[256] = { 131238384Sjkim U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8), 132238384Sjkim U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6), 133238384Sjkim U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6), 134238384Sjkim U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591), 135238384Sjkim U64(0x5030306050303060), U64(0x0301010203010102), 136238384Sjkim U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56), 137238384Sjkim U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5), 138238384Sjkim U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec), 139238384Sjkim U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f), 140238384Sjkim U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa), 141238384Sjkim U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2), 142238384Sjkim U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb), 143238384Sjkim U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3), 144238384Sjkim U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45), 145238384Sjkim U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453), 146238384Sjkim U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b), 147238384Sjkim U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1), 148238384Sjkim U64(0xae93933dae93933d), U64(0x6a26264c6a26264c), 149238384Sjkim U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e), 150238384Sjkim U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83), 151238384Sjkim U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551), 152238384Sjkim U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9), 153238384Sjkim U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab), 154238384Sjkim U64(0x5331316253313162), U64(0x3f15152a3f15152a), 155238384Sjkim U64(0x0c0404080c040408), U64(0x52c7c79552c7c795), 156238384Sjkim U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d), 157238384Sjkim U64(0x2818183028181830), U64(0xa1969637a1969637), 158238384Sjkim U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f), 159238384Sjkim U64(0x0907070e0907070e), U64(0x3612122436121224), 160238384Sjkim U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df), 161238384Sjkim U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e), 162238384Sjkim U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea), 163238384Sjkim U64(0x1b0909121b090912), U64(0x9e83831d9e83831d), 164238384Sjkim U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34), 165238384Sjkim U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc), 166238384Sjkim U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b), 167238384Sjkim U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76), 168238384Sjkim U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d), 169238384Sjkim U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd), 170238384Sjkim U64(0x712f2f5e712f2f5e), U64(0x9784841397848413), 171238384Sjkim U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9), 172238384Sjkim U64(0x0000000000000000), U64(0x2cededc12cededc1), 173238384Sjkim U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3), 174238384Sjkim U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6), 175238384Sjkim U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d), 176238384Sjkim U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972), 177238384Sjkim U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98), 178238384Sjkim U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85), 179238384Sjkim U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5), 180238384Sjkim U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed), 181238384Sjkim U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a), 182238384Sjkim U64(0x5533336655333366), U64(0x9485851194858511), 183238384Sjkim U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9), 184238384Sjkim U64(0x0602020406020204), U64(0x817f7ffe817f7ffe), 185238384Sjkim U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78), 186238384Sjkim U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b), 187238384Sjkim U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d), 188238384Sjkim U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05), 189238384Sjkim U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21), 190238384Sjkim U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1), 191238384Sjkim U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677), 192238384Sjkim U64(0x75dadaaf75dadaaf), U64(0x6321214263212142), 193238384Sjkim U64(0x3010102030101020), U64(0x1affffe51affffe5), 194238384Sjkim U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf), 195238384Sjkim U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18), 196238384Sjkim U64(0x3513132635131326), U64(0x2fececc32fececc3), 197238384Sjkim U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735), 198238384Sjkim U64(0xcc444488cc444488), U64(0x3917172e3917172e), 199238384Sjkim U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755), 200238384Sjkim U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a), 201238384Sjkim U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba), 202238384Sjkim U64(0x2b1919322b191932), U64(0x957373e6957373e6), 203238384Sjkim U64(0xa06060c0a06060c0), U64(0x9881811998818119), 204238384Sjkim U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3), 205238384Sjkim U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54), 206238384Sjkim U64(0xab90903bab90903b), U64(0x8388880b8388880b), 207238384Sjkim U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7), 208238384Sjkim U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428), 209238384Sjkim U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc), 210238384Sjkim U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad), 211238384Sjkim U64(0x3be0e0db3be0e0db), U64(0x5632326456323264), 212238384Sjkim U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14), 213238384Sjkim U64(0xdb494992db494992), U64(0x0a06060c0a06060c), 214238384Sjkim U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8), 215238384Sjkim U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd), 216238384Sjkim U64(0xefacac43efacac43), U64(0xa66262c4a66262c4), 217238384Sjkim U64(0xa8919139a8919139), U64(0xa4959531a4959531), 218238384Sjkim U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2), 219238384Sjkim U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b), 220238384Sjkim U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda), 221238384Sjkim U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1), 222238384Sjkim U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949), 223238384Sjkim U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac), 224238384Sjkim U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf), 225238384Sjkim U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4), 226238384Sjkim U64(0xe9aeae47e9aeae47), U64(0x1808081018080810), 227238384Sjkim U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0), 228238384Sjkim U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c), 229238384Sjkim U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657), 230238384Sjkim U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697), 231238384Sjkim U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1), 232238384Sjkim U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e), 233238384Sjkim U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61), 234238384Sjkim U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f), 235238384Sjkim U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c), 236238384Sjkim U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc), 237238384Sjkim U64(0xd8484890d8484890), U64(0x0503030605030306), 238238384Sjkim U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c), 239238384Sjkim U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a), 240238384Sjkim U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969), 241238384Sjkim U64(0x9186861791868617), U64(0x58c1c19958c1c199), 242238384Sjkim U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27), 243238384Sjkim U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb), 244238384Sjkim U64(0xb398982bb398982b), U64(0x3311112233111122), 245238384Sjkim U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9), 246238384Sjkim U64(0x898e8e07898e8e07), U64(0xa7949433a7949433), 247238384Sjkim U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c), 248238384Sjkim U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9), 249238384Sjkim U64(0x49cece8749cece87), U64(0xff5555aaff5555aa), 250238384Sjkim U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5), 251238384Sjkim U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159), 252238384Sjkim U64(0x8089890980898909), U64(0x170d0d1a170d0d1a), 253238384Sjkim U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7), 254238384Sjkim U64(0xc6424284c6424284), U64(0xb86868d0b86868d0), 255238384Sjkim U64(0xc3414182c3414182), U64(0xb0999929b0999929), 256238384Sjkim U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e), 257238384Sjkim U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8), 258238384Sjkim U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c) 259238384Sjkim}; 260238384Sjkim 261238384Sjkimstatic const u8 Te4[256] = { 262238384Sjkim 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U, 263238384Sjkim 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U, 264238384Sjkim 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U, 265238384Sjkim 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U, 266238384Sjkim 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU, 267238384Sjkim 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U, 268238384Sjkim 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU, 269238384Sjkim 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U, 270238384Sjkim 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U, 271238384Sjkim 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U, 272238384Sjkim 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU, 273238384Sjkim 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU, 274238384Sjkim 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U, 275238384Sjkim 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U, 276238384Sjkim 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U, 277238384Sjkim 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U, 278238384Sjkim 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U, 279238384Sjkim 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U, 280238384Sjkim 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U, 281238384Sjkim 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU, 282238384Sjkim 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU, 283238384Sjkim 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U, 284238384Sjkim 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U, 285238384Sjkim 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U, 286238384Sjkim 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U, 287238384Sjkim 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU, 288238384Sjkim 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU, 289238384Sjkim 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU, 290238384Sjkim 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U, 291238384Sjkim 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU, 292238384Sjkim 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U, 293238384Sjkim 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U 294238384Sjkim}; 295238384Sjkim 296238384Sjkimstatic const u64 Td[256] = { 297238384Sjkim U64(0x50a7f45150a7f451), U64(0x5365417e5365417e), 298238384Sjkim U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a), 299238384Sjkim U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f), 300238384Sjkim U64(0xab58faacab58faac), U64(0x9303e34b9303e34b), 301238384Sjkim U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad), 302238384Sjkim U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5), 303238384Sjkim U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5), 304238384Sjkim U64(0x8044352680443526), U64(0x8fa362b58fa362b5), 305238384Sjkim U64(0x495ab1de495ab1de), U64(0x671bba25671bba25), 306238384Sjkim U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d), 307238384Sjkim U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81), 308238384Sjkim U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b), 309238384Sjkim U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215), 310238384Sjkim U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295), 311238384Sjkim U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458), 312238384Sjkim U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e), 313238384Sjkim U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4), 314238384Sjkim U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927), 315238384Sjkim U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0), 316238384Sjkim U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d), 317238384Sjkim U64(0x184adf63184adf63), U64(0x82311ae582311ae5), 318238384Sjkim U64(0x6033519760335197), U64(0x457f5362457f5362), 319238384Sjkim U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb), 320238384Sjkim U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9), 321238384Sjkim U64(0x5868487058684870), U64(0x19fd458f19fd458f), 322238384Sjkim U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52), 323238384Sjkim U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72), 324238384Sjkim U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566), 325238384Sjkim U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f), 326238384Sjkim U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3), 327238384Sjkim U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23), 328238384Sjkim U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed), 329238384Sjkim U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7), 330238384Sjkim U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e), 331238384Sjkim U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506), 332238384Sjkim U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4), 333238384Sjkim U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2), 334238384Sjkim U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4), 335238384Sjkim U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040), 336238384Sjkim U64(0x069f715e069f715e), U64(0x51106ebd51106ebd), 337238384Sjkim U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96), 338238384Sjkim U64(0xae053eddae053edd), U64(0x46bde64d46bde64d), 339238384Sjkim U64(0xb58d5491b58d5491), U64(0x055dc471055dc471), 340238384Sjkim U64(0x6fd406046fd40604), U64(0xff155060ff155060), 341238384Sjkim U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6), 342238384Sjkim U64(0xcc434089cc434089), U64(0x779ed967779ed967), 343238384Sjkim U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907), 344238384Sjkim U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879), 345238384Sjkim U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c), 346238384Sjkim U64(0xc91e84f8c91e84f8), U64(0x0000000000000000), 347238384Sjkim U64(0x8386800983868009), U64(0x48ed2b3248ed2b32), 348238384Sjkim U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c), 349238384Sjkim U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f), 350238384Sjkim U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36), 351238384Sjkim U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68), 352238384Sjkim U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624), 353238384Sjkim U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793), 354238384Sjkim U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b), 355238384Sjkim U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61), 356238384Sjkim U64(0x694b775a694b775a), U64(0x161a121c161a121c), 357238384Sjkim U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0), 358238384Sjkim U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12), 359238384Sjkim U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2), 360238384Sjkim U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14), 361238384Sjkim U64(0x8519f1578519f157), U64(0x4c0775af4c0775af), 362238384Sjkim U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3), 363238384Sjkim U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c), 364238384Sjkim U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b), 365238384Sjkim U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb), 366238384Sjkim U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8), 367238384Sjkim U64(0xcadc31d7cadc31d7), U64(0x1085634210856342), 368238384Sjkim U64(0x4022971340229713), U64(0x2011c6842011c684), 369238384Sjkim U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2), 370238384Sjkim U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7), 371238384Sjkim U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc), 372238384Sjkim U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177), 373238384Sjkim U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9), 374238384Sjkim U64(0xfa489411fa489411), U64(0x2264e9472264e947), 375238384Sjkim U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0), 376238384Sjkim U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322), 377238384Sjkim U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9), 378238384Sjkim U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498), 379238384Sjkim U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5), 380238384Sjkim U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f), 381238384Sjkim U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850), 382238384Sjkim U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54), 383238384Sjkim U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890), 384238384Sjkim U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382), 385238384Sjkim U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069), 386238384Sjkim U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf), 387238384Sjkim U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810), 388238384Sjkim U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb), 389238384Sjkim U64(0x097826cd097826cd), U64(0xf418596ef418596e), 390238384Sjkim U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83), 391238384Sjkim U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa), 392238384Sjkim U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef), 393238384Sjkim U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a), 394238384Sjkim U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029), 395238384Sjkim U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a), 396238384Sjkim U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235), 397238384Sjkim U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc), 398238384Sjkim U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733), 399238384Sjkim U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41), 400238384Sjkim U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117), 401238384Sjkim U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43), 402238384Sjkim U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4), 403238384Sjkim U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c), 404238384Sjkim U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546), 405238384Sjkim U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01), 406238384Sjkim U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb), 407238384Sjkim U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92), 408238384Sjkim U64(0x335610e9335610e9), U64(0x1347d66d1347d66d), 409238384Sjkim U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137), 410238384Sjkim U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb), 411238384Sjkim U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7), 412238384Sjkim U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a), 413238384Sjkim U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255), 414238384Sjkim U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773), 415238384Sjkim U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f), 416238384Sjkim U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478), 417238384Sjkim U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9), 418238384Sjkim U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2), 419238384Sjkim U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc), 420238384Sjkim U64(0x8b493c288b493c28), U64(0x41950dff41950dff), 421238384Sjkim U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08), 422238384Sjkim U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664), 423238384Sjkim U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5), 424238384Sjkim U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0) 425238384Sjkim}; 426238384Sjkimstatic const u8 Td4[256] = { 427238384Sjkim 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, 428238384Sjkim 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU, 429238384Sjkim 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U, 430238384Sjkim 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU, 431238384Sjkim 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU, 432238384Sjkim 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU, 433238384Sjkim 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U, 434238384Sjkim 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U, 435238384Sjkim 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U, 436238384Sjkim 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U, 437238384Sjkim 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU, 438238384Sjkim 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U, 439238384Sjkim 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU, 440238384Sjkim 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U, 441238384Sjkim 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U, 442238384Sjkim 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU, 443238384Sjkim 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU, 444238384Sjkim 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U, 445238384Sjkim 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U, 446238384Sjkim 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU, 447238384Sjkim 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U, 448238384Sjkim 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU, 449238384Sjkim 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U, 450238384Sjkim 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U, 451238384Sjkim 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U, 452238384Sjkim 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU, 453238384Sjkim 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU, 454238384Sjkim 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU, 455238384Sjkim 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U, 456238384Sjkim 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U, 457238384Sjkim 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, 458238384Sjkim 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU 459238384Sjkim}; 460238384Sjkim 461238384Sjkimstatic const u32 rcon[] = { 462238384Sjkim 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U, 463238384Sjkim 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U, 464238384Sjkim 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ 465238384Sjkim}; 466238384Sjkim 467238384Sjkim/** 468238384Sjkim * Expand the cipher key into the encryption key schedule. 469238384Sjkim */ 470238384Sjkimint AES_set_encrypt_key(const unsigned char *userKey, const int bits, 471280304Sjkim AES_KEY *key) 472280304Sjkim{ 473238384Sjkim 474280304Sjkim u32 *rk; 475280304Sjkim int i = 0; 476280304Sjkim u32 temp; 477238384Sjkim 478280304Sjkim if (!userKey || !key) 479280304Sjkim return -1; 480280304Sjkim if (bits != 128 && bits != 192 && bits != 256) 481280304Sjkim return -2; 482238384Sjkim 483280304Sjkim rk = key->rd_key; 484238384Sjkim 485280304Sjkim if (bits==128) 486280304Sjkim key->rounds = 10; 487280304Sjkim else if (bits==192) 488280304Sjkim key->rounds = 12; 489280304Sjkim else 490280304Sjkim key->rounds = 14; 491238384Sjkim 492280304Sjkim rk[0] = GETU32(userKey ); 493280304Sjkim rk[1] = GETU32(userKey + 4); 494280304Sjkim rk[2] = GETU32(userKey + 8); 495280304Sjkim rk[3] = GETU32(userKey + 12); 496280304Sjkim if (bits == 128) { 497280304Sjkim while (1) { 498280304Sjkim temp = rk[3]; 499280304Sjkim rk[4] = rk[0] ^ 500280304Sjkim ((u32)Te4[(temp >> 8) & 0xff] ) ^ 501280304Sjkim ((u32)Te4[(temp >> 16) & 0xff] << 8) ^ 502280304Sjkim ((u32)Te4[(temp >> 24) ] << 16) ^ 503280304Sjkim ((u32)Te4[(temp ) & 0xff] << 24) ^ 504280304Sjkim rcon[i]; 505280304Sjkim rk[5] = rk[1] ^ rk[4]; 506280304Sjkim rk[6] = rk[2] ^ rk[5]; 507280304Sjkim rk[7] = rk[3] ^ rk[6]; 508280304Sjkim if (++i == 10) { 509280304Sjkim return 0; 510280304Sjkim } 511280304Sjkim rk += 4; 512280304Sjkim } 513280304Sjkim } 514280304Sjkim rk[4] = GETU32(userKey + 16); 515280304Sjkim rk[5] = GETU32(userKey + 20); 516280304Sjkim if (bits == 192) { 517280304Sjkim while (1) { 518280304Sjkim temp = rk[ 5]; 519280304Sjkim rk[ 6] = rk[ 0] ^ 520280304Sjkim ((u32)Te4[(temp >> 8) & 0xff] ) ^ 521280304Sjkim ((u32)Te4[(temp >> 16) & 0xff] << 8) ^ 522280304Sjkim ((u32)Te4[(temp >> 24) ] << 16) ^ 523280304Sjkim ((u32)Te4[(temp ) & 0xff] << 24) ^ 524280304Sjkim rcon[i]; 525280304Sjkim rk[ 7] = rk[ 1] ^ rk[ 6]; 526280304Sjkim rk[ 8] = rk[ 2] ^ rk[ 7]; 527280304Sjkim rk[ 9] = rk[ 3] ^ rk[ 8]; 528280304Sjkim if (++i == 8) { 529280304Sjkim return 0; 530280304Sjkim } 531280304Sjkim rk[10] = rk[ 4] ^ rk[ 9]; 532280304Sjkim rk[11] = rk[ 5] ^ rk[10]; 533280304Sjkim rk += 6; 534280304Sjkim } 535280304Sjkim } 536280304Sjkim rk[6] = GETU32(userKey + 24); 537280304Sjkim rk[7] = GETU32(userKey + 28); 538280304Sjkim if (bits == 256) { 539280304Sjkim while (1) { 540280304Sjkim temp = rk[ 7]; 541280304Sjkim rk[ 8] = rk[ 0] ^ 542280304Sjkim ((u32)Te4[(temp >> 8) & 0xff] ) ^ 543280304Sjkim ((u32)Te4[(temp >> 16) & 0xff] << 8) ^ 544280304Sjkim ((u32)Te4[(temp >> 24) ] << 16) ^ 545280304Sjkim ((u32)Te4[(temp ) & 0xff] << 24) ^ 546280304Sjkim rcon[i]; 547280304Sjkim rk[ 9] = rk[ 1] ^ rk[ 8]; 548280304Sjkim rk[10] = rk[ 2] ^ rk[ 9]; 549280304Sjkim rk[11] = rk[ 3] ^ rk[10]; 550280304Sjkim if (++i == 7) { 551280304Sjkim return 0; 552280304Sjkim } 553280304Sjkim temp = rk[11]; 554280304Sjkim rk[12] = rk[ 4] ^ 555280304Sjkim ((u32)Te4[(temp ) & 0xff] ) ^ 556280304Sjkim ((u32)Te4[(temp >> 8) & 0xff] << 8) ^ 557280304Sjkim ((u32)Te4[(temp >> 16) & 0xff] << 16) ^ 558280304Sjkim ((u32)Te4[(temp >> 24) ] << 24); 559280304Sjkim rk[13] = rk[ 5] ^ rk[12]; 560280304Sjkim rk[14] = rk[ 6] ^ rk[13]; 561280304Sjkim rk[15] = rk[ 7] ^ rk[14]; 562238384Sjkim 563280304Sjkim rk += 8; 564280304Sjkim } 565280304Sjkim } 566280304Sjkim return 0; 567238384Sjkim} 568238384Sjkim 569238384Sjkim/** 570238384Sjkim * Expand the cipher key into the decryption key schedule. 571238384Sjkim */ 572238384Sjkimint AES_set_decrypt_key(const unsigned char *userKey, const int bits, 573280304Sjkim AES_KEY *key) 574280304Sjkim{ 575238384Sjkim 576280304Sjkim u32 *rk; 577280304Sjkim int i, j, status; 578280304Sjkim u32 temp; 579238384Sjkim 580280304Sjkim /* first, start with an encryption schedule */ 581280304Sjkim status = AES_set_encrypt_key(userKey, bits, key); 582280304Sjkim if (status < 0) 583280304Sjkim return status; 584238384Sjkim 585280304Sjkim rk = key->rd_key; 586238384Sjkim 587280304Sjkim /* invert the order of the round keys: */ 588280304Sjkim for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) { 589280304Sjkim temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; 590280304Sjkim temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; 591280304Sjkim temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; 592280304Sjkim temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; 593280304Sjkim } 594280304Sjkim /* apply the inverse MixColumn transform to all round keys but the first and the last: */ 595280304Sjkim for (i = 1; i < (key->rounds); i++) { 596280304Sjkim rk += 4; 597238384Sjkim#if 1 598280304Sjkim for (j = 0; j < 4; j++) { 599280304Sjkim u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; 600238384Sjkim 601280304Sjkim tp1 = rk[j]; 602280304Sjkim m = tp1 & 0x80808080; 603280304Sjkim tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ 604280304Sjkim ((m - (m >> 7)) & 0x1b1b1b1b); 605280304Sjkim m = tp2 & 0x80808080; 606280304Sjkim tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ 607280304Sjkim ((m - (m >> 7)) & 0x1b1b1b1b); 608280304Sjkim m = tp4 & 0x80808080; 609280304Sjkim tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ 610280304Sjkim ((m - (m >> 7)) & 0x1b1b1b1b); 611280304Sjkim tp9 = tp8 ^ tp1; 612280304Sjkim tpb = tp9 ^ tp2; 613280304Sjkim tpd = tp9 ^ tp4; 614280304Sjkim tpe = tp8 ^ tp4 ^ tp2; 615238384Sjkim#if defined(ROTATE) 616280304Sjkim rk[j] = tpe ^ ROTATE(tpd,16) ^ 617280304Sjkim ROTATE(tp9,8) ^ ROTATE(tpb,24); 618238384Sjkim#else 619280304Sjkim rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ 620280304Sjkim (tp9 >> 24) ^ (tp9 << 8) ^ 621280304Sjkim (tpb >> 8) ^ (tpb << 24); 622238384Sjkim#endif 623280304Sjkim } 624238384Sjkim#else 625280304Sjkim rk[0] = 626280304Sjkim Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^ 627280304Sjkim Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^ 628280304Sjkim Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^ 629280304Sjkim Td3[Te2[(rk[0] >> 24) ] & 0xff]; 630280304Sjkim rk[1] = 631280304Sjkim Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^ 632280304Sjkim Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^ 633280304Sjkim Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^ 634280304Sjkim Td3[Te2[(rk[1] >> 24) ] & 0xff]; 635280304Sjkim rk[2] = 636280304Sjkim Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^ 637280304Sjkim Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^ 638280304Sjkim Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^ 639280304Sjkim Td3[Te2[(rk[2] >> 24) ] & 0xff]; 640280304Sjkim rk[3] = 641280304Sjkim Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^ 642280304Sjkim Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^ 643280304Sjkim Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^ 644280304Sjkim Td3[Te2[(rk[3] >> 24) ] & 0xff]; 645238384Sjkim#endif 646280304Sjkim } 647280304Sjkim return 0; 648238384Sjkim} 649238384Sjkim 650238384Sjkim/* 651238384Sjkim * Encrypt a single block 652238384Sjkim * in and out can overlap 653238384Sjkim */ 654238384Sjkimvoid AES_encrypt(const unsigned char *in, unsigned char *out, 655280304Sjkim const AES_KEY *key) 656280304Sjkim{ 657238384Sjkim 658280304Sjkim const u32 *rk; 659280304Sjkim u32 s0, s1, s2, s3, t[4]; 660280304Sjkim int r; 661238384Sjkim 662280304Sjkim assert(in && out && key); 663280304Sjkim rk = key->rd_key; 664238384Sjkim 665280304Sjkim /* 666280304Sjkim * map byte array block to cipher state 667280304Sjkim * and add initial round key: 668280304Sjkim */ 669280304Sjkim s0 = GETU32(in ) ^ rk[0]; 670280304Sjkim s1 = GETU32(in + 4) ^ rk[1]; 671280304Sjkim s2 = GETU32(in + 8) ^ rk[2]; 672280304Sjkim s3 = GETU32(in + 12) ^ rk[3]; 673238384Sjkim 674238384Sjkim#if defined(AES_COMPACT_IN_OUTER_ROUNDS) 675280304Sjkim prefetch256(Te4); 676238384Sjkim 677280304Sjkim t[0] = (u32)Te4[(s0 ) & 0xff] ^ 678280304Sjkim (u32)Te4[(s1 >> 8) & 0xff] << 8 ^ 679280304Sjkim (u32)Te4[(s2 >> 16) & 0xff] << 16 ^ 680280304Sjkim (u32)Te4[(s3 >> 24) ] << 24; 681280304Sjkim t[1] = (u32)Te4[(s1 ) & 0xff] ^ 682280304Sjkim (u32)Te4[(s2 >> 8) & 0xff] << 8 ^ 683280304Sjkim (u32)Te4[(s3 >> 16) & 0xff] << 16 ^ 684280304Sjkim (u32)Te4[(s0 >> 24) ] << 24; 685280304Sjkim t[2] = (u32)Te4[(s2 ) & 0xff] ^ 686280304Sjkim (u32)Te4[(s3 >> 8) & 0xff] << 8 ^ 687280304Sjkim (u32)Te4[(s0 >> 16) & 0xff] << 16 ^ 688280304Sjkim (u32)Te4[(s1 >> 24) ] << 24; 689280304Sjkim t[3] = (u32)Te4[(s3 ) & 0xff] ^ 690280304Sjkim (u32)Te4[(s0 >> 8) & 0xff] << 8 ^ 691280304Sjkim (u32)Te4[(s1 >> 16) & 0xff] << 16 ^ 692280304Sjkim (u32)Te4[(s2 >> 24) ] << 24; 693238384Sjkim 694280304Sjkim /* now do the linear transform using words */ 695280304Sjkim { int i; 696280304Sjkim u32 r0, r1, r2; 697238384Sjkim 698280304Sjkim for (i = 0; i < 4; i++) { 699280304Sjkim r0 = t[i]; 700280304Sjkim r1 = r0 & 0x80808080; 701280304Sjkim r2 = ((r0 & 0x7f7f7f7f) << 1) ^ 702280304Sjkim ((r1 - (r1 >> 7)) & 0x1b1b1b1b); 703238384Sjkim#if defined(ROTATE) 704280304Sjkim t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^ 705280304Sjkim ROTATE(r0,16) ^ ROTATE(r0,8); 706238384Sjkim#else 707280304Sjkim t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^ 708280304Sjkim (r0 << 16) ^ (r0 >> 16) ^ 709280304Sjkim (r0 << 8) ^ (r0 >> 24); 710238384Sjkim#endif 711280304Sjkim t[i] ^= rk[4+i]; 712280304Sjkim } 713280304Sjkim } 714238384Sjkim#else 715280304Sjkim t[0] = Te0[(s0 ) & 0xff] ^ 716280304Sjkim Te1[(s1 >> 8) & 0xff] ^ 717280304Sjkim Te2[(s2 >> 16) & 0xff] ^ 718280304Sjkim Te3[(s3 >> 24) ] ^ 719280304Sjkim rk[4]; 720280304Sjkim t[1] = Te0[(s1 ) & 0xff] ^ 721280304Sjkim Te1[(s2 >> 8) & 0xff] ^ 722280304Sjkim Te2[(s3 >> 16) & 0xff] ^ 723280304Sjkim Te3[(s0 >> 24) ] ^ 724280304Sjkim rk[5]; 725280304Sjkim t[2] = Te0[(s2 ) & 0xff] ^ 726280304Sjkim Te1[(s3 >> 8) & 0xff] ^ 727280304Sjkim Te2[(s0 >> 16) & 0xff] ^ 728280304Sjkim Te3[(s1 >> 24) ] ^ 729280304Sjkim rk[6]; 730280304Sjkim t[3] = Te0[(s3 ) & 0xff] ^ 731280304Sjkim Te1[(s0 >> 8) & 0xff] ^ 732280304Sjkim Te2[(s1 >> 16) & 0xff] ^ 733280304Sjkim Te3[(s2 >> 24) ] ^ 734280304Sjkim rk[7]; 735238384Sjkim#endif 736280304Sjkim s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; 737238384Sjkim 738238384Sjkim /* 739238384Sjkim * Nr - 2 full rounds: 740238384Sjkim */ 741238384Sjkim for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) { 742238384Sjkim#if defined(AES_COMPACT_IN_INNER_ROUNDS) 743280304Sjkim t[0] = (u32)Te4[(s0 ) & 0xff] ^ 744280304Sjkim (u32)Te4[(s1 >> 8) & 0xff] << 8 ^ 745280304Sjkim (u32)Te4[(s2 >> 16) & 0xff] << 16 ^ 746280304Sjkim (u32)Te4[(s3 >> 24) ] << 24; 747280304Sjkim t[1] = (u32)Te4[(s1 ) & 0xff] ^ 748280304Sjkim (u32)Te4[(s2 >> 8) & 0xff] << 8 ^ 749280304Sjkim (u32)Te4[(s3 >> 16) & 0xff] << 16 ^ 750280304Sjkim (u32)Te4[(s0 >> 24) ] << 24; 751280304Sjkim t[2] = (u32)Te4[(s2 ) & 0xff] ^ 752280304Sjkim (u32)Te4[(s3 >> 8) & 0xff] << 8 ^ 753280304Sjkim (u32)Te4[(s0 >> 16) & 0xff] << 16 ^ 754280304Sjkim (u32)Te4[(s1 >> 24) ] << 24; 755280304Sjkim t[3] = (u32)Te4[(s3 ) & 0xff] ^ 756280304Sjkim (u32)Te4[(s0 >> 8) & 0xff] << 8 ^ 757280304Sjkim (u32)Te4[(s1 >> 16) & 0xff] << 16 ^ 758280304Sjkim (u32)Te4[(s2 >> 24) ] << 24; 759238384Sjkim 760280304Sjkim /* now do the linear transform using words */ 761280304Sjkim { 762280304Sjkim int i; 763280304Sjkim u32 r0, r1, r2; 764238384Sjkim 765280304Sjkim for (i = 0; i < 4; i++) { 766280304Sjkim r0 = t[i]; 767280304Sjkim r1 = r0 & 0x80808080; 768280304Sjkim r2 = ((r0 & 0x7f7f7f7f) << 1) ^ 769280304Sjkim ((r1 - (r1 >> 7)) & 0x1b1b1b1b); 770238384Sjkim#if defined(ROTATE) 771280304Sjkim t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^ 772280304Sjkim ROTATE(r0,16) ^ ROTATE(r0,8); 773238384Sjkim#else 774280304Sjkim t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^ 775280304Sjkim (r0 << 16) ^ (r0 >> 16) ^ 776280304Sjkim (r0 << 8) ^ (r0 >> 24); 777238384Sjkim#endif 778280304Sjkim t[i] ^= rk[i]; 779280304Sjkim } 780280304Sjkim } 781238384Sjkim#else 782280304Sjkim t[0] = Te0[(s0 ) & 0xff] ^ 783280304Sjkim Te1[(s1 >> 8) & 0xff] ^ 784280304Sjkim Te2[(s2 >> 16) & 0xff] ^ 785280304Sjkim Te3[(s3 >> 24) ] ^ 786280304Sjkim rk[0]; 787280304Sjkim t[1] = Te0[(s1 ) & 0xff] ^ 788280304Sjkim Te1[(s2 >> 8) & 0xff] ^ 789280304Sjkim Te2[(s3 >> 16) & 0xff] ^ 790280304Sjkim Te3[(s0 >> 24) ] ^ 791280304Sjkim rk[1]; 792280304Sjkim t[2] = Te0[(s2 ) & 0xff] ^ 793280304Sjkim Te1[(s3 >> 8) & 0xff] ^ 794280304Sjkim Te2[(s0 >> 16) & 0xff] ^ 795280304Sjkim Te3[(s1 >> 24) ] ^ 796280304Sjkim rk[2]; 797280304Sjkim t[3] = Te0[(s3 ) & 0xff] ^ 798280304Sjkim Te1[(s0 >> 8) & 0xff] ^ 799280304Sjkim Te2[(s1 >> 16) & 0xff] ^ 800280304Sjkim Te3[(s2 >> 24) ] ^ 801280304Sjkim rk[3]; 802238384Sjkim#endif 803280304Sjkim s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; 804238384Sjkim } 805238384Sjkim /* 806280304Sjkim * apply last round and 807280304Sjkim * map cipher state to byte array block: 808280304Sjkim */ 809238384Sjkim#if defined(AES_COMPACT_IN_OUTER_ROUNDS) 810280304Sjkim prefetch256(Te4); 811238384Sjkim 812280304Sjkim *(u32*)(out+0) = 813280304Sjkim (u32)Te4[(s0 ) & 0xff] ^ 814280304Sjkim (u32)Te4[(s1 >> 8) & 0xff] << 8 ^ 815280304Sjkim (u32)Te4[(s2 >> 16) & 0xff] << 16 ^ 816280304Sjkim (u32)Te4[(s3 >> 24) ] << 24 ^ 817280304Sjkim rk[0]; 818280304Sjkim *(u32*)(out+4) = 819280304Sjkim (u32)Te4[(s1 ) & 0xff] ^ 820280304Sjkim (u32)Te4[(s2 >> 8) & 0xff] << 8 ^ 821280304Sjkim (u32)Te4[(s3 >> 16) & 0xff] << 16 ^ 822280304Sjkim (u32)Te4[(s0 >> 24) ] << 24 ^ 823280304Sjkim rk[1]; 824280304Sjkim *(u32*)(out+8) = 825280304Sjkim (u32)Te4[(s2 ) & 0xff] ^ 826280304Sjkim (u32)Te4[(s3 >> 8) & 0xff] << 8 ^ 827280304Sjkim (u32)Te4[(s0 >> 16) & 0xff] << 16 ^ 828280304Sjkim (u32)Te4[(s1 >> 24) ] << 24 ^ 829280304Sjkim rk[2]; 830280304Sjkim *(u32*)(out+12) = 831280304Sjkim (u32)Te4[(s3 ) & 0xff] ^ 832280304Sjkim (u32)Te4[(s0 >> 8) & 0xff] << 8 ^ 833280304Sjkim (u32)Te4[(s1 >> 16) & 0xff] << 16 ^ 834280304Sjkim (u32)Te4[(s2 >> 24) ] << 24 ^ 835280304Sjkim rk[3]; 836238384Sjkim#else 837280304Sjkim *(u32*)(out+0) = 838280304Sjkim (Te2[(s0 ) & 0xff] & 0x000000ffU) ^ 839280304Sjkim (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^ 840280304Sjkim (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^ 841280304Sjkim (Te1[(s3 >> 24) ] & 0xff000000U) ^ 842280304Sjkim rk[0]; 843280304Sjkim *(u32*)(out+4) = 844280304Sjkim (Te2[(s1 ) & 0xff] & 0x000000ffU) ^ 845280304Sjkim (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^ 846280304Sjkim (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^ 847280304Sjkim (Te1[(s0 >> 24) ] & 0xff000000U) ^ 848280304Sjkim rk[1]; 849280304Sjkim *(u32*)(out+8) = 850280304Sjkim (Te2[(s2 ) & 0xff] & 0x000000ffU) ^ 851280304Sjkim (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^ 852280304Sjkim (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^ 853280304Sjkim (Te1[(s1 >> 24) ] & 0xff000000U) ^ 854280304Sjkim rk[2]; 855280304Sjkim *(u32*)(out+12) = 856280304Sjkim (Te2[(s3 ) & 0xff] & 0x000000ffU) ^ 857280304Sjkim (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^ 858280304Sjkim (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^ 859280304Sjkim (Te1[(s2 >> 24) ] & 0xff000000U) ^ 860280304Sjkim rk[3]; 861238384Sjkim#endif 862238384Sjkim} 863238384Sjkim 864238384Sjkim/* 865238384Sjkim * Decrypt a single block 866238384Sjkim * in and out can overlap 867238384Sjkim */ 868238384Sjkimvoid AES_decrypt(const unsigned char *in, unsigned char *out, 869280304Sjkim const AES_KEY *key) 870280304Sjkim{ 871238384Sjkim 872280304Sjkim const u32 *rk; 873280304Sjkim u32 s0, s1, s2, s3, t[4]; 874280304Sjkim int r; 875238384Sjkim 876280304Sjkim assert(in && out && key); 877280304Sjkim rk = key->rd_key; 878238384Sjkim 879280304Sjkim /* 880280304Sjkim * map byte array block to cipher state 881280304Sjkim * and add initial round key: 882280304Sjkim */ 883280304Sjkim s0 = GETU32(in ) ^ rk[0]; 884280304Sjkim s1 = GETU32(in + 4) ^ rk[1]; 885280304Sjkim s2 = GETU32(in + 8) ^ rk[2]; 886280304Sjkim s3 = GETU32(in + 12) ^ rk[3]; 887238384Sjkim 888238384Sjkim#if defined(AES_COMPACT_IN_OUTER_ROUNDS) 889280304Sjkim prefetch256(Td4); 890238384Sjkim 891280304Sjkim t[0] = (u32)Td4[(s0 ) & 0xff] ^ 892280304Sjkim (u32)Td4[(s3 >> 8) & 0xff] << 8 ^ 893280304Sjkim (u32)Td4[(s2 >> 16) & 0xff] << 16 ^ 894280304Sjkim (u32)Td4[(s1 >> 24) ] << 24; 895280304Sjkim t[1] = (u32)Td4[(s1 ) & 0xff] ^ 896280304Sjkim (u32)Td4[(s0 >> 8) & 0xff] << 8 ^ 897280304Sjkim (u32)Td4[(s3 >> 16) & 0xff] << 16 ^ 898280304Sjkim (u32)Td4[(s2 >> 24) ] << 24; 899280304Sjkim t[2] = (u32)Td4[(s2 ) & 0xff] ^ 900280304Sjkim (u32)Td4[(s1 >> 8) & 0xff] << 8 ^ 901280304Sjkim (u32)Td4[(s0 >> 16) & 0xff] << 16 ^ 902280304Sjkim (u32)Td4[(s3 >> 24) ] << 24; 903280304Sjkim t[3] = (u32)Td4[(s3 ) & 0xff] ^ 904280304Sjkim (u32)Td4[(s2 >> 8) & 0xff] << 8 ^ 905280304Sjkim (u32)Td4[(s1 >> 16) & 0xff] << 16 ^ 906280304Sjkim (u32)Td4[(s0 >> 24) ] << 24; 907238384Sjkim 908280304Sjkim /* now do the linear transform using words */ 909280304Sjkim { 910280304Sjkim int i; 911280304Sjkim u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; 912238384Sjkim 913280304Sjkim for (i = 0; i < 4; i++) { 914280304Sjkim tp1 = t[i]; 915280304Sjkim m = tp1 & 0x80808080; 916280304Sjkim tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ 917280304Sjkim ((m - (m >> 7)) & 0x1b1b1b1b); 918280304Sjkim m = tp2 & 0x80808080; 919280304Sjkim tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ 920280304Sjkim ((m - (m >> 7)) & 0x1b1b1b1b); 921280304Sjkim m = tp4 & 0x80808080; 922280304Sjkim tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ 923280304Sjkim ((m - (m >> 7)) & 0x1b1b1b1b); 924280304Sjkim tp9 = tp8 ^ tp1; 925280304Sjkim tpb = tp9 ^ tp2; 926280304Sjkim tpd = tp9 ^ tp4; 927280304Sjkim tpe = tp8 ^ tp4 ^ tp2; 928238384Sjkim#if defined(ROTATE) 929280304Sjkim t[i] = tpe ^ ROTATE(tpd,16) ^ 930280304Sjkim ROTATE(tp9,8) ^ ROTATE(tpb,24); 931238384Sjkim#else 932280304Sjkim t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ 933280304Sjkim (tp9 >> 24) ^ (tp9 << 8) ^ 934280304Sjkim (tpb >> 8) ^ (tpb << 24); 935238384Sjkim#endif 936280304Sjkim t[i] ^= rk[4+i]; 937280304Sjkim } 938280304Sjkim } 939238384Sjkim#else 940280304Sjkim t[0] = Td0[(s0 ) & 0xff] ^ 941280304Sjkim Td1[(s3 >> 8) & 0xff] ^ 942280304Sjkim Td2[(s2 >> 16) & 0xff] ^ 943280304Sjkim Td3[(s1 >> 24) ] ^ 944280304Sjkim rk[4]; 945280304Sjkim t[1] = Td0[(s1 ) & 0xff] ^ 946280304Sjkim Td1[(s0 >> 8) & 0xff] ^ 947280304Sjkim Td2[(s3 >> 16) & 0xff] ^ 948280304Sjkim Td3[(s2 >> 24) ] ^ 949280304Sjkim rk[5]; 950280304Sjkim t[2] = Td0[(s2 ) & 0xff] ^ 951280304Sjkim Td1[(s1 >> 8) & 0xff] ^ 952280304Sjkim Td2[(s0 >> 16) & 0xff] ^ 953280304Sjkim Td3[(s3 >> 24) ] ^ 954280304Sjkim rk[6]; 955280304Sjkim t[3] = Td0[(s3 ) & 0xff] ^ 956280304Sjkim Td1[(s2 >> 8) & 0xff] ^ 957280304Sjkim Td2[(s1 >> 16) & 0xff] ^ 958280304Sjkim Td3[(s0 >> 24) ] ^ 959280304Sjkim rk[7]; 960238384Sjkim#endif 961280304Sjkim s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; 962238384Sjkim 963238384Sjkim /* 964238384Sjkim * Nr - 2 full rounds: 965238384Sjkim */ 966238384Sjkim for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) { 967238384Sjkim#if defined(AES_COMPACT_IN_INNER_ROUNDS) 968280304Sjkim t[0] = (u32)Td4[(s0 ) & 0xff] ^ 969280304Sjkim (u32)Td4[(s3 >> 8) & 0xff] << 8 ^ 970280304Sjkim (u32)Td4[(s2 >> 16) & 0xff] << 16 ^ 971280304Sjkim (u32)Td4[(s1 >> 24) ] << 24; 972280304Sjkim t[1] = (u32)Td4[(s1 ) & 0xff] ^ 973280304Sjkim (u32)Td4[(s0 >> 8) & 0xff] << 8 ^ 974280304Sjkim (u32)Td4[(s3 >> 16) & 0xff] << 16 ^ 975280304Sjkim (u32)Td4[(s2 >> 24) ] << 24; 976280304Sjkim t[2] = (u32)Td4[(s2 ) & 0xff] ^ 977280304Sjkim (u32)Td4[(s1 >> 8) & 0xff] << 8 ^ 978280304Sjkim (u32)Td4[(s0 >> 16) & 0xff] << 16 ^ 979280304Sjkim (u32)Td4[(s3 >> 24) ] << 24; 980280304Sjkim t[3] = (u32)Td4[(s3 ) & 0xff] ^ 981280304Sjkim (u32)Td4[(s2 >> 8) & 0xff] << 8 ^ 982280304Sjkim (u32)Td4[(s1 >> 16) & 0xff] << 16 ^ 983280304Sjkim (u32)Td4[(s0 >> 24) ] << 24; 984238384Sjkim 985280304Sjkim /* now do the linear transform using words */ 986280304Sjkim { 987280304Sjkim int i; 988280304Sjkim u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m; 989238384Sjkim 990280304Sjkim for (i = 0; i < 4; i++) { 991280304Sjkim tp1 = t[i]; 992280304Sjkim m = tp1 & 0x80808080; 993280304Sjkim tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^ 994280304Sjkim ((m - (m >> 7)) & 0x1b1b1b1b); 995280304Sjkim m = tp2 & 0x80808080; 996280304Sjkim tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^ 997280304Sjkim ((m - (m >> 7)) & 0x1b1b1b1b); 998280304Sjkim m = tp4 & 0x80808080; 999280304Sjkim tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^ 1000280304Sjkim ((m - (m >> 7)) & 0x1b1b1b1b); 1001280304Sjkim tp9 = tp8 ^ tp1; 1002280304Sjkim tpb = tp9 ^ tp2; 1003280304Sjkim tpd = tp9 ^ tp4; 1004280304Sjkim tpe = tp8 ^ tp4 ^ tp2; 1005238384Sjkim#if defined(ROTATE) 1006280304Sjkim t[i] = tpe ^ ROTATE(tpd,16) ^ 1007280304Sjkim ROTATE(tp9,8) ^ ROTATE(tpb,24); 1008238384Sjkim#else 1009280304Sjkim t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^ 1010280304Sjkim (tp9 >> 24) ^ (tp9 << 8) ^ 1011280304Sjkim (tpb >> 8) ^ (tpb << 24); 1012238384Sjkim#endif 1013280304Sjkim t[i] ^= rk[i]; 1014280304Sjkim } 1015280304Sjkim } 1016238384Sjkim#else 1017280304Sjkim t[0] = Td0[(s0 ) & 0xff] ^ 1018280304Sjkim Td1[(s3 >> 8) & 0xff] ^ 1019280304Sjkim Td2[(s2 >> 16) & 0xff] ^ 1020280304Sjkim Td3[(s1 >> 24) ] ^ 1021280304Sjkim rk[0]; 1022280304Sjkim t[1] = Td0[(s1 ) & 0xff] ^ 1023280304Sjkim Td1[(s0 >> 8) & 0xff] ^ 1024280304Sjkim Td2[(s3 >> 16) & 0xff] ^ 1025280304Sjkim Td3[(s2 >> 24) ] ^ 1026280304Sjkim rk[1]; 1027280304Sjkim t[2] = Td0[(s2 ) & 0xff] ^ 1028280304Sjkim Td1[(s1 >> 8) & 0xff] ^ 1029280304Sjkim Td2[(s0 >> 16) & 0xff] ^ 1030280304Sjkim Td3[(s3 >> 24) ] ^ 1031280304Sjkim rk[2]; 1032280304Sjkim t[3] = Td0[(s3 ) & 0xff] ^ 1033280304Sjkim Td1[(s2 >> 8) & 0xff] ^ 1034280304Sjkim Td2[(s1 >> 16) & 0xff] ^ 1035280304Sjkim Td3[(s0 >> 24) ] ^ 1036280304Sjkim rk[3]; 1037238384Sjkim#endif 1038280304Sjkim s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3]; 1039238384Sjkim } 1040238384Sjkim /* 1041280304Sjkim * apply last round and 1042280304Sjkim * map cipher state to byte array block: 1043280304Sjkim */ 1044280304Sjkim prefetch256(Td4); 1045238384Sjkim 1046280304Sjkim *(u32*)(out+0) = 1047280304Sjkim ((u32)Td4[(s0 ) & 0xff]) ^ 1048280304Sjkim ((u32)Td4[(s3 >> 8) & 0xff] << 8) ^ 1049280304Sjkim ((u32)Td4[(s2 >> 16) & 0xff] << 16) ^ 1050280304Sjkim ((u32)Td4[(s1 >> 24) ] << 24) ^ 1051280304Sjkim rk[0]; 1052280304Sjkim *(u32*)(out+4) = 1053280304Sjkim ((u32)Td4[(s1 ) & 0xff]) ^ 1054280304Sjkim ((u32)Td4[(s0 >> 8) & 0xff] << 8) ^ 1055280304Sjkim ((u32)Td4[(s3 >> 16) & 0xff] << 16) ^ 1056280304Sjkim ((u32)Td4[(s2 >> 24) ] << 24) ^ 1057280304Sjkim rk[1]; 1058280304Sjkim *(u32*)(out+8) = 1059280304Sjkim ((u32)Td4[(s2 ) & 0xff]) ^ 1060280304Sjkim ((u32)Td4[(s1 >> 8) & 0xff] << 8) ^ 1061280304Sjkim ((u32)Td4[(s0 >> 16) & 0xff] << 16) ^ 1062280304Sjkim ((u32)Td4[(s3 >> 24) ] << 24) ^ 1063280304Sjkim rk[2]; 1064280304Sjkim *(u32*)(out+12) = 1065280304Sjkim ((u32)Td4[(s3 ) & 0xff]) ^ 1066280304Sjkim ((u32)Td4[(s2 >> 8) & 0xff] << 8) ^ 1067280304Sjkim ((u32)Td4[(s1 >> 16) & 0xff] << 16) ^ 1068280304Sjkim ((u32)Td4[(s0 >> 24) ] << 24) ^ 1069280304Sjkim rk[3]; 1070238384Sjkim} 1071