1104476Ssam/* $OpenBSD: skipjack.c,v 1.3 2001/05/05 00:31:34 angelos Exp $ */ 2139825Simp/*- 3104476Ssam * Further optimized test implementation of SKIPJACK algorithm 4104476Ssam * Mark Tillotson <markt@chaos.org.uk>, 25 June 98 5104476Ssam * Optimizations suit RISC (lots of registers) machine best. 6104476Ssam * 7104476Ssam * based on unoptimized implementation of 8104476Ssam * Panu Rissanen <bande@lut.fi> 960624 9104476Ssam * 10104476Ssam * SKIPJACK and KEA Algorithm Specifications 11104476Ssam * Version 2.0 12104476Ssam * 29 May 1998 13104476Ssam*/ 14104476Ssam 15116191Sobrien#include <sys/cdefs.h> 16116191Sobrien__FBSDID("$FreeBSD$"); 17116191Sobrien 18104476Ssam#include <sys/param.h> 19104476Ssam 20104476Ssam#include <opencrypto/skipjack.h> 21104476Ssam 22104476Ssamstatic const u_int8_t ftable[0x100] = 23104476Ssam{ 24104476Ssam 0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4, 25104476Ssam 0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9, 26104476Ssam 0xe7, 0x2d, 0x4d, 0x8a, 0xce, 0x4c, 0xca, 0x2e, 27104476Ssam 0x52, 0x95, 0xd9, 0x1e, 0x4e, 0x38, 0x44, 0x28, 28104476Ssam 0x0a, 0xdf, 0x02, 0xa0, 0x17, 0xf1, 0x60, 0x68, 29104476Ssam 0x12, 0xb7, 0x7a, 0xc3, 0xe9, 0xfa, 0x3d, 0x53, 30104476Ssam 0x96, 0x84, 0x6b, 0xba, 0xf2, 0x63, 0x9a, 0x19, 31104476Ssam 0x7c, 0xae, 0xe5, 0xf5, 0xf7, 0x16, 0x6a, 0xa2, 32104476Ssam 0x39, 0xb6, 0x7b, 0x0f, 0xc1, 0x93, 0x81, 0x1b, 33104476Ssam 0xee, 0xb4, 0x1a, 0xea, 0xd0, 0x91, 0x2f, 0xb8, 34104476Ssam 0x55, 0xb9, 0xda, 0x85, 0x3f, 0x41, 0xbf, 0xe0, 35104476Ssam 0x5a, 0x58, 0x80, 0x5f, 0x66, 0x0b, 0xd8, 0x90, 36104476Ssam 0x35, 0xd5, 0xc0, 0xa7, 0x33, 0x06, 0x65, 0x69, 37104476Ssam 0x45, 0x00, 0x94, 0x56, 0x6d, 0x98, 0x9b, 0x76, 38104476Ssam 0x97, 0xfc, 0xb2, 0xc2, 0xb0, 0xfe, 0xdb, 0x20, 39104476Ssam 0xe1, 0xeb, 0xd6, 0xe4, 0xdd, 0x47, 0x4a, 0x1d, 40104476Ssam 0x42, 0xed, 0x9e, 0x6e, 0x49, 0x3c, 0xcd, 0x43, 41104476Ssam 0x27, 0xd2, 0x07, 0xd4, 0xde, 0xc7, 0x67, 0x18, 42104476Ssam 0x89, 0xcb, 0x30, 0x1f, 0x8d, 0xc6, 0x8f, 0xaa, 43104476Ssam 0xc8, 0x74, 0xdc, 0xc9, 0x5d, 0x5c, 0x31, 0xa4, 44104476Ssam 0x70, 0x88, 0x61, 0x2c, 0x9f, 0x0d, 0x2b, 0x87, 45104476Ssam 0x50, 0x82, 0x54, 0x64, 0x26, 0x7d, 0x03, 0x40, 46104476Ssam 0x34, 0x4b, 0x1c, 0x73, 0xd1, 0xc4, 0xfd, 0x3b, 47104476Ssam 0xcc, 0xfb, 0x7f, 0xab, 0xe6, 0x3e, 0x5b, 0xa5, 48104476Ssam 0xad, 0x04, 0x23, 0x9c, 0x14, 0x51, 0x22, 0xf0, 49104476Ssam 0x29, 0x79, 0x71, 0x7e, 0xff, 0x8c, 0x0e, 0xe2, 50104476Ssam 0x0c, 0xef, 0xbc, 0x72, 0x75, 0x6f, 0x37, 0xa1, 51104476Ssam 0xec, 0xd3, 0x8e, 0x62, 0x8b, 0x86, 0x10, 0xe8, 52104476Ssam 0x08, 0x77, 0x11, 0xbe, 0x92, 0x4f, 0x24, 0xc5, 53104476Ssam 0x32, 0x36, 0x9d, 0xcf, 0xf3, 0xa6, 0xbb, 0xac, 54104476Ssam 0x5e, 0x6c, 0xa9, 0x13, 0x57, 0x25, 0xb5, 0xe3, 55104476Ssam 0xbd, 0xa8, 0x3a, 0x01, 0x05, 0x59, 0x2a, 0x46 56104476Ssam}; 57104476Ssam 58104476Ssam/* 59104476Ssam * For each key byte generate a table to represent the function 60104476Ssam * ftable [in ^ keybyte] 61104476Ssam * 62104476Ssam * These tables used to save an XOR in each stage of the G-function 63104476Ssam * the tables are hopefully pointed to by register allocated variables 64104476Ssam * k0, k1..k9 65104476Ssam */ 66116191Sobrien 67104476Ssamvoid 68104476Ssamsubkey_table_gen (u_int8_t *key, u_int8_t **key_tables) 69104476Ssam{ 70104476Ssam int i, k; 71104476Ssam 72104476Ssam for (k = 0; k < 10; k++) { 73104476Ssam u_int8_t key_byte = key [k]; 74104476Ssam u_int8_t * table = key_tables[k]; 75104476Ssam for (i = 0; i < 0x100; i++) 76104476Ssam table [i] = ftable [i ^ key_byte]; 77104476Ssam } 78104476Ssam} 79104476Ssam 80104476Ssam 81104476Ssam#define g(k0, k1, k2, k3, ih, il, oh, ol) \ 82104476Ssam{ \ 83104476Ssam oh = k##k0 [il] ^ ih; \ 84104476Ssam ol = k##k1 [oh] ^ il; \ 85104476Ssam oh = k##k2 [ol] ^ oh; \ 86104476Ssam ol = k##k3 [oh] ^ ol; \ 87104476Ssam} 88104476Ssam 89104476Ssam#define g0(ih, il, oh, ol) g(0, 1, 2, 3, ih, il, oh, ol) 90104476Ssam#define g4(ih, il, oh, ol) g(4, 5, 6, 7, ih, il, oh, ol) 91104476Ssam#define g8(ih, il, oh, ol) g(8, 9, 0, 1, ih, il, oh, ol) 92104476Ssam#define g2(ih, il, oh, ol) g(2, 3, 4, 5, ih, il, oh, ol) 93104476Ssam#define g6(ih, il, oh, ol) g(6, 7, 8, 9, ih, il, oh, ol) 94104476Ssam 95104476Ssam 96104476Ssam#define g_inv(k0, k1, k2, k3, ih, il, oh, ol) \ 97104476Ssam{ \ 98104476Ssam ol = k##k3 [ih] ^ il; \ 99104476Ssam oh = k##k2 [ol] ^ ih; \ 100104476Ssam ol = k##k1 [oh] ^ ol; \ 101104476Ssam oh = k##k0 [ol] ^ oh; \ 102104476Ssam} 103104476Ssam 104104476Ssam 105104476Ssam#define g0_inv(ih, il, oh, ol) g_inv(0, 1, 2, 3, ih, il, oh, ol) 106104476Ssam#define g4_inv(ih, il, oh, ol) g_inv(4, 5, 6, 7, ih, il, oh, ol) 107104476Ssam#define g8_inv(ih, il, oh, ol) g_inv(8, 9, 0, 1, ih, il, oh, ol) 108104476Ssam#define g2_inv(ih, il, oh, ol) g_inv(2, 3, 4, 5, ih, il, oh, ol) 109104476Ssam#define g6_inv(ih, il, oh, ol) g_inv(6, 7, 8, 9, ih, il, oh, ol) 110104476Ssam 111104476Ssam/* optimized version of Skipjack algorithm 112104476Ssam * 113104476Ssam * the appropriate g-function is inlined for each round 114104476Ssam * 115104476Ssam * the data movement is minimized by rotating the names of the 116104476Ssam * variables w1..w4, not their contents (saves 3 moves per round) 117104476Ssam * 118104476Ssam * the loops are completely unrolled (needed to staticize choice of g) 119104476Ssam * 120104476Ssam * compiles to about 470 instructions on a Sparc (gcc -O) 121104476Ssam * which is about 58 instructions per byte, 14 per round. 122104476Ssam * gcc seems to leave in some unnecessary and with 0xFF operations 123104476Ssam * but only in the latter part of the functions. Perhaps it 124104476Ssam * runs out of resources to properly optimize long inlined function? 125104476Ssam * in theory should get about 11 instructions per round, not 14 126104476Ssam */ 127104476Ssam 128104476Ssamvoid 129104476Ssamskipjack_forwards(u_int8_t *plain, u_int8_t *cipher, u_int8_t **key_tables) 130104476Ssam{ 131104476Ssam u_int8_t wh1 = plain[0]; u_int8_t wl1 = plain[1]; 132104476Ssam u_int8_t wh2 = plain[2]; u_int8_t wl2 = plain[3]; 133104476Ssam u_int8_t wh3 = plain[4]; u_int8_t wl3 = plain[5]; 134104476Ssam u_int8_t wh4 = plain[6]; u_int8_t wl4 = plain[7]; 135104476Ssam 136104476Ssam u_int8_t * k0 = key_tables [0]; 137104476Ssam u_int8_t * k1 = key_tables [1]; 138104476Ssam u_int8_t * k2 = key_tables [2]; 139104476Ssam u_int8_t * k3 = key_tables [3]; 140104476Ssam u_int8_t * k4 = key_tables [4]; 141104476Ssam u_int8_t * k5 = key_tables [5]; 142104476Ssam u_int8_t * k6 = key_tables [6]; 143104476Ssam u_int8_t * k7 = key_tables [7]; 144104476Ssam u_int8_t * k8 = key_tables [8]; 145104476Ssam u_int8_t * k9 = key_tables [9]; 146104476Ssam 147104476Ssam /* first 8 rounds */ 148104476Ssam g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 1; wh4 ^= wh1; 149104476Ssam g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 2; wh3 ^= wh4; 150104476Ssam g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 3; wh2 ^= wh3; 151104476Ssam g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 4; wh1 ^= wh2; 152104476Ssam g6 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 5; wh4 ^= wh1; 153104476Ssam g0 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 6; wh3 ^= wh4; 154104476Ssam g4 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 7; wh2 ^= wh3; 155104476Ssam g8 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 8; wh1 ^= wh2; 156104476Ssam 157104476Ssam /* second 8 rounds */ 158104476Ssam wh2 ^= wh1; wl2 ^= wl1 ^ 9 ; g2 (wh1,wl1, wh1,wl1); 159104476Ssam wh1 ^= wh4; wl1 ^= wl4 ^ 10; g6 (wh4,wl4, wh4,wl4); 160104476Ssam wh4 ^= wh3; wl4 ^= wl3 ^ 11; g0 (wh3,wl3, wh3,wl3); 161104476Ssam wh3 ^= wh2; wl3 ^= wl2 ^ 12; g4 (wh2,wl2, wh2,wl2); 162104476Ssam wh2 ^= wh1; wl2 ^= wl1 ^ 13; g8 (wh1,wl1, wh1,wl1); 163104476Ssam wh1 ^= wh4; wl1 ^= wl4 ^ 14; g2 (wh4,wl4, wh4,wl4); 164104476Ssam wh4 ^= wh3; wl4 ^= wl3 ^ 15; g6 (wh3,wl3, wh3,wl3); 165104476Ssam wh3 ^= wh2; wl3 ^= wl2 ^ 16; g0 (wh2,wl2, wh2,wl2); 166104476Ssam 167104476Ssam /* third 8 rounds */ 168104476Ssam g4 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 17; wh4 ^= wh1; 169104476Ssam g8 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 18; wh3 ^= wh4; 170104476Ssam g2 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 19; wh2 ^= wh3; 171104476Ssam g6 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 20; wh1 ^= wh2; 172104476Ssam g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 21; wh4 ^= wh1; 173104476Ssam g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 22; wh3 ^= wh4; 174104476Ssam g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 23; wh2 ^= wh3; 175104476Ssam g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 24; wh1 ^= wh2; 176104476Ssam 177104476Ssam /* last 8 rounds */ 178104476Ssam wh2 ^= wh1; wl2 ^= wl1 ^ 25; g6 (wh1,wl1, wh1,wl1); 179104476Ssam wh1 ^= wh4; wl1 ^= wl4 ^ 26; g0 (wh4,wl4, wh4,wl4); 180104476Ssam wh4 ^= wh3; wl4 ^= wl3 ^ 27; g4 (wh3,wl3, wh3,wl3); 181104476Ssam wh3 ^= wh2; wl3 ^= wl2 ^ 28; g8 (wh2,wl2, wh2,wl2); 182104476Ssam wh2 ^= wh1; wl2 ^= wl1 ^ 29; g2 (wh1,wl1, wh1,wl1); 183104476Ssam wh1 ^= wh4; wl1 ^= wl4 ^ 30; g6 (wh4,wl4, wh4,wl4); 184104476Ssam wh4 ^= wh3; wl4 ^= wl3 ^ 31; g0 (wh3,wl3, wh3,wl3); 185104476Ssam wh3 ^= wh2; wl3 ^= wl2 ^ 32; g4 (wh2,wl2, wh2,wl2); 186104476Ssam 187104476Ssam /* pack into byte vector */ 188104476Ssam cipher [0] = wh1; cipher [1] = wl1; 189104476Ssam cipher [2] = wh2; cipher [3] = wl2; 190104476Ssam cipher [4] = wh3; cipher [5] = wl3; 191104476Ssam cipher [6] = wh4; cipher [7] = wl4; 192104476Ssam} 193104476Ssam 194104476Ssam 195104476Ssamvoid 196104476Ssamskipjack_backwards (u_int8_t *cipher, u_int8_t *plain, u_int8_t **key_tables) 197104476Ssam{ 198104476Ssam /* setup 4 16-bit portions */ 199104476Ssam u_int8_t wh1 = cipher[0]; u_int8_t wl1 = cipher[1]; 200104476Ssam u_int8_t wh2 = cipher[2]; u_int8_t wl2 = cipher[3]; 201104476Ssam u_int8_t wh3 = cipher[4]; u_int8_t wl3 = cipher[5]; 202104476Ssam u_int8_t wh4 = cipher[6]; u_int8_t wl4 = cipher[7]; 203104476Ssam 204104476Ssam u_int8_t * k0 = key_tables [0]; 205104476Ssam u_int8_t * k1 = key_tables [1]; 206104476Ssam u_int8_t * k2 = key_tables [2]; 207104476Ssam u_int8_t * k3 = key_tables [3]; 208104476Ssam u_int8_t * k4 = key_tables [4]; 209104476Ssam u_int8_t * k5 = key_tables [5]; 210104476Ssam u_int8_t * k6 = key_tables [6]; 211104476Ssam u_int8_t * k7 = key_tables [7]; 212104476Ssam u_int8_t * k8 = key_tables [8]; 213104476Ssam u_int8_t * k9 = key_tables [9]; 214104476Ssam 215104476Ssam /* first 8 rounds */ 216104476Ssam g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 32; wh3 ^= wh2; 217104476Ssam g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 31; wh4 ^= wh3; 218104476Ssam g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 30; wh1 ^= wh4; 219104476Ssam g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 29; wh2 ^= wh1; 220104476Ssam g8_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 28; wh3 ^= wh2; 221104476Ssam g4_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 27; wh4 ^= wh3; 222104476Ssam g0_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 26; wh1 ^= wh4; 223104476Ssam g6_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 25; wh2 ^= wh1; 224104476Ssam 225104476Ssam /* second 8 rounds */ 226104476Ssam wh1 ^= wh2; wl1 ^= wl2 ^ 24; g2_inv (wh2,wl2, wh2,wl2); 227104476Ssam wh2 ^= wh3; wl2 ^= wl3 ^ 23; g8_inv (wh3,wl3, wh3,wl3); 228104476Ssam wh3 ^= wh4; wl3 ^= wl4 ^ 22; g4_inv (wh4,wl4, wh4,wl4); 229104476Ssam wh4 ^= wh1; wl4 ^= wl1 ^ 21; g0_inv (wh1,wl1, wh1,wl1); 230104476Ssam wh1 ^= wh2; wl1 ^= wl2 ^ 20; g6_inv (wh2,wl2, wh2,wl2); 231104476Ssam wh2 ^= wh3; wl2 ^= wl3 ^ 19; g2_inv (wh3,wl3, wh3,wl3); 232104476Ssam wh3 ^= wh4; wl3 ^= wl4 ^ 18; g8_inv (wh4,wl4, wh4,wl4); 233104476Ssam wh4 ^= wh1; wl4 ^= wl1 ^ 17; g4_inv (wh1,wl1, wh1,wl1); 234104476Ssam 235104476Ssam /* third 8 rounds */ 236104476Ssam g0_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 16; wh3 ^= wh2; 237104476Ssam g6_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 15; wh4 ^= wh3; 238104476Ssam g2_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 14; wh1 ^= wh4; 239104476Ssam g8_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 13; wh2 ^= wh1; 240104476Ssam g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 12; wh3 ^= wh2; 241104476Ssam g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 11; wh4 ^= wh3; 242104476Ssam g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 10; wh1 ^= wh4; 243104476Ssam g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 9; wh2 ^= wh1; 244104476Ssam 245104476Ssam /* last 8 rounds */ 246104476Ssam wh1 ^= wh2; wl1 ^= wl2 ^ 8; g8_inv (wh2,wl2, wh2,wl2); 247104476Ssam wh2 ^= wh3; wl2 ^= wl3 ^ 7; g4_inv (wh3,wl3, wh3,wl3); 248104476Ssam wh3 ^= wh4; wl3 ^= wl4 ^ 6; g0_inv (wh4,wl4, wh4,wl4); 249104476Ssam wh4 ^= wh1; wl4 ^= wl1 ^ 5; g6_inv (wh1,wl1, wh1,wl1); 250104476Ssam wh1 ^= wh2; wl1 ^= wl2 ^ 4; g2_inv (wh2,wl2, wh2,wl2); 251104476Ssam wh2 ^= wh3; wl2 ^= wl3 ^ 3; g8_inv (wh3,wl3, wh3,wl3); 252104476Ssam wh3 ^= wh4; wl3 ^= wl4 ^ 2; g4_inv (wh4,wl4, wh4,wl4); 253104476Ssam wh4 ^= wh1; wl4 ^= wl1 ^ 1; g0_inv (wh1,wl1, wh1,wl1); 254104476Ssam 255104476Ssam /* pack into byte vector */ 256104476Ssam plain [0] = wh1; plain [1] = wl1; 257104476Ssam plain [2] = wh2; plain [3] = wl2; 258104476Ssam plain [4] = wh3; plain [5] = wl3; 259104476Ssam plain [6] = wh4; plain [7] = wl4; 260104476Ssam} 261