1104476Ssam/*	$OpenBSD: skipjack.c,v 1.3 2001/05/05 00:31:34 angelos Exp $	*/
2139825Simp/*-
3104476Ssam * Further optimized test implementation of SKIPJACK algorithm
4104476Ssam * Mark Tillotson <markt@chaos.org.uk>, 25 June 98
5104476Ssam * Optimizations suit RISC (lots of registers) machine best.
6104476Ssam *
7104476Ssam * based on unoptimized implementation of
8104476Ssam * Panu Rissanen <bande@lut.fi> 960624
9104476Ssam *
10104476Ssam * SKIPJACK and KEA Algorithm Specifications
11104476Ssam * Version 2.0
12104476Ssam * 29 May 1998
13104476Ssam*/
14104476Ssam
15116191Sobrien#include <sys/cdefs.h>
16116191Sobrien__FBSDID("$FreeBSD$");
17116191Sobrien
18104476Ssam#include <sys/param.h>
19104476Ssam
20104476Ssam#include <opencrypto/skipjack.h>
21104476Ssam
22104476Ssamstatic const u_int8_t ftable[0x100] =
23104476Ssam{
24104476Ssam	0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4,
25104476Ssam	0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9,
26104476Ssam	0xe7, 0x2d, 0x4d, 0x8a, 0xce, 0x4c, 0xca, 0x2e,
27104476Ssam	0x52, 0x95, 0xd9, 0x1e, 0x4e, 0x38, 0x44, 0x28,
28104476Ssam	0x0a, 0xdf, 0x02, 0xa0, 0x17, 0xf1, 0x60, 0x68,
29104476Ssam	0x12, 0xb7, 0x7a, 0xc3, 0xe9, 0xfa, 0x3d, 0x53,
30104476Ssam	0x96, 0x84, 0x6b, 0xba, 0xf2, 0x63, 0x9a, 0x19,
31104476Ssam	0x7c, 0xae, 0xe5, 0xf5, 0xf7, 0x16, 0x6a, 0xa2,
32104476Ssam	0x39, 0xb6, 0x7b, 0x0f, 0xc1, 0x93, 0x81, 0x1b,
33104476Ssam	0xee, 0xb4, 0x1a, 0xea, 0xd0, 0x91, 0x2f, 0xb8,
34104476Ssam	0x55, 0xb9, 0xda, 0x85, 0x3f, 0x41, 0xbf, 0xe0,
35104476Ssam	0x5a, 0x58, 0x80, 0x5f, 0x66, 0x0b, 0xd8, 0x90,
36104476Ssam	0x35, 0xd5, 0xc0, 0xa7, 0x33, 0x06, 0x65, 0x69,
37104476Ssam	0x45, 0x00, 0x94, 0x56, 0x6d, 0x98, 0x9b, 0x76,
38104476Ssam	0x97, 0xfc, 0xb2, 0xc2, 0xb0, 0xfe, 0xdb, 0x20,
39104476Ssam	0xe1, 0xeb, 0xd6, 0xe4, 0xdd, 0x47, 0x4a, 0x1d,
40104476Ssam	0x42, 0xed, 0x9e, 0x6e, 0x49, 0x3c, 0xcd, 0x43,
41104476Ssam	0x27, 0xd2, 0x07, 0xd4, 0xde, 0xc7, 0x67, 0x18,
42104476Ssam	0x89, 0xcb, 0x30, 0x1f, 0x8d, 0xc6, 0x8f, 0xaa,
43104476Ssam	0xc8, 0x74, 0xdc, 0xc9, 0x5d, 0x5c, 0x31, 0xa4,
44104476Ssam	0x70, 0x88, 0x61, 0x2c, 0x9f, 0x0d, 0x2b, 0x87,
45104476Ssam	0x50, 0x82, 0x54, 0x64, 0x26, 0x7d, 0x03, 0x40,
46104476Ssam	0x34, 0x4b, 0x1c, 0x73, 0xd1, 0xc4, 0xfd, 0x3b,
47104476Ssam	0xcc, 0xfb, 0x7f, 0xab, 0xe6, 0x3e, 0x5b, 0xa5,
48104476Ssam	0xad, 0x04, 0x23, 0x9c, 0x14, 0x51, 0x22, 0xf0,
49104476Ssam	0x29, 0x79, 0x71, 0x7e, 0xff, 0x8c, 0x0e, 0xe2,
50104476Ssam	0x0c, 0xef, 0xbc, 0x72, 0x75, 0x6f, 0x37, 0xa1,
51104476Ssam	0xec, 0xd3, 0x8e, 0x62, 0x8b, 0x86, 0x10, 0xe8,
52104476Ssam	0x08, 0x77, 0x11, 0xbe, 0x92, 0x4f, 0x24, 0xc5,
53104476Ssam	0x32, 0x36, 0x9d, 0xcf, 0xf3, 0xa6, 0xbb, 0xac,
54104476Ssam	0x5e, 0x6c, 0xa9, 0x13, 0x57, 0x25, 0xb5, 0xe3,
55104476Ssam	0xbd, 0xa8, 0x3a, 0x01, 0x05, 0x59, 0x2a, 0x46
56104476Ssam};
57104476Ssam
58104476Ssam/*
59104476Ssam * For each key byte generate a table to represent the function
60104476Ssam *    ftable [in ^ keybyte]
61104476Ssam *
62104476Ssam * These tables used to save an XOR in each stage of the G-function
63104476Ssam * the tables are hopefully pointed to by register allocated variables
64104476Ssam * k0, k1..k9
65104476Ssam */
66116191Sobrien
67104476Ssamvoid
68104476Ssamsubkey_table_gen (u_int8_t *key, u_int8_t **key_tables)
69104476Ssam{
70104476Ssam	int i, k;
71104476Ssam
72104476Ssam	for (k = 0; k < 10; k++) {
73104476Ssam		u_int8_t   key_byte = key [k];
74104476Ssam		u_int8_t * table = key_tables[k];
75104476Ssam		for (i = 0; i < 0x100; i++)
76104476Ssam			table [i] = ftable [i ^ key_byte];
77104476Ssam	}
78104476Ssam}
79104476Ssam
80104476Ssam
81104476Ssam#define g(k0, k1, k2, k3, ih, il, oh, ol) \
82104476Ssam{ \
83104476Ssam	oh = k##k0 [il] ^ ih; \
84104476Ssam	ol = k##k1 [oh] ^ il; \
85104476Ssam	oh = k##k2 [ol] ^ oh; \
86104476Ssam	ol = k##k3 [oh] ^ ol; \
87104476Ssam}
88104476Ssam
89104476Ssam#define g0(ih, il, oh, ol) g(0, 1, 2, 3, ih, il, oh, ol)
90104476Ssam#define g4(ih, il, oh, ol) g(4, 5, 6, 7, ih, il, oh, ol)
91104476Ssam#define g8(ih, il, oh, ol) g(8, 9, 0, 1, ih, il, oh, ol)
92104476Ssam#define g2(ih, il, oh, ol) g(2, 3, 4, 5, ih, il, oh, ol)
93104476Ssam#define g6(ih, il, oh, ol) g(6, 7, 8, 9, ih, il, oh, ol)
94104476Ssam
95104476Ssam
96104476Ssam#define g_inv(k0, k1, k2, k3, ih, il, oh, ol) \
97104476Ssam{ \
98104476Ssam	ol = k##k3 [ih] ^ il; \
99104476Ssam	oh = k##k2 [ol] ^ ih; \
100104476Ssam	ol = k##k1 [oh] ^ ol; \
101104476Ssam	oh = k##k0 [ol] ^ oh; \
102104476Ssam}
103104476Ssam
104104476Ssam
105104476Ssam#define g0_inv(ih, il, oh, ol) g_inv(0, 1, 2, 3, ih, il, oh, ol)
106104476Ssam#define g4_inv(ih, il, oh, ol) g_inv(4, 5, 6, 7, ih, il, oh, ol)
107104476Ssam#define g8_inv(ih, il, oh, ol) g_inv(8, 9, 0, 1, ih, il, oh, ol)
108104476Ssam#define g2_inv(ih, il, oh, ol) g_inv(2, 3, 4, 5, ih, il, oh, ol)
109104476Ssam#define g6_inv(ih, il, oh, ol) g_inv(6, 7, 8, 9, ih, il, oh, ol)
110104476Ssam
111104476Ssam/* optimized version of Skipjack algorithm
112104476Ssam *
113104476Ssam * the appropriate g-function is inlined for each round
114104476Ssam *
115104476Ssam * the data movement is minimized by rotating the names of the
116104476Ssam * variables w1..w4, not their contents (saves 3 moves per round)
117104476Ssam *
118104476Ssam * the loops are completely unrolled (needed to staticize choice of g)
119104476Ssam *
120104476Ssam * compiles to about 470 instructions on a Sparc (gcc -O)
121104476Ssam * which is about 58 instructions per byte, 14 per round.
122104476Ssam * gcc seems to leave in some unnecessary and with 0xFF operations
123104476Ssam * but only in the latter part of the functions.  Perhaps it
124104476Ssam * runs out of resources to properly optimize long inlined function?
125104476Ssam * in theory should get about 11 instructions per round, not 14
126104476Ssam */
127104476Ssam
128104476Ssamvoid
129104476Ssamskipjack_forwards(u_int8_t *plain, u_int8_t *cipher, u_int8_t **key_tables)
130104476Ssam{
131104476Ssam	u_int8_t wh1 = plain[0];  u_int8_t wl1 = plain[1];
132104476Ssam	u_int8_t wh2 = plain[2];  u_int8_t wl2 = plain[3];
133104476Ssam	u_int8_t wh3 = plain[4];  u_int8_t wl3 = plain[5];
134104476Ssam	u_int8_t wh4 = plain[6];  u_int8_t wl4 = plain[7];
135104476Ssam
136104476Ssam	u_int8_t * k0 = key_tables [0];
137104476Ssam	u_int8_t * k1 = key_tables [1];
138104476Ssam	u_int8_t * k2 = key_tables [2];
139104476Ssam	u_int8_t * k3 = key_tables [3];
140104476Ssam	u_int8_t * k4 = key_tables [4];
141104476Ssam	u_int8_t * k5 = key_tables [5];
142104476Ssam	u_int8_t * k6 = key_tables [6];
143104476Ssam	u_int8_t * k7 = key_tables [7];
144104476Ssam	u_int8_t * k8 = key_tables [8];
145104476Ssam	u_int8_t * k9 = key_tables [9];
146104476Ssam
147104476Ssam	/* first 8 rounds */
148104476Ssam	g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 1; wh4 ^= wh1;
149104476Ssam	g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 2; wh3 ^= wh4;
150104476Ssam	g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 3; wh2 ^= wh3;
151104476Ssam	g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 4; wh1 ^= wh2;
152104476Ssam	g6 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 5; wh4 ^= wh1;
153104476Ssam	g0 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 6; wh3 ^= wh4;
154104476Ssam	g4 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 7; wh2 ^= wh3;
155104476Ssam	g8 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 8; wh1 ^= wh2;
156104476Ssam
157104476Ssam	/* second 8 rounds */
158104476Ssam	wh2 ^= wh1; wl2 ^= wl1 ^ 9 ; g2 (wh1,wl1, wh1,wl1);
159104476Ssam	wh1 ^= wh4; wl1 ^= wl4 ^ 10; g6 (wh4,wl4, wh4,wl4);
160104476Ssam	wh4 ^= wh3; wl4 ^= wl3 ^ 11; g0 (wh3,wl3, wh3,wl3);
161104476Ssam	wh3 ^= wh2; wl3 ^= wl2 ^ 12; g4 (wh2,wl2, wh2,wl2);
162104476Ssam	wh2 ^= wh1; wl2 ^= wl1 ^ 13; g8 (wh1,wl1, wh1,wl1);
163104476Ssam	wh1 ^= wh4; wl1 ^= wl4 ^ 14; g2 (wh4,wl4, wh4,wl4);
164104476Ssam	wh4 ^= wh3; wl4 ^= wl3 ^ 15; g6 (wh3,wl3, wh3,wl3);
165104476Ssam	wh3 ^= wh2; wl3 ^= wl2 ^ 16; g0 (wh2,wl2, wh2,wl2);
166104476Ssam
167104476Ssam	/* third 8 rounds */
168104476Ssam	g4 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 17; wh4 ^= wh1;
169104476Ssam	g8 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 18; wh3 ^= wh4;
170104476Ssam	g2 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 19; wh2 ^= wh3;
171104476Ssam	g6 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 20; wh1 ^= wh2;
172104476Ssam	g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 21; wh4 ^= wh1;
173104476Ssam	g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 22; wh3 ^= wh4;
174104476Ssam	g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 23; wh2 ^= wh3;
175104476Ssam	g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 24; wh1 ^= wh2;
176104476Ssam
177104476Ssam	/* last 8 rounds */
178104476Ssam	wh2 ^= wh1; wl2 ^= wl1 ^ 25; g6 (wh1,wl1, wh1,wl1);
179104476Ssam	wh1 ^= wh4; wl1 ^= wl4 ^ 26; g0 (wh4,wl4, wh4,wl4);
180104476Ssam	wh4 ^= wh3; wl4 ^= wl3 ^ 27; g4 (wh3,wl3, wh3,wl3);
181104476Ssam	wh3 ^= wh2; wl3 ^= wl2 ^ 28; g8 (wh2,wl2, wh2,wl2);
182104476Ssam	wh2 ^= wh1; wl2 ^= wl1 ^ 29; g2 (wh1,wl1, wh1,wl1);
183104476Ssam	wh1 ^= wh4; wl1 ^= wl4 ^ 30; g6 (wh4,wl4, wh4,wl4);
184104476Ssam	wh4 ^= wh3; wl4 ^= wl3 ^ 31; g0 (wh3,wl3, wh3,wl3);
185104476Ssam	wh3 ^= wh2; wl3 ^= wl2 ^ 32; g4 (wh2,wl2, wh2,wl2);
186104476Ssam
187104476Ssam	/* pack into byte vector */
188104476Ssam	cipher [0] = wh1;  cipher [1] = wl1;
189104476Ssam	cipher [2] = wh2;  cipher [3] = wl2;
190104476Ssam	cipher [4] = wh3;  cipher [5] = wl3;
191104476Ssam	cipher [6] = wh4;  cipher [7] = wl4;
192104476Ssam}
193104476Ssam
194104476Ssam
195104476Ssamvoid
196104476Ssamskipjack_backwards (u_int8_t *cipher, u_int8_t *plain, u_int8_t **key_tables)
197104476Ssam{
198104476Ssam	/* setup 4 16-bit portions */
199104476Ssam	u_int8_t wh1 = cipher[0];  u_int8_t wl1 = cipher[1];
200104476Ssam	u_int8_t wh2 = cipher[2];  u_int8_t wl2 = cipher[3];
201104476Ssam	u_int8_t wh3 = cipher[4];  u_int8_t wl3 = cipher[5];
202104476Ssam	u_int8_t wh4 = cipher[6];  u_int8_t wl4 = cipher[7];
203104476Ssam
204104476Ssam	u_int8_t * k0 = key_tables [0];
205104476Ssam	u_int8_t * k1 = key_tables [1];
206104476Ssam	u_int8_t * k2 = key_tables [2];
207104476Ssam	u_int8_t * k3 = key_tables [3];
208104476Ssam	u_int8_t * k4 = key_tables [4];
209104476Ssam	u_int8_t * k5 = key_tables [5];
210104476Ssam	u_int8_t * k6 = key_tables [6];
211104476Ssam	u_int8_t * k7 = key_tables [7];
212104476Ssam	u_int8_t * k8 = key_tables [8];
213104476Ssam	u_int8_t * k9 = key_tables [9];
214104476Ssam
215104476Ssam	/* first 8 rounds */
216104476Ssam	g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 32; wh3 ^= wh2;
217104476Ssam	g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 31; wh4 ^= wh3;
218104476Ssam	g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 30; wh1 ^= wh4;
219104476Ssam	g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 29; wh2 ^= wh1;
220104476Ssam	g8_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 28; wh3 ^= wh2;
221104476Ssam	g4_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 27; wh4 ^= wh3;
222104476Ssam	g0_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 26; wh1 ^= wh4;
223104476Ssam	g6_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 25; wh2 ^= wh1;
224104476Ssam
225104476Ssam	/* second 8 rounds */
226104476Ssam	wh1 ^= wh2; wl1 ^= wl2 ^ 24; g2_inv (wh2,wl2, wh2,wl2);
227104476Ssam	wh2 ^= wh3; wl2 ^= wl3 ^ 23; g8_inv (wh3,wl3, wh3,wl3);
228104476Ssam	wh3 ^= wh4; wl3 ^= wl4 ^ 22; g4_inv (wh4,wl4, wh4,wl4);
229104476Ssam	wh4 ^= wh1; wl4 ^= wl1 ^ 21; g0_inv (wh1,wl1, wh1,wl1);
230104476Ssam	wh1 ^= wh2; wl1 ^= wl2 ^ 20; g6_inv (wh2,wl2, wh2,wl2);
231104476Ssam	wh2 ^= wh3; wl2 ^= wl3 ^ 19; g2_inv (wh3,wl3, wh3,wl3);
232104476Ssam	wh3 ^= wh4; wl3 ^= wl4 ^ 18; g8_inv (wh4,wl4, wh4,wl4);
233104476Ssam	wh4 ^= wh1; wl4 ^= wl1 ^ 17; g4_inv (wh1,wl1, wh1,wl1);
234104476Ssam
235104476Ssam	/* third 8 rounds */
236104476Ssam	g0_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 16; wh3 ^= wh2;
237104476Ssam	g6_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 15; wh4 ^= wh3;
238104476Ssam	g2_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 14; wh1 ^= wh4;
239104476Ssam	g8_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 13; wh2 ^= wh1;
240104476Ssam	g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 12; wh3 ^= wh2;
241104476Ssam	g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 11; wh4 ^= wh3;
242104476Ssam	g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 10; wh1 ^= wh4;
243104476Ssam	g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 9;  wh2 ^= wh1;
244104476Ssam
245104476Ssam	/* last 8 rounds */
246104476Ssam	wh1 ^= wh2; wl1 ^= wl2 ^ 8; g8_inv (wh2,wl2, wh2,wl2);
247104476Ssam	wh2 ^= wh3; wl2 ^= wl3 ^ 7; g4_inv (wh3,wl3, wh3,wl3);
248104476Ssam	wh3 ^= wh4; wl3 ^= wl4 ^ 6; g0_inv (wh4,wl4, wh4,wl4);
249104476Ssam	wh4 ^= wh1; wl4 ^= wl1 ^ 5; g6_inv (wh1,wl1, wh1,wl1);
250104476Ssam	wh1 ^= wh2; wl1 ^= wl2 ^ 4; g2_inv (wh2,wl2, wh2,wl2);
251104476Ssam	wh2 ^= wh3; wl2 ^= wl3 ^ 3; g8_inv (wh3,wl3, wh3,wl3);
252104476Ssam	wh3 ^= wh4; wl3 ^= wl4 ^ 2; g4_inv (wh4,wl4, wh4,wl4);
253104476Ssam	wh4 ^= wh1; wl4 ^= wl1 ^ 1; g0_inv (wh1,wl1, wh1,wl1);
254104476Ssam
255104476Ssam	/* pack into byte vector */
256104476Ssam	plain [0] = wh1;  plain [1] = wl1;
257104476Ssam	plain [2] = wh2;  plain [3] = wl2;
258104476Ssam	plain [4] = wh3;  plain [5] = wl3;
259104476Ssam	plain [6] = wh4;  plain [7] = wl4;
260104476Ssam}
261