155714Skris/* crypto/rc4/rc4_enc.c */
255714Skris/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
355714Skris * All rights reserved.
455714Skris *
555714Skris * This package is an SSL implementation written
655714Skris * by Eric Young (eay@cryptsoft.com).
755714Skris * The implementation was written so as to conform with Netscapes SSL.
855714Skris *
955714Skris * This library is free for commercial and non-commercial use as long as
1055714Skris * the following conditions are aheared to.  The following conditions
1155714Skris * apply to all code found in this distribution, be it the RC4, RSA,
1255714Skris * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
1355714Skris * included with this distribution is covered by the same copyright terms
1455714Skris * except that the holder is Tim Hudson (tjh@cryptsoft.com).
1555714Skris *
1655714Skris * Copyright remains Eric Young's, and as such any Copyright notices in
1755714Skris * the code are not to be removed.
1855714Skris * If this package is used in a product, Eric Young should be given attribution
1955714Skris * as the author of the parts of the library used.
2055714Skris * This can be in the form of a textual message at program startup or
2155714Skris * in documentation (online or textual) provided with the package.
2255714Skris *
2355714Skris * Redistribution and use in source and binary forms, with or without
2455714Skris * modification, are permitted provided that the following conditions
2555714Skris * are met:
2655714Skris * 1. Redistributions of source code must retain the copyright
2755714Skris *    notice, this list of conditions and the following disclaimer.
2855714Skris * 2. Redistributions in binary form must reproduce the above copyright
2955714Skris *    notice, this list of conditions and the following disclaimer in the
3055714Skris *    documentation and/or other materials provided with the distribution.
3155714Skris * 3. All advertising materials mentioning features or use of this software
3255714Skris *    must display the following acknowledgement:
3355714Skris *    "This product includes cryptographic software written by
3455714Skris *     Eric Young (eay@cryptsoft.com)"
3555714Skris *    The word 'cryptographic' can be left out if the rouines from the library
3655714Skris *    being used are not cryptographic related :-).
3755714Skris * 4. If you include any Windows specific code (or a derivative thereof) from
3855714Skris *    the apps directory (application code) you must include an acknowledgement:
3955714Skris *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
4055714Skris *
4155714Skris * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
4255714Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4355714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4455714Skris * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
4555714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4655714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4755714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4855714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4955714Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
5055714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5155714Skris * SUCH DAMAGE.
5255714Skris *
5355714Skris * The licence and distribution terms for any publically available version or
5455714Skris * derivative of this code cannot be changed.  i.e. this code cannot simply be
5555714Skris * copied and put under another distribution licence
5655714Skris * [including the GNU Public Licence.]
5755714Skris */
5855714Skris
5955714Skris#include <openssl/rc4.h>
6055714Skris#include "rc4_locl.h"
6155714Skris
6255714Skris/* RC4 as implemented from a posting from
6355714Skris * Newsgroups: sci.crypt
6455714Skris * From: sterndark@netcom.com (David Sterndark)
6555714Skris * Subject: RC4 Algorithm revealed.
6655714Skris * Message-ID: <sternCvKL4B.Hyy@netcom.com>
6755714Skris * Date: Wed, 14 Sep 1994 06:35:31 GMT
6855714Skris */
6955714Skris
70238405Sjkimvoid RC4(RC4_KEY *key, size_t len, const unsigned char *indata,
7155714Skris	     unsigned char *outdata)
7255714Skris	{
7355714Skris        register RC4_INT *d;
7455714Skris        register RC4_INT x,y,tx,ty;
75238405Sjkim	size_t i;
7655714Skris
7755714Skris        x=key->x;
7855714Skris        y=key->y;
7955714Skris        d=key->data;
8055714Skris
8159191Skris#if defined(RC4_CHUNK)
8259191Skris	/*
8359191Skris	 * The original reason for implementing this(*) was the fact that
8459191Skris	 * pre-21164a Alpha CPUs don't have byte load/store instructions
8559191Skris	 * and e.g. a byte store has to be done with 64-bit load, shift,
8659191Skris	 * and, or and finally 64-bit store. Peaking data and operating
8759191Skris	 * at natural word size made it possible to reduce amount of
8859191Skris	 * instructions as well as to perform early read-ahead without
8959191Skris	 * suffering from RAW (read-after-write) hazard. This resulted
9059191Skris	 * in ~40%(**) performance improvement on 21064 box with gcc.
9159191Skris	 * But it's not only Alpha users who win here:-) Thanks to the
9259191Skris	 * early-n-wide read-ahead this implementation also exhibits
9359191Skris	 * >40% speed-up on SPARC and 20-30% on 64-bit MIPS (depending
9459191Skris	 * on sizeof(RC4_INT)).
9559191Skris	 *
9659191Skris	 * (*)	"this" means code which recognizes the case when input
9759191Skris	 *	and output pointers appear to be aligned at natural CPU
9859191Skris	 *	word boundary
9959191Skris	 * (**)	i.e. according to 'apps/openssl speed rc4' benchmark,
10059191Skris	 *	crypto/rc4/rc4speed.c exhibits almost 70% speed-up...
10159191Skris	 *
10259191Skris	 * Cavets.
10359191Skris	 *
10459191Skris	 * - RC4_CHUNK="unsigned long long" should be a #1 choice for
10559191Skris	 *   UltraSPARC. Unfortunately gcc generates very slow code
10659191Skris	 *   (2.5-3 times slower than one generated by Sun's WorkShop
10759191Skris	 *   C) and therefore gcc (at least 2.95 and earlier) should
10859191Skris	 *   always be told that RC4_CHUNK="unsigned long".
10959191Skris	 *
11059191Skris	 *					<appro@fy.chalmers.se>
11159191Skris	 */
11259191Skris
11359191Skris# define RC4_STEP	( \
11459191Skris			x=(x+1) &0xff,	\
11559191Skris			tx=d[x],	\
11659191Skris			y=(tx+y)&0xff,	\
11759191Skris			ty=d[y],	\
11859191Skris			d[y]=tx,	\
11959191Skris			d[x]=ty,	\
12059191Skris			(RC4_CHUNK)d[(tx+ty)&0xff]\
12159191Skris			)
12259191Skris
123238405Sjkim	if ( ( ((size_t)indata  & (sizeof(RC4_CHUNK)-1)) |
124238405Sjkim	       ((size_t)outdata & (sizeof(RC4_CHUNK)-1)) ) == 0 )
12559191Skris		{
12659191Skris		RC4_CHUNK ichunk,otp;
12759191Skris		const union { long one; char little; } is_endian = {1};
12859191Skris
12959191Skris		/*
13059191Skris		 * I reckon we can afford to implement both endian
13159191Skris		 * cases and to decide which way to take at run-time
13259191Skris		 * because the machine code appears to be very compact
13359191Skris		 * and redundant 1-2KB is perfectly tolerable (i.e.
13459191Skris		 * in case the compiler fails to eliminate it:-). By
13559191Skris		 * suggestion from Terrel Larson <terr@terralogic.net>
13659191Skris		 * who also stands for the is_endian union:-)
13759191Skris		 *
13859191Skris		 * Special notes.
13959191Skris		 *
14059191Skris		 * - is_endian is declared automatic as doing otherwise
14159191Skris		 *   (declaring static) prevents gcc from eliminating
14259191Skris		 *   the redundant code;
14359191Skris		 * - compilers (those I've tried) don't seem to have
14459191Skris		 *   problems eliminating either the operators guarded
14559191Skris		 *   by "if (sizeof(RC4_CHUNK)==8)" or the condition
14659191Skris		 *   expressions themselves so I've got 'em to replace
14759191Skris		 *   corresponding #ifdefs from the previous version;
14859191Skris		 * - I chose to let the redundant switch cases when
14959191Skris		 *   sizeof(RC4_CHUNK)!=8 be (were also #ifdefed
15059191Skris		 *   before);
15159191Skris		 * - in case you wonder "&(sizeof(RC4_CHUNK)*8-1)" in
15259191Skris		 *   [LB]ESHFT guards against "shift is out of range"
15359191Skris		 *   warnings when sizeof(RC4_CHUNK)!=8
15459191Skris		 *
15559191Skris		 *			<appro@fy.chalmers.se>
15659191Skris		 */
15759191Skris		if (!is_endian.little)
15859191Skris			{	/* BIG-ENDIAN CASE */
15959191Skris# define BESHFT(c)	(((sizeof(RC4_CHUNK)-(c)-1)*8)&(sizeof(RC4_CHUNK)*8-1))
160238405Sjkim			for (;len&(0-sizeof(RC4_CHUNK));len-=sizeof(RC4_CHUNK))
16159191Skris				{
16259191Skris				ichunk  = *(RC4_CHUNK *)indata;
16359191Skris				otp  = RC4_STEP<<BESHFT(0);
16459191Skris				otp |= RC4_STEP<<BESHFT(1);
16559191Skris				otp |= RC4_STEP<<BESHFT(2);
16659191Skris				otp |= RC4_STEP<<BESHFT(3);
16759191Skris				if (sizeof(RC4_CHUNK)==8)
16859191Skris					{
16959191Skris					otp |= RC4_STEP<<BESHFT(4);
17059191Skris					otp |= RC4_STEP<<BESHFT(5);
17159191Skris					otp |= RC4_STEP<<BESHFT(6);
17259191Skris					otp |= RC4_STEP<<BESHFT(7);
17359191Skris					}
17459191Skris				*(RC4_CHUNK *)outdata = otp^ichunk;
17559191Skris				indata  += sizeof(RC4_CHUNK);
17659191Skris				outdata += sizeof(RC4_CHUNK);
17759191Skris				}
17859191Skris			if (len)
17959191Skris				{
18059191Skris				RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk;
18159191Skris
18259191Skris				ichunk = *(RC4_CHUNK *)indata;
18359191Skris				ochunk = *(RC4_CHUNK *)outdata;
18459191Skris				otp = 0;
18559191Skris				i = BESHFT(0);
18659191Skris				mask <<= (sizeof(RC4_CHUNK)-len)<<3;
18759191Skris				switch (len&(sizeof(RC4_CHUNK)-1))
18859191Skris					{
18959191Skris					case 7:	otp  = RC4_STEP<<i, i-=8;
19059191Skris					case 6:	otp |= RC4_STEP<<i, i-=8;
19159191Skris					case 5:	otp |= RC4_STEP<<i, i-=8;
19259191Skris					case 4:	otp |= RC4_STEP<<i, i-=8;
19359191Skris					case 3:	otp |= RC4_STEP<<i, i-=8;
19459191Skris					case 2:	otp |= RC4_STEP<<i, i-=8;
19559191Skris					case 1:	otp |= RC4_STEP<<i, i-=8;
19659191Skris					case 0: ; /*
19759191Skris						   * it's never the case,
19859191Skris						   * but it has to be here
19959191Skris						   * for ultrix?
20059191Skris						   */
20159191Skris					}
20259191Skris				ochunk &= ~mask;
20359191Skris				ochunk |= (otp^ichunk) & mask;
20459191Skris				*(RC4_CHUNK *)outdata = ochunk;
20559191Skris				}
20659191Skris			key->x=x;
20759191Skris			key->y=y;
20859191Skris			return;
20959191Skris			}
21059191Skris		else
21159191Skris			{	/* LITTLE-ENDIAN CASE */
21259191Skris# define LESHFT(c)	(((c)*8)&(sizeof(RC4_CHUNK)*8-1))
213238405Sjkim			for (;len&(0-sizeof(RC4_CHUNK));len-=sizeof(RC4_CHUNK))
21459191Skris				{
21559191Skris				ichunk  = *(RC4_CHUNK *)indata;
21659191Skris				otp  = RC4_STEP;
21759191Skris				otp |= RC4_STEP<<8;
21859191Skris				otp |= RC4_STEP<<16;
21959191Skris				otp |= RC4_STEP<<24;
22059191Skris				if (sizeof(RC4_CHUNK)==8)
22159191Skris					{
22259191Skris					otp |= RC4_STEP<<LESHFT(4);
22359191Skris					otp |= RC4_STEP<<LESHFT(5);
22459191Skris					otp |= RC4_STEP<<LESHFT(6);
22559191Skris					otp |= RC4_STEP<<LESHFT(7);
22659191Skris					}
22759191Skris				*(RC4_CHUNK *)outdata = otp^ichunk;
22859191Skris				indata  += sizeof(RC4_CHUNK);
22959191Skris				outdata += sizeof(RC4_CHUNK);
23059191Skris				}
23159191Skris			if (len)
23259191Skris				{
23359191Skris				RC4_CHUNK mask=(RC4_CHUNK)-1, ochunk;
23459191Skris
23559191Skris				ichunk = *(RC4_CHUNK *)indata;
23659191Skris				ochunk = *(RC4_CHUNK *)outdata;
23759191Skris				otp = 0;
23859191Skris				i   = 0;
23959191Skris				mask >>= (sizeof(RC4_CHUNK)-len)<<3;
24059191Skris				switch (len&(sizeof(RC4_CHUNK)-1))
24159191Skris					{
24259191Skris					case 7:	otp  = RC4_STEP,    i+=8;
24359191Skris					case 6:	otp |= RC4_STEP<<i, i+=8;
24459191Skris					case 5:	otp |= RC4_STEP<<i, i+=8;
24559191Skris					case 4:	otp |= RC4_STEP<<i, i+=8;
24659191Skris					case 3:	otp |= RC4_STEP<<i, i+=8;
24759191Skris					case 2:	otp |= RC4_STEP<<i, i+=8;
24859191Skris					case 1:	otp |= RC4_STEP<<i, i+=8;
24959191Skris					case 0: ; /*
25059191Skris						   * it's never the case,
25159191Skris						   * but it has to be here
25259191Skris						   * for ultrix?
25359191Skris						   */
25459191Skris					}
25559191Skris				ochunk &= ~mask;
25659191Skris				ochunk |= (otp^ichunk) & mask;
25759191Skris				*(RC4_CHUNK *)outdata = ochunk;
25859191Skris				}
25959191Skris			key->x=x;
26059191Skris			key->y=y;
26159191Skris			return;
26259191Skris			}
26359191Skris		}
26459191Skris#endif
26555714Skris#define LOOP(in,out) \
26655714Skris		x=((x+1)&0xff); \
26755714Skris		tx=d[x]; \
26855714Skris		y=(tx+y)&0xff; \
26955714Skris		d[x]=ty=d[y]; \
27055714Skris		d[y]=tx; \
27155714Skris		(out) = d[(tx+ty)&0xff]^ (in);
27255714Skris
27355714Skris#ifndef RC4_INDEX
27455714Skris#define RC4_LOOP(a,b,i)	LOOP(*((a)++),*((b)++))
27555714Skris#else
27655714Skris#define RC4_LOOP(a,b,i)	LOOP(a[i],b[i])
27755714Skris#endif
27855714Skris
279238405Sjkim	i=len>>3;
28055714Skris	if (i)
28155714Skris		{
28255714Skris		for (;;)
28355714Skris			{
28455714Skris			RC4_LOOP(indata,outdata,0);
28555714Skris			RC4_LOOP(indata,outdata,1);
28655714Skris			RC4_LOOP(indata,outdata,2);
28755714Skris			RC4_LOOP(indata,outdata,3);
28855714Skris			RC4_LOOP(indata,outdata,4);
28955714Skris			RC4_LOOP(indata,outdata,5);
29055714Skris			RC4_LOOP(indata,outdata,6);
29155714Skris			RC4_LOOP(indata,outdata,7);
29255714Skris#ifdef RC4_INDEX
29355714Skris			indata+=8;
29455714Skris			outdata+=8;
29555714Skris#endif
29655714Skris			if (--i == 0) break;
29755714Skris			}
29855714Skris		}
299238405Sjkim	i=len&0x07;
30055714Skris	if (i)
30155714Skris		{
30255714Skris		for (;;)
30355714Skris			{
30455714Skris			RC4_LOOP(indata,outdata,0); if (--i == 0) break;
30555714Skris			RC4_LOOP(indata,outdata,1); if (--i == 0) break;
30655714Skris			RC4_LOOP(indata,outdata,2); if (--i == 0) break;
30755714Skris			RC4_LOOP(indata,outdata,3); if (--i == 0) break;
30855714Skris			RC4_LOOP(indata,outdata,4); if (--i == 0) break;
30955714Skris			RC4_LOOP(indata,outdata,5); if (--i == 0) break;
31055714Skris			RC4_LOOP(indata,outdata,6); if (--i == 0) break;
31155714Skris			}
31255714Skris		}
31355714Skris	key->x=x;
31455714Skris	key->y=y;
31555714Skris	}
316