1238384Sjkim#!/usr/bin/env perl
2238384Sjkim
3238384Sjkim# ====================================================================
4238384Sjkim# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5238384Sjkim# project. The module is, however, dual licensed under OpenSSL and
6238384Sjkim# CRYPTOGAMS licenses depending on where you obtain it. For further
7238384Sjkim# details see http://www.openssl.org/~appro/cryptogams/.
8238384Sjkim# ====================================================================
9238384Sjkim
10238384Sjkim# SHA1 block procedure for MIPS.
11238384Sjkim
12238384Sjkim# Performance improvement is 30% on unaligned input. The "secret" is
13238384Sjkim# to deploy lwl/lwr pair to load unaligned input. One could have
14238384Sjkim# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32-
15238384Sjkim# compatible subroutine. There is room for minor optimization on
16238384Sjkim# little-endian platforms...
17238384Sjkim
18238384Sjkim######################################################################
19238384Sjkim# There is a number of MIPS ABI in use, O32 and N32/64 are most
20238384Sjkim# widely used. Then there is a new contender: NUBI. It appears that if
21238384Sjkim# one picks the latter, it's possible to arrange code in ABI neutral
22238384Sjkim# manner. Therefore let's stick to NUBI register layout:
23238384Sjkim#
24238384Sjkim($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
25238384Sjkim($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
26238384Sjkim($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
27238384Sjkim($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
28238384Sjkim#
29238384Sjkim# The return value is placed in $a0. Following coding rules facilitate
30238384Sjkim# interoperability:
31238384Sjkim#
32238384Sjkim# - never ever touch $tp, "thread pointer", former $gp;
33238384Sjkim# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
34238384Sjkim#   old code];
35238384Sjkim# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
36238384Sjkim#
37238384Sjkim# For reference here is register layout for N32/64 MIPS ABIs:
38238384Sjkim#
39238384Sjkim# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
40238384Sjkim# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
41238384Sjkim# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
42238384Sjkim# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
43238384Sjkim# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
44238384Sjkim#
45238384Sjkim$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64
46238384Sjkim
47238384Sjkimif ($flavour =~ /64|n32/i) {
48238384Sjkim	$PTR_ADD="dadd";	# incidentally works even on n32
49238384Sjkim	$PTR_SUB="dsub";	# incidentally works even on n32
50238384Sjkim	$REG_S="sd";
51238384Sjkim	$REG_L="ld";
52238384Sjkim	$PTR_SLL="dsll";	# incidentally works even on n32
53238384Sjkim	$SZREG=8;
54238384Sjkim} else {
55238384Sjkim	$PTR_ADD="add";
56238384Sjkim	$PTR_SUB="sub";
57238384Sjkim	$REG_S="sw";
58238384Sjkim	$REG_L="lw";
59238384Sjkim	$PTR_SLL="sll";
60238384Sjkim	$SZREG=4;
61238384Sjkim}
62238384Sjkim#
63238384Sjkim# <appro@openssl.org>
64238384Sjkim#
65238384Sjkim######################################################################
66238384Sjkim
67279264Sdelphij$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
68238384Sjkim
69238384Sjkimfor (@ARGV) {	$output=$_ if (/^\w[\w\-]*\.\w+$/);   }
70238384Sjkimopen STDOUT,">$output";
71238384Sjkim
72238384Sjkimif (!defined($big_endian))
73238384Sjkim            {   $big_endian=(unpack('L',pack('N',1))==1);   }
74238384Sjkim
75238384Sjkim# offsets of the Most and Least Significant Bytes
76238384Sjkim$MSB=$big_endian?0:3;
77238384Sjkim$LSB=3&~$MSB;
78238384Sjkim
79238384Sjkim@X=map("\$$_",(8..23));	# a4-a7,s0-s11
80238384Sjkim
81238384Sjkim$ctx=$a0;
82238384Sjkim$inp=$a1;
83238384Sjkim$num=$a2;
84238384Sjkim$A="\$1";
85238384Sjkim$B="\$2";
86238384Sjkim$C="\$3";
87238384Sjkim$D="\$7";
88238384Sjkim$E="\$24";	@V=($A,$B,$C,$D,$E);
89238384Sjkim$t0="\$25";
90238384Sjkim$t1=$num;	# $num is offloaded to stack
91238384Sjkim$t2="\$30";	# fp
92238384Sjkim$K="\$31";	# ra
93238384Sjkim
94238384Sjkimsub BODY_00_14 {
95238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_;
96238384Sjkimmy $j=$i+1;
97238384Sjkim$code.=<<___	if (!$big_endian);
98238384Sjkim	srl	$t0,@X[$i],24	# byte swap($i)
99238384Sjkim	srl	$t1,@X[$i],8
100238384Sjkim	andi	$t2,@X[$i],0xFF00
101238384Sjkim	sll	@X[$i],@X[$i],24
102238384Sjkim	andi	$t1,0xFF00
103238384Sjkim	sll	$t2,$t2,8
104238384Sjkim	or	@X[$i],$t0
105238384Sjkim	or	$t1,$t2
106238384Sjkim	or	@X[$i],$t1
107238384Sjkim___
108238384Sjkim$code.=<<___;
109238384Sjkim	 lwl	@X[$j],$j*4+$MSB($inp)
110238384Sjkim	sll	$t0,$a,5	# $i
111238384Sjkim	addu	$e,$K
112238384Sjkim	 lwr	@X[$j],$j*4+$LSB($inp)
113238384Sjkim	srl	$t1,$a,27
114238384Sjkim	addu	$e,$t0
115238384Sjkim	xor	$t0,$c,$d
116238384Sjkim	addu	$e,$t1
117238384Sjkim	sll	$t2,$b,30
118238384Sjkim	and	$t0,$b
119238384Sjkim	srl	$b,$b,2
120238384Sjkim	xor	$t0,$d
121238384Sjkim	addu	$e,@X[$i]
122238384Sjkim	or	$b,$t2
123238384Sjkim	addu	$e,$t0
124238384Sjkim___
125238384Sjkim}
126238384Sjkim
127238384Sjkimsub BODY_15_19 {
128238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_;
129238384Sjkimmy $j=$i+1;
130238384Sjkim
131238384Sjkim$code.=<<___	if (!$big_endian && $i==15);
132238384Sjkim	srl	$t0,@X[$i],24	# byte swap($i)
133238384Sjkim	srl	$t1,@X[$i],8
134238384Sjkim	andi	$t2,@X[$i],0xFF00
135238384Sjkim	sll	@X[$i],@X[$i],24
136238384Sjkim	andi	$t1,0xFF00
137238384Sjkim	sll	$t2,$t2,8
138238384Sjkim	or	@X[$i],$t0
139238384Sjkim	or	@X[$i],$t1
140238384Sjkim	or	@X[$i],$t2
141238384Sjkim___
142238384Sjkim$code.=<<___;
143238384Sjkim	 xor	@X[$j%16],@X[($j+2)%16]
144238384Sjkim	sll	$t0,$a,5	# $i
145238384Sjkim	addu	$e,$K
146238384Sjkim	srl	$t1,$a,27
147238384Sjkim	addu	$e,$t0
148238384Sjkim	 xor	@X[$j%16],@X[($j+8)%16]
149238384Sjkim	xor	$t0,$c,$d
150238384Sjkim	addu	$e,$t1
151238384Sjkim	 xor	@X[$j%16],@X[($j+13)%16]
152238384Sjkim	sll	$t2,$b,30
153238384Sjkim	and	$t0,$b
154238384Sjkim	 srl	$t1,@X[$j%16],31
155238384Sjkim	 addu	@X[$j%16],@X[$j%16]
156238384Sjkim	srl	$b,$b,2
157238384Sjkim	xor	$t0,$d
158238384Sjkim	 or	@X[$j%16],$t1
159238384Sjkim	addu	$e,@X[$i%16]
160238384Sjkim	or	$b,$t2
161238384Sjkim	addu	$e,$t0
162238384Sjkim___
163238384Sjkim}
164238384Sjkim
165238384Sjkimsub BODY_20_39 {
166238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_;
167238384Sjkimmy $j=$i+1;
168238384Sjkim$code.=<<___ if ($i<79);
169238384Sjkim	 xor	@X[$j%16],@X[($j+2)%16]
170238384Sjkim	sll	$t0,$a,5	# $i
171238384Sjkim	addu	$e,$K
172238384Sjkim	srl	$t1,$a,27
173238384Sjkim	addu	$e,$t0
174238384Sjkim	 xor	@X[$j%16],@X[($j+8)%16]
175238384Sjkim	xor	$t0,$c,$d
176238384Sjkim	addu	$e,$t1
177238384Sjkim	 xor	@X[$j%16],@X[($j+13)%16]
178238384Sjkim	sll	$t2,$b,30
179238384Sjkim	xor	$t0,$b
180238384Sjkim	 srl	$t1,@X[$j%16],31
181238384Sjkim	 addu	@X[$j%16],@X[$j%16]
182238384Sjkim	srl	$b,$b,2
183238384Sjkim	addu	$e,@X[$i%16]
184238384Sjkim	 or	@X[$j%16],$t1
185238384Sjkim	or	$b,$t2
186238384Sjkim	addu	$e,$t0
187238384Sjkim___
188238384Sjkim$code.=<<___ if ($i==79);
189238384Sjkim	 lw	@X[0],0($ctx)
190238384Sjkim	sll	$t0,$a,5	# $i
191238384Sjkim	addu	$e,$K
192238384Sjkim	 lw	@X[1],4($ctx)
193238384Sjkim	srl	$t1,$a,27
194238384Sjkim	addu	$e,$t0
195238384Sjkim	 lw	@X[2],8($ctx)
196238384Sjkim	xor	$t0,$c,$d
197238384Sjkim	addu	$e,$t1
198238384Sjkim	 lw	@X[3],12($ctx)
199238384Sjkim	sll	$t2,$b,30
200238384Sjkim	xor	$t0,$b
201238384Sjkim	 lw	@X[4],16($ctx)
202238384Sjkim	srl	$b,$b,2
203238384Sjkim	addu	$e,@X[$i%16]
204238384Sjkim	or	$b,$t2
205238384Sjkim	addu	$e,$t0
206238384Sjkim___
207238384Sjkim}
208238384Sjkim
209238384Sjkimsub BODY_40_59 {
210238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_;
211238384Sjkimmy $j=$i+1;
212238384Sjkim$code.=<<___ if ($i<79);
213238384Sjkim	 xor	@X[$j%16],@X[($j+2)%16]
214238384Sjkim	sll	$t0,$a,5	# $i
215238384Sjkim	addu	$e,$K
216238384Sjkim	srl	$t1,$a,27
217238384Sjkim	addu	$e,$t0
218238384Sjkim	 xor	@X[$j%16],@X[($j+8)%16]
219238384Sjkim	and	$t0,$c,$d
220238384Sjkim	addu	$e,$t1
221238384Sjkim	 xor	@X[$j%16],@X[($j+13)%16]
222238384Sjkim	sll	$t2,$b,30
223238384Sjkim	addu	$e,$t0
224238384Sjkim	 srl	$t1,@X[$j%16],31
225238384Sjkim	xor	$t0,$c,$d
226238384Sjkim	 addu	@X[$j%16],@X[$j%16]
227238384Sjkim	and	$t0,$b
228238384Sjkim	srl	$b,$b,2
229238384Sjkim	 or	@X[$j%16],$t1
230238384Sjkim	addu	$e,@X[$i%16]
231238384Sjkim	or	$b,$t2
232238384Sjkim	addu	$e,$t0
233238384Sjkim___
234238384Sjkim}
235238384Sjkim
236238384Sjkim$FRAMESIZE=16;	# large enough to accomodate NUBI saved registers
237238384Sjkim$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
238238384Sjkim
239238384Sjkim$code=<<___;
240238384Sjkim#ifdef OPENSSL_FIPSCANISTER
241238384Sjkim# include <openssl/fipssyms.h>
242238384Sjkim#endif
243238384Sjkim
244238384Sjkim.text
245238384Sjkim
246238384Sjkim.set	noat
247238384Sjkim.set	noreorder
248238384Sjkim.align	5
249238384Sjkim.globl	sha1_block_data_order
250238384Sjkim.ent	sha1_block_data_order
251238384Sjkimsha1_block_data_order:
252238384Sjkim	.frame	$sp,$FRAMESIZE*$SZREG,$ra
253238384Sjkim	.mask	$SAVED_REGS_MASK,-$SZREG
254238384Sjkim	.set	noreorder
255238384Sjkim	$PTR_SUB $sp,$FRAMESIZE*$SZREG
256238384Sjkim	$REG_S	$ra,($FRAMESIZE-1)*$SZREG($sp)
257238384Sjkim	$REG_S	$fp,($FRAMESIZE-2)*$SZREG($sp)
258238384Sjkim	$REG_S	$s11,($FRAMESIZE-3)*$SZREG($sp)
259238384Sjkim	$REG_S	$s10,($FRAMESIZE-4)*$SZREG($sp)
260238384Sjkim	$REG_S	$s9,($FRAMESIZE-5)*$SZREG($sp)
261238384Sjkim	$REG_S	$s8,($FRAMESIZE-6)*$SZREG($sp)
262238384Sjkim	$REG_S	$s7,($FRAMESIZE-7)*$SZREG($sp)
263238384Sjkim	$REG_S	$s6,($FRAMESIZE-8)*$SZREG($sp)
264238384Sjkim	$REG_S	$s5,($FRAMESIZE-9)*$SZREG($sp)
265238384Sjkim	$REG_S	$s4,($FRAMESIZE-10)*$SZREG($sp)
266238384Sjkim___
267238384Sjkim$code.=<<___ if ($flavour =~ /nubi/i);	# optimize non-nubi prologue
268238384Sjkim	$REG_S	$s3,($FRAMESIZE-11)*$SZREG($sp)
269238384Sjkim	$REG_S	$s2,($FRAMESIZE-12)*$SZREG($sp)
270238384Sjkim	$REG_S	$s1,($FRAMESIZE-13)*$SZREG($sp)
271238384Sjkim	$REG_S	$s0,($FRAMESIZE-14)*$SZREG($sp)
272238384Sjkim	$REG_S	$gp,($FRAMESIZE-15)*$SZREG($sp)
273238384Sjkim___
274238384Sjkim$code.=<<___;
275238384Sjkim	$PTR_SLL $num,6
276238384Sjkim	$PTR_ADD $num,$inp
277238384Sjkim	$REG_S	$num,0($sp)
278238384Sjkim	lw	$A,0($ctx)
279238384Sjkim	lw	$B,4($ctx)
280238384Sjkim	lw	$C,8($ctx)
281238384Sjkim	lw	$D,12($ctx)
282238384Sjkim	b	.Loop
283238384Sjkim	lw	$E,16($ctx)
284238384Sjkim.align	4
285238384Sjkim.Loop:
286238384Sjkim	.set	reorder
287238384Sjkim	lwl	@X[0],$MSB($inp)
288238384Sjkim	lui	$K,0x5a82
289238384Sjkim	lwr	@X[0],$LSB($inp)
290238384Sjkim	ori	$K,0x7999	# K_00_19
291238384Sjkim___
292238384Sjkimfor ($i=0;$i<15;$i++)	{ &BODY_00_14($i,@V); unshift(@V,pop(@V)); }
293238384Sjkimfor (;$i<20;$i++)	{ &BODY_15_19($i,@V); unshift(@V,pop(@V)); }
294238384Sjkim$code.=<<___;
295238384Sjkim	lui	$K,0x6ed9
296238384Sjkim	ori	$K,0xeba1	# K_20_39
297238384Sjkim___
298238384Sjkimfor (;$i<40;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
299238384Sjkim$code.=<<___;
300238384Sjkim	lui	$K,0x8f1b
301238384Sjkim	ori	$K,0xbcdc	# K_40_59
302238384Sjkim___
303238384Sjkimfor (;$i<60;$i++)	{ &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
304238384Sjkim$code.=<<___;
305238384Sjkim	lui	$K,0xca62
306238384Sjkim	ori	$K,0xc1d6	# K_60_79
307238384Sjkim___
308238384Sjkimfor (;$i<80;$i++)	{ &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
309238384Sjkim$code.=<<___;
310238384Sjkim	$PTR_ADD $inp,64
311238384Sjkim	$REG_L	$num,0($sp)
312238384Sjkim
313238384Sjkim	addu	$A,$X[0]
314238384Sjkim	addu	$B,$X[1]
315238384Sjkim	sw	$A,0($ctx)
316238384Sjkim	addu	$C,$X[2]
317238384Sjkim	addu	$D,$X[3]
318238384Sjkim	sw	$B,4($ctx)
319238384Sjkim	addu	$E,$X[4]
320238384Sjkim	sw	$C,8($ctx)
321238384Sjkim	sw	$D,12($ctx)
322238384Sjkim	sw	$E,16($ctx)
323238384Sjkim	.set	noreorder
324238384Sjkim	bne	$inp,$num,.Loop
325238384Sjkim	nop
326238384Sjkim
327238384Sjkim	.set	noreorder
328238384Sjkim	$REG_L	$ra,($FRAMESIZE-1)*$SZREG($sp)
329238384Sjkim	$REG_L	$fp,($FRAMESIZE-2)*$SZREG($sp)
330238384Sjkim	$REG_L	$s11,($FRAMESIZE-3)*$SZREG($sp)
331238384Sjkim	$REG_L	$s10,($FRAMESIZE-4)*$SZREG($sp)
332238384Sjkim	$REG_L	$s9,($FRAMESIZE-5)*$SZREG($sp)
333238384Sjkim	$REG_L	$s8,($FRAMESIZE-6)*$SZREG($sp)
334238384Sjkim	$REG_L	$s7,($FRAMESIZE-7)*$SZREG($sp)
335238384Sjkim	$REG_L	$s6,($FRAMESIZE-8)*$SZREG($sp)
336238384Sjkim	$REG_L	$s5,($FRAMESIZE-9)*$SZREG($sp)
337238384Sjkim	$REG_L	$s4,($FRAMESIZE-10)*$SZREG($sp)
338238384Sjkim___
339238384Sjkim$code.=<<___ if ($flavour =~ /nubi/i);
340238384Sjkim	$REG_L	$s3,($FRAMESIZE-11)*$SZREG($sp)
341238384Sjkim	$REG_L	$s2,($FRAMESIZE-12)*$SZREG($sp)
342238384Sjkim	$REG_L	$s1,($FRAMESIZE-13)*$SZREG($sp)
343238384Sjkim	$REG_L	$s0,($FRAMESIZE-14)*$SZREG($sp)
344238384Sjkim	$REG_L	$gp,($FRAMESIZE-15)*$SZREG($sp)
345238384Sjkim___
346238384Sjkim$code.=<<___;
347238384Sjkim	jr	$ra
348238384Sjkim	$PTR_ADD $sp,$FRAMESIZE*$SZREG
349238384Sjkim.end	sha1_block_data_order
350238384Sjkim.rdata
351238384Sjkim.asciiz	"SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
352238384Sjkim___
353238384Sjkimprint $code;
354238384Sjkimclose STDOUT;
355