1238384Sjkim#!/usr/bin/env perl 2238384Sjkim 3238384Sjkim# ==================================================================== 4238384Sjkim# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5238384Sjkim# project. The module is, however, dual licensed under OpenSSL and 6238384Sjkim# CRYPTOGAMS licenses depending on where you obtain it. For further 7238384Sjkim# details see http://www.openssl.org/~appro/cryptogams/. 8238384Sjkim# ==================================================================== 9238384Sjkim 10238384Sjkim# SHA1 block procedure for MIPS. 11238384Sjkim 12238384Sjkim# Performance improvement is 30% on unaligned input. The "secret" is 13238384Sjkim# to deploy lwl/lwr pair to load unaligned input. One could have 14238384Sjkim# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- 15238384Sjkim# compatible subroutine. There is room for minor optimization on 16238384Sjkim# little-endian platforms... 17238384Sjkim 18238384Sjkim###################################################################### 19238384Sjkim# There is a number of MIPS ABI in use, O32 and N32/64 are most 20238384Sjkim# widely used. Then there is a new contender: NUBI. It appears that if 21238384Sjkim# one picks the latter, it's possible to arrange code in ABI neutral 22238384Sjkim# manner. Therefore let's stick to NUBI register layout: 23238384Sjkim# 24238384Sjkim($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 25238384Sjkim($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 26238384Sjkim($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 27238384Sjkim($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 28238384Sjkim# 29238384Sjkim# The return value is placed in $a0. Following coding rules facilitate 30238384Sjkim# interoperability: 31238384Sjkim# 32238384Sjkim# - never ever touch $tp, "thread pointer", former $gp; 33238384Sjkim# - copy return value to $t0, former $v0 [or to $a0 if you're adapting 34238384Sjkim# old code]; 35238384Sjkim# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 36238384Sjkim# 37238384Sjkim# For reference here is register layout for N32/64 MIPS ABIs: 38238384Sjkim# 39238384Sjkim# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 40238384Sjkim# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 41238384Sjkim# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 42238384Sjkim# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 43238384Sjkim# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 44238384Sjkim# 45238384Sjkim$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 46238384Sjkim 47238384Sjkimif ($flavour =~ /64|n32/i) { 48238384Sjkim $PTR_ADD="dadd"; # incidentally works even on n32 49238384Sjkim $PTR_SUB="dsub"; # incidentally works even on n32 50238384Sjkim $REG_S="sd"; 51238384Sjkim $REG_L="ld"; 52238384Sjkim $PTR_SLL="dsll"; # incidentally works even on n32 53238384Sjkim $SZREG=8; 54238384Sjkim} else { 55238384Sjkim $PTR_ADD="add"; 56238384Sjkim $PTR_SUB="sub"; 57238384Sjkim $REG_S="sw"; 58238384Sjkim $REG_L="lw"; 59238384Sjkim $PTR_SLL="sll"; 60238384Sjkim $SZREG=4; 61238384Sjkim} 62238384Sjkim# 63238384Sjkim# <appro@openssl.org> 64238384Sjkim# 65238384Sjkim###################################################################### 66238384Sjkim 67279264Sdelphij$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); 68238384Sjkim 69238384Sjkimfor (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } 70238384Sjkimopen STDOUT,">$output"; 71238384Sjkim 72238384Sjkimif (!defined($big_endian)) 73238384Sjkim { $big_endian=(unpack('L',pack('N',1))==1); } 74238384Sjkim 75238384Sjkim# offsets of the Most and Least Significant Bytes 76238384Sjkim$MSB=$big_endian?0:3; 77238384Sjkim$LSB=3&~$MSB; 78238384Sjkim 79238384Sjkim@X=map("\$$_",(8..23)); # a4-a7,s0-s11 80238384Sjkim 81238384Sjkim$ctx=$a0; 82238384Sjkim$inp=$a1; 83238384Sjkim$num=$a2; 84238384Sjkim$A="\$1"; 85238384Sjkim$B="\$2"; 86238384Sjkim$C="\$3"; 87238384Sjkim$D="\$7"; 88238384Sjkim$E="\$24"; @V=($A,$B,$C,$D,$E); 89238384Sjkim$t0="\$25"; 90238384Sjkim$t1=$num; # $num is offloaded to stack 91238384Sjkim$t2="\$30"; # fp 92238384Sjkim$K="\$31"; # ra 93238384Sjkim 94238384Sjkimsub BODY_00_14 { 95238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_; 96238384Sjkimmy $j=$i+1; 97238384Sjkim$code.=<<___ if (!$big_endian); 98238384Sjkim srl $t0,@X[$i],24 # byte swap($i) 99238384Sjkim srl $t1,@X[$i],8 100238384Sjkim andi $t2,@X[$i],0xFF00 101238384Sjkim sll @X[$i],@X[$i],24 102238384Sjkim andi $t1,0xFF00 103238384Sjkim sll $t2,$t2,8 104238384Sjkim or @X[$i],$t0 105238384Sjkim or $t1,$t2 106238384Sjkim or @X[$i],$t1 107238384Sjkim___ 108238384Sjkim$code.=<<___; 109238384Sjkim lwl @X[$j],$j*4+$MSB($inp) 110238384Sjkim sll $t0,$a,5 # $i 111238384Sjkim addu $e,$K 112238384Sjkim lwr @X[$j],$j*4+$LSB($inp) 113238384Sjkim srl $t1,$a,27 114238384Sjkim addu $e,$t0 115238384Sjkim xor $t0,$c,$d 116238384Sjkim addu $e,$t1 117238384Sjkim sll $t2,$b,30 118238384Sjkim and $t0,$b 119238384Sjkim srl $b,$b,2 120238384Sjkim xor $t0,$d 121238384Sjkim addu $e,@X[$i] 122238384Sjkim or $b,$t2 123238384Sjkim addu $e,$t0 124238384Sjkim___ 125238384Sjkim} 126238384Sjkim 127238384Sjkimsub BODY_15_19 { 128238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_; 129238384Sjkimmy $j=$i+1; 130238384Sjkim 131238384Sjkim$code.=<<___ if (!$big_endian && $i==15); 132238384Sjkim srl $t0,@X[$i],24 # byte swap($i) 133238384Sjkim srl $t1,@X[$i],8 134238384Sjkim andi $t2,@X[$i],0xFF00 135238384Sjkim sll @X[$i],@X[$i],24 136238384Sjkim andi $t1,0xFF00 137238384Sjkim sll $t2,$t2,8 138238384Sjkim or @X[$i],$t0 139238384Sjkim or @X[$i],$t1 140238384Sjkim or @X[$i],$t2 141238384Sjkim___ 142238384Sjkim$code.=<<___; 143238384Sjkim xor @X[$j%16],@X[($j+2)%16] 144238384Sjkim sll $t0,$a,5 # $i 145238384Sjkim addu $e,$K 146238384Sjkim srl $t1,$a,27 147238384Sjkim addu $e,$t0 148238384Sjkim xor @X[$j%16],@X[($j+8)%16] 149238384Sjkim xor $t0,$c,$d 150238384Sjkim addu $e,$t1 151238384Sjkim xor @X[$j%16],@X[($j+13)%16] 152238384Sjkim sll $t2,$b,30 153238384Sjkim and $t0,$b 154238384Sjkim srl $t1,@X[$j%16],31 155238384Sjkim addu @X[$j%16],@X[$j%16] 156238384Sjkim srl $b,$b,2 157238384Sjkim xor $t0,$d 158238384Sjkim or @X[$j%16],$t1 159238384Sjkim addu $e,@X[$i%16] 160238384Sjkim or $b,$t2 161238384Sjkim addu $e,$t0 162238384Sjkim___ 163238384Sjkim} 164238384Sjkim 165238384Sjkimsub BODY_20_39 { 166238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_; 167238384Sjkimmy $j=$i+1; 168238384Sjkim$code.=<<___ if ($i<79); 169238384Sjkim xor @X[$j%16],@X[($j+2)%16] 170238384Sjkim sll $t0,$a,5 # $i 171238384Sjkim addu $e,$K 172238384Sjkim srl $t1,$a,27 173238384Sjkim addu $e,$t0 174238384Sjkim xor @X[$j%16],@X[($j+8)%16] 175238384Sjkim xor $t0,$c,$d 176238384Sjkim addu $e,$t1 177238384Sjkim xor @X[$j%16],@X[($j+13)%16] 178238384Sjkim sll $t2,$b,30 179238384Sjkim xor $t0,$b 180238384Sjkim srl $t1,@X[$j%16],31 181238384Sjkim addu @X[$j%16],@X[$j%16] 182238384Sjkim srl $b,$b,2 183238384Sjkim addu $e,@X[$i%16] 184238384Sjkim or @X[$j%16],$t1 185238384Sjkim or $b,$t2 186238384Sjkim addu $e,$t0 187238384Sjkim___ 188238384Sjkim$code.=<<___ if ($i==79); 189238384Sjkim lw @X[0],0($ctx) 190238384Sjkim sll $t0,$a,5 # $i 191238384Sjkim addu $e,$K 192238384Sjkim lw @X[1],4($ctx) 193238384Sjkim srl $t1,$a,27 194238384Sjkim addu $e,$t0 195238384Sjkim lw @X[2],8($ctx) 196238384Sjkim xor $t0,$c,$d 197238384Sjkim addu $e,$t1 198238384Sjkim lw @X[3],12($ctx) 199238384Sjkim sll $t2,$b,30 200238384Sjkim xor $t0,$b 201238384Sjkim lw @X[4],16($ctx) 202238384Sjkim srl $b,$b,2 203238384Sjkim addu $e,@X[$i%16] 204238384Sjkim or $b,$t2 205238384Sjkim addu $e,$t0 206238384Sjkim___ 207238384Sjkim} 208238384Sjkim 209238384Sjkimsub BODY_40_59 { 210238384Sjkimmy ($i,$a,$b,$c,$d,$e)=@_; 211238384Sjkimmy $j=$i+1; 212238384Sjkim$code.=<<___ if ($i<79); 213238384Sjkim xor @X[$j%16],@X[($j+2)%16] 214238384Sjkim sll $t0,$a,5 # $i 215238384Sjkim addu $e,$K 216238384Sjkim srl $t1,$a,27 217238384Sjkim addu $e,$t0 218238384Sjkim xor @X[$j%16],@X[($j+8)%16] 219238384Sjkim and $t0,$c,$d 220238384Sjkim addu $e,$t1 221238384Sjkim xor @X[$j%16],@X[($j+13)%16] 222238384Sjkim sll $t2,$b,30 223238384Sjkim addu $e,$t0 224238384Sjkim srl $t1,@X[$j%16],31 225238384Sjkim xor $t0,$c,$d 226238384Sjkim addu @X[$j%16],@X[$j%16] 227238384Sjkim and $t0,$b 228238384Sjkim srl $b,$b,2 229238384Sjkim or @X[$j%16],$t1 230238384Sjkim addu $e,@X[$i%16] 231238384Sjkim or $b,$t2 232238384Sjkim addu $e,$t0 233238384Sjkim___ 234238384Sjkim} 235238384Sjkim 236238384Sjkim$FRAMESIZE=16; # large enough to accomodate NUBI saved registers 237238384Sjkim$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; 238238384Sjkim 239238384Sjkim$code=<<___; 240238384Sjkim#ifdef OPENSSL_FIPSCANISTER 241238384Sjkim# include <openssl/fipssyms.h> 242238384Sjkim#endif 243238384Sjkim 244238384Sjkim.text 245238384Sjkim 246238384Sjkim.set noat 247238384Sjkim.set noreorder 248238384Sjkim.align 5 249238384Sjkim.globl sha1_block_data_order 250238384Sjkim.ent sha1_block_data_order 251238384Sjkimsha1_block_data_order: 252238384Sjkim .frame $sp,$FRAMESIZE*$SZREG,$ra 253238384Sjkim .mask $SAVED_REGS_MASK,-$SZREG 254238384Sjkim .set noreorder 255238384Sjkim $PTR_SUB $sp,$FRAMESIZE*$SZREG 256238384Sjkim $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) 257238384Sjkim $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) 258238384Sjkim $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) 259238384Sjkim $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) 260238384Sjkim $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) 261238384Sjkim $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) 262238384Sjkim $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) 263238384Sjkim $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) 264238384Sjkim $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) 265238384Sjkim $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) 266238384Sjkim___ 267238384Sjkim$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 268238384Sjkim $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) 269238384Sjkim $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) 270238384Sjkim $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) 271238384Sjkim $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) 272238384Sjkim $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) 273238384Sjkim___ 274238384Sjkim$code.=<<___; 275238384Sjkim $PTR_SLL $num,6 276238384Sjkim $PTR_ADD $num,$inp 277238384Sjkim $REG_S $num,0($sp) 278238384Sjkim lw $A,0($ctx) 279238384Sjkim lw $B,4($ctx) 280238384Sjkim lw $C,8($ctx) 281238384Sjkim lw $D,12($ctx) 282238384Sjkim b .Loop 283238384Sjkim lw $E,16($ctx) 284238384Sjkim.align 4 285238384Sjkim.Loop: 286238384Sjkim .set reorder 287238384Sjkim lwl @X[0],$MSB($inp) 288238384Sjkim lui $K,0x5a82 289238384Sjkim lwr @X[0],$LSB($inp) 290238384Sjkim ori $K,0x7999 # K_00_19 291238384Sjkim___ 292238384Sjkimfor ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } 293238384Sjkimfor (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } 294238384Sjkim$code.=<<___; 295238384Sjkim lui $K,0x6ed9 296238384Sjkim ori $K,0xeba1 # K_20_39 297238384Sjkim___ 298238384Sjkimfor (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 299238384Sjkim$code.=<<___; 300238384Sjkim lui $K,0x8f1b 301238384Sjkim ori $K,0xbcdc # K_40_59 302238384Sjkim___ 303238384Sjkimfor (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 304238384Sjkim$code.=<<___; 305238384Sjkim lui $K,0xca62 306238384Sjkim ori $K,0xc1d6 # K_60_79 307238384Sjkim___ 308238384Sjkimfor (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 309238384Sjkim$code.=<<___; 310238384Sjkim $PTR_ADD $inp,64 311238384Sjkim $REG_L $num,0($sp) 312238384Sjkim 313238384Sjkim addu $A,$X[0] 314238384Sjkim addu $B,$X[1] 315238384Sjkim sw $A,0($ctx) 316238384Sjkim addu $C,$X[2] 317238384Sjkim addu $D,$X[3] 318238384Sjkim sw $B,4($ctx) 319238384Sjkim addu $E,$X[4] 320238384Sjkim sw $C,8($ctx) 321238384Sjkim sw $D,12($ctx) 322238384Sjkim sw $E,16($ctx) 323238384Sjkim .set noreorder 324238384Sjkim bne $inp,$num,.Loop 325238384Sjkim nop 326238384Sjkim 327238384Sjkim .set noreorder 328238384Sjkim $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) 329238384Sjkim $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) 330238384Sjkim $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) 331238384Sjkim $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) 332238384Sjkim $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) 333238384Sjkim $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) 334238384Sjkim $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) 335238384Sjkim $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) 336238384Sjkim $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) 337238384Sjkim $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) 338238384Sjkim___ 339238384Sjkim$code.=<<___ if ($flavour =~ /nubi/i); 340238384Sjkim $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) 341238384Sjkim $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) 342238384Sjkim $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) 343238384Sjkim $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) 344238384Sjkim $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) 345238384Sjkim___ 346238384Sjkim$code.=<<___; 347238384Sjkim jr $ra 348238384Sjkim $PTR_ADD $sp,$FRAMESIZE*$SZREG 349238384Sjkim.end sha1_block_data_order 350238384Sjkim.rdata 351238384Sjkim.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>" 352238384Sjkim___ 353238384Sjkimprint $code; 354238384Sjkimclose STDOUT; 355