1238384Sjkim.ident "s390x.S, version 1.1" 2238384Sjkim// ==================================================================== 3238384Sjkim// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 4238384Sjkim// project. 5238384Sjkim// 6238384Sjkim// Rights for redistribution and usage in source and binary forms are 7238384Sjkim// granted according to the OpenSSL license. Warranty of any kind is 8238384Sjkim// disclaimed. 9238384Sjkim// ==================================================================== 10238384Sjkim 11238384Sjkim.text 12238384Sjkim 13238384Sjkim#define zero %r0 14238384Sjkim 15238384Sjkim// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); 16238384Sjkim.globl bn_mul_add_words 17238384Sjkim.type bn_mul_add_words,@function 18238384Sjkim.align 4 19238384Sjkimbn_mul_add_words: 20238384Sjkim lghi zero,0 // zero = 0 21238384Sjkim la %r1,0(%r2) // put rp aside 22238384Sjkim lghi %r2,0 // i=0; 23238384Sjkim ltgfr %r4,%r4 24238384Sjkim bler %r14 // if (len<=0) return 0; 25238384Sjkim 26238384Sjkim stmg %r6,%r10,48(%r15) 27238384Sjkim lghi %r10,3 28238384Sjkim lghi %r8,0 // carry = 0 29238384Sjkim nr %r10,%r4 // len%4 30238384Sjkim sra %r4,2 // cnt=len/4 31238384Sjkim jz .Loop1_madd // carry is incidentally cleared if branch taken 32238384Sjkim algr zero,zero // clear carry 33238384Sjkim 34238384Sjkim.Loop4_madd: 35238384Sjkim lg %r7,0(%r2,%r3) // ap[i] 36238384Sjkim mlgr %r6,%r5 // *=w 37238384Sjkim alcgr %r7,%r8 // +=carry 38238384Sjkim alcgr %r6,zero 39238384Sjkim alg %r7,0(%r2,%r1) // +=rp[i] 40238384Sjkim stg %r7,0(%r2,%r1) // rp[i]= 41238384Sjkim 42238384Sjkim lg %r9,8(%r2,%r3) 43238384Sjkim mlgr %r8,%r5 44238384Sjkim alcgr %r9,%r6 45238384Sjkim alcgr %r8,zero 46238384Sjkim alg %r9,8(%r2,%r1) 47238384Sjkim stg %r9,8(%r2,%r1) 48238384Sjkim 49238384Sjkim lg %r7,16(%r2,%r3) 50238384Sjkim mlgr %r6,%r5 51238384Sjkim alcgr %r7,%r8 52238384Sjkim alcgr %r6,zero 53238384Sjkim alg %r7,16(%r2,%r1) 54238384Sjkim stg %r7,16(%r2,%r1) 55238384Sjkim 56238384Sjkim lg %r9,24(%r2,%r3) 57238384Sjkim mlgr %r8,%r5 58238384Sjkim alcgr %r9,%r6 59238384Sjkim alcgr %r8,zero 60238384Sjkim alg %r9,24(%r2,%r1) 61238384Sjkim stg %r9,24(%r2,%r1) 62238384Sjkim 63238384Sjkim la %r2,32(%r2) // i+=4 64238384Sjkim brct %r4,.Loop4_madd 65238384Sjkim 66238384Sjkim la %r10,1(%r10) // see if len%4 is zero ... 67238384Sjkim brct %r10,.Loop1_madd // without touching condition code:-) 68238384Sjkim 69238384Sjkim.Lend_madd: 70238384Sjkim alcgr %r8,zero // collect carry bit 71238384Sjkim lgr %r2,%r8 72238384Sjkim lmg %r6,%r10,48(%r15) 73238384Sjkim br %r14 74238384Sjkim 75238384Sjkim.Loop1_madd: 76238384Sjkim lg %r7,0(%r2,%r3) // ap[i] 77238384Sjkim mlgr %r6,%r5 // *=w 78238384Sjkim alcgr %r7,%r8 // +=carry 79238384Sjkim alcgr %r6,zero 80238384Sjkim alg %r7,0(%r2,%r1) // +=rp[i] 81238384Sjkim stg %r7,0(%r2,%r1) // rp[i]= 82238384Sjkim 83238384Sjkim lgr %r8,%r6 84238384Sjkim la %r2,8(%r2) // i++ 85238384Sjkim brct %r10,.Loop1_madd 86238384Sjkim 87238384Sjkim j .Lend_madd 88238384Sjkim.size bn_mul_add_words,.-bn_mul_add_words 89238384Sjkim 90238384Sjkim// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5); 91238384Sjkim.globl bn_mul_words 92238384Sjkim.type bn_mul_words,@function 93238384Sjkim.align 4 94238384Sjkimbn_mul_words: 95238384Sjkim lghi zero,0 // zero = 0 96238384Sjkim la %r1,0(%r2) // put rp aside 97238384Sjkim lghi %r2,0 // i=0; 98238384Sjkim ltgfr %r4,%r4 99238384Sjkim bler %r14 // if (len<=0) return 0; 100238384Sjkim 101238384Sjkim stmg %r6,%r10,48(%r15) 102238384Sjkim lghi %r10,3 103238384Sjkim lghi %r8,0 // carry = 0 104238384Sjkim nr %r10,%r4 // len%4 105238384Sjkim sra %r4,2 // cnt=len/4 106238384Sjkim jz .Loop1_mul // carry is incidentally cleared if branch taken 107238384Sjkim algr zero,zero // clear carry 108238384Sjkim 109238384Sjkim.Loop4_mul: 110238384Sjkim lg %r7,0(%r2,%r3) // ap[i] 111238384Sjkim mlgr %r6,%r5 // *=w 112238384Sjkim alcgr %r7,%r8 // +=carry 113238384Sjkim stg %r7,0(%r2,%r1) // rp[i]= 114238384Sjkim 115238384Sjkim lg %r9,8(%r2,%r3) 116238384Sjkim mlgr %r8,%r5 117238384Sjkim alcgr %r9,%r6 118238384Sjkim stg %r9,8(%r2,%r1) 119238384Sjkim 120238384Sjkim lg %r7,16(%r2,%r3) 121238384Sjkim mlgr %r6,%r5 122238384Sjkim alcgr %r7,%r8 123238384Sjkim stg %r7,16(%r2,%r1) 124238384Sjkim 125238384Sjkim lg %r9,24(%r2,%r3) 126238384Sjkim mlgr %r8,%r5 127238384Sjkim alcgr %r9,%r6 128238384Sjkim stg %r9,24(%r2,%r1) 129238384Sjkim 130238384Sjkim la %r2,32(%r2) // i+=4 131238384Sjkim brct %r4,.Loop4_mul 132238384Sjkim 133238384Sjkim la %r10,1(%r10) // see if len%4 is zero ... 134238384Sjkim brct %r10,.Loop1_mul // without touching condition code:-) 135238384Sjkim 136238384Sjkim.Lend_mul: 137238384Sjkim alcgr %r8,zero // collect carry bit 138238384Sjkim lgr %r2,%r8 139238384Sjkim lmg %r6,%r10,48(%r15) 140238384Sjkim br %r14 141238384Sjkim 142238384Sjkim.Loop1_mul: 143238384Sjkim lg %r7,0(%r2,%r3) // ap[i] 144238384Sjkim mlgr %r6,%r5 // *=w 145238384Sjkim alcgr %r7,%r8 // +=carry 146238384Sjkim stg %r7,0(%r2,%r1) // rp[i]= 147238384Sjkim 148238384Sjkim lgr %r8,%r6 149238384Sjkim la %r2,8(%r2) // i++ 150238384Sjkim brct %r10,.Loop1_mul 151238384Sjkim 152238384Sjkim j .Lend_mul 153238384Sjkim.size bn_mul_words,.-bn_mul_words 154238384Sjkim 155238384Sjkim// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4) 156238384Sjkim.globl bn_sqr_words 157238384Sjkim.type bn_sqr_words,@function 158238384Sjkim.align 4 159238384Sjkimbn_sqr_words: 160238384Sjkim ltgfr %r4,%r4 161238384Sjkim bler %r14 162238384Sjkim 163238384Sjkim stmg %r6,%r7,48(%r15) 164238384Sjkim srag %r1,%r4,2 // cnt=len/4 165238384Sjkim jz .Loop1_sqr 166238384Sjkim 167238384Sjkim.Loop4_sqr: 168238384Sjkim lg %r7,0(%r3) 169238384Sjkim mlgr %r6,%r7 170238384Sjkim stg %r7,0(%r2) 171238384Sjkim stg %r6,8(%r2) 172238384Sjkim 173238384Sjkim lg %r7,8(%r3) 174238384Sjkim mlgr %r6,%r7 175238384Sjkim stg %r7,16(%r2) 176238384Sjkim stg %r6,24(%r2) 177238384Sjkim 178238384Sjkim lg %r7,16(%r3) 179238384Sjkim mlgr %r6,%r7 180238384Sjkim stg %r7,32(%r2) 181238384Sjkim stg %r6,40(%r2) 182238384Sjkim 183238384Sjkim lg %r7,24(%r3) 184238384Sjkim mlgr %r6,%r7 185238384Sjkim stg %r7,48(%r2) 186238384Sjkim stg %r6,56(%r2) 187238384Sjkim 188238384Sjkim la %r3,32(%r3) 189238384Sjkim la %r2,64(%r2) 190238384Sjkim brct %r1,.Loop4_sqr 191238384Sjkim 192238384Sjkim lghi %r1,3 193238384Sjkim nr %r4,%r1 // cnt=len%4 194238384Sjkim jz .Lend_sqr 195238384Sjkim 196238384Sjkim.Loop1_sqr: 197238384Sjkim lg %r7,0(%r3) 198238384Sjkim mlgr %r6,%r7 199238384Sjkim stg %r7,0(%r2) 200238384Sjkim stg %r6,8(%r2) 201238384Sjkim 202238384Sjkim la %r3,8(%r3) 203238384Sjkim la %r2,16(%r2) 204238384Sjkim brct %r4,.Loop1_sqr 205238384Sjkim 206238384Sjkim.Lend_sqr: 207238384Sjkim lmg %r6,%r7,48(%r15) 208238384Sjkim br %r14 209238384Sjkim.size bn_sqr_words,.-bn_sqr_words 210238384Sjkim 211238384Sjkim// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d); 212238384Sjkim.globl bn_div_words 213238384Sjkim.type bn_div_words,@function 214238384Sjkim.align 4 215238384Sjkimbn_div_words: 216238384Sjkim dlgr %r2,%r4 217238384Sjkim lgr %r2,%r3 218238384Sjkim br %r14 219238384Sjkim.size bn_div_words,.-bn_div_words 220238384Sjkim 221238384Sjkim// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); 222238384Sjkim.globl bn_add_words 223238384Sjkim.type bn_add_words,@function 224238384Sjkim.align 4 225238384Sjkimbn_add_words: 226238384Sjkim la %r1,0(%r2) // put rp aside 227238384Sjkim lghi %r2,0 // i=0 228238384Sjkim ltgfr %r5,%r5 229238384Sjkim bler %r14 // if (len<=0) return 0; 230238384Sjkim 231238384Sjkim stg %r6,48(%r15) 232238384Sjkim lghi %r6,3 233238384Sjkim nr %r6,%r5 // len%4 234238384Sjkim sra %r5,2 // len/4, use sra because it sets condition code 235238384Sjkim jz .Loop1_add // carry is incidentally cleared if branch taken 236238384Sjkim algr %r2,%r2 // clear carry 237238384Sjkim 238238384Sjkim.Loop4_add: 239238384Sjkim lg %r0,0(%r2,%r3) 240238384Sjkim alcg %r0,0(%r2,%r4) 241238384Sjkim stg %r0,0(%r2,%r1) 242238384Sjkim lg %r0,8(%r2,%r3) 243238384Sjkim alcg %r0,8(%r2,%r4) 244238384Sjkim stg %r0,8(%r2,%r1) 245238384Sjkim lg %r0,16(%r2,%r3) 246238384Sjkim alcg %r0,16(%r2,%r4) 247238384Sjkim stg %r0,16(%r2,%r1) 248238384Sjkim lg %r0,24(%r2,%r3) 249238384Sjkim alcg %r0,24(%r2,%r4) 250238384Sjkim stg %r0,24(%r2,%r1) 251238384Sjkim 252238384Sjkim la %r2,32(%r2) // i+=4 253238384Sjkim brct %r5,.Loop4_add 254238384Sjkim 255238384Sjkim la %r6,1(%r6) // see if len%4 is zero ... 256238384Sjkim brct %r6,.Loop1_add // without touching condition code:-) 257238384Sjkim 258238384Sjkim.Lexit_add: 259238384Sjkim lghi %r2,0 260238384Sjkim alcgr %r2,%r2 261238384Sjkim lg %r6,48(%r15) 262238384Sjkim br %r14 263238384Sjkim 264238384Sjkim.Loop1_add: 265238384Sjkim lg %r0,0(%r2,%r3) 266238384Sjkim alcg %r0,0(%r2,%r4) 267238384Sjkim stg %r0,0(%r2,%r1) 268238384Sjkim 269238384Sjkim la %r2,8(%r2) // i++ 270238384Sjkim brct %r6,.Loop1_add 271238384Sjkim 272238384Sjkim j .Lexit_add 273238384Sjkim.size bn_add_words,.-bn_add_words 274238384Sjkim 275238384Sjkim// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5); 276238384Sjkim.globl bn_sub_words 277238384Sjkim.type bn_sub_words,@function 278238384Sjkim.align 4 279238384Sjkimbn_sub_words: 280238384Sjkim la %r1,0(%r2) // put rp aside 281238384Sjkim lghi %r2,0 // i=0 282238384Sjkim ltgfr %r5,%r5 283238384Sjkim bler %r14 // if (len<=0) return 0; 284238384Sjkim 285238384Sjkim stg %r6,48(%r15) 286238384Sjkim lghi %r6,3 287238384Sjkim nr %r6,%r5 // len%4 288238384Sjkim sra %r5,2 // len/4, use sra because it sets condition code 289238384Sjkim jnz .Loop4_sub // borrow is incidentally cleared if branch taken 290238384Sjkim slgr %r2,%r2 // clear borrow 291238384Sjkim 292238384Sjkim.Loop1_sub: 293238384Sjkim lg %r0,0(%r2,%r3) 294238384Sjkim slbg %r0,0(%r2,%r4) 295238384Sjkim stg %r0,0(%r2,%r1) 296238384Sjkim 297238384Sjkim la %r2,8(%r2) // i++ 298238384Sjkim brct %r6,.Loop1_sub 299238384Sjkim j .Lexit_sub 300238384Sjkim 301238384Sjkim.Loop4_sub: 302238384Sjkim lg %r0,0(%r2,%r3) 303238384Sjkim slbg %r0,0(%r2,%r4) 304238384Sjkim stg %r0,0(%r2,%r1) 305238384Sjkim lg %r0,8(%r2,%r3) 306238384Sjkim slbg %r0,8(%r2,%r4) 307238384Sjkim stg %r0,8(%r2,%r1) 308238384Sjkim lg %r0,16(%r2,%r3) 309238384Sjkim slbg %r0,16(%r2,%r4) 310238384Sjkim stg %r0,16(%r2,%r1) 311238384Sjkim lg %r0,24(%r2,%r3) 312238384Sjkim slbg %r0,24(%r2,%r4) 313238384Sjkim stg %r0,24(%r2,%r1) 314238384Sjkim 315238384Sjkim la %r2,32(%r2) // i+=4 316238384Sjkim brct %r5,.Loop4_sub 317238384Sjkim 318238384Sjkim la %r6,1(%r6) // see if len%4 is zero ... 319238384Sjkim brct %r6,.Loop1_sub // without touching condition code:-) 320238384Sjkim 321238384Sjkim.Lexit_sub: 322238384Sjkim lghi %r2,0 323238384Sjkim slbgr %r2,%r2 324238384Sjkim lcgr %r2,%r2 325238384Sjkim lg %r6,48(%r15) 326238384Sjkim br %r14 327238384Sjkim.size bn_sub_words,.-bn_sub_words 328238384Sjkim 329238384Sjkim#define c1 %r1 330238384Sjkim#define c2 %r5 331238384Sjkim#define c3 %r8 332238384Sjkim 333238384Sjkim#define mul_add_c(ai,bi,c1,c2,c3) \ 334238384Sjkim lg %r7,ai*8(%r3); \ 335238384Sjkim mlg %r6,bi*8(%r4); \ 336238384Sjkim algr c1,%r7; \ 337238384Sjkim alcgr c2,%r6; \ 338238384Sjkim alcgr c3,zero 339238384Sjkim 340238384Sjkim// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); 341238384Sjkim.globl bn_mul_comba8 342238384Sjkim.type bn_mul_comba8,@function 343238384Sjkim.align 4 344238384Sjkimbn_mul_comba8: 345238384Sjkim stmg %r6,%r8,48(%r15) 346238384Sjkim 347238384Sjkim lghi c1,0 348238384Sjkim lghi c2,0 349238384Sjkim lghi c3,0 350238384Sjkim lghi zero,0 351238384Sjkim 352238384Sjkim mul_add_c(0,0,c1,c2,c3); 353238384Sjkim stg c1,0*8(%r2) 354238384Sjkim lghi c1,0 355238384Sjkim 356238384Sjkim mul_add_c(0,1,c2,c3,c1); 357238384Sjkim mul_add_c(1,0,c2,c3,c1); 358238384Sjkim stg c2,1*8(%r2) 359238384Sjkim lghi c2,0 360238384Sjkim 361238384Sjkim mul_add_c(2,0,c3,c1,c2); 362238384Sjkim mul_add_c(1,1,c3,c1,c2); 363238384Sjkim mul_add_c(0,2,c3,c1,c2); 364238384Sjkim stg c3,2*8(%r2) 365238384Sjkim lghi c3,0 366238384Sjkim 367238384Sjkim mul_add_c(0,3,c1,c2,c3); 368238384Sjkim mul_add_c(1,2,c1,c2,c3); 369238384Sjkim mul_add_c(2,1,c1,c2,c3); 370238384Sjkim mul_add_c(3,0,c1,c2,c3); 371238384Sjkim stg c1,3*8(%r2) 372238384Sjkim lghi c1,0 373238384Sjkim 374238384Sjkim mul_add_c(4,0,c2,c3,c1); 375238384Sjkim mul_add_c(3,1,c2,c3,c1); 376238384Sjkim mul_add_c(2,2,c2,c3,c1); 377238384Sjkim mul_add_c(1,3,c2,c3,c1); 378238384Sjkim mul_add_c(0,4,c2,c3,c1); 379238384Sjkim stg c2,4*8(%r2) 380238384Sjkim lghi c2,0 381238384Sjkim 382238384Sjkim mul_add_c(0,5,c3,c1,c2); 383238384Sjkim mul_add_c(1,4,c3,c1,c2); 384238384Sjkim mul_add_c(2,3,c3,c1,c2); 385238384Sjkim mul_add_c(3,2,c3,c1,c2); 386238384Sjkim mul_add_c(4,1,c3,c1,c2); 387238384Sjkim mul_add_c(5,0,c3,c1,c2); 388238384Sjkim stg c3,5*8(%r2) 389238384Sjkim lghi c3,0 390238384Sjkim 391238384Sjkim mul_add_c(6,0,c1,c2,c3); 392238384Sjkim mul_add_c(5,1,c1,c2,c3); 393238384Sjkim mul_add_c(4,2,c1,c2,c3); 394238384Sjkim mul_add_c(3,3,c1,c2,c3); 395238384Sjkim mul_add_c(2,4,c1,c2,c3); 396238384Sjkim mul_add_c(1,5,c1,c2,c3); 397238384Sjkim mul_add_c(0,6,c1,c2,c3); 398238384Sjkim stg c1,6*8(%r2) 399238384Sjkim lghi c1,0 400238384Sjkim 401238384Sjkim mul_add_c(0,7,c2,c3,c1); 402238384Sjkim mul_add_c(1,6,c2,c3,c1); 403238384Sjkim mul_add_c(2,5,c2,c3,c1); 404238384Sjkim mul_add_c(3,4,c2,c3,c1); 405238384Sjkim mul_add_c(4,3,c2,c3,c1); 406238384Sjkim mul_add_c(5,2,c2,c3,c1); 407238384Sjkim mul_add_c(6,1,c2,c3,c1); 408238384Sjkim mul_add_c(7,0,c2,c3,c1); 409238384Sjkim stg c2,7*8(%r2) 410238384Sjkim lghi c2,0 411238384Sjkim 412238384Sjkim mul_add_c(7,1,c3,c1,c2); 413238384Sjkim mul_add_c(6,2,c3,c1,c2); 414238384Sjkim mul_add_c(5,3,c3,c1,c2); 415238384Sjkim mul_add_c(4,4,c3,c1,c2); 416238384Sjkim mul_add_c(3,5,c3,c1,c2); 417238384Sjkim mul_add_c(2,6,c3,c1,c2); 418238384Sjkim mul_add_c(1,7,c3,c1,c2); 419238384Sjkim stg c3,8*8(%r2) 420238384Sjkim lghi c3,0 421238384Sjkim 422238384Sjkim mul_add_c(2,7,c1,c2,c3); 423238384Sjkim mul_add_c(3,6,c1,c2,c3); 424238384Sjkim mul_add_c(4,5,c1,c2,c3); 425238384Sjkim mul_add_c(5,4,c1,c2,c3); 426238384Sjkim mul_add_c(6,3,c1,c2,c3); 427238384Sjkim mul_add_c(7,2,c1,c2,c3); 428238384Sjkim stg c1,9*8(%r2) 429238384Sjkim lghi c1,0 430238384Sjkim 431238384Sjkim mul_add_c(7,3,c2,c3,c1); 432238384Sjkim mul_add_c(6,4,c2,c3,c1); 433238384Sjkim mul_add_c(5,5,c2,c3,c1); 434238384Sjkim mul_add_c(4,6,c2,c3,c1); 435238384Sjkim mul_add_c(3,7,c2,c3,c1); 436238384Sjkim stg c2,10*8(%r2) 437238384Sjkim lghi c2,0 438238384Sjkim 439238384Sjkim mul_add_c(4,7,c3,c1,c2); 440238384Sjkim mul_add_c(5,6,c3,c1,c2); 441238384Sjkim mul_add_c(6,5,c3,c1,c2); 442238384Sjkim mul_add_c(7,4,c3,c1,c2); 443238384Sjkim stg c3,11*8(%r2) 444238384Sjkim lghi c3,0 445238384Sjkim 446238384Sjkim mul_add_c(7,5,c1,c2,c3); 447238384Sjkim mul_add_c(6,6,c1,c2,c3); 448238384Sjkim mul_add_c(5,7,c1,c2,c3); 449238384Sjkim stg c1,12*8(%r2) 450238384Sjkim lghi c1,0 451238384Sjkim 452238384Sjkim 453238384Sjkim mul_add_c(6,7,c2,c3,c1); 454238384Sjkim mul_add_c(7,6,c2,c3,c1); 455238384Sjkim stg c2,13*8(%r2) 456238384Sjkim lghi c2,0 457238384Sjkim 458238384Sjkim mul_add_c(7,7,c3,c1,c2); 459238384Sjkim stg c3,14*8(%r2) 460238384Sjkim stg c1,15*8(%r2) 461238384Sjkim 462238384Sjkim lmg %r6,%r8,48(%r15) 463238384Sjkim br %r14 464238384Sjkim.size bn_mul_comba8,.-bn_mul_comba8 465238384Sjkim 466238384Sjkim// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4); 467238384Sjkim.globl bn_mul_comba4 468238384Sjkim.type bn_mul_comba4,@function 469238384Sjkim.align 4 470238384Sjkimbn_mul_comba4: 471238384Sjkim stmg %r6,%r8,48(%r15) 472238384Sjkim 473238384Sjkim lghi c1,0 474238384Sjkim lghi c2,0 475238384Sjkim lghi c3,0 476238384Sjkim lghi zero,0 477238384Sjkim 478238384Sjkim mul_add_c(0,0,c1,c2,c3); 479238384Sjkim stg c1,0*8(%r3) 480238384Sjkim lghi c1,0 481238384Sjkim 482238384Sjkim mul_add_c(0,1,c2,c3,c1); 483238384Sjkim mul_add_c(1,0,c2,c3,c1); 484238384Sjkim stg c2,1*8(%r2) 485238384Sjkim lghi c2,0 486238384Sjkim 487238384Sjkim mul_add_c(2,0,c3,c1,c2); 488238384Sjkim mul_add_c(1,1,c3,c1,c2); 489238384Sjkim mul_add_c(0,2,c3,c1,c2); 490238384Sjkim stg c3,2*8(%r2) 491238384Sjkim lghi c3,0 492238384Sjkim 493238384Sjkim mul_add_c(0,3,c1,c2,c3); 494238384Sjkim mul_add_c(1,2,c1,c2,c3); 495238384Sjkim mul_add_c(2,1,c1,c2,c3); 496238384Sjkim mul_add_c(3,0,c1,c2,c3); 497238384Sjkim stg c1,3*8(%r2) 498238384Sjkim lghi c1,0 499238384Sjkim 500238384Sjkim mul_add_c(3,1,c2,c3,c1); 501238384Sjkim mul_add_c(2,2,c2,c3,c1); 502238384Sjkim mul_add_c(1,3,c2,c3,c1); 503238384Sjkim stg c2,4*8(%r2) 504238384Sjkim lghi c2,0 505238384Sjkim 506238384Sjkim mul_add_c(2,3,c3,c1,c2); 507238384Sjkim mul_add_c(3,2,c3,c1,c2); 508238384Sjkim stg c3,5*8(%r2) 509238384Sjkim lghi c3,0 510238384Sjkim 511238384Sjkim mul_add_c(3,3,c1,c2,c3); 512238384Sjkim stg c1,6*8(%r2) 513238384Sjkim stg c2,7*8(%r2) 514238384Sjkim 515238384Sjkim stmg %r6,%r8,48(%r15) 516238384Sjkim br %r14 517238384Sjkim.size bn_mul_comba4,.-bn_mul_comba4 518238384Sjkim 519238384Sjkim#define sqr_add_c(ai,c1,c2,c3) \ 520238384Sjkim lg %r7,ai*8(%r3); \ 521238384Sjkim mlgr %r6,%r7; \ 522238384Sjkim algr c1,%r7; \ 523238384Sjkim alcgr c2,%r6; \ 524238384Sjkim alcgr c3,zero 525238384Sjkim 526238384Sjkim#define sqr_add_c2(ai,aj,c1,c2,c3) \ 527238384Sjkim lg %r7,ai*8(%r3); \ 528238384Sjkim mlg %r6,aj*8(%r3); \ 529238384Sjkim algr c1,%r7; \ 530238384Sjkim alcgr c2,%r6; \ 531238384Sjkim alcgr c3,zero; \ 532238384Sjkim algr c1,%r7; \ 533238384Sjkim alcgr c2,%r6; \ 534238384Sjkim alcgr c3,zero 535238384Sjkim 536238384Sjkim// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3); 537238384Sjkim.globl bn_sqr_comba8 538238384Sjkim.type bn_sqr_comba8,@function 539238384Sjkim.align 4 540238384Sjkimbn_sqr_comba8: 541238384Sjkim stmg %r6,%r8,48(%r15) 542238384Sjkim 543238384Sjkim lghi c1,0 544238384Sjkim lghi c2,0 545238384Sjkim lghi c3,0 546238384Sjkim lghi zero,0 547238384Sjkim 548238384Sjkim sqr_add_c(0,c1,c2,c3); 549238384Sjkim stg c1,0*8(%r2) 550238384Sjkim lghi c1,0 551238384Sjkim 552238384Sjkim sqr_add_c2(1,0,c2,c3,c1); 553238384Sjkim stg c2,1*8(%r2) 554238384Sjkim lghi c2,0 555238384Sjkim 556238384Sjkim sqr_add_c(1,c3,c1,c2); 557238384Sjkim sqr_add_c2(2,0,c3,c1,c2); 558238384Sjkim stg c3,2*8(%r2) 559238384Sjkim lghi c3,0 560238384Sjkim 561238384Sjkim sqr_add_c2(3,0,c1,c2,c3); 562238384Sjkim sqr_add_c2(2,1,c1,c2,c3); 563238384Sjkim stg c1,3*8(%r2) 564238384Sjkim lghi c1,0 565238384Sjkim 566238384Sjkim sqr_add_c(2,c2,c3,c1); 567238384Sjkim sqr_add_c2(3,1,c2,c3,c1); 568238384Sjkim sqr_add_c2(4,0,c2,c3,c1); 569238384Sjkim stg c2,4*8(%r2) 570238384Sjkim lghi c2,0 571238384Sjkim 572238384Sjkim sqr_add_c2(5,0,c3,c1,c2); 573238384Sjkim sqr_add_c2(4,1,c3,c1,c2); 574238384Sjkim sqr_add_c2(3,2,c3,c1,c2); 575238384Sjkim stg c3,5*8(%r2) 576238384Sjkim lghi c3,0 577238384Sjkim 578238384Sjkim sqr_add_c(3,c1,c2,c3); 579238384Sjkim sqr_add_c2(4,2,c1,c2,c3); 580238384Sjkim sqr_add_c2(5,1,c1,c2,c3); 581238384Sjkim sqr_add_c2(6,0,c1,c2,c3); 582238384Sjkim stg c1,6*8(%r2) 583238384Sjkim lghi c1,0 584238384Sjkim 585238384Sjkim sqr_add_c2(7,0,c2,c3,c1); 586238384Sjkim sqr_add_c2(6,1,c2,c3,c1); 587238384Sjkim sqr_add_c2(5,2,c2,c3,c1); 588238384Sjkim sqr_add_c2(4,3,c2,c3,c1); 589238384Sjkim stg c2,7*8(%r2) 590238384Sjkim lghi c2,0 591238384Sjkim 592238384Sjkim sqr_add_c(4,c3,c1,c2); 593238384Sjkim sqr_add_c2(5,3,c3,c1,c2); 594238384Sjkim sqr_add_c2(6,2,c3,c1,c2); 595238384Sjkim sqr_add_c2(7,1,c3,c1,c2); 596238384Sjkim stg c3,8*8(%r2) 597238384Sjkim lghi c3,0 598238384Sjkim 599238384Sjkim sqr_add_c2(7,2,c1,c2,c3); 600238384Sjkim sqr_add_c2(6,3,c1,c2,c3); 601238384Sjkim sqr_add_c2(5,4,c1,c2,c3); 602238384Sjkim stg c1,9*8(%r2) 603238384Sjkim lghi c1,0 604238384Sjkim 605238384Sjkim sqr_add_c(5,c2,c3,c1); 606238384Sjkim sqr_add_c2(6,4,c2,c3,c1); 607238384Sjkim sqr_add_c2(7,3,c2,c3,c1); 608238384Sjkim stg c2,10*8(%r2) 609238384Sjkim lghi c2,0 610238384Sjkim 611238384Sjkim sqr_add_c2(7,4,c3,c1,c2); 612238384Sjkim sqr_add_c2(6,5,c3,c1,c2); 613238384Sjkim stg c3,11*8(%r2) 614238384Sjkim lghi c3,0 615238384Sjkim 616238384Sjkim sqr_add_c(6,c1,c2,c3); 617238384Sjkim sqr_add_c2(7,5,c1,c2,c3); 618238384Sjkim stg c1,12*8(%r2) 619238384Sjkim lghi c1,0 620238384Sjkim 621238384Sjkim sqr_add_c2(7,6,c2,c3,c1); 622238384Sjkim stg c2,13*8(%r2) 623238384Sjkim lghi c2,0 624238384Sjkim 625238384Sjkim sqr_add_c(7,c3,c1,c2); 626238384Sjkim stg c3,14*8(%r2) 627238384Sjkim stg c1,15*8(%r2) 628238384Sjkim 629238384Sjkim lmg %r6,%r8,48(%r15) 630238384Sjkim br %r14 631238384Sjkim.size bn_sqr_comba8,.-bn_sqr_comba8 632238384Sjkim 633238384Sjkim// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3); 634238384Sjkim.globl bn_sqr_comba4 635238384Sjkim.type bn_sqr_comba4,@function 636238384Sjkim.align 4 637238384Sjkimbn_sqr_comba4: 638238384Sjkim stmg %r6,%r8,48(%r15) 639238384Sjkim 640238384Sjkim lghi c1,0 641238384Sjkim lghi c2,0 642238384Sjkim lghi c3,0 643238384Sjkim lghi zero,0 644238384Sjkim 645238384Sjkim sqr_add_c(0,c1,c2,c3); 646238384Sjkim stg c1,0*8(%r2) 647238384Sjkim lghi c1,0 648238384Sjkim 649238384Sjkim sqr_add_c2(1,0,c2,c3,c1); 650238384Sjkim stg c2,1*8(%r2) 651238384Sjkim lghi c2,0 652238384Sjkim 653238384Sjkim sqr_add_c(1,c3,c1,c2); 654238384Sjkim sqr_add_c2(2,0,c3,c1,c2); 655238384Sjkim stg c3,2*8(%r2) 656238384Sjkim lghi c3,0 657238384Sjkim 658238384Sjkim sqr_add_c2(3,0,c1,c2,c3); 659238384Sjkim sqr_add_c2(2,1,c1,c2,c3); 660238384Sjkim stg c1,3*8(%r2) 661238384Sjkim lghi c1,0 662238384Sjkim 663238384Sjkim sqr_add_c(2,c2,c3,c1); 664238384Sjkim sqr_add_c2(3,1,c2,c3,c1); 665238384Sjkim stg c2,4*8(%r2) 666238384Sjkim lghi c2,0 667238384Sjkim 668238384Sjkim sqr_add_c2(3,2,c3,c1,c2); 669238384Sjkim stg c3,5*8(%r2) 670238384Sjkim lghi c3,0 671238384Sjkim 672238384Sjkim sqr_add_c(3,c1,c2,c3); 673238384Sjkim stg c1,6*8(%r2) 674238384Sjkim stg c2,7*8(%r2) 675238384Sjkim 676238384Sjkim lmg %r6,%r8,48(%r15) 677238384Sjkim br %r14 678238384Sjkim.size bn_sqr_comba4,.-bn_sqr_comba4 679