1 # $FreeBSD$ 2.file "x86-gf2m.s" 3.text 4.type _mul_1x1_mmx,@function 5.align 16 6_mul_1x1_mmx: 7 subl $36,%esp 8 movl %eax,%ecx 9 leal (%eax,%eax,1),%edx 10 andl $1073741823,%ecx 11 leal (%edx,%edx,1),%ebp 12 movl $0,(%esp) 13 andl $2147483647,%edx 14 movd %eax,%mm2 15 movd %ebx,%mm3 16 movl %ecx,4(%esp) 17 xorl %edx,%ecx 18 pxor %mm5,%mm5 19 pxor %mm4,%mm4 20 movl %edx,8(%esp) 21 xorl %ebp,%edx 22 movl %ecx,12(%esp) 23 pcmpgtd %mm2,%mm5 24 paddd %mm2,%mm2 25 xorl %edx,%ecx 26 movl %ebp,16(%esp) 27 xorl %edx,%ebp 28 pand %mm3,%mm5 29 pcmpgtd %mm2,%mm4 30 movl %ecx,20(%esp) 31 xorl %ecx,%ebp 32 psllq $31,%mm5 33 pand %mm3,%mm4 34 movl %edx,24(%esp) 35 movl $7,%esi 36 movl %ebp,28(%esp) 37 movl %esi,%ebp 38 andl %ebx,%esi 39 shrl $3,%ebx 40 movl %ebp,%edi 41 psllq $30,%mm4 42 andl %ebx,%edi 43 shrl $3,%ebx 44 movd (%esp,%esi,4),%mm0 45 movl %ebp,%esi 46 andl %ebx,%esi 47 shrl $3,%ebx 48 movd (%esp,%edi,4),%mm2 49 movl %ebp,%edi 50 psllq $3,%mm2 51 andl %ebx,%edi 52 shrl $3,%ebx 53 pxor %mm2,%mm0 54 movd (%esp,%esi,4),%mm1 55 movl %ebp,%esi 56 psllq $6,%mm1 57 andl %ebx,%esi 58 shrl $3,%ebx 59 pxor %mm1,%mm0 60 movd (%esp,%edi,4),%mm2 61 movl %ebp,%edi 62 psllq $9,%mm2 63 andl %ebx,%edi 64 shrl $3,%ebx 65 pxor %mm2,%mm0 66 movd (%esp,%esi,4),%mm1 67 movl %ebp,%esi 68 psllq $12,%mm1 69 andl %ebx,%esi 70 shrl $3,%ebx 71 pxor %mm1,%mm0 72 movd (%esp,%edi,4),%mm2 73 movl %ebp,%edi 74 psllq $15,%mm2 75 andl %ebx,%edi 76 shrl $3,%ebx 77 pxor %mm2,%mm0 78 movd (%esp,%esi,4),%mm1 79 movl %ebp,%esi 80 psllq $18,%mm1 81 andl %ebx,%esi 82 shrl $3,%ebx 83 pxor %mm1,%mm0 84 movd (%esp,%edi,4),%mm2 85 movl %ebp,%edi 86 psllq $21,%mm2 87 andl %ebx,%edi 88 shrl $3,%ebx 89 pxor %mm2,%mm0 90 movd (%esp,%esi,4),%mm1 91 movl %ebp,%esi 92 psllq $24,%mm1 93 andl %ebx,%esi 94 shrl $3,%ebx 95 pxor %mm1,%mm0 96 movd (%esp,%edi,4),%mm2 97 pxor %mm4,%mm0 98 psllq $27,%mm2 99 pxor %mm2,%mm0 100 movd (%esp,%esi,4),%mm1 101 pxor %mm5,%mm0 102 psllq $30,%mm1 103 addl $36,%esp 104 pxor %mm1,%mm0 105 ret 106.size _mul_1x1_mmx,.-_mul_1x1_mmx 107.type _mul_1x1_ialu,@function 108.align 16 109_mul_1x1_ialu: 110 subl $36,%esp 111 movl %eax,%ecx 112 leal (%eax,%eax,1),%edx 113 leal (,%eax,4),%ebp 114 andl $1073741823,%ecx 115 leal (%eax,%eax,1),%edi 116 sarl $31,%eax 117 movl $0,(%esp) 118 andl $2147483647,%edx 119 movl %ecx,4(%esp) 120 xorl %edx,%ecx 121 movl %edx,8(%esp) 122 xorl %ebp,%edx 123 movl %ecx,12(%esp) 124 xorl %edx,%ecx 125 movl %ebp,16(%esp) 126 xorl %edx,%ebp 127 movl %ecx,20(%esp) 128 xorl %ecx,%ebp 129 sarl $31,%edi 130 andl %ebx,%eax 131 movl %edx,24(%esp) 132 andl %ebx,%edi 133 movl %ebp,28(%esp) 134 movl %eax,%edx 135 shll $31,%eax 136 movl %edi,%ecx 137 shrl $1,%edx 138 movl $7,%esi 139 shll $30,%edi 140 andl %ebx,%esi 141 shrl $2,%ecx 142 xorl %edi,%eax 143 shrl $3,%ebx 144 movl $7,%edi 145 andl %ebx,%edi 146 shrl $3,%ebx 147 xorl %ecx,%edx 148 xorl (%esp,%esi,4),%eax 149 movl $7,%esi 150 andl %ebx,%esi 151 shrl $3,%ebx 152 movl (%esp,%edi,4),%ebp 153 movl $7,%edi 154 movl %ebp,%ecx 155 shll $3,%ebp 156 andl %ebx,%edi 157 shrl $29,%ecx 158 xorl %ebp,%eax 159 shrl $3,%ebx 160 xorl %ecx,%edx 161 movl (%esp,%esi,4),%ecx 162 movl $7,%esi 163 movl %ecx,%ebp 164 shll $6,%ecx 165 andl %ebx,%esi 166 shrl $26,%ebp 167 xorl %ecx,%eax 168 shrl $3,%ebx 169 xorl %ebp,%edx 170 movl (%esp,%edi,4),%ebp 171 movl $7,%edi 172 movl %ebp,%ecx 173 shll $9,%ebp 174 andl %ebx,%edi 175 shrl $23,%ecx 176 xorl %ebp,%eax 177 shrl $3,%ebx 178 xorl %ecx,%edx 179 movl (%esp,%esi,4),%ecx 180 movl $7,%esi 181 movl %ecx,%ebp 182 shll $12,%ecx 183 andl %ebx,%esi 184 shrl $20,%ebp 185 xorl %ecx,%eax 186 shrl $3,%ebx 187 xorl %ebp,%edx 188 movl (%esp,%edi,4),%ebp 189 movl $7,%edi 190 movl %ebp,%ecx 191 shll $15,%ebp 192 andl %ebx,%edi 193 shrl $17,%ecx 194 xorl %ebp,%eax 195 shrl $3,%ebx 196 xorl %ecx,%edx 197 movl (%esp,%esi,4),%ecx 198 movl $7,%esi 199 movl %ecx,%ebp 200 shll $18,%ecx 201 andl %ebx,%esi 202 shrl $14,%ebp 203 xorl %ecx,%eax 204 shrl $3,%ebx 205 xorl %ebp,%edx 206 movl (%esp,%edi,4),%ebp 207 movl $7,%edi 208 movl %ebp,%ecx 209 shll $21,%ebp 210 andl %ebx,%edi 211 shrl $11,%ecx 212 xorl %ebp,%eax 213 shrl $3,%ebx 214 xorl %ecx,%edx 215 movl (%esp,%esi,4),%ecx 216 movl $7,%esi 217 movl %ecx,%ebp 218 shll $24,%ecx 219 andl %ebx,%esi 220 shrl $8,%ebp 221 xorl %ecx,%eax 222 shrl $3,%ebx 223 xorl %ebp,%edx 224 movl (%esp,%edi,4),%ebp 225 movl %ebp,%ecx 226 shll $27,%ebp 227 movl (%esp,%esi,4),%edi 228 shrl $5,%ecx 229 movl %edi,%esi 230 xorl %ebp,%eax 231 shll $30,%edi 232 xorl %ecx,%edx 233 shrl $2,%esi 234 xorl %edi,%eax 235 xorl %esi,%edx 236 addl $36,%esp 237 ret 238.size _mul_1x1_ialu,.-_mul_1x1_ialu 239.globl bn_GF2m_mul_2x2 240.type bn_GF2m_mul_2x2,@function 241.align 16 242bn_GF2m_mul_2x2: 243.L_bn_GF2m_mul_2x2_begin: 244 leal OPENSSL_ia32cap_P,%edx 245 movl (%edx),%eax 246 movl 4(%edx),%edx 247 testl $8388608,%eax 248 jz .L000ialu 249 testl $16777216,%eax 250 jz .L001mmx 251 testl $2,%edx 252 jz .L001mmx 253 movups 8(%esp),%xmm0 254 shufps $177,%xmm0,%xmm0 255.byte 102,15,58,68,192,1 256 movl 4(%esp),%eax 257 movups %xmm0,(%eax) 258 ret 259.align 16 260.L001mmx: 261 pushl %ebp 262 pushl %ebx 263 pushl %esi 264 pushl %edi 265 movl 24(%esp),%eax 266 movl 32(%esp),%ebx 267 call _mul_1x1_mmx 268 movq %mm0,%mm7 269 movl 28(%esp),%eax 270 movl 36(%esp),%ebx 271 call _mul_1x1_mmx 272 movq %mm0,%mm6 273 movl 24(%esp),%eax 274 movl 32(%esp),%ebx 275 xorl 28(%esp),%eax 276 xorl 36(%esp),%ebx 277 call _mul_1x1_mmx 278 pxor %mm7,%mm0 279 movl 20(%esp),%eax 280 pxor %mm6,%mm0 281 movq %mm0,%mm2 282 psllq $32,%mm0 283 popl %edi 284 psrlq $32,%mm2 285 popl %esi 286 pxor %mm6,%mm0 287 popl %ebx 288 pxor %mm7,%mm2 289 movq %mm0,(%eax) 290 popl %ebp 291 movq %mm2,8(%eax) 292 emms 293 ret 294.align 16 295.L000ialu: 296 pushl %ebp 297 pushl %ebx 298 pushl %esi 299 pushl %edi 300 subl $20,%esp 301 movl 44(%esp),%eax 302 movl 52(%esp),%ebx 303 call _mul_1x1_ialu 304 movl %eax,8(%esp) 305 movl %edx,12(%esp) 306 movl 48(%esp),%eax 307 movl 56(%esp),%ebx 308 call _mul_1x1_ialu 309 movl %eax,(%esp) 310 movl %edx,4(%esp) 311 movl 44(%esp),%eax 312 movl 52(%esp),%ebx 313 xorl 48(%esp),%eax 314 xorl 56(%esp),%ebx 315 call _mul_1x1_ialu 316 movl 40(%esp),%ebp 317 movl (%esp),%ebx 318 movl 4(%esp),%ecx 319 movl 8(%esp),%edi 320 movl 12(%esp),%esi 321 xorl %edx,%eax 322 xorl %ecx,%edx 323 xorl %ebx,%eax 324 movl %ebx,(%ebp) 325 xorl %edi,%edx 326 movl %esi,12(%ebp) 327 xorl %esi,%eax 328 addl $20,%esp 329 xorl %esi,%edx 330 popl %edi 331 xorl %edx,%eax 332 popl %esi 333 movl %edx,8(%ebp) 334 popl %ebx 335 movl %eax,4(%ebp) 336 popl %ebp 337 ret 338.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin 339.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 340.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 341.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 342.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 343.byte 62,0 344.comm OPENSSL_ia32cap_P,8,4 345