1 # $FreeBSD$ 2.text 3 4.type _mul_1x1,@function 5.align 16 6_mul_1x1: 7 subq $128+8,%rsp 8 movq $-1,%r9 9 leaq (%rax,%rax,1),%rsi 10 shrq $3,%r9 11 leaq (,%rax,4),%rdi 12 andq %rax,%r9 13 leaq (,%rax,8),%r12 14 sarq $63,%rax 15 leaq (%r9,%r9,1),%r10 16 sarq $63,%rsi 17 leaq (,%r9,4),%r11 18 andq %rbp,%rax 19 sarq $63,%rdi 20 movq %rax,%rdx 21 shlq $63,%rax 22 andq %rbp,%rsi 23 shrq $1,%rdx 24 movq %rsi,%rcx 25 shlq $62,%rsi 26 andq %rbp,%rdi 27 shrq $2,%rcx 28 xorq %rsi,%rax 29 movq %rdi,%rbx 30 shlq $61,%rdi 31 xorq %rcx,%rdx 32 shrq $3,%rbx 33 xorq %rdi,%rax 34 xorq %rbx,%rdx 35 36 movq %r9,%r13 37 movq $0,0(%rsp) 38 xorq %r10,%r13 39 movq %r9,8(%rsp) 40 movq %r11,%r14 41 movq %r10,16(%rsp) 42 xorq %r12,%r14 43 movq %r13,24(%rsp) 44 45 xorq %r11,%r9 46 movq %r11,32(%rsp) 47 xorq %r11,%r10 48 movq %r9,40(%rsp) 49 xorq %r11,%r13 50 movq %r10,48(%rsp) 51 xorq %r14,%r9 52 movq %r13,56(%rsp) 53 xorq %r14,%r10 54 55 movq %r12,64(%rsp) 56 xorq %r14,%r13 57 movq %r9,72(%rsp) 58 xorq %r11,%r9 59 movq %r10,80(%rsp) 60 xorq %r11,%r10 61 movq %r13,88(%rsp) 62 63 xorq %r11,%r13 64 movq %r14,96(%rsp) 65 movq %r8,%rsi 66 movq %r9,104(%rsp) 67 andq %rbp,%rsi 68 movq %r10,112(%rsp) 69 shrq $4,%rbp 70 movq %r13,120(%rsp) 71 movq %r8,%rdi 72 andq %rbp,%rdi 73 shrq $4,%rbp 74 75 movq (%rsp,%rsi,8),%xmm0 76 movq %r8,%rsi 77 andq %rbp,%rsi 78 shrq $4,%rbp 79 movq (%rsp,%rdi,8),%rcx 80 movq %r8,%rdi 81 movq %rcx,%rbx 82 shlq $4,%rcx 83 andq %rbp,%rdi 84 movq (%rsp,%rsi,8),%xmm1 85 shrq $60,%rbx 86 xorq %rcx,%rax 87 pslldq $1,%xmm1 88 movq %r8,%rsi 89 shrq $4,%rbp 90 xorq %rbx,%rdx 91 andq %rbp,%rsi 92 shrq $4,%rbp 93 pxor %xmm1,%xmm0 94 movq (%rsp,%rdi,8),%rcx 95 movq %r8,%rdi 96 movq %rcx,%rbx 97 shlq $12,%rcx 98 andq %rbp,%rdi 99 movq (%rsp,%rsi,8),%xmm1 100 shrq $52,%rbx 101 xorq %rcx,%rax 102 pslldq $2,%xmm1 103 movq %r8,%rsi 104 shrq $4,%rbp 105 xorq %rbx,%rdx 106 andq %rbp,%rsi 107 shrq $4,%rbp 108 pxor %xmm1,%xmm0 109 movq (%rsp,%rdi,8),%rcx 110 movq %r8,%rdi 111 movq %rcx,%rbx 112 shlq $20,%rcx 113 andq %rbp,%rdi 114 movq (%rsp,%rsi,8),%xmm1 115 shrq $44,%rbx 116 xorq %rcx,%rax 117 pslldq $3,%xmm1 118 movq %r8,%rsi 119 shrq $4,%rbp 120 xorq %rbx,%rdx 121 andq %rbp,%rsi 122 shrq $4,%rbp 123 pxor %xmm1,%xmm0 124 movq (%rsp,%rdi,8),%rcx 125 movq %r8,%rdi 126 movq %rcx,%rbx 127 shlq $28,%rcx 128 andq %rbp,%rdi 129 movq (%rsp,%rsi,8),%xmm1 130 shrq $36,%rbx 131 xorq %rcx,%rax 132 pslldq $4,%xmm1 133 movq %r8,%rsi 134 shrq $4,%rbp 135 xorq %rbx,%rdx 136 andq %rbp,%rsi 137 shrq $4,%rbp 138 pxor %xmm1,%xmm0 139 movq (%rsp,%rdi,8),%rcx 140 movq %r8,%rdi 141 movq %rcx,%rbx 142 shlq $36,%rcx 143 andq %rbp,%rdi 144 movq (%rsp,%rsi,8),%xmm1 145 shrq $28,%rbx 146 xorq %rcx,%rax 147 pslldq $5,%xmm1 148 movq %r8,%rsi 149 shrq $4,%rbp 150 xorq %rbx,%rdx 151 andq %rbp,%rsi 152 shrq $4,%rbp 153 pxor %xmm1,%xmm0 154 movq (%rsp,%rdi,8),%rcx 155 movq %r8,%rdi 156 movq %rcx,%rbx 157 shlq $44,%rcx 158 andq %rbp,%rdi 159 movq (%rsp,%rsi,8),%xmm1 160 shrq $20,%rbx 161 xorq %rcx,%rax 162 pslldq $6,%xmm1 163 movq %r8,%rsi 164 shrq $4,%rbp 165 xorq %rbx,%rdx 166 andq %rbp,%rsi 167 shrq $4,%rbp 168 pxor %xmm1,%xmm0 169 movq (%rsp,%rdi,8),%rcx 170 movq %r8,%rdi 171 movq %rcx,%rbx 172 shlq $52,%rcx 173 andq %rbp,%rdi 174 movq (%rsp,%rsi,8),%xmm1 175 shrq $12,%rbx 176 xorq %rcx,%rax 177 pslldq $7,%xmm1 178 movq %r8,%rsi 179 shrq $4,%rbp 180 xorq %rbx,%rdx 181 andq %rbp,%rsi 182 shrq $4,%rbp 183 pxor %xmm1,%xmm0 184 movq (%rsp,%rdi,8),%rcx 185 movq %rcx,%rbx 186 shlq $60,%rcx 187.byte 102,72,15,126,198 188 shrq $4,%rbx 189 xorq %rcx,%rax 190 psrldq $8,%xmm0 191 xorq %rbx,%rdx 192.byte 102,72,15,126,199 193 xorq %rsi,%rax 194 xorq %rdi,%rdx 195 196 addq $128+8,%rsp 197 .byte 0xf3,0xc3 198.Lend_mul_1x1: 199.size _mul_1x1,.-_mul_1x1 200 201.globl bn_GF2m_mul_2x2 202.type bn_GF2m_mul_2x2,@function 203.align 16 204bn_GF2m_mul_2x2: 205 movq OPENSSL_ia32cap_P(%rip),%rax 206 btq $33,%rax 207 jnc .Lvanilla_mul_2x2 208 209.byte 102,72,15,110,198 210.byte 102,72,15,110,201 211.byte 102,72,15,110,210 212.byte 102,73,15,110,216 213 movdqa %xmm0,%xmm4 214 movdqa %xmm1,%xmm5 215.byte 102,15,58,68,193,0 216 pxor %xmm2,%xmm4 217 pxor %xmm3,%xmm5 218.byte 102,15,58,68,211,0 219.byte 102,15,58,68,229,0 220 xorps %xmm0,%xmm4 221 xorps %xmm2,%xmm4 222 movdqa %xmm4,%xmm5 223 pslldq $8,%xmm4 224 psrldq $8,%xmm5 225 pxor %xmm4,%xmm2 226 pxor %xmm5,%xmm0 227 movdqu %xmm2,0(%rdi) 228 movdqu %xmm0,16(%rdi) 229 .byte 0xf3,0xc3 230 231.align 16 232.Lvanilla_mul_2x2: 233 leaq -136(%rsp),%rsp 234 movq %r14,80(%rsp) 235 movq %r13,88(%rsp) 236 movq %r12,96(%rsp) 237 movq %rbp,104(%rsp) 238 movq %rbx,112(%rsp) 239.Lbody_mul_2x2: 240 movq %rdi,32(%rsp) 241 movq %rsi,40(%rsp) 242 movq %rdx,48(%rsp) 243 movq %rcx,56(%rsp) 244 movq %r8,64(%rsp) 245 246 movq $15,%r8 247 movq %rsi,%rax 248 movq %rcx,%rbp 249 call _mul_1x1 250 movq %rax,16(%rsp) 251 movq %rdx,24(%rsp) 252 253 movq 48(%rsp),%rax 254 movq 64(%rsp),%rbp 255 call _mul_1x1 256 movq %rax,0(%rsp) 257 movq %rdx,8(%rsp) 258 259 movq 40(%rsp),%rax 260 movq 56(%rsp),%rbp 261 xorq 48(%rsp),%rax 262 xorq 64(%rsp),%rbp 263 call _mul_1x1 264 movq 0(%rsp),%rbx 265 movq 8(%rsp),%rcx 266 movq 16(%rsp),%rdi 267 movq 24(%rsp),%rsi 268 movq 32(%rsp),%rbp 269 270 xorq %rdx,%rax 271 xorq %rcx,%rdx 272 xorq %rbx,%rax 273 movq %rbx,0(%rbp) 274 xorq %rdi,%rdx 275 movq %rsi,24(%rbp) 276 xorq %rsi,%rax 277 xorq %rsi,%rdx 278 xorq %rdx,%rax 279 movq %rdx,16(%rbp) 280 movq %rax,8(%rbp) 281 282 movq 80(%rsp),%r14 283 movq 88(%rsp),%r13 284 movq 96(%rsp),%r12 285 movq 104(%rsp),%rbp 286 movq 112(%rsp),%rbx 287 leaq 136(%rsp),%rsp 288 .byte 0xf3,0xc3 289.Lend_mul_2x2: 290.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 291.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 292.align 16 293