x86_64-gf2m.S revision 299966
1164190Sjkoshy# $FreeBSD: stable/10/secure/lib/libcrypto/amd64/x86_64-gf2m.S 299966 2016-05-16 19:30:27Z jkim $
2164190Sjkoshy# Do not modify. This file is auto-generated from x86_64-gf2m.pl.
3164190Sjkoshy.text
4164190Sjkoshy
5164190Sjkoshy.type	_mul_1x1,@function
6164190Sjkoshy.align	16
7164190Sjkoshy_mul_1x1:
8164190Sjkoshy	subq	$128+8,%rsp
9164190Sjkoshy	movq	$-1,%r9
10164190Sjkoshy	leaq	(%rax,%rax,1),%rsi
11164190Sjkoshy	shrq	$3,%r9
12164190Sjkoshy	leaq	(,%rax,4),%rdi
13164190Sjkoshy	andq	%rax,%r9
14164190Sjkoshy	leaq	(,%rax,8),%r12
15164190Sjkoshy	sarq	$63,%rax
16164190Sjkoshy	leaq	(%r9,%r9,1),%r10
17164190Sjkoshy	sarq	$63,%rsi
18164190Sjkoshy	leaq	(,%r9,4),%r11
19164190Sjkoshy	andq	%rbp,%rax
20164190Sjkoshy	sarq	$63,%rdi
21164190Sjkoshy	movq	%rax,%rdx
22164190Sjkoshy	shlq	$63,%rax
23164190Sjkoshy	andq	%rbp,%rsi
24164190Sjkoshy	shrq	$1,%rdx
25164190Sjkoshy	movq	%rsi,%rcx
26164190Sjkoshy	shlq	$62,%rsi
27164190Sjkoshy	andq	%rbp,%rdi
28164190Sjkoshy	shrq	$2,%rcx
29164190Sjkoshy	xorq	%rsi,%rax
30164190Sjkoshy	movq	%rdi,%rbx
31164190Sjkoshy	shlq	$61,%rdi
32164190Sjkoshy	xorq	%rcx,%rdx
33164190Sjkoshy	shrq	$3,%rbx
34165317Sjkoshy	xorq	%rdi,%rax
35164190Sjkoshy	xorq	%rbx,%rdx
36164190Sjkoshy
37164190Sjkoshy	movq	%r9,%r13
38165317Sjkoshy	movq	$0,0(%rsp)
39165317Sjkoshy	xorq	%r10,%r13
40164190Sjkoshy	movq	%r9,8(%rsp)
41164190Sjkoshy	movq	%r11,%r14
42164190Sjkoshy	movq	%r10,16(%rsp)
43164190Sjkoshy	xorq	%r12,%r14
44164190Sjkoshy	movq	%r13,24(%rsp)
45164190Sjkoshy
46164190Sjkoshy	xorq	%r11,%r9
47164190Sjkoshy	movq	%r11,32(%rsp)
48164190Sjkoshy	xorq	%r11,%r10
49164190Sjkoshy	movq	%r9,40(%rsp)
50164190Sjkoshy	xorq	%r11,%r13
51164190Sjkoshy	movq	%r10,48(%rsp)
52164190Sjkoshy	xorq	%r14,%r9
53164190Sjkoshy	movq	%r13,56(%rsp)
54164190Sjkoshy	xorq	%r14,%r10
55164190Sjkoshy
56164190Sjkoshy	movq	%r12,64(%rsp)
57164190Sjkoshy	xorq	%r14,%r13
58164190Sjkoshy	movq	%r9,72(%rsp)
59164190Sjkoshy	xorq	%r11,%r9
60164190Sjkoshy	movq	%r10,80(%rsp)
61164190Sjkoshy	xorq	%r11,%r10
62164190Sjkoshy	movq	%r13,88(%rsp)
63164190Sjkoshy
64164190Sjkoshy	xorq	%r11,%r13
65164190Sjkoshy	movq	%r14,96(%rsp)
66164190Sjkoshy	movq	%r8,%rsi
67164190Sjkoshy	movq	%r9,104(%rsp)
68164190Sjkoshy	andq	%rbp,%rsi
69164190Sjkoshy	movq	%r10,112(%rsp)
70164190Sjkoshy	shrq	$4,%rbp
71164190Sjkoshy	movq	%r13,120(%rsp)
72164190Sjkoshy	movq	%r8,%rdi
73164190Sjkoshy	andq	%rbp,%rdi
74164190Sjkoshy	shrq	$4,%rbp
75164190Sjkoshy
76164190Sjkoshy	movq	(%rsp,%rsi,8),%xmm0
77164190Sjkoshy	movq	%r8,%rsi
78164190Sjkoshy	andq	%rbp,%rsi
79164190Sjkoshy	shrq	$4,%rbp
80164190Sjkoshy	movq	(%rsp,%rdi,8),%rcx
81164190Sjkoshy	movq	%r8,%rdi
82164190Sjkoshy	movq	%rcx,%rbx
83164190Sjkoshy	shlq	$4,%rcx
84164190Sjkoshy	andq	%rbp,%rdi
85164190Sjkoshy	movq	(%rsp,%rsi,8),%xmm1
86164190Sjkoshy	shrq	$60,%rbx
87164190Sjkoshy	xorq	%rcx,%rax
88164190Sjkoshy	pslldq	$1,%xmm1
89164190Sjkoshy	movq	%r8,%rsi
90164190Sjkoshy	shrq	$4,%rbp
91164190Sjkoshy	xorq	%rbx,%rdx
92164190Sjkoshy	andq	%rbp,%rsi
93164190Sjkoshy	shrq	$4,%rbp
94164190Sjkoshy	pxor	%xmm1,%xmm0
95164190Sjkoshy	movq	(%rsp,%rdi,8),%rcx
96164190Sjkoshy	movq	%r8,%rdi
97164190Sjkoshy	movq	%rcx,%rbx
98164190Sjkoshy	shlq	$12,%rcx
99164190Sjkoshy	andq	%rbp,%rdi
100164190Sjkoshy	movq	(%rsp,%rsi,8),%xmm1
101164190Sjkoshy	shrq	$52,%rbx
102164190Sjkoshy	xorq	%rcx,%rax
103164190Sjkoshy	pslldq	$2,%xmm1
104164190Sjkoshy	movq	%r8,%rsi
105164190Sjkoshy	shrq	$4,%rbp
106164190Sjkoshy	xorq	%rbx,%rdx
107164190Sjkoshy	andq	%rbp,%rsi
108164190Sjkoshy	shrq	$4,%rbp
109164190Sjkoshy	pxor	%xmm1,%xmm0
110164190Sjkoshy	movq	(%rsp,%rdi,8),%rcx
111164190Sjkoshy	movq	%r8,%rdi
112164190Sjkoshy	movq	%rcx,%rbx
113164190Sjkoshy	shlq	$20,%rcx
114164190Sjkoshy	andq	%rbp,%rdi
115164190Sjkoshy	movq	(%rsp,%rsi,8),%xmm1
116164190Sjkoshy	shrq	$44,%rbx
117164190Sjkoshy	xorq	%rcx,%rax
118164190Sjkoshy	pslldq	$3,%xmm1
119164190Sjkoshy	movq	%r8,%rsi
120164190Sjkoshy	shrq	$4,%rbp
121164190Sjkoshy	xorq	%rbx,%rdx
122164190Sjkoshy	andq	%rbp,%rsi
123164190Sjkoshy	shrq	$4,%rbp
124164190Sjkoshy	pxor	%xmm1,%xmm0
125164190Sjkoshy	movq	(%rsp,%rdi,8),%rcx
126164190Sjkoshy	movq	%r8,%rdi
127164190Sjkoshy	movq	%rcx,%rbx
128164190Sjkoshy	shlq	$28,%rcx
129164190Sjkoshy	andq	%rbp,%rdi
130164190Sjkoshy	movq	(%rsp,%rsi,8),%xmm1
131164190Sjkoshy	shrq	$36,%rbx
132164190Sjkoshy	xorq	%rcx,%rax
133164190Sjkoshy	pslldq	$4,%xmm1
134164190Sjkoshy	movq	%r8,%rsi
135164190Sjkoshy	shrq	$4,%rbp
136164190Sjkoshy	xorq	%rbx,%rdx
137164190Sjkoshy	andq	%rbp,%rsi
138164190Sjkoshy	shrq	$4,%rbp
139164190Sjkoshy	pxor	%xmm1,%xmm0
140164190Sjkoshy	movq	(%rsp,%rdi,8),%rcx
141164190Sjkoshy	movq	%r8,%rdi
142164190Sjkoshy	movq	%rcx,%rbx
143164190Sjkoshy	shlq	$36,%rcx
144164190Sjkoshy	andq	%rbp,%rdi
145164190Sjkoshy	movq	(%rsp,%rsi,8),%xmm1
146164190Sjkoshy	shrq	$28,%rbx
147164190Sjkoshy	xorq	%rcx,%rax
148164190Sjkoshy	pslldq	$5,%xmm1
149164190Sjkoshy	movq	%r8,%rsi
150164190Sjkoshy	shrq	$4,%rbp
151164190Sjkoshy	xorq	%rbx,%rdx
152164190Sjkoshy	andq	%rbp,%rsi
153164190Sjkoshy	shrq	$4,%rbp
154164190Sjkoshy	pxor	%xmm1,%xmm0
155165317Sjkoshy	movq	(%rsp,%rdi,8),%rcx
156165317Sjkoshy	movq	%r8,%rdi
157	movq	%rcx,%rbx
158	shlq	$44,%rcx
159	andq	%rbp,%rdi
160	movq	(%rsp,%rsi,8),%xmm1
161	shrq	$20,%rbx
162	xorq	%rcx,%rax
163	pslldq	$6,%xmm1
164	movq	%r8,%rsi
165	shrq	$4,%rbp
166	xorq	%rbx,%rdx
167	andq	%rbp,%rsi
168	shrq	$4,%rbp
169	pxor	%xmm1,%xmm0
170	movq	(%rsp,%rdi,8),%rcx
171	movq	%r8,%rdi
172	movq	%rcx,%rbx
173	shlq	$52,%rcx
174	andq	%rbp,%rdi
175	movq	(%rsp,%rsi,8),%xmm1
176	shrq	$12,%rbx
177	xorq	%rcx,%rax
178	pslldq	$7,%xmm1
179	movq	%r8,%rsi
180	shrq	$4,%rbp
181	xorq	%rbx,%rdx
182	andq	%rbp,%rsi
183	shrq	$4,%rbp
184	pxor	%xmm1,%xmm0
185	movq	(%rsp,%rdi,8),%rcx
186	movq	%rcx,%rbx
187	shlq	$60,%rcx
188.byte	102,72,15,126,198
189	shrq	$4,%rbx
190	xorq	%rcx,%rax
191	psrldq	$8,%xmm0
192	xorq	%rbx,%rdx
193.byte	102,72,15,126,199
194	xorq	%rsi,%rax
195	xorq	%rdi,%rdx
196
197	addq	$128+8,%rsp
198	.byte	0xf3,0xc3
199.Lend_mul_1x1:
200.size	_mul_1x1,.-_mul_1x1
201
202.globl	bn_GF2m_mul_2x2
203.type	bn_GF2m_mul_2x2,@function
204.align	16
205bn_GF2m_mul_2x2:
206	movq	OPENSSL_ia32cap_P(%rip),%rax
207	btq	$33,%rax
208	jnc	.Lvanilla_mul_2x2
209
210.byte	102,72,15,110,198
211.byte	102,72,15,110,201
212.byte	102,72,15,110,210
213.byte	102,73,15,110,216
214	movdqa	%xmm0,%xmm4
215	movdqa	%xmm1,%xmm5
216.byte	102,15,58,68,193,0
217	pxor	%xmm2,%xmm4
218	pxor	%xmm3,%xmm5
219.byte	102,15,58,68,211,0
220.byte	102,15,58,68,229,0
221	xorps	%xmm0,%xmm4
222	xorps	%xmm2,%xmm4
223	movdqa	%xmm4,%xmm5
224	pslldq	$8,%xmm4
225	psrldq	$8,%xmm5
226	pxor	%xmm4,%xmm2
227	pxor	%xmm5,%xmm0
228	movdqu	%xmm2,0(%rdi)
229	movdqu	%xmm0,16(%rdi)
230	.byte	0xf3,0xc3
231
232.align	16
233.Lvanilla_mul_2x2:
234	leaq	-136(%rsp),%rsp
235	movq	%r14,80(%rsp)
236	movq	%r13,88(%rsp)
237	movq	%r12,96(%rsp)
238	movq	%rbp,104(%rsp)
239	movq	%rbx,112(%rsp)
240.Lbody_mul_2x2:
241	movq	%rdi,32(%rsp)
242	movq	%rsi,40(%rsp)
243	movq	%rdx,48(%rsp)
244	movq	%rcx,56(%rsp)
245	movq	%r8,64(%rsp)
246
247	movq	$15,%r8
248	movq	%rsi,%rax
249	movq	%rcx,%rbp
250	call	_mul_1x1
251	movq	%rax,16(%rsp)
252	movq	%rdx,24(%rsp)
253
254	movq	48(%rsp),%rax
255	movq	64(%rsp),%rbp
256	call	_mul_1x1
257	movq	%rax,0(%rsp)
258	movq	%rdx,8(%rsp)
259
260	movq	40(%rsp),%rax
261	movq	56(%rsp),%rbp
262	xorq	48(%rsp),%rax
263	xorq	64(%rsp),%rbp
264	call	_mul_1x1
265	movq	0(%rsp),%rbx
266	movq	8(%rsp),%rcx
267	movq	16(%rsp),%rdi
268	movq	24(%rsp),%rsi
269	movq	32(%rsp),%rbp
270
271	xorq	%rdx,%rax
272	xorq	%rcx,%rdx
273	xorq	%rbx,%rax
274	movq	%rbx,0(%rbp)
275	xorq	%rdi,%rdx
276	movq	%rsi,24(%rbp)
277	xorq	%rsi,%rax
278	xorq	%rsi,%rdx
279	xorq	%rdx,%rax
280	movq	%rdx,16(%rbp)
281	movq	%rax,8(%rbp)
282
283	movq	80(%rsp),%r14
284	movq	88(%rsp),%r13
285	movq	96(%rsp),%r12
286	movq	104(%rsp),%rbp
287	movq	112(%rsp),%rbx
288	leaq	136(%rsp),%rsp
289	.byte	0xf3,0xc3
290.Lend_mul_2x2:
291.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
292.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
293.align	16
294