1	# $FreeBSD$
2.file	"x86-gf2m.s"
3.text
4.type	_mul_1x1_mmx,@function
5.align	16
6_mul_1x1_mmx:
7	subl	$36,%esp
8	movl	%eax,%ecx
9	leal	(%eax,%eax,1),%edx
10	andl	$1073741823,%ecx
11	leal	(%edx,%edx,1),%ebp
12	movl	$0,(%esp)
13	andl	$2147483647,%edx
14	movd	%eax,%mm2
15	movd	%ebx,%mm3
16	movl	%ecx,4(%esp)
17	xorl	%edx,%ecx
18	pxor	%mm5,%mm5
19	pxor	%mm4,%mm4
20	movl	%edx,8(%esp)
21	xorl	%ebp,%edx
22	movl	%ecx,12(%esp)
23	pcmpgtd	%mm2,%mm5
24	paddd	%mm2,%mm2
25	xorl	%edx,%ecx
26	movl	%ebp,16(%esp)
27	xorl	%edx,%ebp
28	pand	%mm3,%mm5
29	pcmpgtd	%mm2,%mm4
30	movl	%ecx,20(%esp)
31	xorl	%ecx,%ebp
32	psllq	$31,%mm5
33	pand	%mm3,%mm4
34	movl	%edx,24(%esp)
35	movl	$7,%esi
36	movl	%ebp,28(%esp)
37	movl	%esi,%ebp
38	andl	%ebx,%esi
39	shrl	$3,%ebx
40	movl	%ebp,%edi
41	psllq	$30,%mm4
42	andl	%ebx,%edi
43	shrl	$3,%ebx
44	movd	(%esp,%esi,4),%mm0
45	movl	%ebp,%esi
46	andl	%ebx,%esi
47	shrl	$3,%ebx
48	movd	(%esp,%edi,4),%mm2
49	movl	%ebp,%edi
50	psllq	$3,%mm2
51	andl	%ebx,%edi
52	shrl	$3,%ebx
53	pxor	%mm2,%mm0
54	movd	(%esp,%esi,4),%mm1
55	movl	%ebp,%esi
56	psllq	$6,%mm1
57	andl	%ebx,%esi
58	shrl	$3,%ebx
59	pxor	%mm1,%mm0
60	movd	(%esp,%edi,4),%mm2
61	movl	%ebp,%edi
62	psllq	$9,%mm2
63	andl	%ebx,%edi
64	shrl	$3,%ebx
65	pxor	%mm2,%mm0
66	movd	(%esp,%esi,4),%mm1
67	movl	%ebp,%esi
68	psllq	$12,%mm1
69	andl	%ebx,%esi
70	shrl	$3,%ebx
71	pxor	%mm1,%mm0
72	movd	(%esp,%edi,4),%mm2
73	movl	%ebp,%edi
74	psllq	$15,%mm2
75	andl	%ebx,%edi
76	shrl	$3,%ebx
77	pxor	%mm2,%mm0
78	movd	(%esp,%esi,4),%mm1
79	movl	%ebp,%esi
80	psllq	$18,%mm1
81	andl	%ebx,%esi
82	shrl	$3,%ebx
83	pxor	%mm1,%mm0
84	movd	(%esp,%edi,4),%mm2
85	movl	%ebp,%edi
86	psllq	$21,%mm2
87	andl	%ebx,%edi
88	shrl	$3,%ebx
89	pxor	%mm2,%mm0
90	movd	(%esp,%esi,4),%mm1
91	movl	%ebp,%esi
92	psllq	$24,%mm1
93	andl	%ebx,%esi
94	shrl	$3,%ebx
95	pxor	%mm1,%mm0
96	movd	(%esp,%edi,4),%mm2
97	pxor	%mm4,%mm0
98	psllq	$27,%mm2
99	pxor	%mm2,%mm0
100	movd	(%esp,%esi,4),%mm1
101	pxor	%mm5,%mm0
102	psllq	$30,%mm1
103	addl	$36,%esp
104	pxor	%mm1,%mm0
105	ret
106.size	_mul_1x1_mmx,.-_mul_1x1_mmx
107.type	_mul_1x1_ialu,@function
108.align	16
109_mul_1x1_ialu:
110	subl	$36,%esp
111	movl	%eax,%ecx
112	leal	(%eax,%eax,1),%edx
113	leal	(,%eax,4),%ebp
114	andl	$1073741823,%ecx
115	leal	(%eax,%eax,1),%edi
116	sarl	$31,%eax
117	movl	$0,(%esp)
118	andl	$2147483647,%edx
119	movl	%ecx,4(%esp)
120	xorl	%edx,%ecx
121	movl	%edx,8(%esp)
122	xorl	%ebp,%edx
123	movl	%ecx,12(%esp)
124	xorl	%edx,%ecx
125	movl	%ebp,16(%esp)
126	xorl	%edx,%ebp
127	movl	%ecx,20(%esp)
128	xorl	%ecx,%ebp
129	sarl	$31,%edi
130	andl	%ebx,%eax
131	movl	%edx,24(%esp)
132	andl	%ebx,%edi
133	movl	%ebp,28(%esp)
134	movl	%eax,%edx
135	shll	$31,%eax
136	movl	%edi,%ecx
137	shrl	$1,%edx
138	movl	$7,%esi
139	shll	$30,%edi
140	andl	%ebx,%esi
141	shrl	$2,%ecx
142	xorl	%edi,%eax
143	shrl	$3,%ebx
144	movl	$7,%edi
145	andl	%ebx,%edi
146	shrl	$3,%ebx
147	xorl	%ecx,%edx
148	xorl	(%esp,%esi,4),%eax
149	movl	$7,%esi
150	andl	%ebx,%esi
151	shrl	$3,%ebx
152	movl	(%esp,%edi,4),%ebp
153	movl	$7,%edi
154	movl	%ebp,%ecx
155	shll	$3,%ebp
156	andl	%ebx,%edi
157	shrl	$29,%ecx
158	xorl	%ebp,%eax
159	shrl	$3,%ebx
160	xorl	%ecx,%edx
161	movl	(%esp,%esi,4),%ecx
162	movl	$7,%esi
163	movl	%ecx,%ebp
164	shll	$6,%ecx
165	andl	%ebx,%esi
166	shrl	$26,%ebp
167	xorl	%ecx,%eax
168	shrl	$3,%ebx
169	xorl	%ebp,%edx
170	movl	(%esp,%edi,4),%ebp
171	movl	$7,%edi
172	movl	%ebp,%ecx
173	shll	$9,%ebp
174	andl	%ebx,%edi
175	shrl	$23,%ecx
176	xorl	%ebp,%eax
177	shrl	$3,%ebx
178	xorl	%ecx,%edx
179	movl	(%esp,%esi,4),%ecx
180	movl	$7,%esi
181	movl	%ecx,%ebp
182	shll	$12,%ecx
183	andl	%ebx,%esi
184	shrl	$20,%ebp
185	xorl	%ecx,%eax
186	shrl	$3,%ebx
187	xorl	%ebp,%edx
188	movl	(%esp,%edi,4),%ebp
189	movl	$7,%edi
190	movl	%ebp,%ecx
191	shll	$15,%ebp
192	andl	%ebx,%edi
193	shrl	$17,%ecx
194	xorl	%ebp,%eax
195	shrl	$3,%ebx
196	xorl	%ecx,%edx
197	movl	(%esp,%esi,4),%ecx
198	movl	$7,%esi
199	movl	%ecx,%ebp
200	shll	$18,%ecx
201	andl	%ebx,%esi
202	shrl	$14,%ebp
203	xorl	%ecx,%eax
204	shrl	$3,%ebx
205	xorl	%ebp,%edx
206	movl	(%esp,%edi,4),%ebp
207	movl	$7,%edi
208	movl	%ebp,%ecx
209	shll	$21,%ebp
210	andl	%ebx,%edi
211	shrl	$11,%ecx
212	xorl	%ebp,%eax
213	shrl	$3,%ebx
214	xorl	%ecx,%edx
215	movl	(%esp,%esi,4),%ecx
216	movl	$7,%esi
217	movl	%ecx,%ebp
218	shll	$24,%ecx
219	andl	%ebx,%esi
220	shrl	$8,%ebp
221	xorl	%ecx,%eax
222	shrl	$3,%ebx
223	xorl	%ebp,%edx
224	movl	(%esp,%edi,4),%ebp
225	movl	%ebp,%ecx
226	shll	$27,%ebp
227	movl	(%esp,%esi,4),%edi
228	shrl	$5,%ecx
229	movl	%edi,%esi
230	xorl	%ebp,%eax
231	shll	$30,%edi
232	xorl	%ecx,%edx
233	shrl	$2,%esi
234	xorl	%edi,%eax
235	xorl	%esi,%edx
236	addl	$36,%esp
237	ret
238.size	_mul_1x1_ialu,.-_mul_1x1_ialu
239.globl	bn_GF2m_mul_2x2
240.type	bn_GF2m_mul_2x2,@function
241.align	16
242bn_GF2m_mul_2x2:
243.L_bn_GF2m_mul_2x2_begin:
244	leal	OPENSSL_ia32cap_P,%edx
245	movl	(%edx),%eax
246	movl	4(%edx),%edx
247	testl	$8388608,%eax
248	jz	.L000ialu
249	testl	$16777216,%eax
250	jz	.L001mmx
251	testl	$2,%edx
252	jz	.L001mmx
253	movups	8(%esp),%xmm0
254	shufps	$177,%xmm0,%xmm0
255.byte	102,15,58,68,192,1
256	movl	4(%esp),%eax
257	movups	%xmm0,(%eax)
258	ret
259.align	16
260.L001mmx:
261	pushl	%ebp
262	pushl	%ebx
263	pushl	%esi
264	pushl	%edi
265	movl	24(%esp),%eax
266	movl	32(%esp),%ebx
267	call	_mul_1x1_mmx
268	movq	%mm0,%mm7
269	movl	28(%esp),%eax
270	movl	36(%esp),%ebx
271	call	_mul_1x1_mmx
272	movq	%mm0,%mm6
273	movl	24(%esp),%eax
274	movl	32(%esp),%ebx
275	xorl	28(%esp),%eax
276	xorl	36(%esp),%ebx
277	call	_mul_1x1_mmx
278	pxor	%mm7,%mm0
279	movl	20(%esp),%eax
280	pxor	%mm6,%mm0
281	movq	%mm0,%mm2
282	psllq	$32,%mm0
283	popl	%edi
284	psrlq	$32,%mm2
285	popl	%esi
286	pxor	%mm6,%mm0
287	popl	%ebx
288	pxor	%mm7,%mm2
289	movq	%mm0,(%eax)
290	popl	%ebp
291	movq	%mm2,8(%eax)
292	emms
293	ret
294.align	16
295.L000ialu:
296	pushl	%ebp
297	pushl	%ebx
298	pushl	%esi
299	pushl	%edi
300	subl	$20,%esp
301	movl	44(%esp),%eax
302	movl	52(%esp),%ebx
303	call	_mul_1x1_ialu
304	movl	%eax,8(%esp)
305	movl	%edx,12(%esp)
306	movl	48(%esp),%eax
307	movl	56(%esp),%ebx
308	call	_mul_1x1_ialu
309	movl	%eax,(%esp)
310	movl	%edx,4(%esp)
311	movl	44(%esp),%eax
312	movl	52(%esp),%ebx
313	xorl	48(%esp),%eax
314	xorl	56(%esp),%ebx
315	call	_mul_1x1_ialu
316	movl	40(%esp),%ebp
317	movl	(%esp),%ebx
318	movl	4(%esp),%ecx
319	movl	8(%esp),%edi
320	movl	12(%esp),%esi
321	xorl	%edx,%eax
322	xorl	%ecx,%edx
323	xorl	%ebx,%eax
324	movl	%ebx,(%ebp)
325	xorl	%edi,%edx
326	movl	%esi,12(%ebp)
327	xorl	%esi,%eax
328	addl	$20,%esp
329	xorl	%esi,%edx
330	popl	%edi
331	xorl	%edx,%eax
332	popl	%esi
333	movl	%edx,8(%ebp)
334	popl	%ebx
335	movl	%eax,4(%ebp)
336	popl	%ebp
337	ret
338.size	bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin
339.byte	71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
340.byte	99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
341.byte	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
342.byte	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
343.byte	62,0
344.comm	OPENSSL_ia32cap_P,8,4
345