x86-mont.S revision 299966
1# $FreeBSD: stable/10/secure/lib/libcrypto/i386/x86-mont.S 299966 2016-05-16 19:30:27Z jkim $
2# Do not modify. This file is auto-generated from x86-mont.pl.
3#ifdef PIC
4.file	"x86-mont.S"
5.text
6.globl	bn_mul_mont
7.type	bn_mul_mont,@function
8.align	16
9bn_mul_mont:
10.L_bn_mul_mont_begin:
11	pushl	%ebp
12	pushl	%ebx
13	pushl	%esi
14	pushl	%edi
15	xorl	%eax,%eax
16	movl	40(%esp),%edi
17	cmpl	$4,%edi
18	jl	.L000just_leave
19	leal	20(%esp),%esi
20	leal	24(%esp),%edx
21	movl	%esp,%ebp
22	addl	$2,%edi
23	negl	%edi
24	leal	-32(%esp,%edi,4),%esp
25	negl	%edi
26	movl	%esp,%eax
27	subl	%edx,%eax
28	andl	$2047,%eax
29	subl	%eax,%esp
30	xorl	%esp,%edx
31	andl	$2048,%edx
32	xorl	$2048,%edx
33	subl	%edx,%esp
34	andl	$-64,%esp
35	movl	%ebp,%eax
36	subl	%esp,%eax
37	andl	$-4096,%eax
38.L001page_walk:
39	movl	(%esp,%eax,1),%edx
40	subl	$4096,%eax
41.byte	46
42	jnc	.L001page_walk
43	movl	(%esi),%eax
44	movl	4(%esi),%ebx
45	movl	8(%esi),%ecx
46	movl	12(%esi),%edx
47	movl	16(%esi),%esi
48	movl	(%esi),%esi
49	movl	%eax,4(%esp)
50	movl	%ebx,8(%esp)
51	movl	%ecx,12(%esp)
52	movl	%edx,16(%esp)
53	movl	%esi,20(%esp)
54	leal	-3(%edi),%ebx
55	movl	%ebp,24(%esp)
56	call	.L002PIC_me_up
57.L002PIC_me_up:
58	popl	%eax
59	leal	_GLOBAL_OFFSET_TABLE_+[.-.L002PIC_me_up](%eax),%eax
60	movl	OPENSSL_ia32cap_P@GOT(%eax),%eax
61	btl	$26,(%eax)
62	jnc	.L003non_sse2
63	movl	$-1,%eax
64	movd	%eax,%mm7
65	movl	8(%esp),%esi
66	movl	12(%esp),%edi
67	movl	16(%esp),%ebp
68	xorl	%edx,%edx
69	xorl	%ecx,%ecx
70	movd	(%edi),%mm4
71	movd	(%esi),%mm5
72	movd	(%ebp),%mm3
73	pmuludq	%mm4,%mm5
74	movq	%mm5,%mm2
75	movq	%mm5,%mm0
76	pand	%mm7,%mm0
77	pmuludq	20(%esp),%mm5
78	pmuludq	%mm5,%mm3
79	paddq	%mm0,%mm3
80	movd	4(%ebp),%mm1
81	movd	4(%esi),%mm0
82	psrlq	$32,%mm2
83	psrlq	$32,%mm3
84	incl	%ecx
85.align	16
86.L0041st:
87	pmuludq	%mm4,%mm0
88	pmuludq	%mm5,%mm1
89	paddq	%mm0,%mm2
90	paddq	%mm1,%mm3
91	movq	%mm2,%mm0
92	pand	%mm7,%mm0
93	movd	4(%ebp,%ecx,4),%mm1
94	paddq	%mm0,%mm3
95	movd	4(%esi,%ecx,4),%mm0
96	psrlq	$32,%mm2
97	movd	%mm3,28(%esp,%ecx,4)
98	psrlq	$32,%mm3
99	leal	1(%ecx),%ecx
100	cmpl	%ebx,%ecx
101	jl	.L0041st
102	pmuludq	%mm4,%mm0
103	pmuludq	%mm5,%mm1
104	paddq	%mm0,%mm2
105	paddq	%mm1,%mm3
106	movq	%mm2,%mm0
107	pand	%mm7,%mm0
108	paddq	%mm0,%mm3
109	movd	%mm3,28(%esp,%ecx,4)
110	psrlq	$32,%mm2
111	psrlq	$32,%mm3
112	paddq	%mm2,%mm3
113	movq	%mm3,32(%esp,%ebx,4)
114	incl	%edx
115.L005outer:
116	xorl	%ecx,%ecx
117	movd	(%edi,%edx,4),%mm4
118	movd	(%esi),%mm5
119	movd	32(%esp),%mm6
120	movd	(%ebp),%mm3
121	pmuludq	%mm4,%mm5
122	paddq	%mm6,%mm5
123	movq	%mm5,%mm0
124	movq	%mm5,%mm2
125	pand	%mm7,%mm0
126	pmuludq	20(%esp),%mm5
127	pmuludq	%mm5,%mm3
128	paddq	%mm0,%mm3
129	movd	36(%esp),%mm6
130	movd	4(%ebp),%mm1
131	movd	4(%esi),%mm0
132	psrlq	$32,%mm2
133	psrlq	$32,%mm3
134	paddq	%mm6,%mm2
135	incl	%ecx
136	decl	%ebx
137.L006inner:
138	pmuludq	%mm4,%mm0
139	pmuludq	%mm5,%mm1
140	paddq	%mm0,%mm2
141	paddq	%mm1,%mm3
142	movq	%mm2,%mm0
143	movd	36(%esp,%ecx,4),%mm6
144	pand	%mm7,%mm0
145	movd	4(%ebp,%ecx,4),%mm1
146	paddq	%mm0,%mm3
147	movd	4(%esi,%ecx,4),%mm0
148	psrlq	$32,%mm2
149	movd	%mm3,28(%esp,%ecx,4)
150	psrlq	$32,%mm3
151	paddq	%mm6,%mm2
152	decl	%ebx
153	leal	1(%ecx),%ecx
154	jnz	.L006inner
155	movl	%ecx,%ebx
156	pmuludq	%mm4,%mm0
157	pmuludq	%mm5,%mm1
158	paddq	%mm0,%mm2
159	paddq	%mm1,%mm3
160	movq	%mm2,%mm0
161	pand	%mm7,%mm0
162	paddq	%mm0,%mm3
163	movd	%mm3,28(%esp,%ecx,4)
164	psrlq	$32,%mm2
165	psrlq	$32,%mm3
166	movd	36(%esp,%ebx,4),%mm6
167	paddq	%mm2,%mm3
168	paddq	%mm6,%mm3
169	movq	%mm3,32(%esp,%ebx,4)
170	leal	1(%edx),%edx
171	cmpl	%ebx,%edx
172	jle	.L005outer
173	emms
174	jmp	.L007common_tail
175.align	16
176.L003non_sse2:
177	movl	8(%esp),%esi
178	leal	1(%ebx),%ebp
179	movl	12(%esp),%edi
180	xorl	%ecx,%ecx
181	movl	%esi,%edx
182	andl	$1,%ebp
183	subl	%edi,%edx
184	leal	4(%edi,%ebx,4),%eax
185	orl	%edx,%ebp
186	movl	(%edi),%edi
187	jz	.L008bn_sqr_mont
188	movl	%eax,28(%esp)
189	movl	(%esi),%eax
190	xorl	%edx,%edx
191.align	16
192.L009mull:
193	movl	%edx,%ebp
194	mull	%edi
195	addl	%eax,%ebp
196	leal	1(%ecx),%ecx
197	adcl	$0,%edx
198	movl	(%esi,%ecx,4),%eax
199	cmpl	%ebx,%ecx
200	movl	%ebp,28(%esp,%ecx,4)
201	jl	.L009mull
202	movl	%edx,%ebp
203	mull	%edi
204	movl	20(%esp),%edi
205	addl	%ebp,%eax
206	movl	16(%esp),%esi
207	adcl	$0,%edx
208	imull	32(%esp),%edi
209	movl	%eax,32(%esp,%ebx,4)
210	xorl	%ecx,%ecx
211	movl	%edx,36(%esp,%ebx,4)
212	movl	%ecx,40(%esp,%ebx,4)
213	movl	(%esi),%eax
214	mull	%edi
215	addl	32(%esp),%eax
216	movl	4(%esi),%eax
217	adcl	$0,%edx
218	incl	%ecx
219	jmp	.L0102ndmadd
220.align	16
221.L0111stmadd:
222	movl	%edx,%ebp
223	mull	%edi
224	addl	32(%esp,%ecx,4),%ebp
225	leal	1(%ecx),%ecx
226	adcl	$0,%edx
227	addl	%eax,%ebp
228	movl	(%esi,%ecx,4),%eax
229	adcl	$0,%edx
230	cmpl	%ebx,%ecx
231	movl	%ebp,28(%esp,%ecx,4)
232	jl	.L0111stmadd
233	movl	%edx,%ebp
234	mull	%edi
235	addl	32(%esp,%ebx,4),%eax
236	movl	20(%esp),%edi
237	adcl	$0,%edx
238	movl	16(%esp),%esi
239	addl	%eax,%ebp
240	adcl	$0,%edx
241	imull	32(%esp),%edi
242	xorl	%ecx,%ecx
243	addl	36(%esp,%ebx,4),%edx
244	movl	%ebp,32(%esp,%ebx,4)
245	adcl	$0,%ecx
246	movl	(%esi),%eax
247	movl	%edx,36(%esp,%ebx,4)
248	movl	%ecx,40(%esp,%ebx,4)
249	mull	%edi
250	addl	32(%esp),%eax
251	movl	4(%esi),%eax
252	adcl	$0,%edx
253	movl	$1,%ecx
254.align	16
255.L0102ndmadd:
256	movl	%edx,%ebp
257	mull	%edi
258	addl	32(%esp,%ecx,4),%ebp
259	leal	1(%ecx),%ecx
260	adcl	$0,%edx
261	addl	%eax,%ebp
262	movl	(%esi,%ecx,4),%eax
263	adcl	$0,%edx
264	cmpl	%ebx,%ecx
265	movl	%ebp,24(%esp,%ecx,4)
266	jl	.L0102ndmadd
267	movl	%edx,%ebp
268	mull	%edi
269	addl	32(%esp,%ebx,4),%ebp
270	adcl	$0,%edx
271	addl	%eax,%ebp
272	adcl	$0,%edx
273	movl	%ebp,28(%esp,%ebx,4)
274	xorl	%eax,%eax
275	movl	12(%esp),%ecx
276	addl	36(%esp,%ebx,4),%edx
277	adcl	40(%esp,%ebx,4),%eax
278	leal	4(%ecx),%ecx
279	movl	%edx,32(%esp,%ebx,4)
280	cmpl	28(%esp),%ecx
281	movl	%eax,36(%esp,%ebx,4)
282	je	.L007common_tail
283	movl	(%ecx),%edi
284	movl	8(%esp),%esi
285	movl	%ecx,12(%esp)
286	xorl	%ecx,%ecx
287	xorl	%edx,%edx
288	movl	(%esi),%eax
289	jmp	.L0111stmadd
290.align	16
291.L008bn_sqr_mont:
292	movl	%ebx,(%esp)
293	movl	%ecx,12(%esp)
294	movl	%edi,%eax
295	mull	%edi
296	movl	%eax,32(%esp)
297	movl	%edx,%ebx
298	shrl	$1,%edx
299	andl	$1,%ebx
300	incl	%ecx
301.align	16
302.L012sqr:
303	movl	(%esi,%ecx,4),%eax
304	movl	%edx,%ebp
305	mull	%edi
306	addl	%ebp,%eax
307	leal	1(%ecx),%ecx
308	adcl	$0,%edx
309	leal	(%ebx,%eax,2),%ebp
310	shrl	$31,%eax
311	cmpl	(%esp),%ecx
312	movl	%eax,%ebx
313	movl	%ebp,28(%esp,%ecx,4)
314	jl	.L012sqr
315	movl	(%esi,%ecx,4),%eax
316	movl	%edx,%ebp
317	mull	%edi
318	addl	%ebp,%eax
319	movl	20(%esp),%edi
320	adcl	$0,%edx
321	movl	16(%esp),%esi
322	leal	(%ebx,%eax,2),%ebp
323	imull	32(%esp),%edi
324	shrl	$31,%eax
325	movl	%ebp,32(%esp,%ecx,4)
326	leal	(%eax,%edx,2),%ebp
327	movl	(%esi),%eax
328	shrl	$31,%edx
329	movl	%ebp,36(%esp,%ecx,4)
330	movl	%edx,40(%esp,%ecx,4)
331	mull	%edi
332	addl	32(%esp),%eax
333	movl	%ecx,%ebx
334	adcl	$0,%edx
335	movl	4(%esi),%eax
336	movl	$1,%ecx
337.align	16
338.L0133rdmadd:
339	movl	%edx,%ebp
340	mull	%edi
341	addl	32(%esp,%ecx,4),%ebp
342	adcl	$0,%edx
343	addl	%eax,%ebp
344	movl	4(%esi,%ecx,4),%eax
345	adcl	$0,%edx
346	movl	%ebp,28(%esp,%ecx,4)
347	movl	%edx,%ebp
348	mull	%edi
349	addl	36(%esp,%ecx,4),%ebp
350	leal	2(%ecx),%ecx
351	adcl	$0,%edx
352	addl	%eax,%ebp
353	movl	(%esi,%ecx,4),%eax
354	adcl	$0,%edx
355	cmpl	%ebx,%ecx
356	movl	%ebp,24(%esp,%ecx,4)
357	jl	.L0133rdmadd
358	movl	%edx,%ebp
359	mull	%edi
360	addl	32(%esp,%ebx,4),%ebp
361	adcl	$0,%edx
362	addl	%eax,%ebp
363	adcl	$0,%edx
364	movl	%ebp,28(%esp,%ebx,4)
365	movl	12(%esp),%ecx
366	xorl	%eax,%eax
367	movl	8(%esp),%esi
368	addl	36(%esp,%ebx,4),%edx
369	adcl	40(%esp,%ebx,4),%eax
370	movl	%edx,32(%esp,%ebx,4)
371	cmpl	%ebx,%ecx
372	movl	%eax,36(%esp,%ebx,4)
373	je	.L007common_tail
374	movl	4(%esi,%ecx,4),%edi
375	leal	1(%ecx),%ecx
376	movl	%edi,%eax
377	movl	%ecx,12(%esp)
378	mull	%edi
379	addl	32(%esp,%ecx,4),%eax
380	adcl	$0,%edx
381	movl	%eax,32(%esp,%ecx,4)
382	xorl	%ebp,%ebp
383	cmpl	%ebx,%ecx
384	leal	1(%ecx),%ecx
385	je	.L014sqrlast
386	movl	%edx,%ebx
387	shrl	$1,%edx
388	andl	$1,%ebx
389.align	16
390.L015sqradd:
391	movl	(%esi,%ecx,4),%eax
392	movl	%edx,%ebp
393	mull	%edi
394	addl	%ebp,%eax
395	leal	(%eax,%eax,1),%ebp
396	adcl	$0,%edx
397	shrl	$31,%eax
398	addl	32(%esp,%ecx,4),%ebp
399	leal	1(%ecx),%ecx
400	adcl	$0,%eax
401	addl	%ebx,%ebp
402	adcl	$0,%eax
403	cmpl	(%esp),%ecx
404	movl	%ebp,28(%esp,%ecx,4)
405	movl	%eax,%ebx
406	jle	.L015sqradd
407	movl	%edx,%ebp
408	addl	%edx,%edx
409	shrl	$31,%ebp
410	addl	%ebx,%edx
411	adcl	$0,%ebp
412.L014sqrlast:
413	movl	20(%esp),%edi
414	movl	16(%esp),%esi
415	imull	32(%esp),%edi
416	addl	32(%esp,%ecx,4),%edx
417	movl	(%esi),%eax
418	adcl	$0,%ebp
419	movl	%edx,32(%esp,%ecx,4)
420	movl	%ebp,36(%esp,%ecx,4)
421	mull	%edi
422	addl	32(%esp),%eax
423	leal	-1(%ecx),%ebx
424	adcl	$0,%edx
425	movl	$1,%ecx
426	movl	4(%esi),%eax
427	jmp	.L0133rdmadd
428.align	16
429.L007common_tail:
430	movl	16(%esp),%ebp
431	movl	4(%esp),%edi
432	leal	32(%esp),%esi
433	movl	(%esi),%eax
434	movl	%ebx,%ecx
435	xorl	%edx,%edx
436.align	16
437.L016sub:
438	sbbl	(%ebp,%edx,4),%eax
439	movl	%eax,(%edi,%edx,4)
440	decl	%ecx
441	movl	4(%esi,%edx,4),%eax
442	leal	1(%edx),%edx
443	jge	.L016sub
444	sbbl	$0,%eax
445	andl	%eax,%esi
446	notl	%eax
447	movl	%edi,%ebp
448	andl	%eax,%ebp
449	orl	%ebp,%esi
450.align	16
451.L017copy:
452	movl	(%esi,%ebx,4),%eax
453	movl	%eax,(%edi,%ebx,4)
454	movl	%ecx,32(%esp,%ebx,4)
455	decl	%ebx
456	jge	.L017copy
457	movl	24(%esp),%esp
458	movl	$1,%eax
459.L000just_leave:
460	popl	%edi
461	popl	%esi
462	popl	%ebx
463	popl	%ebp
464	ret
465.size	bn_mul_mont,.-.L_bn_mul_mont_begin
466.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
467.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
468.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
469.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
470.byte	111,114,103,62,0
471.comm	OPENSSL_ia32cap_P,8,4
472#else
473.file	"x86-mont.S"
474.text
475.globl	bn_mul_mont
476.type	bn_mul_mont,@function
477.align	16
478bn_mul_mont:
479.L_bn_mul_mont_begin:
480	pushl	%ebp
481	pushl	%ebx
482	pushl	%esi
483	pushl	%edi
484	xorl	%eax,%eax
485	movl	40(%esp),%edi
486	cmpl	$4,%edi
487	jl	.L000just_leave
488	leal	20(%esp),%esi
489	leal	24(%esp),%edx
490	movl	%esp,%ebp
491	addl	$2,%edi
492	negl	%edi
493	leal	-32(%esp,%edi,4),%esp
494	negl	%edi
495	movl	%esp,%eax
496	subl	%edx,%eax
497	andl	$2047,%eax
498	subl	%eax,%esp
499	xorl	%esp,%edx
500	andl	$2048,%edx
501	xorl	$2048,%edx
502	subl	%edx,%esp
503	andl	$-64,%esp
504	movl	%ebp,%eax
505	subl	%esp,%eax
506	andl	$-4096,%eax
507.L001page_walk:
508	movl	(%esp,%eax,1),%edx
509	subl	$4096,%eax
510.byte	46
511	jnc	.L001page_walk
512	movl	(%esi),%eax
513	movl	4(%esi),%ebx
514	movl	8(%esi),%ecx
515	movl	12(%esi),%edx
516	movl	16(%esi),%esi
517	movl	(%esi),%esi
518	movl	%eax,4(%esp)
519	movl	%ebx,8(%esp)
520	movl	%ecx,12(%esp)
521	movl	%edx,16(%esp)
522	movl	%esi,20(%esp)
523	leal	-3(%edi),%ebx
524	movl	%ebp,24(%esp)
525	leal	OPENSSL_ia32cap_P,%eax
526	btl	$26,(%eax)
527	jnc	.L002non_sse2
528	movl	$-1,%eax
529	movd	%eax,%mm7
530	movl	8(%esp),%esi
531	movl	12(%esp),%edi
532	movl	16(%esp),%ebp
533	xorl	%edx,%edx
534	xorl	%ecx,%ecx
535	movd	(%edi),%mm4
536	movd	(%esi),%mm5
537	movd	(%ebp),%mm3
538	pmuludq	%mm4,%mm5
539	movq	%mm5,%mm2
540	movq	%mm5,%mm0
541	pand	%mm7,%mm0
542	pmuludq	20(%esp),%mm5
543	pmuludq	%mm5,%mm3
544	paddq	%mm0,%mm3
545	movd	4(%ebp),%mm1
546	movd	4(%esi),%mm0
547	psrlq	$32,%mm2
548	psrlq	$32,%mm3
549	incl	%ecx
550.align	16
551.L0031st:
552	pmuludq	%mm4,%mm0
553	pmuludq	%mm5,%mm1
554	paddq	%mm0,%mm2
555	paddq	%mm1,%mm3
556	movq	%mm2,%mm0
557	pand	%mm7,%mm0
558	movd	4(%ebp,%ecx,4),%mm1
559	paddq	%mm0,%mm3
560	movd	4(%esi,%ecx,4),%mm0
561	psrlq	$32,%mm2
562	movd	%mm3,28(%esp,%ecx,4)
563	psrlq	$32,%mm3
564	leal	1(%ecx),%ecx
565	cmpl	%ebx,%ecx
566	jl	.L0031st
567	pmuludq	%mm4,%mm0
568	pmuludq	%mm5,%mm1
569	paddq	%mm0,%mm2
570	paddq	%mm1,%mm3
571	movq	%mm2,%mm0
572	pand	%mm7,%mm0
573	paddq	%mm0,%mm3
574	movd	%mm3,28(%esp,%ecx,4)
575	psrlq	$32,%mm2
576	psrlq	$32,%mm3
577	paddq	%mm2,%mm3
578	movq	%mm3,32(%esp,%ebx,4)
579	incl	%edx
580.L004outer:
581	xorl	%ecx,%ecx
582	movd	(%edi,%edx,4),%mm4
583	movd	(%esi),%mm5
584	movd	32(%esp),%mm6
585	movd	(%ebp),%mm3
586	pmuludq	%mm4,%mm5
587	paddq	%mm6,%mm5
588	movq	%mm5,%mm0
589	movq	%mm5,%mm2
590	pand	%mm7,%mm0
591	pmuludq	20(%esp),%mm5
592	pmuludq	%mm5,%mm3
593	paddq	%mm0,%mm3
594	movd	36(%esp),%mm6
595	movd	4(%ebp),%mm1
596	movd	4(%esi),%mm0
597	psrlq	$32,%mm2
598	psrlq	$32,%mm3
599	paddq	%mm6,%mm2
600	incl	%ecx
601	decl	%ebx
602.L005inner:
603	pmuludq	%mm4,%mm0
604	pmuludq	%mm5,%mm1
605	paddq	%mm0,%mm2
606	paddq	%mm1,%mm3
607	movq	%mm2,%mm0
608	movd	36(%esp,%ecx,4),%mm6
609	pand	%mm7,%mm0
610	movd	4(%ebp,%ecx,4),%mm1
611	paddq	%mm0,%mm3
612	movd	4(%esi,%ecx,4),%mm0
613	psrlq	$32,%mm2
614	movd	%mm3,28(%esp,%ecx,4)
615	psrlq	$32,%mm3
616	paddq	%mm6,%mm2
617	decl	%ebx
618	leal	1(%ecx),%ecx
619	jnz	.L005inner
620	movl	%ecx,%ebx
621	pmuludq	%mm4,%mm0
622	pmuludq	%mm5,%mm1
623	paddq	%mm0,%mm2
624	paddq	%mm1,%mm3
625	movq	%mm2,%mm0
626	pand	%mm7,%mm0
627	paddq	%mm0,%mm3
628	movd	%mm3,28(%esp,%ecx,4)
629	psrlq	$32,%mm2
630	psrlq	$32,%mm3
631	movd	36(%esp,%ebx,4),%mm6
632	paddq	%mm2,%mm3
633	paddq	%mm6,%mm3
634	movq	%mm3,32(%esp,%ebx,4)
635	leal	1(%edx),%edx
636	cmpl	%ebx,%edx
637	jle	.L004outer
638	emms
639	jmp	.L006common_tail
640.align	16
641.L002non_sse2:
642	movl	8(%esp),%esi
643	leal	1(%ebx),%ebp
644	movl	12(%esp),%edi
645	xorl	%ecx,%ecx
646	movl	%esi,%edx
647	andl	$1,%ebp
648	subl	%edi,%edx
649	leal	4(%edi,%ebx,4),%eax
650	orl	%edx,%ebp
651	movl	(%edi),%edi
652	jz	.L007bn_sqr_mont
653	movl	%eax,28(%esp)
654	movl	(%esi),%eax
655	xorl	%edx,%edx
656.align	16
657.L008mull:
658	movl	%edx,%ebp
659	mull	%edi
660	addl	%eax,%ebp
661	leal	1(%ecx),%ecx
662	adcl	$0,%edx
663	movl	(%esi,%ecx,4),%eax
664	cmpl	%ebx,%ecx
665	movl	%ebp,28(%esp,%ecx,4)
666	jl	.L008mull
667	movl	%edx,%ebp
668	mull	%edi
669	movl	20(%esp),%edi
670	addl	%ebp,%eax
671	movl	16(%esp),%esi
672	adcl	$0,%edx
673	imull	32(%esp),%edi
674	movl	%eax,32(%esp,%ebx,4)
675	xorl	%ecx,%ecx
676	movl	%edx,36(%esp,%ebx,4)
677	movl	%ecx,40(%esp,%ebx,4)
678	movl	(%esi),%eax
679	mull	%edi
680	addl	32(%esp),%eax
681	movl	4(%esi),%eax
682	adcl	$0,%edx
683	incl	%ecx
684	jmp	.L0092ndmadd
685.align	16
686.L0101stmadd:
687	movl	%edx,%ebp
688	mull	%edi
689	addl	32(%esp,%ecx,4),%ebp
690	leal	1(%ecx),%ecx
691	adcl	$0,%edx
692	addl	%eax,%ebp
693	movl	(%esi,%ecx,4),%eax
694	adcl	$0,%edx
695	cmpl	%ebx,%ecx
696	movl	%ebp,28(%esp,%ecx,4)
697	jl	.L0101stmadd
698	movl	%edx,%ebp
699	mull	%edi
700	addl	32(%esp,%ebx,4),%eax
701	movl	20(%esp),%edi
702	adcl	$0,%edx
703	movl	16(%esp),%esi
704	addl	%eax,%ebp
705	adcl	$0,%edx
706	imull	32(%esp),%edi
707	xorl	%ecx,%ecx
708	addl	36(%esp,%ebx,4),%edx
709	movl	%ebp,32(%esp,%ebx,4)
710	adcl	$0,%ecx
711	movl	(%esi),%eax
712	movl	%edx,36(%esp,%ebx,4)
713	movl	%ecx,40(%esp,%ebx,4)
714	mull	%edi
715	addl	32(%esp),%eax
716	movl	4(%esi),%eax
717	adcl	$0,%edx
718	movl	$1,%ecx
719.align	16
720.L0092ndmadd:
721	movl	%edx,%ebp
722	mull	%edi
723	addl	32(%esp,%ecx,4),%ebp
724	leal	1(%ecx),%ecx
725	adcl	$0,%edx
726	addl	%eax,%ebp
727	movl	(%esi,%ecx,4),%eax
728	adcl	$0,%edx
729	cmpl	%ebx,%ecx
730	movl	%ebp,24(%esp,%ecx,4)
731	jl	.L0092ndmadd
732	movl	%edx,%ebp
733	mull	%edi
734	addl	32(%esp,%ebx,4),%ebp
735	adcl	$0,%edx
736	addl	%eax,%ebp
737	adcl	$0,%edx
738	movl	%ebp,28(%esp,%ebx,4)
739	xorl	%eax,%eax
740	movl	12(%esp),%ecx
741	addl	36(%esp,%ebx,4),%edx
742	adcl	40(%esp,%ebx,4),%eax
743	leal	4(%ecx),%ecx
744	movl	%edx,32(%esp,%ebx,4)
745	cmpl	28(%esp),%ecx
746	movl	%eax,36(%esp,%ebx,4)
747	je	.L006common_tail
748	movl	(%ecx),%edi
749	movl	8(%esp),%esi
750	movl	%ecx,12(%esp)
751	xorl	%ecx,%ecx
752	xorl	%edx,%edx
753	movl	(%esi),%eax
754	jmp	.L0101stmadd
755.align	16
756.L007bn_sqr_mont:
757	movl	%ebx,(%esp)
758	movl	%ecx,12(%esp)
759	movl	%edi,%eax
760	mull	%edi
761	movl	%eax,32(%esp)
762	movl	%edx,%ebx
763	shrl	$1,%edx
764	andl	$1,%ebx
765	incl	%ecx
766.align	16
767.L011sqr:
768	movl	(%esi,%ecx,4),%eax
769	movl	%edx,%ebp
770	mull	%edi
771	addl	%ebp,%eax
772	leal	1(%ecx),%ecx
773	adcl	$0,%edx
774	leal	(%ebx,%eax,2),%ebp
775	shrl	$31,%eax
776	cmpl	(%esp),%ecx
777	movl	%eax,%ebx
778	movl	%ebp,28(%esp,%ecx,4)
779	jl	.L011sqr
780	movl	(%esi,%ecx,4),%eax
781	movl	%edx,%ebp
782	mull	%edi
783	addl	%ebp,%eax
784	movl	20(%esp),%edi
785	adcl	$0,%edx
786	movl	16(%esp),%esi
787	leal	(%ebx,%eax,2),%ebp
788	imull	32(%esp),%edi
789	shrl	$31,%eax
790	movl	%ebp,32(%esp,%ecx,4)
791	leal	(%eax,%edx,2),%ebp
792	movl	(%esi),%eax
793	shrl	$31,%edx
794	movl	%ebp,36(%esp,%ecx,4)
795	movl	%edx,40(%esp,%ecx,4)
796	mull	%edi
797	addl	32(%esp),%eax
798	movl	%ecx,%ebx
799	adcl	$0,%edx
800	movl	4(%esi),%eax
801	movl	$1,%ecx
802.align	16
803.L0123rdmadd:
804	movl	%edx,%ebp
805	mull	%edi
806	addl	32(%esp,%ecx,4),%ebp
807	adcl	$0,%edx
808	addl	%eax,%ebp
809	movl	4(%esi,%ecx,4),%eax
810	adcl	$0,%edx
811	movl	%ebp,28(%esp,%ecx,4)
812	movl	%edx,%ebp
813	mull	%edi
814	addl	36(%esp,%ecx,4),%ebp
815	leal	2(%ecx),%ecx
816	adcl	$0,%edx
817	addl	%eax,%ebp
818	movl	(%esi,%ecx,4),%eax
819	adcl	$0,%edx
820	cmpl	%ebx,%ecx
821	movl	%ebp,24(%esp,%ecx,4)
822	jl	.L0123rdmadd
823	movl	%edx,%ebp
824	mull	%edi
825	addl	32(%esp,%ebx,4),%ebp
826	adcl	$0,%edx
827	addl	%eax,%ebp
828	adcl	$0,%edx
829	movl	%ebp,28(%esp,%ebx,4)
830	movl	12(%esp),%ecx
831	xorl	%eax,%eax
832	movl	8(%esp),%esi
833	addl	36(%esp,%ebx,4),%edx
834	adcl	40(%esp,%ebx,4),%eax
835	movl	%edx,32(%esp,%ebx,4)
836	cmpl	%ebx,%ecx
837	movl	%eax,36(%esp,%ebx,4)
838	je	.L006common_tail
839	movl	4(%esi,%ecx,4),%edi
840	leal	1(%ecx),%ecx
841	movl	%edi,%eax
842	movl	%ecx,12(%esp)
843	mull	%edi
844	addl	32(%esp,%ecx,4),%eax
845	adcl	$0,%edx
846	movl	%eax,32(%esp,%ecx,4)
847	xorl	%ebp,%ebp
848	cmpl	%ebx,%ecx
849	leal	1(%ecx),%ecx
850	je	.L013sqrlast
851	movl	%edx,%ebx
852	shrl	$1,%edx
853	andl	$1,%ebx
854.align	16
855.L014sqradd:
856	movl	(%esi,%ecx,4),%eax
857	movl	%edx,%ebp
858	mull	%edi
859	addl	%ebp,%eax
860	leal	(%eax,%eax,1),%ebp
861	adcl	$0,%edx
862	shrl	$31,%eax
863	addl	32(%esp,%ecx,4),%ebp
864	leal	1(%ecx),%ecx
865	adcl	$0,%eax
866	addl	%ebx,%ebp
867	adcl	$0,%eax
868	cmpl	(%esp),%ecx
869	movl	%ebp,28(%esp,%ecx,4)
870	movl	%eax,%ebx
871	jle	.L014sqradd
872	movl	%edx,%ebp
873	addl	%edx,%edx
874	shrl	$31,%ebp
875	addl	%ebx,%edx
876	adcl	$0,%ebp
877.L013sqrlast:
878	movl	20(%esp),%edi
879	movl	16(%esp),%esi
880	imull	32(%esp),%edi
881	addl	32(%esp,%ecx,4),%edx
882	movl	(%esi),%eax
883	adcl	$0,%ebp
884	movl	%edx,32(%esp,%ecx,4)
885	movl	%ebp,36(%esp,%ecx,4)
886	mull	%edi
887	addl	32(%esp),%eax
888	leal	-1(%ecx),%ebx
889	adcl	$0,%edx
890	movl	$1,%ecx
891	movl	4(%esi),%eax
892	jmp	.L0123rdmadd
893.align	16
894.L006common_tail:
895	movl	16(%esp),%ebp
896	movl	4(%esp),%edi
897	leal	32(%esp),%esi
898	movl	(%esi),%eax
899	movl	%ebx,%ecx
900	xorl	%edx,%edx
901.align	16
902.L015sub:
903	sbbl	(%ebp,%edx,4),%eax
904	movl	%eax,(%edi,%edx,4)
905	decl	%ecx
906	movl	4(%esi,%edx,4),%eax
907	leal	1(%edx),%edx
908	jge	.L015sub
909	sbbl	$0,%eax
910	andl	%eax,%esi
911	notl	%eax
912	movl	%edi,%ebp
913	andl	%eax,%ebp
914	orl	%ebp,%esi
915.align	16
916.L016copy:
917	movl	(%esi,%ebx,4),%eax
918	movl	%eax,(%edi,%ebx,4)
919	movl	%ecx,32(%esp,%ebx,4)
920	decl	%ebx
921	jge	.L016copy
922	movl	24(%esp),%esp
923	movl	$1,%eax
924.L000just_leave:
925	popl	%edi
926	popl	%esi
927	popl	%ebx
928	popl	%ebp
929	ret
930.size	bn_mul_mont,.-.L_bn_mul_mont_begin
931.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
932.byte	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
933.byte	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
934.byte	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
935.byte	111,114,103,62,0
936.comm	OPENSSL_ia32cap_P,8,4
937#endif
938