1238405Sjkim	# $FreeBSD$
2238405Sjkim.text
3238405Sjkim
4238405Sjkim.globl	bn_mul_mont
5238405Sjkim.type	bn_mul_mont,@function
6238405Sjkim.align	16
7238405Sjkimbn_mul_mont:
8238405Sjkim	testl	$3,%r9d
9238405Sjkim	jnz	.Lmul_enter
10238405Sjkim	cmpl	$8,%r9d
11238405Sjkim	jb	.Lmul_enter
12238405Sjkim	cmpq	%rsi,%rdx
13238405Sjkim	jne	.Lmul4x_enter
14238405Sjkim	jmp	.Lsqr4x_enter
15238405Sjkim
16238405Sjkim.align	16
17238405Sjkim.Lmul_enter:
18238405Sjkim	pushq	%rbx
19238405Sjkim	pushq	%rbp
20238405Sjkim	pushq	%r12
21238405Sjkim	pushq	%r13
22238405Sjkim	pushq	%r14
23238405Sjkim	pushq	%r15
24238405Sjkim
25238405Sjkim	movl	%r9d,%r9d
26238405Sjkim	leaq	2(%r9),%r10
27238405Sjkim	movq	%rsp,%r11
28238405Sjkim	negq	%r10
29238405Sjkim	leaq	(%rsp,%r10,8),%rsp
30238405Sjkim	andq	$-1024,%rsp
31238405Sjkim
32238405Sjkim	movq	%r11,8(%rsp,%r9,8)
33238405Sjkim.Lmul_body:
34238405Sjkim	movq	%rdx,%r12
35238405Sjkim	movq	(%r8),%r8
36238405Sjkim	movq	(%r12),%rbx
37238405Sjkim	movq	(%rsi),%rax
38238405Sjkim
39238405Sjkim	xorq	%r14,%r14
40238405Sjkim	xorq	%r15,%r15
41238405Sjkim
42238405Sjkim	movq	%r8,%rbp
43238405Sjkim	mulq	%rbx
44238405Sjkim	movq	%rax,%r10
45238405Sjkim	movq	(%rcx),%rax
46238405Sjkim
47238405Sjkim	imulq	%r10,%rbp
48238405Sjkim	movq	%rdx,%r11
49238405Sjkim
50238405Sjkim	mulq	%rbp
51238405Sjkim	addq	%rax,%r10
52238405Sjkim	movq	8(%rsi),%rax
53238405Sjkim	adcq	$0,%rdx
54238405Sjkim	movq	%rdx,%r13
55238405Sjkim
56238405Sjkim	leaq	1(%r15),%r15
57238405Sjkim	jmp	.L1st_enter
58238405Sjkim
59238405Sjkim.align	16
60238405Sjkim.L1st:
61238405Sjkim	addq	%rax,%r13
62238405Sjkim	movq	(%rsi,%r15,8),%rax
63238405Sjkim	adcq	$0,%rdx
64238405Sjkim	addq	%r11,%r13
65238405Sjkim	movq	%r10,%r11
66238405Sjkim	adcq	$0,%rdx
67238405Sjkim	movq	%r13,-16(%rsp,%r15,8)
68238405Sjkim	movq	%rdx,%r13
69238405Sjkim
70238405Sjkim.L1st_enter:
71238405Sjkim	mulq	%rbx
72238405Sjkim	addq	%rax,%r11
73238405Sjkim	movq	(%rcx,%r15,8),%rax
74238405Sjkim	adcq	$0,%rdx
75238405Sjkim	leaq	1(%r15),%r15
76238405Sjkim	movq	%rdx,%r10
77238405Sjkim
78238405Sjkim	mulq	%rbp
79238405Sjkim	cmpq	%r9,%r15
80238405Sjkim	jne	.L1st
81238405Sjkim
82238405Sjkim	addq	%rax,%r13
83238405Sjkim	movq	(%rsi),%rax
84238405Sjkim	adcq	$0,%rdx
85238405Sjkim	addq	%r11,%r13
86238405Sjkim	adcq	$0,%rdx
87238405Sjkim	movq	%r13,-16(%rsp,%r15,8)
88238405Sjkim	movq	%rdx,%r13
89238405Sjkim	movq	%r10,%r11
90238405Sjkim
91238405Sjkim	xorq	%rdx,%rdx
92238405Sjkim	addq	%r11,%r13
93238405Sjkim	adcq	$0,%rdx
94238405Sjkim	movq	%r13,-8(%rsp,%r9,8)
95238405Sjkim	movq	%rdx,(%rsp,%r9,8)
96238405Sjkim
97238405Sjkim	leaq	1(%r14),%r14
98238405Sjkim	jmp	.Louter
99238405Sjkim.align	16
100238405Sjkim.Louter:
101238405Sjkim	movq	(%r12,%r14,8),%rbx
102238405Sjkim	xorq	%r15,%r15
103238405Sjkim	movq	%r8,%rbp
104238405Sjkim	movq	(%rsp),%r10
105238405Sjkim	mulq	%rbx
106238405Sjkim	addq	%rax,%r10
107238405Sjkim	movq	(%rcx),%rax
108238405Sjkim	adcq	$0,%rdx
109238405Sjkim
110238405Sjkim	imulq	%r10,%rbp
111238405Sjkim	movq	%rdx,%r11
112238405Sjkim
113238405Sjkim	mulq	%rbp
114238405Sjkim	addq	%rax,%r10
115238405Sjkim	movq	8(%rsi),%rax
116238405Sjkim	adcq	$0,%rdx
117238405Sjkim	movq	8(%rsp),%r10
118238405Sjkim	movq	%rdx,%r13
119238405Sjkim
120238405Sjkim	leaq	1(%r15),%r15
121238405Sjkim	jmp	.Linner_enter
122238405Sjkim
123238405Sjkim.align	16
124238405Sjkim.Linner:
125238405Sjkim	addq	%rax,%r13
126238405Sjkim	movq	(%rsi,%r15,8),%rax
127238405Sjkim	adcq	$0,%rdx
128238405Sjkim	addq	%r10,%r13
129238405Sjkim	movq	(%rsp,%r15,8),%r10
130238405Sjkim	adcq	$0,%rdx
131238405Sjkim	movq	%r13,-16(%rsp,%r15,8)
132238405Sjkim	movq	%rdx,%r13
133238405Sjkim
134238405Sjkim.Linner_enter:
135238405Sjkim	mulq	%rbx
136238405Sjkim	addq	%rax,%r11
137238405Sjkim	movq	(%rcx,%r15,8),%rax
138238405Sjkim	adcq	$0,%rdx
139238405Sjkim	addq	%r11,%r10
140238405Sjkim	movq	%rdx,%r11
141238405Sjkim	adcq	$0,%r11
142238405Sjkim	leaq	1(%r15),%r15
143238405Sjkim
144238405Sjkim	mulq	%rbp
145238405Sjkim	cmpq	%r9,%r15
146238405Sjkim	jne	.Linner
147238405Sjkim
148238405Sjkim	addq	%rax,%r13
149238405Sjkim	movq	(%rsi),%rax
150238405Sjkim	adcq	$0,%rdx
151238405Sjkim	addq	%r10,%r13
152238405Sjkim	movq	(%rsp,%r15,8),%r10
153238405Sjkim	adcq	$0,%rdx
154238405Sjkim	movq	%r13,-16(%rsp,%r15,8)
155238405Sjkim	movq	%rdx,%r13
156238405Sjkim
157238405Sjkim	xorq	%rdx,%rdx
158238405Sjkim	addq	%r11,%r13
159238405Sjkim	adcq	$0,%rdx
160238405Sjkim	addq	%r10,%r13
161238405Sjkim	adcq	$0,%rdx
162238405Sjkim	movq	%r13,-8(%rsp,%r9,8)
163238405Sjkim	movq	%rdx,(%rsp,%r9,8)
164238405Sjkim
165238405Sjkim	leaq	1(%r14),%r14
166238405Sjkim	cmpq	%r9,%r14
167238405Sjkim	jl	.Louter
168238405Sjkim
169238405Sjkim	xorq	%r14,%r14
170238405Sjkim	movq	(%rsp),%rax
171238405Sjkim	leaq	(%rsp),%rsi
172238405Sjkim	movq	%r9,%r15
173238405Sjkim	jmp	.Lsub
174238405Sjkim.align	16
175238405Sjkim.Lsub:	sbbq	(%rcx,%r14,8),%rax
176238405Sjkim	movq	%rax,(%rdi,%r14,8)
177238405Sjkim	movq	8(%rsi,%r14,8),%rax
178238405Sjkim	leaq	1(%r14),%r14
179238405Sjkim	decq	%r15
180238405Sjkim	jnz	.Lsub
181238405Sjkim
182238405Sjkim	sbbq	$0,%rax
183238405Sjkim	xorq	%r14,%r14
184238405Sjkim	andq	%rax,%rsi
185238405Sjkim	notq	%rax
186238405Sjkim	movq	%rdi,%rcx
187238405Sjkim	andq	%rax,%rcx
188238405Sjkim	movq	%r9,%r15
189238405Sjkim	orq	%rcx,%rsi
190238405Sjkim.align	16
191238405Sjkim.Lcopy:
192238405Sjkim	movq	(%rsi,%r14,8),%rax
193238405Sjkim	movq	%r14,(%rsp,%r14,8)
194238405Sjkim	movq	%rax,(%rdi,%r14,8)
195238405Sjkim	leaq	1(%r14),%r14
196238405Sjkim	subq	$1,%r15
197238405Sjkim	jnz	.Lcopy
198238405Sjkim
199238405Sjkim	movq	8(%rsp,%r9,8),%rsi
200238405Sjkim	movq	$1,%rax
201238405Sjkim	movq	(%rsi),%r15
202238405Sjkim	movq	8(%rsi),%r14
203238405Sjkim	movq	16(%rsi),%r13
204238405Sjkim	movq	24(%rsi),%r12
205238405Sjkim	movq	32(%rsi),%rbp
206238405Sjkim	movq	40(%rsi),%rbx
207238405Sjkim	leaq	48(%rsi),%rsp
208238405Sjkim.Lmul_epilogue:
209238405Sjkim	.byte	0xf3,0xc3
210238405Sjkim.size	bn_mul_mont,.-bn_mul_mont
211238405Sjkim.type	bn_mul4x_mont,@function
212238405Sjkim.align	16
213238405Sjkimbn_mul4x_mont:
214238405Sjkim.Lmul4x_enter:
215238405Sjkim	pushq	%rbx
216238405Sjkim	pushq	%rbp
217238405Sjkim	pushq	%r12
218238405Sjkim	pushq	%r13
219238405Sjkim	pushq	%r14
220238405Sjkim	pushq	%r15
221238405Sjkim
222238405Sjkim	movl	%r9d,%r9d
223238405Sjkim	leaq	4(%r9),%r10
224238405Sjkim	movq	%rsp,%r11
225238405Sjkim	negq	%r10
226238405Sjkim	leaq	(%rsp,%r10,8),%rsp
227238405Sjkim	andq	$-1024,%rsp
228238405Sjkim
229238405Sjkim	movq	%r11,8(%rsp,%r9,8)
230238405Sjkim.Lmul4x_body:
231238405Sjkim	movq	%rdi,16(%rsp,%r9,8)
232238405Sjkim	movq	%rdx,%r12
233238405Sjkim	movq	(%r8),%r8
234238405Sjkim	movq	(%r12),%rbx
235238405Sjkim	movq	(%rsi),%rax
236238405Sjkim
237238405Sjkim	xorq	%r14,%r14
238238405Sjkim	xorq	%r15,%r15
239238405Sjkim
240238405Sjkim	movq	%r8,%rbp
241238405Sjkim	mulq	%rbx
242238405Sjkim	movq	%rax,%r10
243238405Sjkim	movq	(%rcx),%rax
244238405Sjkim
245238405Sjkim	imulq	%r10,%rbp
246238405Sjkim	movq	%rdx,%r11
247238405Sjkim
248238405Sjkim	mulq	%rbp
249238405Sjkim	addq	%rax,%r10
250238405Sjkim	movq	8(%rsi),%rax
251238405Sjkim	adcq	$0,%rdx
252238405Sjkim	movq	%rdx,%rdi
253238405Sjkim
254238405Sjkim	mulq	%rbx
255238405Sjkim	addq	%rax,%r11
256238405Sjkim	movq	8(%rcx),%rax
257238405Sjkim	adcq	$0,%rdx
258238405Sjkim	movq	%rdx,%r10
259238405Sjkim
260238405Sjkim	mulq	%rbp
261238405Sjkim	addq	%rax,%rdi
262238405Sjkim	movq	16(%rsi),%rax
263238405Sjkim	adcq	$0,%rdx
264238405Sjkim	addq	%r11,%rdi
265238405Sjkim	leaq	4(%r15),%r15
266238405Sjkim	adcq	$0,%rdx
267238405Sjkim	movq	%rdi,(%rsp)
268238405Sjkim	movq	%rdx,%r13
269238405Sjkim	jmp	.L1st4x
270238405Sjkim.align	16
271238405Sjkim.L1st4x:
272238405Sjkim	mulq	%rbx
273238405Sjkim	addq	%rax,%r10
274238405Sjkim	movq	-16(%rcx,%r15,8),%rax
275238405Sjkim	adcq	$0,%rdx
276238405Sjkim	movq	%rdx,%r11
277238405Sjkim
278238405Sjkim	mulq	%rbp
279238405Sjkim	addq	%rax,%r13
280238405Sjkim	movq	-8(%rsi,%r15,8),%rax
281238405Sjkim	adcq	$0,%rdx
282238405Sjkim	addq	%r10,%r13
283238405Sjkim	adcq	$0,%rdx
284238405Sjkim	movq	%r13,-24(%rsp,%r15,8)
285238405Sjkim	movq	%rdx,%rdi
286238405Sjkim
287238405Sjkim	mulq	%rbx
288238405Sjkim	addq	%rax,%r11
289238405Sjkim	movq	-8(%rcx,%r15,8),%rax
290238405Sjkim	adcq	$0,%rdx
291238405Sjkim	movq	%rdx,%r10
292238405Sjkim
293238405Sjkim	mulq	%rbp
294238405Sjkim	addq	%rax,%rdi
295238405Sjkim	movq	(%rsi,%r15,8),%rax
296238405Sjkim	adcq	$0,%rdx
297238405Sjkim	addq	%r11,%rdi
298238405Sjkim	adcq	$0,%rdx
299238405Sjkim	movq	%rdi,-16(%rsp,%r15,8)
300238405Sjkim	movq	%rdx,%r13
301238405Sjkim
302238405Sjkim	mulq	%rbx
303238405Sjkim	addq	%rax,%r10
304238405Sjkim	movq	(%rcx,%r15,8),%rax
305238405Sjkim	adcq	$0,%rdx
306238405Sjkim	movq	%rdx,%r11
307238405Sjkim
308238405Sjkim	mulq	%rbp
309238405Sjkim	addq	%rax,%r13
310238405Sjkim	movq	8(%rsi,%r15,8),%rax
311238405Sjkim	adcq	$0,%rdx
312238405Sjkim	addq	%r10,%r13
313238405Sjkim	adcq	$0,%rdx
314238405Sjkim	movq	%r13,-8(%rsp,%r15,8)
315238405Sjkim	movq	%rdx,%rdi
316238405Sjkim
317238405Sjkim	mulq	%rbx
318238405Sjkim	addq	%rax,%r11
319238405Sjkim	movq	8(%rcx,%r15,8),%rax
320238405Sjkim	adcq	$0,%rdx
321238405Sjkim	leaq	4(%r15),%r15
322238405Sjkim	movq	%rdx,%r10
323238405Sjkim
324238405Sjkim	mulq	%rbp
325238405Sjkim	addq	%rax,%rdi
326238405Sjkim	movq	-16(%rsi,%r15,8),%rax
327238405Sjkim	adcq	$0,%rdx
328238405Sjkim	addq	%r11,%rdi
329238405Sjkim	adcq	$0,%rdx
330238405Sjkim	movq	%rdi,-32(%rsp,%r15,8)
331238405Sjkim	movq	%rdx,%r13
332238405Sjkim	cmpq	%r9,%r15
333238405Sjkim	jl	.L1st4x
334238405Sjkim
335238405Sjkim	mulq	%rbx
336238405Sjkim	addq	%rax,%r10
337238405Sjkim	movq	-16(%rcx,%r15,8),%rax
338238405Sjkim	adcq	$0,%rdx
339238405Sjkim	movq	%rdx,%r11
340238405Sjkim
341238405Sjkim	mulq	%rbp
342238405Sjkim	addq	%rax,%r13
343238405Sjkim	movq	-8(%rsi,%r15,8),%rax
344238405Sjkim	adcq	$0,%rdx
345238405Sjkim	addq	%r10,%r13
346238405Sjkim	adcq	$0,%rdx
347238405Sjkim	movq	%r13,-24(%rsp,%r15,8)
348238405Sjkim	movq	%rdx,%rdi
349238405Sjkim
350238405Sjkim	mulq	%rbx
351238405Sjkim	addq	%rax,%r11
352238405Sjkim	movq	-8(%rcx,%r15,8),%rax
353238405Sjkim	adcq	$0,%rdx
354238405Sjkim	movq	%rdx,%r10
355238405Sjkim
356238405Sjkim	mulq	%rbp
357238405Sjkim	addq	%rax,%rdi
358238405Sjkim	movq	(%rsi),%rax
359238405Sjkim	adcq	$0,%rdx
360238405Sjkim	addq	%r11,%rdi
361238405Sjkim	adcq	$0,%rdx
362238405Sjkim	movq	%rdi,-16(%rsp,%r15,8)
363238405Sjkim	movq	%rdx,%r13
364238405Sjkim
365238405Sjkim	xorq	%rdi,%rdi
366238405Sjkim	addq	%r10,%r13
367238405Sjkim	adcq	$0,%rdi
368238405Sjkim	movq	%r13,-8(%rsp,%r15,8)
369238405Sjkim	movq	%rdi,(%rsp,%r15,8)
370238405Sjkim
371238405Sjkim	leaq	1(%r14),%r14
372238405Sjkim.align	4
373238405Sjkim.Louter4x:
374238405Sjkim	movq	(%r12,%r14,8),%rbx
375238405Sjkim	xorq	%r15,%r15
376238405Sjkim	movq	(%rsp),%r10
377238405Sjkim	movq	%r8,%rbp
378238405Sjkim	mulq	%rbx
379238405Sjkim	addq	%rax,%r10
380238405Sjkim	movq	(%rcx),%rax
381238405Sjkim	adcq	$0,%rdx
382238405Sjkim
383238405Sjkim	imulq	%r10,%rbp
384238405Sjkim	movq	%rdx,%r11
385238405Sjkim
386238405Sjkim	mulq	%rbp
387238405Sjkim	addq	%rax,%r10
388238405Sjkim	movq	8(%rsi),%rax
389238405Sjkim	adcq	$0,%rdx
390238405Sjkim	movq	%rdx,%rdi
391238405Sjkim
392238405Sjkim	mulq	%rbx
393238405Sjkim	addq	%rax,%r11
394238405Sjkim	movq	8(%rcx),%rax
395238405Sjkim	adcq	$0,%rdx
396238405Sjkim	addq	8(%rsp),%r11
397238405Sjkim	adcq	$0,%rdx
398238405Sjkim	movq	%rdx,%r10
399238405Sjkim
400238405Sjkim	mulq	%rbp
401238405Sjkim	addq	%rax,%rdi
402238405Sjkim	movq	16(%rsi),%rax
403238405Sjkim	adcq	$0,%rdx
404238405Sjkim	addq	%r11,%rdi
405238405Sjkim	leaq	4(%r15),%r15
406238405Sjkim	adcq	$0,%rdx
407238405Sjkim	movq	%rdi,(%rsp)
408238405Sjkim	movq	%rdx,%r13
409238405Sjkim	jmp	.Linner4x
410238405Sjkim.align	16
411238405Sjkim.Linner4x:
412238405Sjkim	mulq	%rbx
413238405Sjkim	addq	%rax,%r10
414238405Sjkim	movq	-16(%rcx,%r15,8),%rax
415238405Sjkim	adcq	$0,%rdx
416238405Sjkim	addq	-16(%rsp,%r15,8),%r10
417238405Sjkim	adcq	$0,%rdx
418238405Sjkim	movq	%rdx,%r11
419238405Sjkim
420238405Sjkim	mulq	%rbp
421238405Sjkim	addq	%rax,%r13
422238405Sjkim	movq	-8(%rsi,%r15,8),%rax
423238405Sjkim	adcq	$0,%rdx
424238405Sjkim	addq	%r10,%r13
425238405Sjkim	adcq	$0,%rdx
426238405Sjkim	movq	%r13,-24(%rsp,%r15,8)
427238405Sjkim	movq	%rdx,%rdi
428238405Sjkim
429238405Sjkim	mulq	%rbx
430238405Sjkim	addq	%rax,%r11
431238405Sjkim	movq	-8(%rcx,%r15,8),%rax
432238405Sjkim	adcq	$0,%rdx
433238405Sjkim	addq	-8(%rsp,%r15,8),%r11
434238405Sjkim	adcq	$0,%rdx
435238405Sjkim	movq	%rdx,%r10
436238405Sjkim
437238405Sjkim	mulq	%rbp
438238405Sjkim	addq	%rax,%rdi
439238405Sjkim	movq	(%rsi,%r15,8),%rax
440238405Sjkim	adcq	$0,%rdx
441238405Sjkim	addq	%r11,%rdi
442238405Sjkim	adcq	$0,%rdx
443238405Sjkim	movq	%rdi,-16(%rsp,%r15,8)
444238405Sjkim	movq	%rdx,%r13
445238405Sjkim
446238405Sjkim	mulq	%rbx
447238405Sjkim	addq	%rax,%r10
448238405Sjkim	movq	(%rcx,%r15,8),%rax
449238405Sjkim	adcq	$0,%rdx
450238405Sjkim	addq	(%rsp,%r15,8),%r10
451238405Sjkim	adcq	$0,%rdx
452238405Sjkim	movq	%rdx,%r11
453238405Sjkim
454238405Sjkim	mulq	%rbp
455238405Sjkim	addq	%rax,%r13
456238405Sjkim	movq	8(%rsi,%r15,8),%rax
457238405Sjkim	adcq	$0,%rdx
458238405Sjkim	addq	%r10,%r13
459238405Sjkim	adcq	$0,%rdx
460238405Sjkim	movq	%r13,-8(%rsp,%r15,8)
461238405Sjkim	movq	%rdx,%rdi
462238405Sjkim
463238405Sjkim	mulq	%rbx
464238405Sjkim	addq	%rax,%r11
465238405Sjkim	movq	8(%rcx,%r15,8),%rax
466238405Sjkim	adcq	$0,%rdx
467238405Sjkim	addq	8(%rsp,%r15,8),%r11
468238405Sjkim	adcq	$0,%rdx
469238405Sjkim	leaq	4(%r15),%r15
470238405Sjkim	movq	%rdx,%r10
471238405Sjkim
472238405Sjkim	mulq	%rbp
473238405Sjkim	addq	%rax,%rdi
474238405Sjkim	movq	-16(%rsi,%r15,8),%rax
475238405Sjkim	adcq	$0,%rdx
476238405Sjkim	addq	%r11,%rdi
477238405Sjkim	adcq	$0,%rdx
478238405Sjkim	movq	%rdi,-32(%rsp,%r15,8)
479238405Sjkim	movq	%rdx,%r13
480238405Sjkim	cmpq	%r9,%r15
481238405Sjkim	jl	.Linner4x
482238405Sjkim
483238405Sjkim	mulq	%rbx
484238405Sjkim	addq	%rax,%r10
485238405Sjkim	movq	-16(%rcx,%r15,8),%rax
486238405Sjkim	adcq	$0,%rdx
487238405Sjkim	addq	-16(%rsp,%r15,8),%r10
488238405Sjkim	adcq	$0,%rdx
489238405Sjkim	movq	%rdx,%r11
490238405Sjkim
491238405Sjkim	mulq	%rbp
492238405Sjkim	addq	%rax,%r13
493238405Sjkim	movq	-8(%rsi,%r15,8),%rax
494238405Sjkim	adcq	$0,%rdx
495238405Sjkim	addq	%r10,%r13
496238405Sjkim	adcq	$0,%rdx
497238405Sjkim	movq	%r13,-24(%rsp,%r15,8)
498238405Sjkim	movq	%rdx,%rdi
499238405Sjkim
500238405Sjkim	mulq	%rbx
501238405Sjkim	addq	%rax,%r11
502238405Sjkim	movq	-8(%rcx,%r15,8),%rax
503238405Sjkim	adcq	$0,%rdx
504238405Sjkim	addq	-8(%rsp,%r15,8),%r11
505238405Sjkim	adcq	$0,%rdx
506238405Sjkim	leaq	1(%r14),%r14
507238405Sjkim	movq	%rdx,%r10
508238405Sjkim
509238405Sjkim	mulq	%rbp
510238405Sjkim	addq	%rax,%rdi
511238405Sjkim	movq	(%rsi),%rax
512238405Sjkim	adcq	$0,%rdx
513238405Sjkim	addq	%r11,%rdi
514238405Sjkim	adcq	$0,%rdx
515238405Sjkim	movq	%rdi,-16(%rsp,%r15,8)
516238405Sjkim	movq	%rdx,%r13
517238405Sjkim
518238405Sjkim	xorq	%rdi,%rdi
519238405Sjkim	addq	%r10,%r13
520238405Sjkim	adcq	$0,%rdi
521238405Sjkim	addq	(%rsp,%r9,8),%r13
522238405Sjkim	adcq	$0,%rdi
523238405Sjkim	movq	%r13,-8(%rsp,%r15,8)
524238405Sjkim	movq	%rdi,(%rsp,%r15,8)
525238405Sjkim
526238405Sjkim	cmpq	%r9,%r14
527238405Sjkim	jl	.Louter4x
528238405Sjkim	movq	16(%rsp,%r9,8),%rdi
529238405Sjkim	movq	0(%rsp),%rax
530238405Sjkim	pxor	%xmm0,%xmm0
531238405Sjkim	movq	8(%rsp),%rdx
532238405Sjkim	shrq	$2,%r9
533238405Sjkim	leaq	(%rsp),%rsi
534238405Sjkim	xorq	%r14,%r14
535238405Sjkim
536238405Sjkim	subq	0(%rcx),%rax
537238405Sjkim	movq	16(%rsi),%rbx
538238405Sjkim	movq	24(%rsi),%rbp
539238405Sjkim	sbbq	8(%rcx),%rdx
540238405Sjkim	leaq	-1(%r9),%r15
541238405Sjkim	jmp	.Lsub4x
542238405Sjkim.align	16
543238405Sjkim.Lsub4x:
544238405Sjkim	movq	%rax,0(%rdi,%r14,8)
545238405Sjkim	movq	%rdx,8(%rdi,%r14,8)
546238405Sjkim	sbbq	16(%rcx,%r14,8),%rbx
547238405Sjkim	movq	32(%rsi,%r14,8),%rax
548238405Sjkim	movq	40(%rsi,%r14,8),%rdx
549238405Sjkim	sbbq	24(%rcx,%r14,8),%rbp
550238405Sjkim	movq	%rbx,16(%rdi,%r14,8)
551238405Sjkim	movq	%rbp,24(%rdi,%r14,8)
552238405Sjkim	sbbq	32(%rcx,%r14,8),%rax
553238405Sjkim	movq	48(%rsi,%r14,8),%rbx
554238405Sjkim	movq	56(%rsi,%r14,8),%rbp
555238405Sjkim	sbbq	40(%rcx,%r14,8),%rdx
556238405Sjkim	leaq	4(%r14),%r14
557238405Sjkim	decq	%r15
558238405Sjkim	jnz	.Lsub4x
559238405Sjkim
560238405Sjkim	movq	%rax,0(%rdi,%r14,8)
561238405Sjkim	movq	32(%rsi,%r14,8),%rax
562238405Sjkim	sbbq	16(%rcx,%r14,8),%rbx
563238405Sjkim	movq	%rdx,8(%rdi,%r14,8)
564238405Sjkim	sbbq	24(%rcx,%r14,8),%rbp
565238405Sjkim	movq	%rbx,16(%rdi,%r14,8)
566238405Sjkim
567238405Sjkim	sbbq	$0,%rax
568238405Sjkim	movq	%rbp,24(%rdi,%r14,8)
569238405Sjkim	xorq	%r14,%r14
570238405Sjkim	andq	%rax,%rsi
571238405Sjkim	notq	%rax
572238405Sjkim	movq	%rdi,%rcx
573238405Sjkim	andq	%rax,%rcx
574238405Sjkim	leaq	-1(%r9),%r15
575238405Sjkim	orq	%rcx,%rsi
576238405Sjkim
577238405Sjkim	movdqu	(%rsi),%xmm1
578238405Sjkim	movdqa	%xmm0,(%rsp)
579238405Sjkim	movdqu	%xmm1,(%rdi)
580238405Sjkim	jmp	.Lcopy4x
581238405Sjkim.align	16
582238405Sjkim.Lcopy4x:
583238405Sjkim	movdqu	16(%rsi,%r14,1),%xmm2
584238405Sjkim	movdqu	32(%rsi,%r14,1),%xmm1
585238405Sjkim	movdqa	%xmm0,16(%rsp,%r14,1)
586238405Sjkim	movdqu	%xmm2,16(%rdi,%r14,1)
587238405Sjkim	movdqa	%xmm0,32(%rsp,%r14,1)
588238405Sjkim	movdqu	%xmm1,32(%rdi,%r14,1)
589238405Sjkim	leaq	32(%r14),%r14
590238405Sjkim	decq	%r15
591238405Sjkim	jnz	.Lcopy4x
592238405Sjkim
593238405Sjkim	shlq	$2,%r9
594238405Sjkim	movdqu	16(%rsi,%r14,1),%xmm2
595238405Sjkim	movdqa	%xmm0,16(%rsp,%r14,1)
596238405Sjkim	movdqu	%xmm2,16(%rdi,%r14,1)
597238405Sjkim	movq	8(%rsp,%r9,8),%rsi
598238405Sjkim	movq	$1,%rax
599238405Sjkim	movq	(%rsi),%r15
600238405Sjkim	movq	8(%rsi),%r14
601238405Sjkim	movq	16(%rsi),%r13
602238405Sjkim	movq	24(%rsi),%r12
603238405Sjkim	movq	32(%rsi),%rbp
604238405Sjkim	movq	40(%rsi),%rbx
605238405Sjkim	leaq	48(%rsi),%rsp
606238405Sjkim.Lmul4x_epilogue:
607238405Sjkim	.byte	0xf3,0xc3
608238405Sjkim.size	bn_mul4x_mont,.-bn_mul4x_mont
609238405Sjkim.type	bn_sqr4x_mont,@function
610238405Sjkim.align	16
611238405Sjkimbn_sqr4x_mont:
612238405Sjkim.Lsqr4x_enter:
613238405Sjkim	pushq	%rbx
614238405Sjkim	pushq	%rbp
615238405Sjkim	pushq	%r12
616238405Sjkim	pushq	%r13
617238405Sjkim	pushq	%r14
618238405Sjkim	pushq	%r15
619238405Sjkim
620238405Sjkim	shll	$3,%r9d
621238405Sjkim	xorq	%r10,%r10
622238405Sjkim	movq	%rsp,%r11
623238405Sjkim	subq	%r9,%r10
624238405Sjkim	movq	(%r8),%r8
625238405Sjkim	leaq	-72(%rsp,%r10,2),%rsp
626238405Sjkim	andq	$-1024,%rsp
627238405Sjkim
628238405Sjkim
629238405Sjkim
630238405Sjkim
631238405Sjkim
632238405Sjkim
633238405Sjkim
634238405Sjkim
635238405Sjkim
636238405Sjkim
637238405Sjkim
638238405Sjkim	movq	%rdi,32(%rsp)
639238405Sjkim	movq	%rcx,40(%rsp)
640238405Sjkim	movq	%r8,48(%rsp)
641238405Sjkim	movq	%r11,56(%rsp)
642238405Sjkim.Lsqr4x_body:
643238405Sjkim
644238405Sjkim
645238405Sjkim
646238405Sjkim
647238405Sjkim
648238405Sjkim
649238405Sjkim
650238405Sjkim	leaq	32(%r10),%rbp
651238405Sjkim	leaq	(%rsi,%r9,1),%rsi
652238405Sjkim
653238405Sjkim	movq	%r9,%rcx
654238405Sjkim
655238405Sjkim
656238405Sjkim	movq	-32(%rsi,%rbp,1),%r14
657238405Sjkim	leaq	64(%rsp,%r9,2),%rdi
658238405Sjkim	movq	-24(%rsi,%rbp,1),%rax
659238405Sjkim	leaq	-32(%rdi,%rbp,1),%rdi
660238405Sjkim	movq	-16(%rsi,%rbp,1),%rbx
661238405Sjkim	movq	%rax,%r15
662238405Sjkim
663238405Sjkim	mulq	%r14
664238405Sjkim	movq	%rax,%r10
665238405Sjkim	movq	%rbx,%rax
666238405Sjkim	movq	%rdx,%r11
667238405Sjkim	movq	%r10,-24(%rdi,%rbp,1)
668238405Sjkim
669238405Sjkim	xorq	%r10,%r10
670238405Sjkim	mulq	%r14
671238405Sjkim	addq	%rax,%r11
672238405Sjkim	movq	%rbx,%rax
673238405Sjkim	adcq	%rdx,%r10
674238405Sjkim	movq	%r11,-16(%rdi,%rbp,1)
675238405Sjkim
676238405Sjkim	leaq	-16(%rbp),%rcx
677238405Sjkim
678238405Sjkim
679238405Sjkim	movq	8(%rsi,%rcx,1),%rbx
680238405Sjkim	mulq	%r15
681238405Sjkim	movq	%rax,%r12
682238405Sjkim	movq	%rbx,%rax
683238405Sjkim	movq	%rdx,%r13
684238405Sjkim
685238405Sjkim	xorq	%r11,%r11
686238405Sjkim	addq	%r12,%r10
687238405Sjkim	leaq	16(%rcx),%rcx
688238405Sjkim	adcq	$0,%r11
689238405Sjkim	mulq	%r14
690238405Sjkim	addq	%rax,%r10
691238405Sjkim	movq	%rbx,%rax
692238405Sjkim	adcq	%rdx,%r11
693238405Sjkim	movq	%r10,-8(%rdi,%rcx,1)
694238405Sjkim	jmp	.Lsqr4x_1st
695238405Sjkim
696238405Sjkim.align	16
697238405Sjkim.Lsqr4x_1st:
698238405Sjkim	movq	(%rsi,%rcx,1),%rbx
699238405Sjkim	xorq	%r12,%r12
700238405Sjkim	mulq	%r15
701238405Sjkim	addq	%rax,%r13
702238405Sjkim	movq	%rbx,%rax
703238405Sjkim	adcq	%rdx,%r12
704238405Sjkim
705238405Sjkim	xorq	%r10,%r10
706238405Sjkim	addq	%r13,%r11
707238405Sjkim	adcq	$0,%r10
708238405Sjkim	mulq	%r14
709238405Sjkim	addq	%rax,%r11
710238405Sjkim	movq	%rbx,%rax
711238405Sjkim	adcq	%rdx,%r10
712238405Sjkim	movq	%r11,(%rdi,%rcx,1)
713238405Sjkim
714238405Sjkim
715238405Sjkim	movq	8(%rsi,%rcx,1),%rbx
716238405Sjkim	xorq	%r13,%r13
717238405Sjkim	mulq	%r15
718238405Sjkim	addq	%rax,%r12
719238405Sjkim	movq	%rbx,%rax
720238405Sjkim	adcq	%rdx,%r13
721238405Sjkim
722238405Sjkim	xorq	%r11,%r11
723238405Sjkim	addq	%r12,%r10
724238405Sjkim	adcq	$0,%r11
725238405Sjkim	mulq	%r14
726238405Sjkim	addq	%rax,%r10
727238405Sjkim	movq	%rbx,%rax
728238405Sjkim	adcq	%rdx,%r11
729238405Sjkim	movq	%r10,8(%rdi,%rcx,1)
730238405Sjkim
731238405Sjkim	movq	16(%rsi,%rcx,1),%rbx
732238405Sjkim	xorq	%r12,%r12
733238405Sjkim	mulq	%r15
734238405Sjkim	addq	%rax,%r13
735238405Sjkim	movq	%rbx,%rax
736238405Sjkim	adcq	%rdx,%r12
737238405Sjkim
738238405Sjkim	xorq	%r10,%r10
739238405Sjkim	addq	%r13,%r11
740238405Sjkim	adcq	$0,%r10
741238405Sjkim	mulq	%r14
742238405Sjkim	addq	%rax,%r11
743238405Sjkim	movq	%rbx,%rax
744238405Sjkim	adcq	%rdx,%r10
745238405Sjkim	movq	%r11,16(%rdi,%rcx,1)
746238405Sjkim
747238405Sjkim
748238405Sjkim	movq	24(%rsi,%rcx,1),%rbx
749238405Sjkim	xorq	%r13,%r13
750238405Sjkim	mulq	%r15
751238405Sjkim	addq	%rax,%r12
752238405Sjkim	movq	%rbx,%rax
753238405Sjkim	adcq	%rdx,%r13
754238405Sjkim
755238405Sjkim	xorq	%r11,%r11
756238405Sjkim	addq	%r12,%r10
757238405Sjkim	leaq	32(%rcx),%rcx
758238405Sjkim	adcq	$0,%r11
759238405Sjkim	mulq	%r14
760238405Sjkim	addq	%rax,%r10
761238405Sjkim	movq	%rbx,%rax
762238405Sjkim	adcq	%rdx,%r11
763238405Sjkim	movq	%r10,-8(%rdi,%rcx,1)
764238405Sjkim
765238405Sjkim	cmpq	$0,%rcx
766238405Sjkim	jne	.Lsqr4x_1st
767238405Sjkim
768238405Sjkim	xorq	%r12,%r12
769238405Sjkim	addq	%r11,%r13
770238405Sjkim	adcq	$0,%r12
771238405Sjkim	mulq	%r15
772238405Sjkim	addq	%rax,%r13
773238405Sjkim	adcq	%rdx,%r12
774238405Sjkim
775238405Sjkim	movq	%r13,(%rdi)
776238405Sjkim	leaq	16(%rbp),%rbp
777238405Sjkim	movq	%r12,8(%rdi)
778238405Sjkim	jmp	.Lsqr4x_outer
779238405Sjkim
780238405Sjkim.align	16
781238405Sjkim.Lsqr4x_outer:
782238405Sjkim	movq	-32(%rsi,%rbp,1),%r14
783238405Sjkim	leaq	64(%rsp,%r9,2),%rdi
784238405Sjkim	movq	-24(%rsi,%rbp,1),%rax
785238405Sjkim	leaq	-32(%rdi,%rbp,1),%rdi
786238405Sjkim	movq	-16(%rsi,%rbp,1),%rbx
787238405Sjkim	movq	%rax,%r15
788238405Sjkim
789238405Sjkim	movq	-24(%rdi,%rbp,1),%r10
790238405Sjkim	xorq	%r11,%r11
791238405Sjkim	mulq	%r14
792238405Sjkim	addq	%rax,%r10
793238405Sjkim	movq	%rbx,%rax
794238405Sjkim	adcq	%rdx,%r11
795238405Sjkim	movq	%r10,-24(%rdi,%rbp,1)
796238405Sjkim
797238405Sjkim	xorq	%r10,%r10
798238405Sjkim	addq	-16(%rdi,%rbp,1),%r11
799238405Sjkim	adcq	$0,%r10
800238405Sjkim	mulq	%r14
801238405Sjkim	addq	%rax,%r11
802238405Sjkim	movq	%rbx,%rax
803238405Sjkim	adcq	%rdx,%r10
804238405Sjkim	movq	%r11,-16(%rdi,%rbp,1)
805238405Sjkim
806238405Sjkim	leaq	-16(%rbp),%rcx
807238405Sjkim	xorq	%r12,%r12
808238405Sjkim
809238405Sjkim
810238405Sjkim	movq	8(%rsi,%rcx,1),%rbx
811238405Sjkim	xorq	%r13,%r13
812238405Sjkim	addq	8(%rdi,%rcx,1),%r12
813238405Sjkim	adcq	$0,%r13
814238405Sjkim	mulq	%r15
815238405Sjkim	addq	%rax,%r12
816238405Sjkim	movq	%rbx,%rax
817238405Sjkim	adcq	%rdx,%r13
818238405Sjkim
819238405Sjkim	xorq	%r11,%r11
820238405Sjkim	addq	%r12,%r10
821238405Sjkim	adcq	$0,%r11
822238405Sjkim	mulq	%r14
823238405Sjkim	addq	%rax,%r10
824238405Sjkim	movq	%rbx,%rax
825238405Sjkim	adcq	%rdx,%r11
826238405Sjkim	movq	%r10,8(%rdi,%rcx,1)
827238405Sjkim
828238405Sjkim	leaq	16(%rcx),%rcx
829238405Sjkim	jmp	.Lsqr4x_inner
830238405Sjkim
831238405Sjkim.align	16
832238405Sjkim.Lsqr4x_inner:
833238405Sjkim	movq	(%rsi,%rcx,1),%rbx
834238405Sjkim	xorq	%r12,%r12
835238405Sjkim	addq	(%rdi,%rcx,1),%r13
836238405Sjkim	adcq	$0,%r12
837238405Sjkim	mulq	%r15
838238405Sjkim	addq	%rax,%r13
839238405Sjkim	movq	%rbx,%rax
840238405Sjkim	adcq	%rdx,%r12
841238405Sjkim
842238405Sjkim	xorq	%r10,%r10
843238405Sjkim	addq	%r13,%r11
844238405Sjkim	adcq	$0,%r10
845238405Sjkim	mulq	%r14
846238405Sjkim	addq	%rax,%r11
847238405Sjkim	movq	%rbx,%rax
848238405Sjkim	adcq	%rdx,%r10
849238405Sjkim	movq	%r11,(%rdi,%rcx,1)
850238405Sjkim
851238405Sjkim	movq	8(%rsi,%rcx,1),%rbx
852238405Sjkim	xorq	%r13,%r13
853238405Sjkim	addq	8(%rdi,%rcx,1),%r12
854238405Sjkim	adcq	$0,%r13
855238405Sjkim	mulq	%r15
856238405Sjkim	addq	%rax,%r12
857238405Sjkim	movq	%rbx,%rax
858238405Sjkim	adcq	%rdx,%r13
859238405Sjkim
860238405Sjkim	xorq	%r11,%r11
861238405Sjkim	addq	%r12,%r10
862238405Sjkim	leaq	16(%rcx),%rcx
863238405Sjkim	adcq	$0,%r11
864238405Sjkim	mulq	%r14
865238405Sjkim	addq	%rax,%r10
866238405Sjkim	movq	%rbx,%rax
867238405Sjkim	adcq	%rdx,%r11
868238405Sjkim	movq	%r10,-8(%rdi,%rcx,1)
869238405Sjkim
870238405Sjkim	cmpq	$0,%rcx
871238405Sjkim	jne	.Lsqr4x_inner
872238405Sjkim
873238405Sjkim	xorq	%r12,%r12
874238405Sjkim	addq	%r11,%r13
875238405Sjkim	adcq	$0,%r12
876238405Sjkim	mulq	%r15
877238405Sjkim	addq	%rax,%r13
878238405Sjkim	adcq	%rdx,%r12
879238405Sjkim
880238405Sjkim	movq	%r13,(%rdi)
881238405Sjkim	movq	%r12,8(%rdi)
882238405Sjkim
883238405Sjkim	addq	$16,%rbp
884238405Sjkim	jnz	.Lsqr4x_outer
885238405Sjkim
886238405Sjkim
887238405Sjkim	movq	-32(%rsi),%r14
888238405Sjkim	leaq	64(%rsp,%r9,2),%rdi
889238405Sjkim	movq	-24(%rsi),%rax
890238405Sjkim	leaq	-32(%rdi,%rbp,1),%rdi
891238405Sjkim	movq	-16(%rsi),%rbx
892238405Sjkim	movq	%rax,%r15
893238405Sjkim
894238405Sjkim	xorq	%r11,%r11
895238405Sjkim	mulq	%r14
896238405Sjkim	addq	%rax,%r10
897238405Sjkim	movq	%rbx,%rax
898238405Sjkim	adcq	%rdx,%r11
899238405Sjkim	movq	%r10,-24(%rdi)
900238405Sjkim
901238405Sjkim	xorq	%r10,%r10
902238405Sjkim	addq	%r13,%r11
903238405Sjkim	adcq	$0,%r10
904238405Sjkim	mulq	%r14
905238405Sjkim	addq	%rax,%r11
906238405Sjkim	movq	%rbx,%rax
907238405Sjkim	adcq	%rdx,%r10
908238405Sjkim	movq	%r11,-16(%rdi)
909238405Sjkim
910238405Sjkim	movq	-8(%rsi),%rbx
911238405Sjkim	mulq	%r15
912238405Sjkim	addq	%rax,%r12
913238405Sjkim	movq	%rbx,%rax
914238405Sjkim	adcq	$0,%rdx
915238405Sjkim
916238405Sjkim	xorq	%r11,%r11
917238405Sjkim	addq	%r12,%r10
918238405Sjkim	movq	%rdx,%r13
919238405Sjkim	adcq	$0,%r11
920238405Sjkim	mulq	%r14
921238405Sjkim	addq	%rax,%r10
922238405Sjkim	movq	%rbx,%rax
923238405Sjkim	adcq	%rdx,%r11
924238405Sjkim	movq	%r10,-8(%rdi)
925238405Sjkim
926238405Sjkim	xorq	%r12,%r12
927238405Sjkim	addq	%r11,%r13
928238405Sjkim	adcq	$0,%r12
929238405Sjkim	mulq	%r15
930238405Sjkim	addq	%rax,%r13
931238405Sjkim	movq	-16(%rsi),%rax
932238405Sjkim	adcq	%rdx,%r12
933238405Sjkim
934238405Sjkim	movq	%r13,(%rdi)
935238405Sjkim	movq	%r12,8(%rdi)
936238405Sjkim
937238405Sjkim	mulq	%rbx
938238405Sjkim	addq	$16,%rbp
939238405Sjkim	xorq	%r14,%r14
940238405Sjkim	subq	%r9,%rbp
941238405Sjkim	xorq	%r15,%r15
942238405Sjkim
943238405Sjkim	addq	%r12,%rax
944238405Sjkim	adcq	$0,%rdx
945238405Sjkim	movq	%rax,8(%rdi)
946238405Sjkim	movq	%rdx,16(%rdi)
947238405Sjkim	movq	%r15,24(%rdi)
948238405Sjkim
949238405Sjkim	movq	-16(%rsi,%rbp,1),%rax
950238405Sjkim	leaq	64(%rsp,%r9,2),%rdi
951238405Sjkim	xorq	%r10,%r10
952238405Sjkim	movq	-24(%rdi,%rbp,2),%r11
953238405Sjkim
954238405Sjkim	leaq	(%r14,%r10,2),%r12
955238405Sjkim	shrq	$63,%r10
956238405Sjkim	leaq	(%rcx,%r11,2),%r13
957238405Sjkim	shrq	$63,%r11
958238405Sjkim	orq	%r10,%r13
959238405Sjkim	movq	-16(%rdi,%rbp,2),%r10
960238405Sjkim	movq	%r11,%r14
961238405Sjkim	mulq	%rax
962238405Sjkim	negq	%r15
963238405Sjkim	movq	-8(%rdi,%rbp,2),%r11
964238405Sjkim	adcq	%rax,%r12
965238405Sjkim	movq	-8(%rsi,%rbp,1),%rax
966238405Sjkim	movq	%r12,-32(%rdi,%rbp,2)
967238405Sjkim	adcq	%rdx,%r13
968238405Sjkim
969238405Sjkim	leaq	(%r14,%r10,2),%rbx
970238405Sjkim	movq	%r13,-24(%rdi,%rbp,2)
971238405Sjkim	sbbq	%r15,%r15
972238405Sjkim	shrq	$63,%r10
973238405Sjkim	leaq	(%rcx,%r11,2),%r8
974238405Sjkim	shrq	$63,%r11
975238405Sjkim	orq	%r10,%r8
976238405Sjkim	movq	0(%rdi,%rbp,2),%r10
977238405Sjkim	movq	%r11,%r14
978238405Sjkim	mulq	%rax
979238405Sjkim	negq	%r15
980238405Sjkim	movq	8(%rdi,%rbp,2),%r11
981238405Sjkim	adcq	%rax,%rbx
982238405Sjkim	movq	0(%rsi,%rbp,1),%rax
983238405Sjkim	movq	%rbx,-16(%rdi,%rbp,2)
984238405Sjkim	adcq	%rdx,%r8
985238405Sjkim	leaq	16(%rbp),%rbp
986238405Sjkim	movq	%r8,-40(%rdi,%rbp,2)
987238405Sjkim	sbbq	%r15,%r15
988238405Sjkim	jmp	.Lsqr4x_shift_n_add
989238405Sjkim
990238405Sjkim.align	16
991238405Sjkim.Lsqr4x_shift_n_add:
992238405Sjkim	leaq	(%r14,%r10,2),%r12
993238405Sjkim	shrq	$63,%r10
994238405Sjkim	leaq	(%rcx,%r11,2),%r13
995238405Sjkim	shrq	$63,%r11
996238405Sjkim	orq	%r10,%r13
997238405Sjkim	movq	-16(%rdi,%rbp,2),%r10
998238405Sjkim	movq	%r11,%r14
999238405Sjkim	mulq	%rax
1000238405Sjkim	negq	%r15
1001238405Sjkim	movq	-8(%rdi,%rbp,2),%r11
1002238405Sjkim	adcq	%rax,%r12
1003238405Sjkim	movq	-8(%rsi,%rbp,1),%rax
1004238405Sjkim	movq	%r12,-32(%rdi,%rbp,2)
1005238405Sjkim	adcq	%rdx,%r13
1006238405Sjkim
1007238405Sjkim	leaq	(%r14,%r10,2),%rbx
1008238405Sjkim	movq	%r13,-24(%rdi,%rbp,2)
1009238405Sjkim	sbbq	%r15,%r15
1010238405Sjkim	shrq	$63,%r10
1011238405Sjkim	leaq	(%rcx,%r11,2),%r8
1012238405Sjkim	shrq	$63,%r11
1013238405Sjkim	orq	%r10,%r8
1014238405Sjkim	movq	0(%rdi,%rbp,2),%r10
1015238405Sjkim	movq	%r11,%r14
1016238405Sjkim	mulq	%rax
1017238405Sjkim	negq	%r15
1018238405Sjkim	movq	8(%rdi,%rbp,2),%r11
1019238405Sjkim	adcq	%rax,%rbx
1020238405Sjkim	movq	0(%rsi,%rbp,1),%rax
1021238405Sjkim	movq	%rbx,-16(%rdi,%rbp,2)
1022238405Sjkim	adcq	%rdx,%r8
1023238405Sjkim
1024238405Sjkim	leaq	(%r14,%r10,2),%r12
1025238405Sjkim	movq	%r8,-8(%rdi,%rbp,2)
1026238405Sjkim	sbbq	%r15,%r15
1027238405Sjkim	shrq	$63,%r10
1028238405Sjkim	leaq	(%rcx,%r11,2),%r13
1029238405Sjkim	shrq	$63,%r11
1030238405Sjkim	orq	%r10,%r13
1031238405Sjkim	movq	16(%rdi,%rbp,2),%r10
1032238405Sjkim	movq	%r11,%r14
1033238405Sjkim	mulq	%rax
1034238405Sjkim	negq	%r15
1035238405Sjkim	movq	24(%rdi,%rbp,2),%r11
1036238405Sjkim	adcq	%rax,%r12
1037238405Sjkim	movq	8(%rsi,%rbp,1),%rax
1038238405Sjkim	movq	%r12,0(%rdi,%rbp,2)
1039238405Sjkim	adcq	%rdx,%r13
1040238405Sjkim
1041238405Sjkim	leaq	(%r14,%r10,2),%rbx
1042238405Sjkim	movq	%r13,8(%rdi,%rbp,2)
1043238405Sjkim	sbbq	%r15,%r15
1044238405Sjkim	shrq	$63,%r10
1045238405Sjkim	leaq	(%rcx,%r11,2),%r8
1046238405Sjkim	shrq	$63,%r11
1047238405Sjkim	orq	%r10,%r8
1048238405Sjkim	movq	32(%rdi,%rbp,2),%r10
1049238405Sjkim	movq	%r11,%r14
1050238405Sjkim	mulq	%rax
1051238405Sjkim	negq	%r15
1052238405Sjkim	movq	40(%rdi,%rbp,2),%r11
1053238405Sjkim	adcq	%rax,%rbx
1054238405Sjkim	movq	16(%rsi,%rbp,1),%rax
1055238405Sjkim	movq	%rbx,16(%rdi,%rbp,2)
1056238405Sjkim	adcq	%rdx,%r8
1057238405Sjkim	movq	%r8,24(%rdi,%rbp,2)
1058238405Sjkim	sbbq	%r15,%r15
1059238405Sjkim	addq	$32,%rbp
1060238405Sjkim	jnz	.Lsqr4x_shift_n_add
1061238405Sjkim
1062238405Sjkim	leaq	(%r14,%r10,2),%r12
1063238405Sjkim	shrq	$63,%r10
1064238405Sjkim	leaq	(%rcx,%r11,2),%r13
1065238405Sjkim	shrq	$63,%r11
1066238405Sjkim	orq	%r10,%r13
1067238405Sjkim	movq	-16(%rdi),%r10
1068238405Sjkim	movq	%r11,%r14
1069238405Sjkim	mulq	%rax
1070238405Sjkim	negq	%r15
1071238405Sjkim	movq	-8(%rdi),%r11
1072238405Sjkim	adcq	%rax,%r12
1073238405Sjkim	movq	-8(%rsi),%rax
1074238405Sjkim	movq	%r12,-32(%rdi)
1075238405Sjkim	adcq	%rdx,%r13
1076238405Sjkim
1077238405Sjkim	leaq	(%r14,%r10,2),%rbx
1078238405Sjkim	movq	%r13,-24(%rdi)
1079238405Sjkim	sbbq	%r15,%r15
1080238405Sjkim	shrq	$63,%r10
1081238405Sjkim	leaq	(%rcx,%r11,2),%r8
1082238405Sjkim	shrq	$63,%r11
1083238405Sjkim	orq	%r10,%r8
1084238405Sjkim	mulq	%rax
1085238405Sjkim	negq	%r15
1086238405Sjkim	adcq	%rax,%rbx
1087238405Sjkim	adcq	%rdx,%r8
1088238405Sjkim	movq	%rbx,-16(%rdi)
1089238405Sjkim	movq	%r8,-8(%rdi)
1090238405Sjkim	movq	40(%rsp),%rsi
1091238405Sjkim	movq	48(%rsp),%r8
1092238405Sjkim	xorq	%rcx,%rcx
1093238405Sjkim	movq	%r9,0(%rsp)
1094238405Sjkim	subq	%r9,%rcx
1095238405Sjkim	movq	64(%rsp),%r10
1096238405Sjkim	movq	%r8,%r14
1097238405Sjkim	leaq	64(%rsp,%r9,2),%rax
1098238405Sjkim	leaq	64(%rsp,%r9,1),%rdi
1099238405Sjkim	movq	%rax,8(%rsp)
1100238405Sjkim	leaq	(%rsi,%r9,1),%rsi
1101238405Sjkim	xorq	%rbp,%rbp
1102238405Sjkim
1103238405Sjkim	movq	0(%rsi,%rcx,1),%rax
1104238405Sjkim	movq	8(%rsi,%rcx,1),%r9
1105238405Sjkim	imulq	%r10,%r14
1106238405Sjkim	movq	%rax,%rbx
1107238405Sjkim	jmp	.Lsqr4x_mont_outer
1108238405Sjkim
1109238405Sjkim.align	16
1110238405Sjkim.Lsqr4x_mont_outer:
1111238405Sjkim	xorq	%r11,%r11
1112238405Sjkim	mulq	%r14
1113238405Sjkim	addq	%rax,%r10
1114238405Sjkim	movq	%r9,%rax
1115238405Sjkim	adcq	%rdx,%r11
1116238405Sjkim	movq	%r8,%r15
1117238405Sjkim
1118238405Sjkim	xorq	%r10,%r10
1119238405Sjkim	addq	8(%rdi,%rcx,1),%r11
1120238405Sjkim	adcq	$0,%r10
1121238405Sjkim	mulq	%r14
1122238405Sjkim	addq	%rax,%r11
1123238405Sjkim	movq	%rbx,%rax
1124238405Sjkim	adcq	%rdx,%r10
1125238405Sjkim
1126238405Sjkim	imulq	%r11,%r15
1127238405Sjkim
1128238405Sjkim	movq	16(%rsi,%rcx,1),%rbx
1129238405Sjkim	xorq	%r13,%r13
1130238405Sjkim	addq	%r11,%r12
1131238405Sjkim	adcq	$0,%r13
1132238405Sjkim	mulq	%r15
1133238405Sjkim	addq	%rax,%r12
1134238405Sjkim	movq	%rbx,%rax
1135238405Sjkim	adcq	%rdx,%r13
1136238405Sjkim	movq	%r12,8(%rdi,%rcx,1)
1137238405Sjkim
1138238405Sjkim	xorq	%r11,%r11
1139238405Sjkim	addq	16(%rdi,%rcx,1),%r10
1140238405Sjkim	adcq	$0,%r11
1141238405Sjkim	mulq	%r14
1142238405Sjkim	addq	%rax,%r10
1143238405Sjkim	movq	%r9,%rax
1144238405Sjkim	adcq	%rdx,%r11
1145238405Sjkim
1146238405Sjkim	movq	24(%rsi,%rcx,1),%r9
1147238405Sjkim	xorq	%r12,%r12
1148238405Sjkim	addq	%r10,%r13
1149238405Sjkim	adcq	$0,%r12
1150238405Sjkim	mulq	%r15
1151238405Sjkim	addq	%rax,%r13
1152238405Sjkim	movq	%r9,%rax
1153238405Sjkim	adcq	%rdx,%r12
1154238405Sjkim	movq	%r13,16(%rdi,%rcx,1)
1155238405Sjkim
1156238405Sjkim	xorq	%r10,%r10
1157238405Sjkim	addq	24(%rdi,%rcx,1),%r11
1158238405Sjkim	leaq	32(%rcx),%rcx
1159238405Sjkim	adcq	$0,%r10
1160238405Sjkim	mulq	%r14
1161238405Sjkim	addq	%rax,%r11
1162238405Sjkim	movq	%rbx,%rax
1163238405Sjkim	adcq	%rdx,%r10
1164238405Sjkim	jmp	.Lsqr4x_mont_inner
1165238405Sjkim
1166238405Sjkim.align	16
1167238405Sjkim.Lsqr4x_mont_inner:
1168238405Sjkim	movq	(%rsi,%rcx,1),%rbx
1169238405Sjkim	xorq	%r13,%r13
1170238405Sjkim	addq	%r11,%r12
1171238405Sjkim	adcq	$0,%r13
1172238405Sjkim	mulq	%r15
1173238405Sjkim	addq	%rax,%r12
1174238405Sjkim	movq	%rbx,%rax
1175238405Sjkim	adcq	%rdx,%r13
1176238405Sjkim	movq	%r12,-8(%rdi,%rcx,1)
1177238405Sjkim
1178238405Sjkim	xorq	%r11,%r11
1179238405Sjkim	addq	(%rdi,%rcx,1),%r10
1180238405Sjkim	adcq	$0,%r11
1181238405Sjkim	mulq	%r14
1182238405Sjkim	addq	%rax,%r10
1183238405Sjkim	movq	%r9,%rax
1184238405Sjkim	adcq	%rdx,%r11
1185238405Sjkim
1186238405Sjkim	movq	8(%rsi,%rcx,1),%r9
1187238405Sjkim	xorq	%r12,%r12
1188238405Sjkim	addq	%r10,%r13
1189238405Sjkim	adcq	$0,%r12
1190238405Sjkim	mulq	%r15
1191238405Sjkim	addq	%rax,%r13
1192238405Sjkim	movq	%r9,%rax
1193238405Sjkim	adcq	%rdx,%r12
1194238405Sjkim	movq	%r13,(%rdi,%rcx,1)
1195238405Sjkim
1196238405Sjkim	xorq	%r10,%r10
1197238405Sjkim	addq	8(%rdi,%rcx,1),%r11
1198238405Sjkim	adcq	$0,%r10
1199238405Sjkim	mulq	%r14
1200238405Sjkim	addq	%rax,%r11
1201238405Sjkim	movq	%rbx,%rax
1202238405Sjkim	adcq	%rdx,%r10
1203238405Sjkim
1204238405Sjkim
1205238405Sjkim	movq	16(%rsi,%rcx,1),%rbx
1206238405Sjkim	xorq	%r13,%r13
1207238405Sjkim	addq	%r11,%r12
1208238405Sjkim	adcq	$0,%r13
1209238405Sjkim	mulq	%r15
1210238405Sjkim	addq	%rax,%r12
1211238405Sjkim	movq	%rbx,%rax
1212238405Sjkim	adcq	%rdx,%r13
1213238405Sjkim	movq	%r12,8(%rdi,%rcx,1)
1214238405Sjkim
1215238405Sjkim	xorq	%r11,%r11
1216238405Sjkim	addq	16(%rdi,%rcx,1),%r10
1217238405Sjkim	adcq	$0,%r11
1218238405Sjkim	mulq	%r14
1219238405Sjkim	addq	%rax,%r10
1220238405Sjkim	movq	%r9,%rax
1221238405Sjkim	adcq	%rdx,%r11
1222238405Sjkim
1223238405Sjkim	movq	24(%rsi,%rcx,1),%r9
1224238405Sjkim	xorq	%r12,%r12
1225238405Sjkim	addq	%r10,%r13
1226238405Sjkim	adcq	$0,%r12
1227238405Sjkim	mulq	%r15
1228238405Sjkim	addq	%rax,%r13
1229238405Sjkim	movq	%r9,%rax
1230238405Sjkim	adcq	%rdx,%r12
1231238405Sjkim	movq	%r13,16(%rdi,%rcx,1)
1232238405Sjkim
1233238405Sjkim	xorq	%r10,%r10
1234238405Sjkim	addq	24(%rdi,%rcx,1),%r11
1235238405Sjkim	leaq	32(%rcx),%rcx
1236238405Sjkim	adcq	$0,%r10
1237238405Sjkim	mulq	%r14
1238238405Sjkim	addq	%rax,%r11
1239238405Sjkim	movq	%rbx,%rax
1240238405Sjkim	adcq	%rdx,%r10
1241238405Sjkim	cmpq	$0,%rcx
1242238405Sjkim	jne	.Lsqr4x_mont_inner
1243238405Sjkim
1244238405Sjkim	subq	0(%rsp),%rcx
1245238405Sjkim	movq	%r8,%r14
1246238405Sjkim
1247238405Sjkim	xorq	%r13,%r13
1248238405Sjkim	addq	%r11,%r12
1249238405Sjkim	adcq	$0,%r13
1250238405Sjkim	mulq	%r15
1251238405Sjkim	addq	%rax,%r12
1252238405Sjkim	movq	%r9,%rax
1253238405Sjkim	adcq	%rdx,%r13
1254238405Sjkim	movq	%r12,-8(%rdi)
1255238405Sjkim
1256238405Sjkim	xorq	%r11,%r11
1257238405Sjkim	addq	(%rdi),%r10
1258238405Sjkim	adcq	$0,%r11
1259238405Sjkim	movq	0(%rsi,%rcx,1),%rbx
1260238405Sjkim	addq	%rbp,%r10
1261238405Sjkim	adcq	$0,%r11
1262238405Sjkim
1263238405Sjkim	imulq	16(%rdi,%rcx,1),%r14
1264238405Sjkim	xorq	%r12,%r12
1265238405Sjkim	movq	8(%rsi,%rcx,1),%r9
1266238405Sjkim	addq	%r10,%r13
1267238405Sjkim	movq	16(%rdi,%rcx,1),%r10
1268238405Sjkim	adcq	$0,%r12
1269238405Sjkim	mulq	%r15
1270238405Sjkim	addq	%rax,%r13
1271238405Sjkim	movq	%rbx,%rax
1272238405Sjkim	adcq	%rdx,%r12
1273238405Sjkim	movq	%r13,(%rdi)
1274238405Sjkim
1275238405Sjkim	xorq	%rbp,%rbp
1276238405Sjkim	addq	8(%rdi),%r12
1277238405Sjkim	adcq	%rbp,%rbp
1278238405Sjkim	addq	%r11,%r12
1279238405Sjkim	leaq	16(%rdi),%rdi
1280238405Sjkim	adcq	$0,%rbp
1281238405Sjkim	movq	%r12,-8(%rdi)
1282238405Sjkim	cmpq	8(%rsp),%rdi
1283238405Sjkim	jb	.Lsqr4x_mont_outer
1284238405Sjkim
1285238405Sjkim	movq	0(%rsp),%r9
1286238405Sjkim	movq	%rbp,(%rdi)
1287238405Sjkim	movq	64(%rsp,%r9,1),%rax
1288238405Sjkim	leaq	64(%rsp,%r9,1),%rbx
1289238405Sjkim	movq	40(%rsp),%rsi
1290238405Sjkim	shrq	$5,%r9
1291238405Sjkim	movq	8(%rbx),%rdx
1292238405Sjkim	xorq	%rbp,%rbp
1293238405Sjkim
1294238405Sjkim	movq	32(%rsp),%rdi
1295238405Sjkim	subq	0(%rsi),%rax
1296238405Sjkim	movq	16(%rbx),%r10
1297238405Sjkim	movq	24(%rbx),%r11
1298238405Sjkim	sbbq	8(%rsi),%rdx
1299238405Sjkim	leaq	-1(%r9),%rcx
1300238405Sjkim	jmp	.Lsqr4x_sub
1301238405Sjkim.align	16
1302238405Sjkim.Lsqr4x_sub:
1303238405Sjkim	movq	%rax,0(%rdi,%rbp,8)
1304238405Sjkim	movq	%rdx,8(%rdi,%rbp,8)
1305238405Sjkim	sbbq	16(%rsi,%rbp,8),%r10
1306238405Sjkim	movq	32(%rbx,%rbp,8),%rax
1307238405Sjkim	movq	40(%rbx,%rbp,8),%rdx
1308238405Sjkim	sbbq	24(%rsi,%rbp,8),%r11
1309238405Sjkim	movq	%r10,16(%rdi,%rbp,8)
1310238405Sjkim	movq	%r11,24(%rdi,%rbp,8)
1311238405Sjkim	sbbq	32(%rsi,%rbp,8),%rax
1312238405Sjkim	movq	48(%rbx,%rbp,8),%r10
1313238405Sjkim	movq	56(%rbx,%rbp,8),%r11
1314238405Sjkim	sbbq	40(%rsi,%rbp,8),%rdx
1315238405Sjkim	leaq	4(%rbp),%rbp
1316238405Sjkim	decq	%rcx
1317238405Sjkim	jnz	.Lsqr4x_sub
1318238405Sjkim
1319238405Sjkim	movq	%rax,0(%rdi,%rbp,8)
1320238405Sjkim	movq	32(%rbx,%rbp,8),%rax
1321238405Sjkim	sbbq	16(%rsi,%rbp,8),%r10
1322238405Sjkim	movq	%rdx,8(%rdi,%rbp,8)
1323238405Sjkim	sbbq	24(%rsi,%rbp,8),%r11
1324238405Sjkim	movq	%r10,16(%rdi,%rbp,8)
1325238405Sjkim
1326238405Sjkim	sbbq	$0,%rax
1327238405Sjkim	movq	%r11,24(%rdi,%rbp,8)
1328238405Sjkim	xorq	%rbp,%rbp
1329238405Sjkim	andq	%rax,%rbx
1330238405Sjkim	notq	%rax
1331238405Sjkim	movq	%rdi,%rsi
1332238405Sjkim	andq	%rax,%rsi
1333238405Sjkim	leaq	-1(%r9),%rcx
1334238405Sjkim	orq	%rsi,%rbx
1335238405Sjkim
1336238405Sjkim	pxor	%xmm0,%xmm0
1337238405Sjkim	leaq	64(%rsp,%r9,8),%rsi
1338238405Sjkim	movdqu	(%rbx),%xmm1
1339238405Sjkim	leaq	(%rsi,%r9,8),%rsi
1340238405Sjkim	movdqa	%xmm0,64(%rsp)
1341238405Sjkim	movdqa	%xmm0,(%rsi)
1342238405Sjkim	movdqu	%xmm1,(%rdi)
1343238405Sjkim	jmp	.Lsqr4x_copy
1344238405Sjkim.align	16
1345238405Sjkim.Lsqr4x_copy:
1346238405Sjkim	movdqu	16(%rbx,%rbp,1),%xmm2
1347238405Sjkim	movdqu	32(%rbx,%rbp,1),%xmm1
1348238405Sjkim	movdqa	%xmm0,80(%rsp,%rbp,1)
1349238405Sjkim	movdqa	%xmm0,96(%rsp,%rbp,1)
1350238405Sjkim	movdqa	%xmm0,16(%rsi,%rbp,1)
1351238405Sjkim	movdqa	%xmm0,32(%rsi,%rbp,1)
1352238405Sjkim	movdqu	%xmm2,16(%rdi,%rbp,1)
1353238405Sjkim	movdqu	%xmm1,32(%rdi,%rbp,1)
1354238405Sjkim	leaq	32(%rbp),%rbp
1355238405Sjkim	decq	%rcx
1356238405Sjkim	jnz	.Lsqr4x_copy
1357238405Sjkim
1358238405Sjkim	movdqu	16(%rbx,%rbp,1),%xmm2
1359238405Sjkim	movdqa	%xmm0,80(%rsp,%rbp,1)
1360238405Sjkim	movdqa	%xmm0,16(%rsi,%rbp,1)
1361238405Sjkim	movdqu	%xmm2,16(%rdi,%rbp,1)
1362238405Sjkim	movq	56(%rsp),%rsi
1363238405Sjkim	movq	$1,%rax
1364238405Sjkim	movq	0(%rsi),%r15
1365238405Sjkim	movq	8(%rsi),%r14
1366238405Sjkim	movq	16(%rsi),%r13
1367238405Sjkim	movq	24(%rsi),%r12
1368238405Sjkim	movq	32(%rsi),%rbp
1369238405Sjkim	movq	40(%rsi),%rbx
1370238405Sjkim	leaq	48(%rsi),%rsp
1371238405Sjkim.Lsqr4x_epilogue:
1372238405Sjkim	.byte	0xf3,0xc3
1373238405Sjkim.size	bn_sqr4x_mont,.-bn_sqr4x_mont
1374238405Sjkim.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1375238405Sjkim.align	16
1376