ghash-x86.S revision 299966
1# $FreeBSD: stable/10/secure/lib/libcrypto/i386/ghash-x86.S 299966 2016-05-16 19:30:27Z jkim $
2# Do not modify. This file is auto-generated from ghash-x86.pl.
3#ifdef PIC
4.file	"ghash-x86.S"
5.text
6.globl	gcm_gmult_4bit_x86
7.type	gcm_gmult_4bit_x86,@function
8.align	16
9gcm_gmult_4bit_x86:
10.L_gcm_gmult_4bit_x86_begin:
11	pushl	%ebp
12	pushl	%ebx
13	pushl	%esi
14	pushl	%edi
15	subl	$84,%esp
16	movl	104(%esp),%edi
17	movl	108(%esp),%esi
18	movl	(%edi),%ebp
19	movl	4(%edi),%edx
20	movl	8(%edi),%ecx
21	movl	12(%edi),%ebx
22	movl	$0,16(%esp)
23	movl	$471859200,20(%esp)
24	movl	$943718400,24(%esp)
25	movl	$610271232,28(%esp)
26	movl	$1887436800,32(%esp)
27	movl	$1822425088,36(%esp)
28	movl	$1220542464,40(%esp)
29	movl	$1423966208,44(%esp)
30	movl	$3774873600,48(%esp)
31	movl	$4246732800,52(%esp)
32	movl	$3644850176,56(%esp)
33	movl	$3311403008,60(%esp)
34	movl	$2441084928,64(%esp)
35	movl	$2376073216,68(%esp)
36	movl	$2847932416,72(%esp)
37	movl	$3051356160,76(%esp)
38	movl	%ebp,(%esp)
39	movl	%edx,4(%esp)
40	movl	%ecx,8(%esp)
41	movl	%ebx,12(%esp)
42	shrl	$20,%ebx
43	andl	$240,%ebx
44	movl	4(%esi,%ebx,1),%ebp
45	movl	(%esi,%ebx,1),%edx
46	movl	12(%esi,%ebx,1),%ecx
47	movl	8(%esi,%ebx,1),%ebx
48	xorl	%eax,%eax
49	movl	$15,%edi
50	jmp	.L000x86_loop
51.align	16
52.L000x86_loop:
53	movb	%bl,%al
54	shrdl	$4,%ecx,%ebx
55	andb	$15,%al
56	shrdl	$4,%edx,%ecx
57	shrdl	$4,%ebp,%edx
58	shrl	$4,%ebp
59	xorl	16(%esp,%eax,4),%ebp
60	movb	(%esp,%edi,1),%al
61	andb	$240,%al
62	xorl	8(%esi,%eax,1),%ebx
63	xorl	12(%esi,%eax,1),%ecx
64	xorl	(%esi,%eax,1),%edx
65	xorl	4(%esi,%eax,1),%ebp
66	decl	%edi
67	js	.L001x86_break
68	movb	%bl,%al
69	shrdl	$4,%ecx,%ebx
70	andb	$15,%al
71	shrdl	$4,%edx,%ecx
72	shrdl	$4,%ebp,%edx
73	shrl	$4,%ebp
74	xorl	16(%esp,%eax,4),%ebp
75	movb	(%esp,%edi,1),%al
76	shlb	$4,%al
77	xorl	8(%esi,%eax,1),%ebx
78	xorl	12(%esi,%eax,1),%ecx
79	xorl	(%esi,%eax,1),%edx
80	xorl	4(%esi,%eax,1),%ebp
81	jmp	.L000x86_loop
82.align	16
83.L001x86_break:
84	bswap	%ebx
85	bswap	%ecx
86	bswap	%edx
87	bswap	%ebp
88	movl	104(%esp),%edi
89	movl	%ebx,12(%edi)
90	movl	%ecx,8(%edi)
91	movl	%edx,4(%edi)
92	movl	%ebp,(%edi)
93	addl	$84,%esp
94	popl	%edi
95	popl	%esi
96	popl	%ebx
97	popl	%ebp
98	ret
99.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
100.globl	gcm_ghash_4bit_x86
101.type	gcm_ghash_4bit_x86,@function
102.align	16
103gcm_ghash_4bit_x86:
104.L_gcm_ghash_4bit_x86_begin:
105	pushl	%ebp
106	pushl	%ebx
107	pushl	%esi
108	pushl	%edi
109	subl	$84,%esp
110	movl	104(%esp),%ebx
111	movl	108(%esp),%esi
112	movl	112(%esp),%edi
113	movl	116(%esp),%ecx
114	addl	%edi,%ecx
115	movl	%ecx,116(%esp)
116	movl	(%ebx),%ebp
117	movl	4(%ebx),%edx
118	movl	8(%ebx),%ecx
119	movl	12(%ebx),%ebx
120	movl	$0,16(%esp)
121	movl	$471859200,20(%esp)
122	movl	$943718400,24(%esp)
123	movl	$610271232,28(%esp)
124	movl	$1887436800,32(%esp)
125	movl	$1822425088,36(%esp)
126	movl	$1220542464,40(%esp)
127	movl	$1423966208,44(%esp)
128	movl	$3774873600,48(%esp)
129	movl	$4246732800,52(%esp)
130	movl	$3644850176,56(%esp)
131	movl	$3311403008,60(%esp)
132	movl	$2441084928,64(%esp)
133	movl	$2376073216,68(%esp)
134	movl	$2847932416,72(%esp)
135	movl	$3051356160,76(%esp)
136.align	16
137.L002x86_outer_loop:
138	xorl	12(%edi),%ebx
139	xorl	8(%edi),%ecx
140	xorl	4(%edi),%edx
141	xorl	(%edi),%ebp
142	movl	%ebx,12(%esp)
143	movl	%ecx,8(%esp)
144	movl	%edx,4(%esp)
145	movl	%ebp,(%esp)
146	shrl	$20,%ebx
147	andl	$240,%ebx
148	movl	4(%esi,%ebx,1),%ebp
149	movl	(%esi,%ebx,1),%edx
150	movl	12(%esi,%ebx,1),%ecx
151	movl	8(%esi,%ebx,1),%ebx
152	xorl	%eax,%eax
153	movl	$15,%edi
154	jmp	.L003x86_loop
155.align	16
156.L003x86_loop:
157	movb	%bl,%al
158	shrdl	$4,%ecx,%ebx
159	andb	$15,%al
160	shrdl	$4,%edx,%ecx
161	shrdl	$4,%ebp,%edx
162	shrl	$4,%ebp
163	xorl	16(%esp,%eax,4),%ebp
164	movb	(%esp,%edi,1),%al
165	andb	$240,%al
166	xorl	8(%esi,%eax,1),%ebx
167	xorl	12(%esi,%eax,1),%ecx
168	xorl	(%esi,%eax,1),%edx
169	xorl	4(%esi,%eax,1),%ebp
170	decl	%edi
171	js	.L004x86_break
172	movb	%bl,%al
173	shrdl	$4,%ecx,%ebx
174	andb	$15,%al
175	shrdl	$4,%edx,%ecx
176	shrdl	$4,%ebp,%edx
177	shrl	$4,%ebp
178	xorl	16(%esp,%eax,4),%ebp
179	movb	(%esp,%edi,1),%al
180	shlb	$4,%al
181	xorl	8(%esi,%eax,1),%ebx
182	xorl	12(%esi,%eax,1),%ecx
183	xorl	(%esi,%eax,1),%edx
184	xorl	4(%esi,%eax,1),%ebp
185	jmp	.L003x86_loop
186.align	16
187.L004x86_break:
188	bswap	%ebx
189	bswap	%ecx
190	bswap	%edx
191	bswap	%ebp
192	movl	112(%esp),%edi
193	leal	16(%edi),%edi
194	cmpl	116(%esp),%edi
195	movl	%edi,112(%esp)
196	jb	.L002x86_outer_loop
197	movl	104(%esp),%edi
198	movl	%ebx,12(%edi)
199	movl	%ecx,8(%edi)
200	movl	%edx,4(%edi)
201	movl	%ebp,(%edi)
202	addl	$84,%esp
203	popl	%edi
204	popl	%esi
205	popl	%ebx
206	popl	%ebp
207	ret
208.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
209.globl	gcm_gmult_4bit_mmx
210.type	gcm_gmult_4bit_mmx,@function
211.align	16
212gcm_gmult_4bit_mmx:
213.L_gcm_gmult_4bit_mmx_begin:
214	pushl	%ebp
215	pushl	%ebx
216	pushl	%esi
217	pushl	%edi
218	movl	20(%esp),%edi
219	movl	24(%esp),%esi
220	call	.L005pic_point
221.L005pic_point:
222	popl	%eax
223	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
224	movzbl	15(%edi),%ebx
225	xorl	%ecx,%ecx
226	movl	%ebx,%edx
227	movb	%dl,%cl
228	movl	$14,%ebp
229	shlb	$4,%cl
230	andl	$240,%edx
231	movq	8(%esi,%ecx,1),%mm0
232	movq	(%esi,%ecx,1),%mm1
233	movd	%mm0,%ebx
234	jmp	.L006mmx_loop
235.align	16
236.L006mmx_loop:
237	psrlq	$4,%mm0
238	andl	$15,%ebx
239	movq	%mm1,%mm2
240	psrlq	$4,%mm1
241	pxor	8(%esi,%edx,1),%mm0
242	movb	(%edi,%ebp,1),%cl
243	psllq	$60,%mm2
244	pxor	(%eax,%ebx,8),%mm1
245	decl	%ebp
246	movd	%mm0,%ebx
247	pxor	(%esi,%edx,1),%mm1
248	movl	%ecx,%edx
249	pxor	%mm2,%mm0
250	js	.L007mmx_break
251	shlb	$4,%cl
252	andl	$15,%ebx
253	psrlq	$4,%mm0
254	andl	$240,%edx
255	movq	%mm1,%mm2
256	psrlq	$4,%mm1
257	pxor	8(%esi,%ecx,1),%mm0
258	psllq	$60,%mm2
259	pxor	(%eax,%ebx,8),%mm1
260	movd	%mm0,%ebx
261	pxor	(%esi,%ecx,1),%mm1
262	pxor	%mm2,%mm0
263	jmp	.L006mmx_loop
264.align	16
265.L007mmx_break:
266	shlb	$4,%cl
267	andl	$15,%ebx
268	psrlq	$4,%mm0
269	andl	$240,%edx
270	movq	%mm1,%mm2
271	psrlq	$4,%mm1
272	pxor	8(%esi,%ecx,1),%mm0
273	psllq	$60,%mm2
274	pxor	(%eax,%ebx,8),%mm1
275	movd	%mm0,%ebx
276	pxor	(%esi,%ecx,1),%mm1
277	pxor	%mm2,%mm0
278	psrlq	$4,%mm0
279	andl	$15,%ebx
280	movq	%mm1,%mm2
281	psrlq	$4,%mm1
282	pxor	8(%esi,%edx,1),%mm0
283	psllq	$60,%mm2
284	pxor	(%eax,%ebx,8),%mm1
285	movd	%mm0,%ebx
286	pxor	(%esi,%edx,1),%mm1
287	pxor	%mm2,%mm0
288	psrlq	$32,%mm0
289	movd	%mm1,%edx
290	psrlq	$32,%mm1
291	movd	%mm0,%ecx
292	movd	%mm1,%ebp
293	bswap	%ebx
294	bswap	%edx
295	bswap	%ecx
296	bswap	%ebp
297	emms
298	movl	%ebx,12(%edi)
299	movl	%edx,4(%edi)
300	movl	%ecx,8(%edi)
301	movl	%ebp,(%edi)
302	popl	%edi
303	popl	%esi
304	popl	%ebx
305	popl	%ebp
306	ret
307.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
308.globl	gcm_ghash_4bit_mmx
309.type	gcm_ghash_4bit_mmx,@function
310.align	16
311gcm_ghash_4bit_mmx:
312.L_gcm_ghash_4bit_mmx_begin:
313	pushl	%ebp
314	pushl	%ebx
315	pushl	%esi
316	pushl	%edi
317	movl	20(%esp),%eax
318	movl	24(%esp),%ebx
319	movl	28(%esp),%ecx
320	movl	32(%esp),%edx
321	movl	%esp,%ebp
322	call	.L008pic_point
323.L008pic_point:
324	popl	%esi
325	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
326	subl	$544,%esp
327	andl	$-64,%esp
328	subl	$16,%esp
329	addl	%ecx,%edx
330	movl	%eax,544(%esp)
331	movl	%edx,552(%esp)
332	movl	%ebp,556(%esp)
333	addl	$128,%ebx
334	leal	144(%esp),%edi
335	leal	400(%esp),%ebp
336	movl	-120(%ebx),%edx
337	movq	-120(%ebx),%mm0
338	movq	-128(%ebx),%mm3
339	shll	$4,%edx
340	movb	%dl,(%esp)
341	movl	-104(%ebx),%edx
342	movq	-104(%ebx),%mm2
343	movq	-112(%ebx),%mm5
344	movq	%mm0,-128(%edi)
345	psrlq	$4,%mm0
346	movq	%mm3,(%edi)
347	movq	%mm3,%mm7
348	psrlq	$4,%mm3
349	shll	$4,%edx
350	movb	%dl,1(%esp)
351	movl	-88(%ebx),%edx
352	movq	-88(%ebx),%mm1
353	psllq	$60,%mm7
354	movq	-96(%ebx),%mm4
355	por	%mm7,%mm0
356	movq	%mm2,-120(%edi)
357	psrlq	$4,%mm2
358	movq	%mm5,8(%edi)
359	movq	%mm5,%mm6
360	movq	%mm0,-128(%ebp)
361	psrlq	$4,%mm5
362	movq	%mm3,(%ebp)
363	shll	$4,%edx
364	movb	%dl,2(%esp)
365	movl	-72(%ebx),%edx
366	movq	-72(%ebx),%mm0
367	psllq	$60,%mm6
368	movq	-80(%ebx),%mm3
369	por	%mm6,%mm2
370	movq	%mm1,-112(%edi)
371	psrlq	$4,%mm1
372	movq	%mm4,16(%edi)
373	movq	%mm4,%mm7
374	movq	%mm2,-120(%ebp)
375	psrlq	$4,%mm4
376	movq	%mm5,8(%ebp)
377	shll	$4,%edx
378	movb	%dl,3(%esp)
379	movl	-56(%ebx),%edx
380	movq	-56(%ebx),%mm2
381	psllq	$60,%mm7
382	movq	-64(%ebx),%mm5
383	por	%mm7,%mm1
384	movq	%mm0,-104(%edi)
385	psrlq	$4,%mm0
386	movq	%mm3,24(%edi)
387	movq	%mm3,%mm6
388	movq	%mm1,-112(%ebp)
389	psrlq	$4,%mm3
390	movq	%mm4,16(%ebp)
391	shll	$4,%edx
392	movb	%dl,4(%esp)
393	movl	-40(%ebx),%edx
394	movq	-40(%ebx),%mm1
395	psllq	$60,%mm6
396	movq	-48(%ebx),%mm4
397	por	%mm6,%mm0
398	movq	%mm2,-96(%edi)
399	psrlq	$4,%mm2
400	movq	%mm5,32(%edi)
401	movq	%mm5,%mm7
402	movq	%mm0,-104(%ebp)
403	psrlq	$4,%mm5
404	movq	%mm3,24(%ebp)
405	shll	$4,%edx
406	movb	%dl,5(%esp)
407	movl	-24(%ebx),%edx
408	movq	-24(%ebx),%mm0
409	psllq	$60,%mm7
410	movq	-32(%ebx),%mm3
411	por	%mm7,%mm2
412	movq	%mm1,-88(%edi)
413	psrlq	$4,%mm1
414	movq	%mm4,40(%edi)
415	movq	%mm4,%mm6
416	movq	%mm2,-96(%ebp)
417	psrlq	$4,%mm4
418	movq	%mm5,32(%ebp)
419	shll	$4,%edx
420	movb	%dl,6(%esp)
421	movl	-8(%ebx),%edx
422	movq	-8(%ebx),%mm2
423	psllq	$60,%mm6
424	movq	-16(%ebx),%mm5
425	por	%mm6,%mm1
426	movq	%mm0,-80(%edi)
427	psrlq	$4,%mm0
428	movq	%mm3,48(%edi)
429	movq	%mm3,%mm7
430	movq	%mm1,-88(%ebp)
431	psrlq	$4,%mm3
432	movq	%mm4,40(%ebp)
433	shll	$4,%edx
434	movb	%dl,7(%esp)
435	movl	8(%ebx),%edx
436	movq	8(%ebx),%mm1
437	psllq	$60,%mm7
438	movq	(%ebx),%mm4
439	por	%mm7,%mm0
440	movq	%mm2,-72(%edi)
441	psrlq	$4,%mm2
442	movq	%mm5,56(%edi)
443	movq	%mm5,%mm6
444	movq	%mm0,-80(%ebp)
445	psrlq	$4,%mm5
446	movq	%mm3,48(%ebp)
447	shll	$4,%edx
448	movb	%dl,8(%esp)
449	movl	24(%ebx),%edx
450	movq	24(%ebx),%mm0
451	psllq	$60,%mm6
452	movq	16(%ebx),%mm3
453	por	%mm6,%mm2
454	movq	%mm1,-64(%edi)
455	psrlq	$4,%mm1
456	movq	%mm4,64(%edi)
457	movq	%mm4,%mm7
458	movq	%mm2,-72(%ebp)
459	psrlq	$4,%mm4
460	movq	%mm5,56(%ebp)
461	shll	$4,%edx
462	movb	%dl,9(%esp)
463	movl	40(%ebx),%edx
464	movq	40(%ebx),%mm2
465	psllq	$60,%mm7
466	movq	32(%ebx),%mm5
467	por	%mm7,%mm1
468	movq	%mm0,-56(%edi)
469	psrlq	$4,%mm0
470	movq	%mm3,72(%edi)
471	movq	%mm3,%mm6
472	movq	%mm1,-64(%ebp)
473	psrlq	$4,%mm3
474	movq	%mm4,64(%ebp)
475	shll	$4,%edx
476	movb	%dl,10(%esp)
477	movl	56(%ebx),%edx
478	movq	56(%ebx),%mm1
479	psllq	$60,%mm6
480	movq	48(%ebx),%mm4
481	por	%mm6,%mm0
482	movq	%mm2,-48(%edi)
483	psrlq	$4,%mm2
484	movq	%mm5,80(%edi)
485	movq	%mm5,%mm7
486	movq	%mm0,-56(%ebp)
487	psrlq	$4,%mm5
488	movq	%mm3,72(%ebp)
489	shll	$4,%edx
490	movb	%dl,11(%esp)
491	movl	72(%ebx),%edx
492	movq	72(%ebx),%mm0
493	psllq	$60,%mm7
494	movq	64(%ebx),%mm3
495	por	%mm7,%mm2
496	movq	%mm1,-40(%edi)
497	psrlq	$4,%mm1
498	movq	%mm4,88(%edi)
499	movq	%mm4,%mm6
500	movq	%mm2,-48(%ebp)
501	psrlq	$4,%mm4
502	movq	%mm5,80(%ebp)
503	shll	$4,%edx
504	movb	%dl,12(%esp)
505	movl	88(%ebx),%edx
506	movq	88(%ebx),%mm2
507	psllq	$60,%mm6
508	movq	80(%ebx),%mm5
509	por	%mm6,%mm1
510	movq	%mm0,-32(%edi)
511	psrlq	$4,%mm0
512	movq	%mm3,96(%edi)
513	movq	%mm3,%mm7
514	movq	%mm1,-40(%ebp)
515	psrlq	$4,%mm3
516	movq	%mm4,88(%ebp)
517	shll	$4,%edx
518	movb	%dl,13(%esp)
519	movl	104(%ebx),%edx
520	movq	104(%ebx),%mm1
521	psllq	$60,%mm7
522	movq	96(%ebx),%mm4
523	por	%mm7,%mm0
524	movq	%mm2,-24(%edi)
525	psrlq	$4,%mm2
526	movq	%mm5,104(%edi)
527	movq	%mm5,%mm6
528	movq	%mm0,-32(%ebp)
529	psrlq	$4,%mm5
530	movq	%mm3,96(%ebp)
531	shll	$4,%edx
532	movb	%dl,14(%esp)
533	movl	120(%ebx),%edx
534	movq	120(%ebx),%mm0
535	psllq	$60,%mm6
536	movq	112(%ebx),%mm3
537	por	%mm6,%mm2
538	movq	%mm1,-16(%edi)
539	psrlq	$4,%mm1
540	movq	%mm4,112(%edi)
541	movq	%mm4,%mm7
542	movq	%mm2,-24(%ebp)
543	psrlq	$4,%mm4
544	movq	%mm5,104(%ebp)
545	shll	$4,%edx
546	movb	%dl,15(%esp)
547	psllq	$60,%mm7
548	por	%mm7,%mm1
549	movq	%mm0,-8(%edi)
550	psrlq	$4,%mm0
551	movq	%mm3,120(%edi)
552	movq	%mm3,%mm6
553	movq	%mm1,-16(%ebp)
554	psrlq	$4,%mm3
555	movq	%mm4,112(%ebp)
556	psllq	$60,%mm6
557	por	%mm6,%mm0
558	movq	%mm0,-8(%ebp)
559	movq	%mm3,120(%ebp)
560	movq	(%eax),%mm6
561	movl	8(%eax),%ebx
562	movl	12(%eax),%edx
563.align	16
564.L009outer:
565	xorl	12(%ecx),%edx
566	xorl	8(%ecx),%ebx
567	pxor	(%ecx),%mm6
568	leal	16(%ecx),%ecx
569	movl	%ebx,536(%esp)
570	movq	%mm6,528(%esp)
571	movl	%ecx,548(%esp)
572	xorl	%eax,%eax
573	roll	$8,%edx
574	movb	%dl,%al
575	movl	%eax,%ebp
576	andb	$15,%al
577	shrl	$4,%ebp
578	pxor	%mm0,%mm0
579	roll	$8,%edx
580	pxor	%mm1,%mm1
581	pxor	%mm2,%mm2
582	movq	16(%esp,%eax,8),%mm7
583	movq	144(%esp,%eax,8),%mm6
584	movb	%dl,%al
585	movd	%mm7,%ebx
586	psrlq	$8,%mm7
587	movq	%mm6,%mm3
588	movl	%eax,%edi
589	psrlq	$8,%mm6
590	pxor	272(%esp,%ebp,8),%mm7
591	andb	$15,%al
592	psllq	$56,%mm3
593	shrl	$4,%edi
594	pxor	16(%esp,%eax,8),%mm7
595	roll	$8,%edx
596	pxor	144(%esp,%eax,8),%mm6
597	pxor	%mm3,%mm7
598	pxor	400(%esp,%ebp,8),%mm6
599	xorb	(%esp,%ebp,1),%bl
600	movb	%dl,%al
601	movd	%mm7,%ecx
602	movzbl	%bl,%ebx
603	psrlq	$8,%mm7
604	movq	%mm6,%mm3
605	movl	%eax,%ebp
606	psrlq	$8,%mm6
607	pxor	272(%esp,%edi,8),%mm7
608	andb	$15,%al
609	psllq	$56,%mm3
610	shrl	$4,%ebp
611	pinsrw	$2,(%esi,%ebx,2),%mm2
612	pxor	16(%esp,%eax,8),%mm7
613	roll	$8,%edx
614	pxor	144(%esp,%eax,8),%mm6
615	pxor	%mm3,%mm7
616	pxor	400(%esp,%edi,8),%mm6
617	xorb	(%esp,%edi,1),%cl
618	movb	%dl,%al
619	movl	536(%esp),%edx
620	movd	%mm7,%ebx
621	movzbl	%cl,%ecx
622	psrlq	$8,%mm7
623	movq	%mm6,%mm3
624	movl	%eax,%edi
625	psrlq	$8,%mm6
626	pxor	272(%esp,%ebp,8),%mm7
627	andb	$15,%al
628	psllq	$56,%mm3
629	pxor	%mm2,%mm6
630	shrl	$4,%edi
631	pinsrw	$2,(%esi,%ecx,2),%mm1
632	pxor	16(%esp,%eax,8),%mm7
633	roll	$8,%edx
634	pxor	144(%esp,%eax,8),%mm6
635	pxor	%mm3,%mm7
636	pxor	400(%esp,%ebp,8),%mm6
637	xorb	(%esp,%ebp,1),%bl
638	movb	%dl,%al
639	movd	%mm7,%ecx
640	movzbl	%bl,%ebx
641	psrlq	$8,%mm7
642	movq	%mm6,%mm3
643	movl	%eax,%ebp
644	psrlq	$8,%mm6
645	pxor	272(%esp,%edi,8),%mm7
646	andb	$15,%al
647	psllq	$56,%mm3
648	pxor	%mm1,%mm6
649	shrl	$4,%ebp
650	pinsrw	$2,(%esi,%ebx,2),%mm0
651	pxor	16(%esp,%eax,8),%mm7
652	roll	$8,%edx
653	pxor	144(%esp,%eax,8),%mm6
654	pxor	%mm3,%mm7
655	pxor	400(%esp,%edi,8),%mm6
656	xorb	(%esp,%edi,1),%cl
657	movb	%dl,%al
658	movd	%mm7,%ebx
659	movzbl	%cl,%ecx
660	psrlq	$8,%mm7
661	movq	%mm6,%mm3
662	movl	%eax,%edi
663	psrlq	$8,%mm6
664	pxor	272(%esp,%ebp,8),%mm7
665	andb	$15,%al
666	psllq	$56,%mm3
667	pxor	%mm0,%mm6
668	shrl	$4,%edi
669	pinsrw	$2,(%esi,%ecx,2),%mm2
670	pxor	16(%esp,%eax,8),%mm7
671	roll	$8,%edx
672	pxor	144(%esp,%eax,8),%mm6
673	pxor	%mm3,%mm7
674	pxor	400(%esp,%ebp,8),%mm6
675	xorb	(%esp,%ebp,1),%bl
676	movb	%dl,%al
677	movd	%mm7,%ecx
678	movzbl	%bl,%ebx
679	psrlq	$8,%mm7
680	movq	%mm6,%mm3
681	movl	%eax,%ebp
682	psrlq	$8,%mm6
683	pxor	272(%esp,%edi,8),%mm7
684	andb	$15,%al
685	psllq	$56,%mm3
686	pxor	%mm2,%mm6
687	shrl	$4,%ebp
688	pinsrw	$2,(%esi,%ebx,2),%mm1
689	pxor	16(%esp,%eax,8),%mm7
690	roll	$8,%edx
691	pxor	144(%esp,%eax,8),%mm6
692	pxor	%mm3,%mm7
693	pxor	400(%esp,%edi,8),%mm6
694	xorb	(%esp,%edi,1),%cl
695	movb	%dl,%al
696	movl	532(%esp),%edx
697	movd	%mm7,%ebx
698	movzbl	%cl,%ecx
699	psrlq	$8,%mm7
700	movq	%mm6,%mm3
701	movl	%eax,%edi
702	psrlq	$8,%mm6
703	pxor	272(%esp,%ebp,8),%mm7
704	andb	$15,%al
705	psllq	$56,%mm3
706	pxor	%mm1,%mm6
707	shrl	$4,%edi
708	pinsrw	$2,(%esi,%ecx,2),%mm0
709	pxor	16(%esp,%eax,8),%mm7
710	roll	$8,%edx
711	pxor	144(%esp,%eax,8),%mm6
712	pxor	%mm3,%mm7
713	pxor	400(%esp,%ebp,8),%mm6
714	xorb	(%esp,%ebp,1),%bl
715	movb	%dl,%al
716	movd	%mm7,%ecx
717	movzbl	%bl,%ebx
718	psrlq	$8,%mm7
719	movq	%mm6,%mm3
720	movl	%eax,%ebp
721	psrlq	$8,%mm6
722	pxor	272(%esp,%edi,8),%mm7
723	andb	$15,%al
724	psllq	$56,%mm3
725	pxor	%mm0,%mm6
726	shrl	$4,%ebp
727	pinsrw	$2,(%esi,%ebx,2),%mm2
728	pxor	16(%esp,%eax,8),%mm7
729	roll	$8,%edx
730	pxor	144(%esp,%eax,8),%mm6
731	pxor	%mm3,%mm7
732	pxor	400(%esp,%edi,8),%mm6
733	xorb	(%esp,%edi,1),%cl
734	movb	%dl,%al
735	movd	%mm7,%ebx
736	movzbl	%cl,%ecx
737	psrlq	$8,%mm7
738	movq	%mm6,%mm3
739	movl	%eax,%edi
740	psrlq	$8,%mm6
741	pxor	272(%esp,%ebp,8),%mm7
742	andb	$15,%al
743	psllq	$56,%mm3
744	pxor	%mm2,%mm6
745	shrl	$4,%edi
746	pinsrw	$2,(%esi,%ecx,2),%mm1
747	pxor	16(%esp,%eax,8),%mm7
748	roll	$8,%edx
749	pxor	144(%esp,%eax,8),%mm6
750	pxor	%mm3,%mm7
751	pxor	400(%esp,%ebp,8),%mm6
752	xorb	(%esp,%ebp,1),%bl
753	movb	%dl,%al
754	movd	%mm7,%ecx
755	movzbl	%bl,%ebx
756	psrlq	$8,%mm7
757	movq	%mm6,%mm3
758	movl	%eax,%ebp
759	psrlq	$8,%mm6
760	pxor	272(%esp,%edi,8),%mm7
761	andb	$15,%al
762	psllq	$56,%mm3
763	pxor	%mm1,%mm6
764	shrl	$4,%ebp
765	pinsrw	$2,(%esi,%ebx,2),%mm0
766	pxor	16(%esp,%eax,8),%mm7
767	roll	$8,%edx
768	pxor	144(%esp,%eax,8),%mm6
769	pxor	%mm3,%mm7
770	pxor	400(%esp,%edi,8),%mm6
771	xorb	(%esp,%edi,1),%cl
772	movb	%dl,%al
773	movl	528(%esp),%edx
774	movd	%mm7,%ebx
775	movzbl	%cl,%ecx
776	psrlq	$8,%mm7
777	movq	%mm6,%mm3
778	movl	%eax,%edi
779	psrlq	$8,%mm6
780	pxor	272(%esp,%ebp,8),%mm7
781	andb	$15,%al
782	psllq	$56,%mm3
783	pxor	%mm0,%mm6
784	shrl	$4,%edi
785	pinsrw	$2,(%esi,%ecx,2),%mm2
786	pxor	16(%esp,%eax,8),%mm7
787	roll	$8,%edx
788	pxor	144(%esp,%eax,8),%mm6
789	pxor	%mm3,%mm7
790	pxor	400(%esp,%ebp,8),%mm6
791	xorb	(%esp,%ebp,1),%bl
792	movb	%dl,%al
793	movd	%mm7,%ecx
794	movzbl	%bl,%ebx
795	psrlq	$8,%mm7
796	movq	%mm6,%mm3
797	movl	%eax,%ebp
798	psrlq	$8,%mm6
799	pxor	272(%esp,%edi,8),%mm7
800	andb	$15,%al
801	psllq	$56,%mm3
802	pxor	%mm2,%mm6
803	shrl	$4,%ebp
804	pinsrw	$2,(%esi,%ebx,2),%mm1
805	pxor	16(%esp,%eax,8),%mm7
806	roll	$8,%edx
807	pxor	144(%esp,%eax,8),%mm6
808	pxor	%mm3,%mm7
809	pxor	400(%esp,%edi,8),%mm6
810	xorb	(%esp,%edi,1),%cl
811	movb	%dl,%al
812	movd	%mm7,%ebx
813	movzbl	%cl,%ecx
814	psrlq	$8,%mm7
815	movq	%mm6,%mm3
816	movl	%eax,%edi
817	psrlq	$8,%mm6
818	pxor	272(%esp,%ebp,8),%mm7
819	andb	$15,%al
820	psllq	$56,%mm3
821	pxor	%mm1,%mm6
822	shrl	$4,%edi
823	pinsrw	$2,(%esi,%ecx,2),%mm0
824	pxor	16(%esp,%eax,8),%mm7
825	roll	$8,%edx
826	pxor	144(%esp,%eax,8),%mm6
827	pxor	%mm3,%mm7
828	pxor	400(%esp,%ebp,8),%mm6
829	xorb	(%esp,%ebp,1),%bl
830	movb	%dl,%al
831	movd	%mm7,%ecx
832	movzbl	%bl,%ebx
833	psrlq	$8,%mm7
834	movq	%mm6,%mm3
835	movl	%eax,%ebp
836	psrlq	$8,%mm6
837	pxor	272(%esp,%edi,8),%mm7
838	andb	$15,%al
839	psllq	$56,%mm3
840	pxor	%mm0,%mm6
841	shrl	$4,%ebp
842	pinsrw	$2,(%esi,%ebx,2),%mm2
843	pxor	16(%esp,%eax,8),%mm7
844	roll	$8,%edx
845	pxor	144(%esp,%eax,8),%mm6
846	pxor	%mm3,%mm7
847	pxor	400(%esp,%edi,8),%mm6
848	xorb	(%esp,%edi,1),%cl
849	movb	%dl,%al
850	movl	524(%esp),%edx
851	movd	%mm7,%ebx
852	movzbl	%cl,%ecx
853	psrlq	$8,%mm7
854	movq	%mm6,%mm3
855	movl	%eax,%edi
856	psrlq	$8,%mm6
857	pxor	272(%esp,%ebp,8),%mm7
858	andb	$15,%al
859	psllq	$56,%mm3
860	pxor	%mm2,%mm6
861	shrl	$4,%edi
862	pinsrw	$2,(%esi,%ecx,2),%mm1
863	pxor	16(%esp,%eax,8),%mm7
864	pxor	144(%esp,%eax,8),%mm6
865	xorb	(%esp,%ebp,1),%bl
866	pxor	%mm3,%mm7
867	pxor	400(%esp,%ebp,8),%mm6
868	movzbl	%bl,%ebx
869	pxor	%mm2,%mm2
870	psllq	$4,%mm1
871	movd	%mm7,%ecx
872	psrlq	$4,%mm7
873	movq	%mm6,%mm3
874	psrlq	$4,%mm6
875	shll	$4,%ecx
876	pxor	16(%esp,%edi,8),%mm7
877	psllq	$60,%mm3
878	movzbl	%cl,%ecx
879	pxor	%mm3,%mm7
880	pxor	144(%esp,%edi,8),%mm6
881	pinsrw	$2,(%esi,%ebx,2),%mm0
882	pxor	%mm1,%mm6
883	movd	%mm7,%edx
884	pinsrw	$3,(%esi,%ecx,2),%mm2
885	psllq	$12,%mm0
886	pxor	%mm0,%mm6
887	psrlq	$32,%mm7
888	pxor	%mm2,%mm6
889	movl	548(%esp),%ecx
890	movd	%mm7,%ebx
891	movq	%mm6,%mm3
892	psllw	$8,%mm6
893	psrlw	$8,%mm3
894	por	%mm3,%mm6
895	bswap	%edx
896	pshufw	$27,%mm6,%mm6
897	bswap	%ebx
898	cmpl	552(%esp),%ecx
899	jne	.L009outer
900	movl	544(%esp),%eax
901	movl	%edx,12(%eax)
902	movl	%ebx,8(%eax)
903	movq	%mm6,(%eax)
904	movl	556(%esp),%esp
905	emms
906	popl	%edi
907	popl	%esi
908	popl	%ebx
909	popl	%ebp
910	ret
911.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
912.globl	gcm_init_clmul
913.type	gcm_init_clmul,@function
914.align	16
915gcm_init_clmul:
916.L_gcm_init_clmul_begin:
917	movl	4(%esp),%edx
918	movl	8(%esp),%eax
919	call	.L010pic
920.L010pic:
921	popl	%ecx
922	leal	.Lbswap-.L010pic(%ecx),%ecx
923	movdqu	(%eax),%xmm2
924	pshufd	$78,%xmm2,%xmm2
925	pshufd	$255,%xmm2,%xmm4
926	movdqa	%xmm2,%xmm3
927	psllq	$1,%xmm2
928	pxor	%xmm5,%xmm5
929	psrlq	$63,%xmm3
930	pcmpgtd	%xmm4,%xmm5
931	pslldq	$8,%xmm3
932	por	%xmm3,%xmm2
933	pand	16(%ecx),%xmm5
934	pxor	%xmm5,%xmm2
935	movdqa	%xmm2,%xmm0
936	movdqa	%xmm0,%xmm1
937	pshufd	$78,%xmm0,%xmm3
938	pshufd	$78,%xmm2,%xmm4
939	pxor	%xmm0,%xmm3
940	pxor	%xmm2,%xmm4
941.byte	102,15,58,68,194,0
942.byte	102,15,58,68,202,17
943.byte	102,15,58,68,220,0
944	xorps	%xmm0,%xmm3
945	xorps	%xmm1,%xmm3
946	movdqa	%xmm3,%xmm4
947	psrldq	$8,%xmm3
948	pslldq	$8,%xmm4
949	pxor	%xmm3,%xmm1
950	pxor	%xmm4,%xmm0
951	movdqa	%xmm0,%xmm3
952	psllq	$1,%xmm0
953	pxor	%xmm3,%xmm0
954	psllq	$5,%xmm0
955	pxor	%xmm3,%xmm0
956	psllq	$57,%xmm0
957	movdqa	%xmm0,%xmm4
958	pslldq	$8,%xmm0
959	psrldq	$8,%xmm4
960	pxor	%xmm3,%xmm0
961	pxor	%xmm4,%xmm1
962	movdqa	%xmm0,%xmm4
963	psrlq	$5,%xmm0
964	pxor	%xmm4,%xmm0
965	psrlq	$1,%xmm0
966	pxor	%xmm4,%xmm0
967	pxor	%xmm1,%xmm4
968	psrlq	$1,%xmm0
969	pxor	%xmm4,%xmm0
970	movdqu	%xmm2,(%edx)
971	movdqu	%xmm0,16(%edx)
972	ret
973.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
974.globl	gcm_gmult_clmul
975.type	gcm_gmult_clmul,@function
976.align	16
977gcm_gmult_clmul:
978.L_gcm_gmult_clmul_begin:
979	movl	4(%esp),%eax
980	movl	8(%esp),%edx
981	call	.L011pic
982.L011pic:
983	popl	%ecx
984	leal	.Lbswap-.L011pic(%ecx),%ecx
985	movdqu	(%eax),%xmm0
986	movdqa	(%ecx),%xmm5
987	movups	(%edx),%xmm2
988.byte	102,15,56,0,197
989	movdqa	%xmm0,%xmm1
990	pshufd	$78,%xmm0,%xmm3
991	pshufd	$78,%xmm2,%xmm4
992	pxor	%xmm0,%xmm3
993	pxor	%xmm2,%xmm4
994.byte	102,15,58,68,194,0
995.byte	102,15,58,68,202,17
996.byte	102,15,58,68,220,0
997	xorps	%xmm0,%xmm3
998	xorps	%xmm1,%xmm3
999	movdqa	%xmm3,%xmm4
1000	psrldq	$8,%xmm3
1001	pslldq	$8,%xmm4
1002	pxor	%xmm3,%xmm1
1003	pxor	%xmm4,%xmm0
1004	movdqa	%xmm0,%xmm3
1005	psllq	$1,%xmm0
1006	pxor	%xmm3,%xmm0
1007	psllq	$5,%xmm0
1008	pxor	%xmm3,%xmm0
1009	psllq	$57,%xmm0
1010	movdqa	%xmm0,%xmm4
1011	pslldq	$8,%xmm0
1012	psrldq	$8,%xmm4
1013	pxor	%xmm3,%xmm0
1014	pxor	%xmm4,%xmm1
1015	movdqa	%xmm0,%xmm4
1016	psrlq	$5,%xmm0
1017	pxor	%xmm4,%xmm0
1018	psrlq	$1,%xmm0
1019	pxor	%xmm4,%xmm0
1020	pxor	%xmm1,%xmm4
1021	psrlq	$1,%xmm0
1022	pxor	%xmm4,%xmm0
1023.byte	102,15,56,0,197
1024	movdqu	%xmm0,(%eax)
1025	ret
1026.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
1027.globl	gcm_ghash_clmul
1028.type	gcm_ghash_clmul,@function
1029.align	16
1030gcm_ghash_clmul:
1031.L_gcm_ghash_clmul_begin:
1032	pushl	%ebp
1033	pushl	%ebx
1034	pushl	%esi
1035	pushl	%edi
1036	movl	20(%esp),%eax
1037	movl	24(%esp),%edx
1038	movl	28(%esp),%esi
1039	movl	32(%esp),%ebx
1040	call	.L012pic
1041.L012pic:
1042	popl	%ecx
1043	leal	.Lbswap-.L012pic(%ecx),%ecx
1044	movdqu	(%eax),%xmm0
1045	movdqa	(%ecx),%xmm5
1046	movdqu	(%edx),%xmm2
1047.byte	102,15,56,0,197
1048	subl	$16,%ebx
1049	jz	.L013odd_tail
1050	movdqu	(%esi),%xmm3
1051	movdqu	16(%esi),%xmm6
1052.byte	102,15,56,0,221
1053.byte	102,15,56,0,245
1054	pxor	%xmm3,%xmm0
1055	movdqa	%xmm6,%xmm7
1056	pshufd	$78,%xmm6,%xmm3
1057	pshufd	$78,%xmm2,%xmm4
1058	pxor	%xmm6,%xmm3
1059	pxor	%xmm2,%xmm4
1060.byte	102,15,58,68,242,0
1061.byte	102,15,58,68,250,17
1062.byte	102,15,58,68,220,0
1063	xorps	%xmm6,%xmm3
1064	xorps	%xmm7,%xmm3
1065	movdqa	%xmm3,%xmm4
1066	psrldq	$8,%xmm3
1067	pslldq	$8,%xmm4
1068	pxor	%xmm3,%xmm7
1069	pxor	%xmm4,%xmm6
1070	movups	16(%edx),%xmm2
1071	leal	32(%esi),%esi
1072	subl	$32,%ebx
1073	jbe	.L014even_tail
1074.L015mod_loop:
1075	movdqa	%xmm0,%xmm1
1076	pshufd	$78,%xmm0,%xmm3
1077	pshufd	$78,%xmm2,%xmm4
1078	pxor	%xmm0,%xmm3
1079	pxor	%xmm2,%xmm4
1080.byte	102,15,58,68,194,0
1081.byte	102,15,58,68,202,17
1082.byte	102,15,58,68,220,0
1083	xorps	%xmm0,%xmm3
1084	xorps	%xmm1,%xmm3
1085	movdqa	%xmm3,%xmm4
1086	psrldq	$8,%xmm3
1087	pslldq	$8,%xmm4
1088	pxor	%xmm3,%xmm1
1089	pxor	%xmm4,%xmm0
1090	movdqu	(%esi),%xmm3
1091	movups	(%edx),%xmm2
1092	pxor	%xmm6,%xmm0
1093	pxor	%xmm7,%xmm1
1094	movdqu	16(%esi),%xmm6
1095.byte	102,15,56,0,221
1096.byte	102,15,56,0,245
1097	movdqa	%xmm6,%xmm5
1098	movdqa	%xmm6,%xmm7
1099	pxor	%xmm3,%xmm1
1100	movdqa	%xmm0,%xmm3
1101	psllq	$1,%xmm0
1102	pxor	%xmm3,%xmm0
1103	psllq	$5,%xmm0
1104	pxor	%xmm3,%xmm0
1105.byte	102,15,58,68,242,0
1106	psllq	$57,%xmm0
1107	movdqa	%xmm0,%xmm4
1108	pslldq	$8,%xmm0
1109	psrldq	$8,%xmm4
1110	pxor	%xmm3,%xmm0
1111	pshufd	$78,%xmm5,%xmm3
1112	pxor	%xmm4,%xmm1
1113	pxor	%xmm5,%xmm3
1114	pshufd	$78,%xmm2,%xmm5
1115	pxor	%xmm2,%xmm5
1116.byte	102,15,58,68,250,17
1117	movdqa	%xmm0,%xmm4
1118	psrlq	$5,%xmm0
1119	pxor	%xmm4,%xmm0
1120	psrlq	$1,%xmm0
1121	pxor	%xmm4,%xmm0
1122	pxor	%xmm1,%xmm4
1123	psrlq	$1,%xmm0
1124	pxor	%xmm4,%xmm0
1125.byte	102,15,58,68,221,0
1126	movups	16(%edx),%xmm2
1127	xorps	%xmm6,%xmm3
1128	xorps	%xmm7,%xmm3
1129	movdqa	%xmm3,%xmm5
1130	psrldq	$8,%xmm3
1131	pslldq	$8,%xmm5
1132	pxor	%xmm3,%xmm7
1133	pxor	%xmm5,%xmm6
1134	movdqa	(%ecx),%xmm5
1135	leal	32(%esi),%esi
1136	subl	$32,%ebx
1137	ja	.L015mod_loop
1138.L014even_tail:
1139	movdqa	%xmm0,%xmm1
1140	pshufd	$78,%xmm0,%xmm3
1141	pshufd	$78,%xmm2,%xmm4
1142	pxor	%xmm0,%xmm3
1143	pxor	%xmm2,%xmm4
1144.byte	102,15,58,68,194,0
1145.byte	102,15,58,68,202,17
1146.byte	102,15,58,68,220,0
1147	xorps	%xmm0,%xmm3
1148	xorps	%xmm1,%xmm3
1149	movdqa	%xmm3,%xmm4
1150	psrldq	$8,%xmm3
1151	pslldq	$8,%xmm4
1152	pxor	%xmm3,%xmm1
1153	pxor	%xmm4,%xmm0
1154	pxor	%xmm6,%xmm0
1155	pxor	%xmm7,%xmm1
1156	movdqa	%xmm0,%xmm3
1157	psllq	$1,%xmm0
1158	pxor	%xmm3,%xmm0
1159	psllq	$5,%xmm0
1160	pxor	%xmm3,%xmm0
1161	psllq	$57,%xmm0
1162	movdqa	%xmm0,%xmm4
1163	pslldq	$8,%xmm0
1164	psrldq	$8,%xmm4
1165	pxor	%xmm3,%xmm0
1166	pxor	%xmm4,%xmm1
1167	movdqa	%xmm0,%xmm4
1168	psrlq	$5,%xmm0
1169	pxor	%xmm4,%xmm0
1170	psrlq	$1,%xmm0
1171	pxor	%xmm4,%xmm0
1172	pxor	%xmm1,%xmm4
1173	psrlq	$1,%xmm0
1174	pxor	%xmm4,%xmm0
1175	testl	%ebx,%ebx
1176	jnz	.L016done
1177	movups	(%edx),%xmm2
1178.L013odd_tail:
1179	movdqu	(%esi),%xmm3
1180.byte	102,15,56,0,221
1181	pxor	%xmm3,%xmm0
1182	movdqa	%xmm0,%xmm1
1183	pshufd	$78,%xmm0,%xmm3
1184	pshufd	$78,%xmm2,%xmm4
1185	pxor	%xmm0,%xmm3
1186	pxor	%xmm2,%xmm4
1187.byte	102,15,58,68,194,0
1188.byte	102,15,58,68,202,17
1189.byte	102,15,58,68,220,0
1190	xorps	%xmm0,%xmm3
1191	xorps	%xmm1,%xmm3
1192	movdqa	%xmm3,%xmm4
1193	psrldq	$8,%xmm3
1194	pslldq	$8,%xmm4
1195	pxor	%xmm3,%xmm1
1196	pxor	%xmm4,%xmm0
1197	movdqa	%xmm0,%xmm3
1198	psllq	$1,%xmm0
1199	pxor	%xmm3,%xmm0
1200	psllq	$5,%xmm0
1201	pxor	%xmm3,%xmm0
1202	psllq	$57,%xmm0
1203	movdqa	%xmm0,%xmm4
1204	pslldq	$8,%xmm0
1205	psrldq	$8,%xmm4
1206	pxor	%xmm3,%xmm0
1207	pxor	%xmm4,%xmm1
1208	movdqa	%xmm0,%xmm4
1209	psrlq	$5,%xmm0
1210	pxor	%xmm4,%xmm0
1211	psrlq	$1,%xmm0
1212	pxor	%xmm4,%xmm0
1213	pxor	%xmm1,%xmm4
1214	psrlq	$1,%xmm0
1215	pxor	%xmm4,%xmm0
1216.L016done:
1217.byte	102,15,56,0,197
1218	movdqu	%xmm0,(%eax)
1219	popl	%edi
1220	popl	%esi
1221	popl	%ebx
1222	popl	%ebp
1223	ret
1224.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
1225.align	64
1226.Lbswap:
1227.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1228.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
1229.align	64
1230.Lrem_4bit:
1231.long	0,0,0,471859200,0,943718400,0,610271232
1232.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
1233.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
1234.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
1235.align	64
1236.Lrem_8bit:
1237.value	0,450,900,582,1800,1738,1164,1358
1238.value	3600,4050,3476,3158,2328,2266,2716,2910
1239.value	7200,7650,8100,7782,6952,6890,6316,6510
1240.value	4656,5106,4532,4214,5432,5370,5820,6014
1241.value	14400,14722,15300,14854,16200,16010,15564,15630
1242.value	13904,14226,13780,13334,12632,12442,13020,13086
1243.value	9312,9634,10212,9766,9064,8874,8428,8494
1244.value	10864,11186,10740,10294,11640,11450,12028,12094
1245.value	28800,28994,29444,29382,30600,30282,29708,30158
1246.value	32400,32594,32020,31958,31128,30810,31260,31710
1247.value	27808,28002,28452,28390,27560,27242,26668,27118
1248.value	25264,25458,24884,24822,26040,25722,26172,26622
1249.value	18624,18690,19268,19078,20424,19978,19532,19854
1250.value	18128,18194,17748,17558,16856,16410,16988,17310
1251.value	21728,21794,22372,22182,21480,21034,20588,20910
1252.value	23280,23346,22900,22710,24056,23610,24188,24510
1253.value	57600,57538,57988,58182,58888,59338,58764,58446
1254.value	61200,61138,60564,60758,59416,59866,60316,59998
1255.value	64800,64738,65188,65382,64040,64490,63916,63598
1256.value	62256,62194,61620,61814,62520,62970,63420,63102
1257.value	55616,55426,56004,56070,56904,57226,56780,56334
1258.value	55120,54930,54484,54550,53336,53658,54236,53790
1259.value	50528,50338,50916,50982,49768,50090,49644,49198
1260.value	52080,51890,51444,51510,52344,52666,53244,52798
1261.value	37248,36930,37380,37830,38536,38730,38156,38094
1262.value	40848,40530,39956,40406,39064,39258,39708,39646
1263.value	36256,35938,36388,36838,35496,35690,35116,35054
1264.value	33712,33394,32820,33270,33976,34170,34620,34558
1265.value	43456,43010,43588,43910,44744,44810,44364,44174
1266.value	42960,42514,42068,42390,41176,41242,41820,41630
1267.value	46560,46114,46692,47014,45800,45866,45420,45230
1268.value	48112,47666,47220,47542,48376,48442,49020,48830
1269.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
1270.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
1271.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
1272.byte	0
1273#else
1274.file	"ghash-x86.S"
1275.text
1276.globl	gcm_gmult_4bit_x86
1277.type	gcm_gmult_4bit_x86,@function
1278.align	16
1279gcm_gmult_4bit_x86:
1280.L_gcm_gmult_4bit_x86_begin:
1281	pushl	%ebp
1282	pushl	%ebx
1283	pushl	%esi
1284	pushl	%edi
1285	subl	$84,%esp
1286	movl	104(%esp),%edi
1287	movl	108(%esp),%esi
1288	movl	(%edi),%ebp
1289	movl	4(%edi),%edx
1290	movl	8(%edi),%ecx
1291	movl	12(%edi),%ebx
1292	movl	$0,16(%esp)
1293	movl	$471859200,20(%esp)
1294	movl	$943718400,24(%esp)
1295	movl	$610271232,28(%esp)
1296	movl	$1887436800,32(%esp)
1297	movl	$1822425088,36(%esp)
1298	movl	$1220542464,40(%esp)
1299	movl	$1423966208,44(%esp)
1300	movl	$3774873600,48(%esp)
1301	movl	$4246732800,52(%esp)
1302	movl	$3644850176,56(%esp)
1303	movl	$3311403008,60(%esp)
1304	movl	$2441084928,64(%esp)
1305	movl	$2376073216,68(%esp)
1306	movl	$2847932416,72(%esp)
1307	movl	$3051356160,76(%esp)
1308	movl	%ebp,(%esp)
1309	movl	%edx,4(%esp)
1310	movl	%ecx,8(%esp)
1311	movl	%ebx,12(%esp)
1312	shrl	$20,%ebx
1313	andl	$240,%ebx
1314	movl	4(%esi,%ebx,1),%ebp
1315	movl	(%esi,%ebx,1),%edx
1316	movl	12(%esi,%ebx,1),%ecx
1317	movl	8(%esi,%ebx,1),%ebx
1318	xorl	%eax,%eax
1319	movl	$15,%edi
1320	jmp	.L000x86_loop
1321.align	16
1322.L000x86_loop:
1323	movb	%bl,%al
1324	shrdl	$4,%ecx,%ebx
1325	andb	$15,%al
1326	shrdl	$4,%edx,%ecx
1327	shrdl	$4,%ebp,%edx
1328	shrl	$4,%ebp
1329	xorl	16(%esp,%eax,4),%ebp
1330	movb	(%esp,%edi,1),%al
1331	andb	$240,%al
1332	xorl	8(%esi,%eax,1),%ebx
1333	xorl	12(%esi,%eax,1),%ecx
1334	xorl	(%esi,%eax,1),%edx
1335	xorl	4(%esi,%eax,1),%ebp
1336	decl	%edi
1337	js	.L001x86_break
1338	movb	%bl,%al
1339	shrdl	$4,%ecx,%ebx
1340	andb	$15,%al
1341	shrdl	$4,%edx,%ecx
1342	shrdl	$4,%ebp,%edx
1343	shrl	$4,%ebp
1344	xorl	16(%esp,%eax,4),%ebp
1345	movb	(%esp,%edi,1),%al
1346	shlb	$4,%al
1347	xorl	8(%esi,%eax,1),%ebx
1348	xorl	12(%esi,%eax,1),%ecx
1349	xorl	(%esi,%eax,1),%edx
1350	xorl	4(%esi,%eax,1),%ebp
1351	jmp	.L000x86_loop
1352.align	16
1353.L001x86_break:
1354	bswap	%ebx
1355	bswap	%ecx
1356	bswap	%edx
1357	bswap	%ebp
1358	movl	104(%esp),%edi
1359	movl	%ebx,12(%edi)
1360	movl	%ecx,8(%edi)
1361	movl	%edx,4(%edi)
1362	movl	%ebp,(%edi)
1363	addl	$84,%esp
1364	popl	%edi
1365	popl	%esi
1366	popl	%ebx
1367	popl	%ebp
1368	ret
1369.size	gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
1370.globl	gcm_ghash_4bit_x86
1371.type	gcm_ghash_4bit_x86,@function
1372.align	16
1373gcm_ghash_4bit_x86:
1374.L_gcm_ghash_4bit_x86_begin:
1375	pushl	%ebp
1376	pushl	%ebx
1377	pushl	%esi
1378	pushl	%edi
1379	subl	$84,%esp
1380	movl	104(%esp),%ebx
1381	movl	108(%esp),%esi
1382	movl	112(%esp),%edi
1383	movl	116(%esp),%ecx
1384	addl	%edi,%ecx
1385	movl	%ecx,116(%esp)
1386	movl	(%ebx),%ebp
1387	movl	4(%ebx),%edx
1388	movl	8(%ebx),%ecx
1389	movl	12(%ebx),%ebx
1390	movl	$0,16(%esp)
1391	movl	$471859200,20(%esp)
1392	movl	$943718400,24(%esp)
1393	movl	$610271232,28(%esp)
1394	movl	$1887436800,32(%esp)
1395	movl	$1822425088,36(%esp)
1396	movl	$1220542464,40(%esp)
1397	movl	$1423966208,44(%esp)
1398	movl	$3774873600,48(%esp)
1399	movl	$4246732800,52(%esp)
1400	movl	$3644850176,56(%esp)
1401	movl	$3311403008,60(%esp)
1402	movl	$2441084928,64(%esp)
1403	movl	$2376073216,68(%esp)
1404	movl	$2847932416,72(%esp)
1405	movl	$3051356160,76(%esp)
1406.align	16
1407.L002x86_outer_loop:
1408	xorl	12(%edi),%ebx
1409	xorl	8(%edi),%ecx
1410	xorl	4(%edi),%edx
1411	xorl	(%edi),%ebp
1412	movl	%ebx,12(%esp)
1413	movl	%ecx,8(%esp)
1414	movl	%edx,4(%esp)
1415	movl	%ebp,(%esp)
1416	shrl	$20,%ebx
1417	andl	$240,%ebx
1418	movl	4(%esi,%ebx,1),%ebp
1419	movl	(%esi,%ebx,1),%edx
1420	movl	12(%esi,%ebx,1),%ecx
1421	movl	8(%esi,%ebx,1),%ebx
1422	xorl	%eax,%eax
1423	movl	$15,%edi
1424	jmp	.L003x86_loop
1425.align	16
1426.L003x86_loop:
1427	movb	%bl,%al
1428	shrdl	$4,%ecx,%ebx
1429	andb	$15,%al
1430	shrdl	$4,%edx,%ecx
1431	shrdl	$4,%ebp,%edx
1432	shrl	$4,%ebp
1433	xorl	16(%esp,%eax,4),%ebp
1434	movb	(%esp,%edi,1),%al
1435	andb	$240,%al
1436	xorl	8(%esi,%eax,1),%ebx
1437	xorl	12(%esi,%eax,1),%ecx
1438	xorl	(%esi,%eax,1),%edx
1439	xorl	4(%esi,%eax,1),%ebp
1440	decl	%edi
1441	js	.L004x86_break
1442	movb	%bl,%al
1443	shrdl	$4,%ecx,%ebx
1444	andb	$15,%al
1445	shrdl	$4,%edx,%ecx
1446	shrdl	$4,%ebp,%edx
1447	shrl	$4,%ebp
1448	xorl	16(%esp,%eax,4),%ebp
1449	movb	(%esp,%edi,1),%al
1450	shlb	$4,%al
1451	xorl	8(%esi,%eax,1),%ebx
1452	xorl	12(%esi,%eax,1),%ecx
1453	xorl	(%esi,%eax,1),%edx
1454	xorl	4(%esi,%eax,1),%ebp
1455	jmp	.L003x86_loop
1456.align	16
1457.L004x86_break:
1458	bswap	%ebx
1459	bswap	%ecx
1460	bswap	%edx
1461	bswap	%ebp
1462	movl	112(%esp),%edi
1463	leal	16(%edi),%edi
1464	cmpl	116(%esp),%edi
1465	movl	%edi,112(%esp)
1466	jb	.L002x86_outer_loop
1467	movl	104(%esp),%edi
1468	movl	%ebx,12(%edi)
1469	movl	%ecx,8(%edi)
1470	movl	%edx,4(%edi)
1471	movl	%ebp,(%edi)
1472	addl	$84,%esp
1473	popl	%edi
1474	popl	%esi
1475	popl	%ebx
1476	popl	%ebp
1477	ret
1478.size	gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
1479.globl	gcm_gmult_4bit_mmx
1480.type	gcm_gmult_4bit_mmx,@function
1481.align	16
1482gcm_gmult_4bit_mmx:
1483.L_gcm_gmult_4bit_mmx_begin:
1484	pushl	%ebp
1485	pushl	%ebx
1486	pushl	%esi
1487	pushl	%edi
1488	movl	20(%esp),%edi
1489	movl	24(%esp),%esi
1490	call	.L005pic_point
1491.L005pic_point:
1492	popl	%eax
1493	leal	.Lrem_4bit-.L005pic_point(%eax),%eax
1494	movzbl	15(%edi),%ebx
1495	xorl	%ecx,%ecx
1496	movl	%ebx,%edx
1497	movb	%dl,%cl
1498	movl	$14,%ebp
1499	shlb	$4,%cl
1500	andl	$240,%edx
1501	movq	8(%esi,%ecx,1),%mm0
1502	movq	(%esi,%ecx,1),%mm1
1503	movd	%mm0,%ebx
1504	jmp	.L006mmx_loop
1505.align	16
1506.L006mmx_loop:
1507	psrlq	$4,%mm0
1508	andl	$15,%ebx
1509	movq	%mm1,%mm2
1510	psrlq	$4,%mm1
1511	pxor	8(%esi,%edx,1),%mm0
1512	movb	(%edi,%ebp,1),%cl
1513	psllq	$60,%mm2
1514	pxor	(%eax,%ebx,8),%mm1
1515	decl	%ebp
1516	movd	%mm0,%ebx
1517	pxor	(%esi,%edx,1),%mm1
1518	movl	%ecx,%edx
1519	pxor	%mm2,%mm0
1520	js	.L007mmx_break
1521	shlb	$4,%cl
1522	andl	$15,%ebx
1523	psrlq	$4,%mm0
1524	andl	$240,%edx
1525	movq	%mm1,%mm2
1526	psrlq	$4,%mm1
1527	pxor	8(%esi,%ecx,1),%mm0
1528	psllq	$60,%mm2
1529	pxor	(%eax,%ebx,8),%mm1
1530	movd	%mm0,%ebx
1531	pxor	(%esi,%ecx,1),%mm1
1532	pxor	%mm2,%mm0
1533	jmp	.L006mmx_loop
1534.align	16
1535.L007mmx_break:
1536	shlb	$4,%cl
1537	andl	$15,%ebx
1538	psrlq	$4,%mm0
1539	andl	$240,%edx
1540	movq	%mm1,%mm2
1541	psrlq	$4,%mm1
1542	pxor	8(%esi,%ecx,1),%mm0
1543	psllq	$60,%mm2
1544	pxor	(%eax,%ebx,8),%mm1
1545	movd	%mm0,%ebx
1546	pxor	(%esi,%ecx,1),%mm1
1547	pxor	%mm2,%mm0
1548	psrlq	$4,%mm0
1549	andl	$15,%ebx
1550	movq	%mm1,%mm2
1551	psrlq	$4,%mm1
1552	pxor	8(%esi,%edx,1),%mm0
1553	psllq	$60,%mm2
1554	pxor	(%eax,%ebx,8),%mm1
1555	movd	%mm0,%ebx
1556	pxor	(%esi,%edx,1),%mm1
1557	pxor	%mm2,%mm0
1558	psrlq	$32,%mm0
1559	movd	%mm1,%edx
1560	psrlq	$32,%mm1
1561	movd	%mm0,%ecx
1562	movd	%mm1,%ebp
1563	bswap	%ebx
1564	bswap	%edx
1565	bswap	%ecx
1566	bswap	%ebp
1567	emms
1568	movl	%ebx,12(%edi)
1569	movl	%edx,4(%edi)
1570	movl	%ecx,8(%edi)
1571	movl	%ebp,(%edi)
1572	popl	%edi
1573	popl	%esi
1574	popl	%ebx
1575	popl	%ebp
1576	ret
1577.size	gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
1578.globl	gcm_ghash_4bit_mmx
1579.type	gcm_ghash_4bit_mmx,@function
1580.align	16
1581gcm_ghash_4bit_mmx:
1582.L_gcm_ghash_4bit_mmx_begin:
1583	pushl	%ebp
1584	pushl	%ebx
1585	pushl	%esi
1586	pushl	%edi
1587	movl	20(%esp),%eax
1588	movl	24(%esp),%ebx
1589	movl	28(%esp),%ecx
1590	movl	32(%esp),%edx
1591	movl	%esp,%ebp
1592	call	.L008pic_point
1593.L008pic_point:
1594	popl	%esi
1595	leal	.Lrem_8bit-.L008pic_point(%esi),%esi
1596	subl	$544,%esp
1597	andl	$-64,%esp
1598	subl	$16,%esp
1599	addl	%ecx,%edx
1600	movl	%eax,544(%esp)
1601	movl	%edx,552(%esp)
1602	movl	%ebp,556(%esp)
1603	addl	$128,%ebx
1604	leal	144(%esp),%edi
1605	leal	400(%esp),%ebp
1606	movl	-120(%ebx),%edx
1607	movq	-120(%ebx),%mm0
1608	movq	-128(%ebx),%mm3
1609	shll	$4,%edx
1610	movb	%dl,(%esp)
1611	movl	-104(%ebx),%edx
1612	movq	-104(%ebx),%mm2
1613	movq	-112(%ebx),%mm5
1614	movq	%mm0,-128(%edi)
1615	psrlq	$4,%mm0
1616	movq	%mm3,(%edi)
1617	movq	%mm3,%mm7
1618	psrlq	$4,%mm3
1619	shll	$4,%edx
1620	movb	%dl,1(%esp)
1621	movl	-88(%ebx),%edx
1622	movq	-88(%ebx),%mm1
1623	psllq	$60,%mm7
1624	movq	-96(%ebx),%mm4
1625	por	%mm7,%mm0
1626	movq	%mm2,-120(%edi)
1627	psrlq	$4,%mm2
1628	movq	%mm5,8(%edi)
1629	movq	%mm5,%mm6
1630	movq	%mm0,-128(%ebp)
1631	psrlq	$4,%mm5
1632	movq	%mm3,(%ebp)
1633	shll	$4,%edx
1634	movb	%dl,2(%esp)
1635	movl	-72(%ebx),%edx
1636	movq	-72(%ebx),%mm0
1637	psllq	$60,%mm6
1638	movq	-80(%ebx),%mm3
1639	por	%mm6,%mm2
1640	movq	%mm1,-112(%edi)
1641	psrlq	$4,%mm1
1642	movq	%mm4,16(%edi)
1643	movq	%mm4,%mm7
1644	movq	%mm2,-120(%ebp)
1645	psrlq	$4,%mm4
1646	movq	%mm5,8(%ebp)
1647	shll	$4,%edx
1648	movb	%dl,3(%esp)
1649	movl	-56(%ebx),%edx
1650	movq	-56(%ebx),%mm2
1651	psllq	$60,%mm7
1652	movq	-64(%ebx),%mm5
1653	por	%mm7,%mm1
1654	movq	%mm0,-104(%edi)
1655	psrlq	$4,%mm0
1656	movq	%mm3,24(%edi)
1657	movq	%mm3,%mm6
1658	movq	%mm1,-112(%ebp)
1659	psrlq	$4,%mm3
1660	movq	%mm4,16(%ebp)
1661	shll	$4,%edx
1662	movb	%dl,4(%esp)
1663	movl	-40(%ebx),%edx
1664	movq	-40(%ebx),%mm1
1665	psllq	$60,%mm6
1666	movq	-48(%ebx),%mm4
1667	por	%mm6,%mm0
1668	movq	%mm2,-96(%edi)
1669	psrlq	$4,%mm2
1670	movq	%mm5,32(%edi)
1671	movq	%mm5,%mm7
1672	movq	%mm0,-104(%ebp)
1673	psrlq	$4,%mm5
1674	movq	%mm3,24(%ebp)
1675	shll	$4,%edx
1676	movb	%dl,5(%esp)
1677	movl	-24(%ebx),%edx
1678	movq	-24(%ebx),%mm0
1679	psllq	$60,%mm7
1680	movq	-32(%ebx),%mm3
1681	por	%mm7,%mm2
1682	movq	%mm1,-88(%edi)
1683	psrlq	$4,%mm1
1684	movq	%mm4,40(%edi)
1685	movq	%mm4,%mm6
1686	movq	%mm2,-96(%ebp)
1687	psrlq	$4,%mm4
1688	movq	%mm5,32(%ebp)
1689	shll	$4,%edx
1690	movb	%dl,6(%esp)
1691	movl	-8(%ebx),%edx
1692	movq	-8(%ebx),%mm2
1693	psllq	$60,%mm6
1694	movq	-16(%ebx),%mm5
1695	por	%mm6,%mm1
1696	movq	%mm0,-80(%edi)
1697	psrlq	$4,%mm0
1698	movq	%mm3,48(%edi)
1699	movq	%mm3,%mm7
1700	movq	%mm1,-88(%ebp)
1701	psrlq	$4,%mm3
1702	movq	%mm4,40(%ebp)
1703	shll	$4,%edx
1704	movb	%dl,7(%esp)
1705	movl	8(%ebx),%edx
1706	movq	8(%ebx),%mm1
1707	psllq	$60,%mm7
1708	movq	(%ebx),%mm4
1709	por	%mm7,%mm0
1710	movq	%mm2,-72(%edi)
1711	psrlq	$4,%mm2
1712	movq	%mm5,56(%edi)
1713	movq	%mm5,%mm6
1714	movq	%mm0,-80(%ebp)
1715	psrlq	$4,%mm5
1716	movq	%mm3,48(%ebp)
1717	shll	$4,%edx
1718	movb	%dl,8(%esp)
1719	movl	24(%ebx),%edx
1720	movq	24(%ebx),%mm0
1721	psllq	$60,%mm6
1722	movq	16(%ebx),%mm3
1723	por	%mm6,%mm2
1724	movq	%mm1,-64(%edi)
1725	psrlq	$4,%mm1
1726	movq	%mm4,64(%edi)
1727	movq	%mm4,%mm7
1728	movq	%mm2,-72(%ebp)
1729	psrlq	$4,%mm4
1730	movq	%mm5,56(%ebp)
1731	shll	$4,%edx
1732	movb	%dl,9(%esp)
1733	movl	40(%ebx),%edx
1734	movq	40(%ebx),%mm2
1735	psllq	$60,%mm7
1736	movq	32(%ebx),%mm5
1737	por	%mm7,%mm1
1738	movq	%mm0,-56(%edi)
1739	psrlq	$4,%mm0
1740	movq	%mm3,72(%edi)
1741	movq	%mm3,%mm6
1742	movq	%mm1,-64(%ebp)
1743	psrlq	$4,%mm3
1744	movq	%mm4,64(%ebp)
1745	shll	$4,%edx
1746	movb	%dl,10(%esp)
1747	movl	56(%ebx),%edx
1748	movq	56(%ebx),%mm1
1749	psllq	$60,%mm6
1750	movq	48(%ebx),%mm4
1751	por	%mm6,%mm0
1752	movq	%mm2,-48(%edi)
1753	psrlq	$4,%mm2
1754	movq	%mm5,80(%edi)
1755	movq	%mm5,%mm7
1756	movq	%mm0,-56(%ebp)
1757	psrlq	$4,%mm5
1758	movq	%mm3,72(%ebp)
1759	shll	$4,%edx
1760	movb	%dl,11(%esp)
1761	movl	72(%ebx),%edx
1762	movq	72(%ebx),%mm0
1763	psllq	$60,%mm7
1764	movq	64(%ebx),%mm3
1765	por	%mm7,%mm2
1766	movq	%mm1,-40(%edi)
1767	psrlq	$4,%mm1
1768	movq	%mm4,88(%edi)
1769	movq	%mm4,%mm6
1770	movq	%mm2,-48(%ebp)
1771	psrlq	$4,%mm4
1772	movq	%mm5,80(%ebp)
1773	shll	$4,%edx
1774	movb	%dl,12(%esp)
1775	movl	88(%ebx),%edx
1776	movq	88(%ebx),%mm2
1777	psllq	$60,%mm6
1778	movq	80(%ebx),%mm5
1779	por	%mm6,%mm1
1780	movq	%mm0,-32(%edi)
1781	psrlq	$4,%mm0
1782	movq	%mm3,96(%edi)
1783	movq	%mm3,%mm7
1784	movq	%mm1,-40(%ebp)
1785	psrlq	$4,%mm3
1786	movq	%mm4,88(%ebp)
1787	shll	$4,%edx
1788	movb	%dl,13(%esp)
1789	movl	104(%ebx),%edx
1790	movq	104(%ebx),%mm1
1791	psllq	$60,%mm7
1792	movq	96(%ebx),%mm4
1793	por	%mm7,%mm0
1794	movq	%mm2,-24(%edi)
1795	psrlq	$4,%mm2
1796	movq	%mm5,104(%edi)
1797	movq	%mm5,%mm6
1798	movq	%mm0,-32(%ebp)
1799	psrlq	$4,%mm5
1800	movq	%mm3,96(%ebp)
1801	shll	$4,%edx
1802	movb	%dl,14(%esp)
1803	movl	120(%ebx),%edx
1804	movq	120(%ebx),%mm0
1805	psllq	$60,%mm6
1806	movq	112(%ebx),%mm3
1807	por	%mm6,%mm2
1808	movq	%mm1,-16(%edi)
1809	psrlq	$4,%mm1
1810	movq	%mm4,112(%edi)
1811	movq	%mm4,%mm7
1812	movq	%mm2,-24(%ebp)
1813	psrlq	$4,%mm4
1814	movq	%mm5,104(%ebp)
1815	shll	$4,%edx
1816	movb	%dl,15(%esp)
1817	psllq	$60,%mm7
1818	por	%mm7,%mm1
1819	movq	%mm0,-8(%edi)
1820	psrlq	$4,%mm0
1821	movq	%mm3,120(%edi)
1822	movq	%mm3,%mm6
1823	movq	%mm1,-16(%ebp)
1824	psrlq	$4,%mm3
1825	movq	%mm4,112(%ebp)
1826	psllq	$60,%mm6
1827	por	%mm6,%mm0
1828	movq	%mm0,-8(%ebp)
1829	movq	%mm3,120(%ebp)
1830	movq	(%eax),%mm6
1831	movl	8(%eax),%ebx
1832	movl	12(%eax),%edx
1833.align	16
1834.L009outer:
1835	xorl	12(%ecx),%edx
1836	xorl	8(%ecx),%ebx
1837	pxor	(%ecx),%mm6
1838	leal	16(%ecx),%ecx
1839	movl	%ebx,536(%esp)
1840	movq	%mm6,528(%esp)
1841	movl	%ecx,548(%esp)
1842	xorl	%eax,%eax
1843	roll	$8,%edx
1844	movb	%dl,%al
1845	movl	%eax,%ebp
1846	andb	$15,%al
1847	shrl	$4,%ebp
1848	pxor	%mm0,%mm0
1849	roll	$8,%edx
1850	pxor	%mm1,%mm1
1851	pxor	%mm2,%mm2
1852	movq	16(%esp,%eax,8),%mm7
1853	movq	144(%esp,%eax,8),%mm6
1854	movb	%dl,%al
1855	movd	%mm7,%ebx
1856	psrlq	$8,%mm7
1857	movq	%mm6,%mm3
1858	movl	%eax,%edi
1859	psrlq	$8,%mm6
1860	pxor	272(%esp,%ebp,8),%mm7
1861	andb	$15,%al
1862	psllq	$56,%mm3
1863	shrl	$4,%edi
1864	pxor	16(%esp,%eax,8),%mm7
1865	roll	$8,%edx
1866	pxor	144(%esp,%eax,8),%mm6
1867	pxor	%mm3,%mm7
1868	pxor	400(%esp,%ebp,8),%mm6
1869	xorb	(%esp,%ebp,1),%bl
1870	movb	%dl,%al
1871	movd	%mm7,%ecx
1872	movzbl	%bl,%ebx
1873	psrlq	$8,%mm7
1874	movq	%mm6,%mm3
1875	movl	%eax,%ebp
1876	psrlq	$8,%mm6
1877	pxor	272(%esp,%edi,8),%mm7
1878	andb	$15,%al
1879	psllq	$56,%mm3
1880	shrl	$4,%ebp
1881	pinsrw	$2,(%esi,%ebx,2),%mm2
1882	pxor	16(%esp,%eax,8),%mm7
1883	roll	$8,%edx
1884	pxor	144(%esp,%eax,8),%mm6
1885	pxor	%mm3,%mm7
1886	pxor	400(%esp,%edi,8),%mm6
1887	xorb	(%esp,%edi,1),%cl
1888	movb	%dl,%al
1889	movl	536(%esp),%edx
1890	movd	%mm7,%ebx
1891	movzbl	%cl,%ecx
1892	psrlq	$8,%mm7
1893	movq	%mm6,%mm3
1894	movl	%eax,%edi
1895	psrlq	$8,%mm6
1896	pxor	272(%esp,%ebp,8),%mm7
1897	andb	$15,%al
1898	psllq	$56,%mm3
1899	pxor	%mm2,%mm6
1900	shrl	$4,%edi
1901	pinsrw	$2,(%esi,%ecx,2),%mm1
1902	pxor	16(%esp,%eax,8),%mm7
1903	roll	$8,%edx
1904	pxor	144(%esp,%eax,8),%mm6
1905	pxor	%mm3,%mm7
1906	pxor	400(%esp,%ebp,8),%mm6
1907	xorb	(%esp,%ebp,1),%bl
1908	movb	%dl,%al
1909	movd	%mm7,%ecx
1910	movzbl	%bl,%ebx
1911	psrlq	$8,%mm7
1912	movq	%mm6,%mm3
1913	movl	%eax,%ebp
1914	psrlq	$8,%mm6
1915	pxor	272(%esp,%edi,8),%mm7
1916	andb	$15,%al
1917	psllq	$56,%mm3
1918	pxor	%mm1,%mm6
1919	shrl	$4,%ebp
1920	pinsrw	$2,(%esi,%ebx,2),%mm0
1921	pxor	16(%esp,%eax,8),%mm7
1922	roll	$8,%edx
1923	pxor	144(%esp,%eax,8),%mm6
1924	pxor	%mm3,%mm7
1925	pxor	400(%esp,%edi,8),%mm6
1926	xorb	(%esp,%edi,1),%cl
1927	movb	%dl,%al
1928	movd	%mm7,%ebx
1929	movzbl	%cl,%ecx
1930	psrlq	$8,%mm7
1931	movq	%mm6,%mm3
1932	movl	%eax,%edi
1933	psrlq	$8,%mm6
1934	pxor	272(%esp,%ebp,8),%mm7
1935	andb	$15,%al
1936	psllq	$56,%mm3
1937	pxor	%mm0,%mm6
1938	shrl	$4,%edi
1939	pinsrw	$2,(%esi,%ecx,2),%mm2
1940	pxor	16(%esp,%eax,8),%mm7
1941	roll	$8,%edx
1942	pxor	144(%esp,%eax,8),%mm6
1943	pxor	%mm3,%mm7
1944	pxor	400(%esp,%ebp,8),%mm6
1945	xorb	(%esp,%ebp,1),%bl
1946	movb	%dl,%al
1947	movd	%mm7,%ecx
1948	movzbl	%bl,%ebx
1949	psrlq	$8,%mm7
1950	movq	%mm6,%mm3
1951	movl	%eax,%ebp
1952	psrlq	$8,%mm6
1953	pxor	272(%esp,%edi,8),%mm7
1954	andb	$15,%al
1955	psllq	$56,%mm3
1956	pxor	%mm2,%mm6
1957	shrl	$4,%ebp
1958	pinsrw	$2,(%esi,%ebx,2),%mm1
1959	pxor	16(%esp,%eax,8),%mm7
1960	roll	$8,%edx
1961	pxor	144(%esp,%eax,8),%mm6
1962	pxor	%mm3,%mm7
1963	pxor	400(%esp,%edi,8),%mm6
1964	xorb	(%esp,%edi,1),%cl
1965	movb	%dl,%al
1966	movl	532(%esp),%edx
1967	movd	%mm7,%ebx
1968	movzbl	%cl,%ecx
1969	psrlq	$8,%mm7
1970	movq	%mm6,%mm3
1971	movl	%eax,%edi
1972	psrlq	$8,%mm6
1973	pxor	272(%esp,%ebp,8),%mm7
1974	andb	$15,%al
1975	psllq	$56,%mm3
1976	pxor	%mm1,%mm6
1977	shrl	$4,%edi
1978	pinsrw	$2,(%esi,%ecx,2),%mm0
1979	pxor	16(%esp,%eax,8),%mm7
1980	roll	$8,%edx
1981	pxor	144(%esp,%eax,8),%mm6
1982	pxor	%mm3,%mm7
1983	pxor	400(%esp,%ebp,8),%mm6
1984	xorb	(%esp,%ebp,1),%bl
1985	movb	%dl,%al
1986	movd	%mm7,%ecx
1987	movzbl	%bl,%ebx
1988	psrlq	$8,%mm7
1989	movq	%mm6,%mm3
1990	movl	%eax,%ebp
1991	psrlq	$8,%mm6
1992	pxor	272(%esp,%edi,8),%mm7
1993	andb	$15,%al
1994	psllq	$56,%mm3
1995	pxor	%mm0,%mm6
1996	shrl	$4,%ebp
1997	pinsrw	$2,(%esi,%ebx,2),%mm2
1998	pxor	16(%esp,%eax,8),%mm7
1999	roll	$8,%edx
2000	pxor	144(%esp,%eax,8),%mm6
2001	pxor	%mm3,%mm7
2002	pxor	400(%esp,%edi,8),%mm6
2003	xorb	(%esp,%edi,1),%cl
2004	movb	%dl,%al
2005	movd	%mm7,%ebx
2006	movzbl	%cl,%ecx
2007	psrlq	$8,%mm7
2008	movq	%mm6,%mm3
2009	movl	%eax,%edi
2010	psrlq	$8,%mm6
2011	pxor	272(%esp,%ebp,8),%mm7
2012	andb	$15,%al
2013	psllq	$56,%mm3
2014	pxor	%mm2,%mm6
2015	shrl	$4,%edi
2016	pinsrw	$2,(%esi,%ecx,2),%mm1
2017	pxor	16(%esp,%eax,8),%mm7
2018	roll	$8,%edx
2019	pxor	144(%esp,%eax,8),%mm6
2020	pxor	%mm3,%mm7
2021	pxor	400(%esp,%ebp,8),%mm6
2022	xorb	(%esp,%ebp,1),%bl
2023	movb	%dl,%al
2024	movd	%mm7,%ecx
2025	movzbl	%bl,%ebx
2026	psrlq	$8,%mm7
2027	movq	%mm6,%mm3
2028	movl	%eax,%ebp
2029	psrlq	$8,%mm6
2030	pxor	272(%esp,%edi,8),%mm7
2031	andb	$15,%al
2032	psllq	$56,%mm3
2033	pxor	%mm1,%mm6
2034	shrl	$4,%ebp
2035	pinsrw	$2,(%esi,%ebx,2),%mm0
2036	pxor	16(%esp,%eax,8),%mm7
2037	roll	$8,%edx
2038	pxor	144(%esp,%eax,8),%mm6
2039	pxor	%mm3,%mm7
2040	pxor	400(%esp,%edi,8),%mm6
2041	xorb	(%esp,%edi,1),%cl
2042	movb	%dl,%al
2043	movl	528(%esp),%edx
2044	movd	%mm7,%ebx
2045	movzbl	%cl,%ecx
2046	psrlq	$8,%mm7
2047	movq	%mm6,%mm3
2048	movl	%eax,%edi
2049	psrlq	$8,%mm6
2050	pxor	272(%esp,%ebp,8),%mm7
2051	andb	$15,%al
2052	psllq	$56,%mm3
2053	pxor	%mm0,%mm6
2054	shrl	$4,%edi
2055	pinsrw	$2,(%esi,%ecx,2),%mm2
2056	pxor	16(%esp,%eax,8),%mm7
2057	roll	$8,%edx
2058	pxor	144(%esp,%eax,8),%mm6
2059	pxor	%mm3,%mm7
2060	pxor	400(%esp,%ebp,8),%mm6
2061	xorb	(%esp,%ebp,1),%bl
2062	movb	%dl,%al
2063	movd	%mm7,%ecx
2064	movzbl	%bl,%ebx
2065	psrlq	$8,%mm7
2066	movq	%mm6,%mm3
2067	movl	%eax,%ebp
2068	psrlq	$8,%mm6
2069	pxor	272(%esp,%edi,8),%mm7
2070	andb	$15,%al
2071	psllq	$56,%mm3
2072	pxor	%mm2,%mm6
2073	shrl	$4,%ebp
2074	pinsrw	$2,(%esi,%ebx,2),%mm1
2075	pxor	16(%esp,%eax,8),%mm7
2076	roll	$8,%edx
2077	pxor	144(%esp,%eax,8),%mm6
2078	pxor	%mm3,%mm7
2079	pxor	400(%esp,%edi,8),%mm6
2080	xorb	(%esp,%edi,1),%cl
2081	movb	%dl,%al
2082	movd	%mm7,%ebx
2083	movzbl	%cl,%ecx
2084	psrlq	$8,%mm7
2085	movq	%mm6,%mm3
2086	movl	%eax,%edi
2087	psrlq	$8,%mm6
2088	pxor	272(%esp,%ebp,8),%mm7
2089	andb	$15,%al
2090	psllq	$56,%mm3
2091	pxor	%mm1,%mm6
2092	shrl	$4,%edi
2093	pinsrw	$2,(%esi,%ecx,2),%mm0
2094	pxor	16(%esp,%eax,8),%mm7
2095	roll	$8,%edx
2096	pxor	144(%esp,%eax,8),%mm6
2097	pxor	%mm3,%mm7
2098	pxor	400(%esp,%ebp,8),%mm6
2099	xorb	(%esp,%ebp,1),%bl
2100	movb	%dl,%al
2101	movd	%mm7,%ecx
2102	movzbl	%bl,%ebx
2103	psrlq	$8,%mm7
2104	movq	%mm6,%mm3
2105	movl	%eax,%ebp
2106	psrlq	$8,%mm6
2107	pxor	272(%esp,%edi,8),%mm7
2108	andb	$15,%al
2109	psllq	$56,%mm3
2110	pxor	%mm0,%mm6
2111	shrl	$4,%ebp
2112	pinsrw	$2,(%esi,%ebx,2),%mm2
2113	pxor	16(%esp,%eax,8),%mm7
2114	roll	$8,%edx
2115	pxor	144(%esp,%eax,8),%mm6
2116	pxor	%mm3,%mm7
2117	pxor	400(%esp,%edi,8),%mm6
2118	xorb	(%esp,%edi,1),%cl
2119	movb	%dl,%al
2120	movl	524(%esp),%edx
2121	movd	%mm7,%ebx
2122	movzbl	%cl,%ecx
2123	psrlq	$8,%mm7
2124	movq	%mm6,%mm3
2125	movl	%eax,%edi
2126	psrlq	$8,%mm6
2127	pxor	272(%esp,%ebp,8),%mm7
2128	andb	$15,%al
2129	psllq	$56,%mm3
2130	pxor	%mm2,%mm6
2131	shrl	$4,%edi
2132	pinsrw	$2,(%esi,%ecx,2),%mm1
2133	pxor	16(%esp,%eax,8),%mm7
2134	pxor	144(%esp,%eax,8),%mm6
2135	xorb	(%esp,%ebp,1),%bl
2136	pxor	%mm3,%mm7
2137	pxor	400(%esp,%ebp,8),%mm6
2138	movzbl	%bl,%ebx
2139	pxor	%mm2,%mm2
2140	psllq	$4,%mm1
2141	movd	%mm7,%ecx
2142	psrlq	$4,%mm7
2143	movq	%mm6,%mm3
2144	psrlq	$4,%mm6
2145	shll	$4,%ecx
2146	pxor	16(%esp,%edi,8),%mm7
2147	psllq	$60,%mm3
2148	movzbl	%cl,%ecx
2149	pxor	%mm3,%mm7
2150	pxor	144(%esp,%edi,8),%mm6
2151	pinsrw	$2,(%esi,%ebx,2),%mm0
2152	pxor	%mm1,%mm6
2153	movd	%mm7,%edx
2154	pinsrw	$3,(%esi,%ecx,2),%mm2
2155	psllq	$12,%mm0
2156	pxor	%mm0,%mm6
2157	psrlq	$32,%mm7
2158	pxor	%mm2,%mm6
2159	movl	548(%esp),%ecx
2160	movd	%mm7,%ebx
2161	movq	%mm6,%mm3
2162	psllw	$8,%mm6
2163	psrlw	$8,%mm3
2164	por	%mm3,%mm6
2165	bswap	%edx
2166	pshufw	$27,%mm6,%mm6
2167	bswap	%ebx
2168	cmpl	552(%esp),%ecx
2169	jne	.L009outer
2170	movl	544(%esp),%eax
2171	movl	%edx,12(%eax)
2172	movl	%ebx,8(%eax)
2173	movq	%mm6,(%eax)
2174	movl	556(%esp),%esp
2175	emms
2176	popl	%edi
2177	popl	%esi
2178	popl	%ebx
2179	popl	%ebp
2180	ret
2181.size	gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
2182.globl	gcm_init_clmul
2183.type	gcm_init_clmul,@function
2184.align	16
2185gcm_init_clmul:
2186.L_gcm_init_clmul_begin:
2187	movl	4(%esp),%edx
2188	movl	8(%esp),%eax
2189	call	.L010pic
2190.L010pic:
2191	popl	%ecx
2192	leal	.Lbswap-.L010pic(%ecx),%ecx
2193	movdqu	(%eax),%xmm2
2194	pshufd	$78,%xmm2,%xmm2
2195	pshufd	$255,%xmm2,%xmm4
2196	movdqa	%xmm2,%xmm3
2197	psllq	$1,%xmm2
2198	pxor	%xmm5,%xmm5
2199	psrlq	$63,%xmm3
2200	pcmpgtd	%xmm4,%xmm5
2201	pslldq	$8,%xmm3
2202	por	%xmm3,%xmm2
2203	pand	16(%ecx),%xmm5
2204	pxor	%xmm5,%xmm2
2205	movdqa	%xmm2,%xmm0
2206	movdqa	%xmm0,%xmm1
2207	pshufd	$78,%xmm0,%xmm3
2208	pshufd	$78,%xmm2,%xmm4
2209	pxor	%xmm0,%xmm3
2210	pxor	%xmm2,%xmm4
2211.byte	102,15,58,68,194,0
2212.byte	102,15,58,68,202,17
2213.byte	102,15,58,68,220,0
2214	xorps	%xmm0,%xmm3
2215	xorps	%xmm1,%xmm3
2216	movdqa	%xmm3,%xmm4
2217	psrldq	$8,%xmm3
2218	pslldq	$8,%xmm4
2219	pxor	%xmm3,%xmm1
2220	pxor	%xmm4,%xmm0
2221	movdqa	%xmm0,%xmm3
2222	psllq	$1,%xmm0
2223	pxor	%xmm3,%xmm0
2224	psllq	$5,%xmm0
2225	pxor	%xmm3,%xmm0
2226	psllq	$57,%xmm0
2227	movdqa	%xmm0,%xmm4
2228	pslldq	$8,%xmm0
2229	psrldq	$8,%xmm4
2230	pxor	%xmm3,%xmm0
2231	pxor	%xmm4,%xmm1
2232	movdqa	%xmm0,%xmm4
2233	psrlq	$5,%xmm0
2234	pxor	%xmm4,%xmm0
2235	psrlq	$1,%xmm0
2236	pxor	%xmm4,%xmm0
2237	pxor	%xmm1,%xmm4
2238	psrlq	$1,%xmm0
2239	pxor	%xmm4,%xmm0
2240	movdqu	%xmm2,(%edx)
2241	movdqu	%xmm0,16(%edx)
2242	ret
2243.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
2244.globl	gcm_gmult_clmul
2245.type	gcm_gmult_clmul,@function
2246.align	16
2247gcm_gmult_clmul:
2248.L_gcm_gmult_clmul_begin:
2249	movl	4(%esp),%eax
2250	movl	8(%esp),%edx
2251	call	.L011pic
2252.L011pic:
2253	popl	%ecx
2254	leal	.Lbswap-.L011pic(%ecx),%ecx
2255	movdqu	(%eax),%xmm0
2256	movdqa	(%ecx),%xmm5
2257	movups	(%edx),%xmm2
2258.byte	102,15,56,0,197
2259	movdqa	%xmm0,%xmm1
2260	pshufd	$78,%xmm0,%xmm3
2261	pshufd	$78,%xmm2,%xmm4
2262	pxor	%xmm0,%xmm3
2263	pxor	%xmm2,%xmm4
2264.byte	102,15,58,68,194,0
2265.byte	102,15,58,68,202,17
2266.byte	102,15,58,68,220,0
2267	xorps	%xmm0,%xmm3
2268	xorps	%xmm1,%xmm3
2269	movdqa	%xmm3,%xmm4
2270	psrldq	$8,%xmm3
2271	pslldq	$8,%xmm4
2272	pxor	%xmm3,%xmm1
2273	pxor	%xmm4,%xmm0
2274	movdqa	%xmm0,%xmm3
2275	psllq	$1,%xmm0
2276	pxor	%xmm3,%xmm0
2277	psllq	$5,%xmm0
2278	pxor	%xmm3,%xmm0
2279	psllq	$57,%xmm0
2280	movdqa	%xmm0,%xmm4
2281	pslldq	$8,%xmm0
2282	psrldq	$8,%xmm4
2283	pxor	%xmm3,%xmm0
2284	pxor	%xmm4,%xmm1
2285	movdqa	%xmm0,%xmm4
2286	psrlq	$5,%xmm0
2287	pxor	%xmm4,%xmm0
2288	psrlq	$1,%xmm0
2289	pxor	%xmm4,%xmm0
2290	pxor	%xmm1,%xmm4
2291	psrlq	$1,%xmm0
2292	pxor	%xmm4,%xmm0
2293.byte	102,15,56,0,197
2294	movdqu	%xmm0,(%eax)
2295	ret
2296.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
2297.globl	gcm_ghash_clmul
2298.type	gcm_ghash_clmul,@function
2299.align	16
2300gcm_ghash_clmul:
2301.L_gcm_ghash_clmul_begin:
2302	pushl	%ebp
2303	pushl	%ebx
2304	pushl	%esi
2305	pushl	%edi
2306	movl	20(%esp),%eax
2307	movl	24(%esp),%edx
2308	movl	28(%esp),%esi
2309	movl	32(%esp),%ebx
2310	call	.L012pic
2311.L012pic:
2312	popl	%ecx
2313	leal	.Lbswap-.L012pic(%ecx),%ecx
2314	movdqu	(%eax),%xmm0
2315	movdqa	(%ecx),%xmm5
2316	movdqu	(%edx),%xmm2
2317.byte	102,15,56,0,197
2318	subl	$16,%ebx
2319	jz	.L013odd_tail
2320	movdqu	(%esi),%xmm3
2321	movdqu	16(%esi),%xmm6
2322.byte	102,15,56,0,221
2323.byte	102,15,56,0,245
2324	pxor	%xmm3,%xmm0
2325	movdqa	%xmm6,%xmm7
2326	pshufd	$78,%xmm6,%xmm3
2327	pshufd	$78,%xmm2,%xmm4
2328	pxor	%xmm6,%xmm3
2329	pxor	%xmm2,%xmm4
2330.byte	102,15,58,68,242,0
2331.byte	102,15,58,68,250,17
2332.byte	102,15,58,68,220,0
2333	xorps	%xmm6,%xmm3
2334	xorps	%xmm7,%xmm3
2335	movdqa	%xmm3,%xmm4
2336	psrldq	$8,%xmm3
2337	pslldq	$8,%xmm4
2338	pxor	%xmm3,%xmm7
2339	pxor	%xmm4,%xmm6
2340	movups	16(%edx),%xmm2
2341	leal	32(%esi),%esi
2342	subl	$32,%ebx
2343	jbe	.L014even_tail
2344.L015mod_loop:
2345	movdqa	%xmm0,%xmm1
2346	pshufd	$78,%xmm0,%xmm3
2347	pshufd	$78,%xmm2,%xmm4
2348	pxor	%xmm0,%xmm3
2349	pxor	%xmm2,%xmm4
2350.byte	102,15,58,68,194,0
2351.byte	102,15,58,68,202,17
2352.byte	102,15,58,68,220,0
2353	xorps	%xmm0,%xmm3
2354	xorps	%xmm1,%xmm3
2355	movdqa	%xmm3,%xmm4
2356	psrldq	$8,%xmm3
2357	pslldq	$8,%xmm4
2358	pxor	%xmm3,%xmm1
2359	pxor	%xmm4,%xmm0
2360	movdqu	(%esi),%xmm3
2361	movups	(%edx),%xmm2
2362	pxor	%xmm6,%xmm0
2363	pxor	%xmm7,%xmm1
2364	movdqu	16(%esi),%xmm6
2365.byte	102,15,56,0,221
2366.byte	102,15,56,0,245
2367	movdqa	%xmm6,%xmm5
2368	movdqa	%xmm6,%xmm7
2369	pxor	%xmm3,%xmm1
2370	movdqa	%xmm0,%xmm3
2371	psllq	$1,%xmm0
2372	pxor	%xmm3,%xmm0
2373	psllq	$5,%xmm0
2374	pxor	%xmm3,%xmm0
2375.byte	102,15,58,68,242,0
2376	psllq	$57,%xmm0
2377	movdqa	%xmm0,%xmm4
2378	pslldq	$8,%xmm0
2379	psrldq	$8,%xmm4
2380	pxor	%xmm3,%xmm0
2381	pshufd	$78,%xmm5,%xmm3
2382	pxor	%xmm4,%xmm1
2383	pxor	%xmm5,%xmm3
2384	pshufd	$78,%xmm2,%xmm5
2385	pxor	%xmm2,%xmm5
2386.byte	102,15,58,68,250,17
2387	movdqa	%xmm0,%xmm4
2388	psrlq	$5,%xmm0
2389	pxor	%xmm4,%xmm0
2390	psrlq	$1,%xmm0
2391	pxor	%xmm4,%xmm0
2392	pxor	%xmm1,%xmm4
2393	psrlq	$1,%xmm0
2394	pxor	%xmm4,%xmm0
2395.byte	102,15,58,68,221,0
2396	movups	16(%edx),%xmm2
2397	xorps	%xmm6,%xmm3
2398	xorps	%xmm7,%xmm3
2399	movdqa	%xmm3,%xmm5
2400	psrldq	$8,%xmm3
2401	pslldq	$8,%xmm5
2402	pxor	%xmm3,%xmm7
2403	pxor	%xmm5,%xmm6
2404	movdqa	(%ecx),%xmm5
2405	leal	32(%esi),%esi
2406	subl	$32,%ebx
2407	ja	.L015mod_loop
2408.L014even_tail:
2409	movdqa	%xmm0,%xmm1
2410	pshufd	$78,%xmm0,%xmm3
2411	pshufd	$78,%xmm2,%xmm4
2412	pxor	%xmm0,%xmm3
2413	pxor	%xmm2,%xmm4
2414.byte	102,15,58,68,194,0
2415.byte	102,15,58,68,202,17
2416.byte	102,15,58,68,220,0
2417	xorps	%xmm0,%xmm3
2418	xorps	%xmm1,%xmm3
2419	movdqa	%xmm3,%xmm4
2420	psrldq	$8,%xmm3
2421	pslldq	$8,%xmm4
2422	pxor	%xmm3,%xmm1
2423	pxor	%xmm4,%xmm0
2424	pxor	%xmm6,%xmm0
2425	pxor	%xmm7,%xmm1
2426	movdqa	%xmm0,%xmm3
2427	psllq	$1,%xmm0
2428	pxor	%xmm3,%xmm0
2429	psllq	$5,%xmm0
2430	pxor	%xmm3,%xmm0
2431	psllq	$57,%xmm0
2432	movdqa	%xmm0,%xmm4
2433	pslldq	$8,%xmm0
2434	psrldq	$8,%xmm4
2435	pxor	%xmm3,%xmm0
2436	pxor	%xmm4,%xmm1
2437	movdqa	%xmm0,%xmm4
2438	psrlq	$5,%xmm0
2439	pxor	%xmm4,%xmm0
2440	psrlq	$1,%xmm0
2441	pxor	%xmm4,%xmm0
2442	pxor	%xmm1,%xmm4
2443	psrlq	$1,%xmm0
2444	pxor	%xmm4,%xmm0
2445	testl	%ebx,%ebx
2446	jnz	.L016done
2447	movups	(%edx),%xmm2
2448.L013odd_tail:
2449	movdqu	(%esi),%xmm3
2450.byte	102,15,56,0,221
2451	pxor	%xmm3,%xmm0
2452	movdqa	%xmm0,%xmm1
2453	pshufd	$78,%xmm0,%xmm3
2454	pshufd	$78,%xmm2,%xmm4
2455	pxor	%xmm0,%xmm3
2456	pxor	%xmm2,%xmm4
2457.byte	102,15,58,68,194,0
2458.byte	102,15,58,68,202,17
2459.byte	102,15,58,68,220,0
2460	xorps	%xmm0,%xmm3
2461	xorps	%xmm1,%xmm3
2462	movdqa	%xmm3,%xmm4
2463	psrldq	$8,%xmm3
2464	pslldq	$8,%xmm4
2465	pxor	%xmm3,%xmm1
2466	pxor	%xmm4,%xmm0
2467	movdqa	%xmm0,%xmm3
2468	psllq	$1,%xmm0
2469	pxor	%xmm3,%xmm0
2470	psllq	$5,%xmm0
2471	pxor	%xmm3,%xmm0
2472	psllq	$57,%xmm0
2473	movdqa	%xmm0,%xmm4
2474	pslldq	$8,%xmm0
2475	psrldq	$8,%xmm4
2476	pxor	%xmm3,%xmm0
2477	pxor	%xmm4,%xmm1
2478	movdqa	%xmm0,%xmm4
2479	psrlq	$5,%xmm0
2480	pxor	%xmm4,%xmm0
2481	psrlq	$1,%xmm0
2482	pxor	%xmm4,%xmm0
2483	pxor	%xmm1,%xmm4
2484	psrlq	$1,%xmm0
2485	pxor	%xmm4,%xmm0
2486.L016done:
2487.byte	102,15,56,0,197
2488	movdqu	%xmm0,(%eax)
2489	popl	%edi
2490	popl	%esi
2491	popl	%ebx
2492	popl	%ebp
2493	ret
2494.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
2495.align	64
2496.Lbswap:
2497.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2498.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
2499.align	64
2500.Lrem_4bit:
2501.long	0,0,0,471859200,0,943718400,0,610271232
2502.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
2503.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
2504.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
2505.align	64
2506.Lrem_8bit:
2507.value	0,450,900,582,1800,1738,1164,1358
2508.value	3600,4050,3476,3158,2328,2266,2716,2910
2509.value	7200,7650,8100,7782,6952,6890,6316,6510
2510.value	4656,5106,4532,4214,5432,5370,5820,6014
2511.value	14400,14722,15300,14854,16200,16010,15564,15630
2512.value	13904,14226,13780,13334,12632,12442,13020,13086
2513.value	9312,9634,10212,9766,9064,8874,8428,8494
2514.value	10864,11186,10740,10294,11640,11450,12028,12094
2515.value	28800,28994,29444,29382,30600,30282,29708,30158
2516.value	32400,32594,32020,31958,31128,30810,31260,31710
2517.value	27808,28002,28452,28390,27560,27242,26668,27118
2518.value	25264,25458,24884,24822,26040,25722,26172,26622
2519.value	18624,18690,19268,19078,20424,19978,19532,19854
2520.value	18128,18194,17748,17558,16856,16410,16988,17310
2521.value	21728,21794,22372,22182,21480,21034,20588,20910
2522.value	23280,23346,22900,22710,24056,23610,24188,24510
2523.value	57600,57538,57988,58182,58888,59338,58764,58446
2524.value	61200,61138,60564,60758,59416,59866,60316,59998
2525.value	64800,64738,65188,65382,64040,64490,63916,63598
2526.value	62256,62194,61620,61814,62520,62970,63420,63102
2527.value	55616,55426,56004,56070,56904,57226,56780,56334
2528.value	55120,54930,54484,54550,53336,53658,54236,53790
2529.value	50528,50338,50916,50982,49768,50090,49644,49198
2530.value	52080,51890,51444,51510,52344,52666,53244,52798
2531.value	37248,36930,37380,37830,38536,38730,38156,38094
2532.value	40848,40530,39956,40406,39064,39258,39708,39646
2533.value	36256,35938,36388,36838,35496,35690,35116,35054
2534.value	33712,33394,32820,33270,33976,34170,34620,34558
2535.value	43456,43010,43588,43910,44744,44810,44364,44174
2536.value	42960,42514,42068,42390,41176,41242,41820,41630
2537.value	46560,46114,46692,47014,45800,45866,45420,45230
2538.value	48112,47666,47220,47542,48376,48442,49020,48830
2539.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
2540.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
2541.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
2542.byte	0
2543#endif
2544