1	# $FreeBSD$
2.file	"aesni-x86.s"
3.text
4.globl	aesni_encrypt
5.type	aesni_encrypt,@function
6.align	16
7aesni_encrypt:
8.L_aesni_encrypt_begin:
9	movl	4(%esp),%eax
10	movl	12(%esp),%edx
11	movups	(%eax),%xmm2
12	movl	240(%edx),%ecx
13	movl	8(%esp),%eax
14	movups	(%edx),%xmm0
15	movups	16(%edx),%xmm1
16	leal	32(%edx),%edx
17	xorps	%xmm0,%xmm2
18.L000enc1_loop_1:
19.byte	102,15,56,220,209
20	decl	%ecx
21	movups	(%edx),%xmm1
22	leal	16(%edx),%edx
23	jnz	.L000enc1_loop_1
24.byte	102,15,56,221,209
25	movups	%xmm2,(%eax)
26	ret
27.size	aesni_encrypt,.-.L_aesni_encrypt_begin
28.globl	aesni_decrypt
29.type	aesni_decrypt,@function
30.align	16
31aesni_decrypt:
32.L_aesni_decrypt_begin:
33	movl	4(%esp),%eax
34	movl	12(%esp),%edx
35	movups	(%eax),%xmm2
36	movl	240(%edx),%ecx
37	movl	8(%esp),%eax
38	movups	(%edx),%xmm0
39	movups	16(%edx),%xmm1
40	leal	32(%edx),%edx
41	xorps	%xmm0,%xmm2
42.L001dec1_loop_2:
43.byte	102,15,56,222,209
44	decl	%ecx
45	movups	(%edx),%xmm1
46	leal	16(%edx),%edx
47	jnz	.L001dec1_loop_2
48.byte	102,15,56,223,209
49	movups	%xmm2,(%eax)
50	ret
51.size	aesni_decrypt,.-.L_aesni_decrypt_begin
52.type	_aesni_encrypt3,@function
53.align	16
54_aesni_encrypt3:
55	movups	(%edx),%xmm0
56	shrl	$1,%ecx
57	movups	16(%edx),%xmm1
58	leal	32(%edx),%edx
59	xorps	%xmm0,%xmm2
60	pxor	%xmm0,%xmm3
61	pxor	%xmm0,%xmm4
62	movups	(%edx),%xmm0
63.L002enc3_loop:
64.byte	102,15,56,220,209
65.byte	102,15,56,220,217
66	decl	%ecx
67.byte	102,15,56,220,225
68	movups	16(%edx),%xmm1
69.byte	102,15,56,220,208
70.byte	102,15,56,220,216
71	leal	32(%edx),%edx
72.byte	102,15,56,220,224
73	movups	(%edx),%xmm0
74	jnz	.L002enc3_loop
75.byte	102,15,56,220,209
76.byte	102,15,56,220,217
77.byte	102,15,56,220,225
78.byte	102,15,56,221,208
79.byte	102,15,56,221,216
80.byte	102,15,56,221,224
81	ret
82.size	_aesni_encrypt3,.-_aesni_encrypt3
83.type	_aesni_decrypt3,@function
84.align	16
85_aesni_decrypt3:
86	movups	(%edx),%xmm0
87	shrl	$1,%ecx
88	movups	16(%edx),%xmm1
89	leal	32(%edx),%edx
90	xorps	%xmm0,%xmm2
91	pxor	%xmm0,%xmm3
92	pxor	%xmm0,%xmm4
93	movups	(%edx),%xmm0
94.L003dec3_loop:
95.byte	102,15,56,222,209
96.byte	102,15,56,222,217
97	decl	%ecx
98.byte	102,15,56,222,225
99	movups	16(%edx),%xmm1
100.byte	102,15,56,222,208
101.byte	102,15,56,222,216
102	leal	32(%edx),%edx
103.byte	102,15,56,222,224
104	movups	(%edx),%xmm0
105	jnz	.L003dec3_loop
106.byte	102,15,56,222,209
107.byte	102,15,56,222,217
108.byte	102,15,56,222,225
109.byte	102,15,56,223,208
110.byte	102,15,56,223,216
111.byte	102,15,56,223,224
112	ret
113.size	_aesni_decrypt3,.-_aesni_decrypt3
114.type	_aesni_encrypt4,@function
115.align	16
116_aesni_encrypt4:
117	movups	(%edx),%xmm0
118	movups	16(%edx),%xmm1
119	shrl	$1,%ecx
120	leal	32(%edx),%edx
121	xorps	%xmm0,%xmm2
122	pxor	%xmm0,%xmm3
123	pxor	%xmm0,%xmm4
124	pxor	%xmm0,%xmm5
125	movups	(%edx),%xmm0
126.L004enc4_loop:
127.byte	102,15,56,220,209
128.byte	102,15,56,220,217
129	decl	%ecx
130.byte	102,15,56,220,225
131.byte	102,15,56,220,233
132	movups	16(%edx),%xmm1
133.byte	102,15,56,220,208
134.byte	102,15,56,220,216
135	leal	32(%edx),%edx
136.byte	102,15,56,220,224
137.byte	102,15,56,220,232
138	movups	(%edx),%xmm0
139	jnz	.L004enc4_loop
140.byte	102,15,56,220,209
141.byte	102,15,56,220,217
142.byte	102,15,56,220,225
143.byte	102,15,56,220,233
144.byte	102,15,56,221,208
145.byte	102,15,56,221,216
146.byte	102,15,56,221,224
147.byte	102,15,56,221,232
148	ret
149.size	_aesni_encrypt4,.-_aesni_encrypt4
150.type	_aesni_decrypt4,@function
151.align	16
152_aesni_decrypt4:
153	movups	(%edx),%xmm0
154	movups	16(%edx),%xmm1
155	shrl	$1,%ecx
156	leal	32(%edx),%edx
157	xorps	%xmm0,%xmm2
158	pxor	%xmm0,%xmm3
159	pxor	%xmm0,%xmm4
160	pxor	%xmm0,%xmm5
161	movups	(%edx),%xmm0
162.L005dec4_loop:
163.byte	102,15,56,222,209
164.byte	102,15,56,222,217
165	decl	%ecx
166.byte	102,15,56,222,225
167.byte	102,15,56,222,233
168	movups	16(%edx),%xmm1
169.byte	102,15,56,222,208
170.byte	102,15,56,222,216
171	leal	32(%edx),%edx
172.byte	102,15,56,222,224
173.byte	102,15,56,222,232
174	movups	(%edx),%xmm0
175	jnz	.L005dec4_loop
176.byte	102,15,56,222,209
177.byte	102,15,56,222,217
178.byte	102,15,56,222,225
179.byte	102,15,56,222,233
180.byte	102,15,56,223,208
181.byte	102,15,56,223,216
182.byte	102,15,56,223,224
183.byte	102,15,56,223,232
184	ret
185.size	_aesni_decrypt4,.-_aesni_decrypt4
186.type	_aesni_encrypt6,@function
187.align	16
188_aesni_encrypt6:
189	movups	(%edx),%xmm0
190	shrl	$1,%ecx
191	movups	16(%edx),%xmm1
192	leal	32(%edx),%edx
193	xorps	%xmm0,%xmm2
194	pxor	%xmm0,%xmm3
195.byte	102,15,56,220,209
196	pxor	%xmm0,%xmm4
197.byte	102,15,56,220,217
198	pxor	%xmm0,%xmm5
199	decl	%ecx
200.byte	102,15,56,220,225
201	pxor	%xmm0,%xmm6
202.byte	102,15,56,220,233
203	pxor	%xmm0,%xmm7
204.byte	102,15,56,220,241
205	movups	(%edx),%xmm0
206.byte	102,15,56,220,249
207	jmp	.L_aesni_encrypt6_enter
208.align	16
209.L006enc6_loop:
210.byte	102,15,56,220,209
211.byte	102,15,56,220,217
212	decl	%ecx
213.byte	102,15,56,220,225
214.byte	102,15,56,220,233
215.byte	102,15,56,220,241
216.byte	102,15,56,220,249
217.align	16
218.L_aesni_encrypt6_enter:
219	movups	16(%edx),%xmm1
220.byte	102,15,56,220,208
221.byte	102,15,56,220,216
222	leal	32(%edx),%edx
223.byte	102,15,56,220,224
224.byte	102,15,56,220,232
225.byte	102,15,56,220,240
226.byte	102,15,56,220,248
227	movups	(%edx),%xmm0
228	jnz	.L006enc6_loop
229.byte	102,15,56,220,209
230.byte	102,15,56,220,217
231.byte	102,15,56,220,225
232.byte	102,15,56,220,233
233.byte	102,15,56,220,241
234.byte	102,15,56,220,249
235.byte	102,15,56,221,208
236.byte	102,15,56,221,216
237.byte	102,15,56,221,224
238.byte	102,15,56,221,232
239.byte	102,15,56,221,240
240.byte	102,15,56,221,248
241	ret
242.size	_aesni_encrypt6,.-_aesni_encrypt6
243.type	_aesni_decrypt6,@function
244.align	16
245_aesni_decrypt6:
246	movups	(%edx),%xmm0
247	shrl	$1,%ecx
248	movups	16(%edx),%xmm1
249	leal	32(%edx),%edx
250	xorps	%xmm0,%xmm2
251	pxor	%xmm0,%xmm3
252.byte	102,15,56,222,209
253	pxor	%xmm0,%xmm4
254.byte	102,15,56,222,217
255	pxor	%xmm0,%xmm5
256	decl	%ecx
257.byte	102,15,56,222,225
258	pxor	%xmm0,%xmm6
259.byte	102,15,56,222,233
260	pxor	%xmm0,%xmm7
261.byte	102,15,56,222,241
262	movups	(%edx),%xmm0
263.byte	102,15,56,222,249
264	jmp	.L_aesni_decrypt6_enter
265.align	16
266.L007dec6_loop:
267.byte	102,15,56,222,209
268.byte	102,15,56,222,217
269	decl	%ecx
270.byte	102,15,56,222,225
271.byte	102,15,56,222,233
272.byte	102,15,56,222,241
273.byte	102,15,56,222,249
274.align	16
275.L_aesni_decrypt6_enter:
276	movups	16(%edx),%xmm1
277.byte	102,15,56,222,208
278.byte	102,15,56,222,216
279	leal	32(%edx),%edx
280.byte	102,15,56,222,224
281.byte	102,15,56,222,232
282.byte	102,15,56,222,240
283.byte	102,15,56,222,248
284	movups	(%edx),%xmm0
285	jnz	.L007dec6_loop
286.byte	102,15,56,222,209
287.byte	102,15,56,222,217
288.byte	102,15,56,222,225
289.byte	102,15,56,222,233
290.byte	102,15,56,222,241
291.byte	102,15,56,222,249
292.byte	102,15,56,223,208
293.byte	102,15,56,223,216
294.byte	102,15,56,223,224
295.byte	102,15,56,223,232
296.byte	102,15,56,223,240
297.byte	102,15,56,223,248
298	ret
299.size	_aesni_decrypt6,.-_aesni_decrypt6
300.globl	aesni_ecb_encrypt
301.type	aesni_ecb_encrypt,@function
302.align	16
303aesni_ecb_encrypt:
304.L_aesni_ecb_encrypt_begin:
305	pushl	%ebp
306	pushl	%ebx
307	pushl	%esi
308	pushl	%edi
309	movl	20(%esp),%esi
310	movl	24(%esp),%edi
311	movl	28(%esp),%eax
312	movl	32(%esp),%edx
313	movl	36(%esp),%ebx
314	andl	$-16,%eax
315	jz	.L008ecb_ret
316	movl	240(%edx),%ecx
317	testl	%ebx,%ebx
318	jz	.L009ecb_decrypt
319	movl	%edx,%ebp
320	movl	%ecx,%ebx
321	cmpl	$96,%eax
322	jb	.L010ecb_enc_tail
323	movdqu	(%esi),%xmm2
324	movdqu	16(%esi),%xmm3
325	movdqu	32(%esi),%xmm4
326	movdqu	48(%esi),%xmm5
327	movdqu	64(%esi),%xmm6
328	movdqu	80(%esi),%xmm7
329	leal	96(%esi),%esi
330	subl	$96,%eax
331	jmp	.L011ecb_enc_loop6_enter
332.align	16
333.L012ecb_enc_loop6:
334	movups	%xmm2,(%edi)
335	movdqu	(%esi),%xmm2
336	movups	%xmm3,16(%edi)
337	movdqu	16(%esi),%xmm3
338	movups	%xmm4,32(%edi)
339	movdqu	32(%esi),%xmm4
340	movups	%xmm5,48(%edi)
341	movdqu	48(%esi),%xmm5
342	movups	%xmm6,64(%edi)
343	movdqu	64(%esi),%xmm6
344	movups	%xmm7,80(%edi)
345	leal	96(%edi),%edi
346	movdqu	80(%esi),%xmm7
347	leal	96(%esi),%esi
348.L011ecb_enc_loop6_enter:
349	call	_aesni_encrypt6
350	movl	%ebp,%edx
351	movl	%ebx,%ecx
352	subl	$96,%eax
353	jnc	.L012ecb_enc_loop6
354	movups	%xmm2,(%edi)
355	movups	%xmm3,16(%edi)
356	movups	%xmm4,32(%edi)
357	movups	%xmm5,48(%edi)
358	movups	%xmm6,64(%edi)
359	movups	%xmm7,80(%edi)
360	leal	96(%edi),%edi
361	addl	$96,%eax
362	jz	.L008ecb_ret
363.L010ecb_enc_tail:
364	movups	(%esi),%xmm2
365	cmpl	$32,%eax
366	jb	.L013ecb_enc_one
367	movups	16(%esi),%xmm3
368	je	.L014ecb_enc_two
369	movups	32(%esi),%xmm4
370	cmpl	$64,%eax
371	jb	.L015ecb_enc_three
372	movups	48(%esi),%xmm5
373	je	.L016ecb_enc_four
374	movups	64(%esi),%xmm6
375	xorps	%xmm7,%xmm7
376	call	_aesni_encrypt6
377	movups	%xmm2,(%edi)
378	movups	%xmm3,16(%edi)
379	movups	%xmm4,32(%edi)
380	movups	%xmm5,48(%edi)
381	movups	%xmm6,64(%edi)
382	jmp	.L008ecb_ret
383.align	16
384.L013ecb_enc_one:
385	movups	(%edx),%xmm0
386	movups	16(%edx),%xmm1
387	leal	32(%edx),%edx
388	xorps	%xmm0,%xmm2
389.L017enc1_loop_3:
390.byte	102,15,56,220,209
391	decl	%ecx
392	movups	(%edx),%xmm1
393	leal	16(%edx),%edx
394	jnz	.L017enc1_loop_3
395.byte	102,15,56,221,209
396	movups	%xmm2,(%edi)
397	jmp	.L008ecb_ret
398.align	16
399.L014ecb_enc_two:
400	xorps	%xmm4,%xmm4
401	call	_aesni_encrypt3
402	movups	%xmm2,(%edi)
403	movups	%xmm3,16(%edi)
404	jmp	.L008ecb_ret
405.align	16
406.L015ecb_enc_three:
407	call	_aesni_encrypt3
408	movups	%xmm2,(%edi)
409	movups	%xmm3,16(%edi)
410	movups	%xmm4,32(%edi)
411	jmp	.L008ecb_ret
412.align	16
413.L016ecb_enc_four:
414	call	_aesni_encrypt4
415	movups	%xmm2,(%edi)
416	movups	%xmm3,16(%edi)
417	movups	%xmm4,32(%edi)
418	movups	%xmm5,48(%edi)
419	jmp	.L008ecb_ret
420.align	16
421.L009ecb_decrypt:
422	movl	%edx,%ebp
423	movl	%ecx,%ebx
424	cmpl	$96,%eax
425	jb	.L018ecb_dec_tail
426	movdqu	(%esi),%xmm2
427	movdqu	16(%esi),%xmm3
428	movdqu	32(%esi),%xmm4
429	movdqu	48(%esi),%xmm5
430	movdqu	64(%esi),%xmm6
431	movdqu	80(%esi),%xmm7
432	leal	96(%esi),%esi
433	subl	$96,%eax
434	jmp	.L019ecb_dec_loop6_enter
435.align	16
436.L020ecb_dec_loop6:
437	movups	%xmm2,(%edi)
438	movdqu	(%esi),%xmm2
439	movups	%xmm3,16(%edi)
440	movdqu	16(%esi),%xmm3
441	movups	%xmm4,32(%edi)
442	movdqu	32(%esi),%xmm4
443	movups	%xmm5,48(%edi)
444	movdqu	48(%esi),%xmm5
445	movups	%xmm6,64(%edi)
446	movdqu	64(%esi),%xmm6
447	movups	%xmm7,80(%edi)
448	leal	96(%edi),%edi
449	movdqu	80(%esi),%xmm7
450	leal	96(%esi),%esi
451.L019ecb_dec_loop6_enter:
452	call	_aesni_decrypt6
453	movl	%ebp,%edx
454	movl	%ebx,%ecx
455	subl	$96,%eax
456	jnc	.L020ecb_dec_loop6
457	movups	%xmm2,(%edi)
458	movups	%xmm3,16(%edi)
459	movups	%xmm4,32(%edi)
460	movups	%xmm5,48(%edi)
461	movups	%xmm6,64(%edi)
462	movups	%xmm7,80(%edi)
463	leal	96(%edi),%edi
464	addl	$96,%eax
465	jz	.L008ecb_ret
466.L018ecb_dec_tail:
467	movups	(%esi),%xmm2
468	cmpl	$32,%eax
469	jb	.L021ecb_dec_one
470	movups	16(%esi),%xmm3
471	je	.L022ecb_dec_two
472	movups	32(%esi),%xmm4
473	cmpl	$64,%eax
474	jb	.L023ecb_dec_three
475	movups	48(%esi),%xmm5
476	je	.L024ecb_dec_four
477	movups	64(%esi),%xmm6
478	xorps	%xmm7,%xmm7
479	call	_aesni_decrypt6
480	movups	%xmm2,(%edi)
481	movups	%xmm3,16(%edi)
482	movups	%xmm4,32(%edi)
483	movups	%xmm5,48(%edi)
484	movups	%xmm6,64(%edi)
485	jmp	.L008ecb_ret
486.align	16
487.L021ecb_dec_one:
488	movups	(%edx),%xmm0
489	movups	16(%edx),%xmm1
490	leal	32(%edx),%edx
491	xorps	%xmm0,%xmm2
492.L025dec1_loop_4:
493.byte	102,15,56,222,209
494	decl	%ecx
495	movups	(%edx),%xmm1
496	leal	16(%edx),%edx
497	jnz	.L025dec1_loop_4
498.byte	102,15,56,223,209
499	movups	%xmm2,(%edi)
500	jmp	.L008ecb_ret
501.align	16
502.L022ecb_dec_two:
503	xorps	%xmm4,%xmm4
504	call	_aesni_decrypt3
505	movups	%xmm2,(%edi)
506	movups	%xmm3,16(%edi)
507	jmp	.L008ecb_ret
508.align	16
509.L023ecb_dec_three:
510	call	_aesni_decrypt3
511	movups	%xmm2,(%edi)
512	movups	%xmm3,16(%edi)
513	movups	%xmm4,32(%edi)
514	jmp	.L008ecb_ret
515.align	16
516.L024ecb_dec_four:
517	call	_aesni_decrypt4
518	movups	%xmm2,(%edi)
519	movups	%xmm3,16(%edi)
520	movups	%xmm4,32(%edi)
521	movups	%xmm5,48(%edi)
522.L008ecb_ret:
523	popl	%edi
524	popl	%esi
525	popl	%ebx
526	popl	%ebp
527	ret
528.size	aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
529.globl	aesni_ccm64_encrypt_blocks
530.type	aesni_ccm64_encrypt_blocks,@function
531.align	16
532aesni_ccm64_encrypt_blocks:
533.L_aesni_ccm64_encrypt_blocks_begin:
534	pushl	%ebp
535	pushl	%ebx
536	pushl	%esi
537	pushl	%edi
538	movl	20(%esp),%esi
539	movl	24(%esp),%edi
540	movl	28(%esp),%eax
541	movl	32(%esp),%edx
542	movl	36(%esp),%ebx
543	movl	40(%esp),%ecx
544	movl	%esp,%ebp
545	subl	$60,%esp
546	andl	$-16,%esp
547	movl	%ebp,48(%esp)
548	movdqu	(%ebx),%xmm7
549	movdqu	(%ecx),%xmm3
550	movl	240(%edx),%ecx
551	movl	$202182159,(%esp)
552	movl	$134810123,4(%esp)
553	movl	$67438087,8(%esp)
554	movl	$66051,12(%esp)
555	movl	$1,%ebx
556	xorl	%ebp,%ebp
557	movl	%ebx,16(%esp)
558	movl	%ebp,20(%esp)
559	movl	%ebp,24(%esp)
560	movl	%ebp,28(%esp)
561	shrl	$1,%ecx
562	leal	(%edx),%ebp
563	movdqa	(%esp),%xmm5
564	movdqa	%xmm7,%xmm2
565	movl	%ecx,%ebx
566.byte	102,15,56,0,253
567.L026ccm64_enc_outer:
568	movups	(%ebp),%xmm0
569	movl	%ebx,%ecx
570	movups	(%esi),%xmm6
571	xorps	%xmm0,%xmm2
572	movups	16(%ebp),%xmm1
573	xorps	%xmm6,%xmm0
574	leal	32(%ebp),%edx
575	xorps	%xmm0,%xmm3
576	movups	(%edx),%xmm0
577.L027ccm64_enc2_loop:
578.byte	102,15,56,220,209
579	decl	%ecx
580.byte	102,15,56,220,217
581	movups	16(%edx),%xmm1
582.byte	102,15,56,220,208
583	leal	32(%edx),%edx
584.byte	102,15,56,220,216
585	movups	(%edx),%xmm0
586	jnz	.L027ccm64_enc2_loop
587.byte	102,15,56,220,209
588.byte	102,15,56,220,217
589	paddq	16(%esp),%xmm7
590.byte	102,15,56,221,208
591.byte	102,15,56,221,216
592	decl	%eax
593	leal	16(%esi),%esi
594	xorps	%xmm2,%xmm6
595	movdqa	%xmm7,%xmm2
596	movups	%xmm6,(%edi)
597	leal	16(%edi),%edi
598.byte	102,15,56,0,213
599	jnz	.L026ccm64_enc_outer
600	movl	48(%esp),%esp
601	movl	40(%esp),%edi
602	movups	%xmm3,(%edi)
603	popl	%edi
604	popl	%esi
605	popl	%ebx
606	popl	%ebp
607	ret
608.size	aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
609.globl	aesni_ccm64_decrypt_blocks
610.type	aesni_ccm64_decrypt_blocks,@function
611.align	16
612aesni_ccm64_decrypt_blocks:
613.L_aesni_ccm64_decrypt_blocks_begin:
614	pushl	%ebp
615	pushl	%ebx
616	pushl	%esi
617	pushl	%edi
618	movl	20(%esp),%esi
619	movl	24(%esp),%edi
620	movl	28(%esp),%eax
621	movl	32(%esp),%edx
622	movl	36(%esp),%ebx
623	movl	40(%esp),%ecx
624	movl	%esp,%ebp
625	subl	$60,%esp
626	andl	$-16,%esp
627	movl	%ebp,48(%esp)
628	movdqu	(%ebx),%xmm7
629	movdqu	(%ecx),%xmm3
630	movl	240(%edx),%ecx
631	movl	$202182159,(%esp)
632	movl	$134810123,4(%esp)
633	movl	$67438087,8(%esp)
634	movl	$66051,12(%esp)
635	movl	$1,%ebx
636	xorl	%ebp,%ebp
637	movl	%ebx,16(%esp)
638	movl	%ebp,20(%esp)
639	movl	%ebp,24(%esp)
640	movl	%ebp,28(%esp)
641	movdqa	(%esp),%xmm5
642	movdqa	%xmm7,%xmm2
643	movl	%edx,%ebp
644	movl	%ecx,%ebx
645.byte	102,15,56,0,253
646	movups	(%edx),%xmm0
647	movups	16(%edx),%xmm1
648	leal	32(%edx),%edx
649	xorps	%xmm0,%xmm2
650.L028enc1_loop_5:
651.byte	102,15,56,220,209
652	decl	%ecx
653	movups	(%edx),%xmm1
654	leal	16(%edx),%edx
655	jnz	.L028enc1_loop_5
656.byte	102,15,56,221,209
657	movups	(%esi),%xmm6
658	paddq	16(%esp),%xmm7
659	leal	16(%esi),%esi
660	jmp	.L029ccm64_dec_outer
661.align	16
662.L029ccm64_dec_outer:
663	xorps	%xmm2,%xmm6
664	movdqa	%xmm7,%xmm2
665	movl	%ebx,%ecx
666	movups	%xmm6,(%edi)
667	leal	16(%edi),%edi
668.byte	102,15,56,0,213
669	subl	$1,%eax
670	jz	.L030ccm64_dec_break
671	movups	(%ebp),%xmm0
672	shrl	$1,%ecx
673	movups	16(%ebp),%xmm1
674	xorps	%xmm0,%xmm6
675	leal	32(%ebp),%edx
676	xorps	%xmm0,%xmm2
677	xorps	%xmm6,%xmm3
678	movups	(%edx),%xmm0
679.L031ccm64_dec2_loop:
680.byte	102,15,56,220,209
681	decl	%ecx
682.byte	102,15,56,220,217
683	movups	16(%edx),%xmm1
684.byte	102,15,56,220,208
685	leal	32(%edx),%edx
686.byte	102,15,56,220,216
687	movups	(%edx),%xmm0
688	jnz	.L031ccm64_dec2_loop
689	movups	(%esi),%xmm6
690	paddq	16(%esp),%xmm7
691.byte	102,15,56,220,209
692.byte	102,15,56,220,217
693	leal	16(%esi),%esi
694.byte	102,15,56,221,208
695.byte	102,15,56,221,216
696	jmp	.L029ccm64_dec_outer
697.align	16
698.L030ccm64_dec_break:
699	movl	%ebp,%edx
700	movups	(%edx),%xmm0
701	movups	16(%edx),%xmm1
702	xorps	%xmm0,%xmm6
703	leal	32(%edx),%edx
704	xorps	%xmm6,%xmm3
705.L032enc1_loop_6:
706.byte	102,15,56,220,217
707	decl	%ecx
708	movups	(%edx),%xmm1
709	leal	16(%edx),%edx
710	jnz	.L032enc1_loop_6
711.byte	102,15,56,221,217
712	movl	48(%esp),%esp
713	movl	40(%esp),%edi
714	movups	%xmm3,(%edi)
715	popl	%edi
716	popl	%esi
717	popl	%ebx
718	popl	%ebp
719	ret
720.size	aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
721.globl	aesni_ctr32_encrypt_blocks
722.type	aesni_ctr32_encrypt_blocks,@function
723.align	16
724aesni_ctr32_encrypt_blocks:
725.L_aesni_ctr32_encrypt_blocks_begin:
726	pushl	%ebp
727	pushl	%ebx
728	pushl	%esi
729	pushl	%edi
730	movl	20(%esp),%esi
731	movl	24(%esp),%edi
732	movl	28(%esp),%eax
733	movl	32(%esp),%edx
734	movl	36(%esp),%ebx
735	movl	%esp,%ebp
736	subl	$88,%esp
737	andl	$-16,%esp
738	movl	%ebp,80(%esp)
739	cmpl	$1,%eax
740	je	.L033ctr32_one_shortcut
741	movdqu	(%ebx),%xmm7
742	movl	$202182159,(%esp)
743	movl	$134810123,4(%esp)
744	movl	$67438087,8(%esp)
745	movl	$66051,12(%esp)
746	movl	$6,%ecx
747	xorl	%ebp,%ebp
748	movl	%ecx,16(%esp)
749	movl	%ecx,20(%esp)
750	movl	%ecx,24(%esp)
751	movl	%ebp,28(%esp)
752.byte	102,15,58,22,251,3
753.byte	102,15,58,34,253,3
754	movl	240(%edx),%ecx
755	bswap	%ebx
756	pxor	%xmm1,%xmm1
757	pxor	%xmm0,%xmm0
758	movdqa	(%esp),%xmm2
759.byte	102,15,58,34,203,0
760	leal	3(%ebx),%ebp
761.byte	102,15,58,34,197,0
762	incl	%ebx
763.byte	102,15,58,34,203,1
764	incl	%ebp
765.byte	102,15,58,34,197,1
766	incl	%ebx
767.byte	102,15,58,34,203,2
768	incl	%ebp
769.byte	102,15,58,34,197,2
770	movdqa	%xmm1,48(%esp)
771.byte	102,15,56,0,202
772	movdqa	%xmm0,64(%esp)
773.byte	102,15,56,0,194
774	pshufd	$192,%xmm1,%xmm2
775	pshufd	$128,%xmm1,%xmm3
776	cmpl	$6,%eax
777	jb	.L034ctr32_tail
778	movdqa	%xmm7,32(%esp)
779	shrl	$1,%ecx
780	movl	%edx,%ebp
781	movl	%ecx,%ebx
782	subl	$6,%eax
783	jmp	.L035ctr32_loop6
784.align	16
785.L035ctr32_loop6:
786	pshufd	$64,%xmm1,%xmm4
787	movdqa	32(%esp),%xmm1
788	pshufd	$192,%xmm0,%xmm5
789	por	%xmm1,%xmm2
790	pshufd	$128,%xmm0,%xmm6
791	por	%xmm1,%xmm3
792	pshufd	$64,%xmm0,%xmm7
793	por	%xmm1,%xmm4
794	por	%xmm1,%xmm5
795	por	%xmm1,%xmm6
796	por	%xmm1,%xmm7
797	movups	(%ebp),%xmm0
798	movups	16(%ebp),%xmm1
799	leal	32(%ebp),%edx
800	decl	%ecx
801	pxor	%xmm0,%xmm2
802	pxor	%xmm0,%xmm3
803.byte	102,15,56,220,209
804	pxor	%xmm0,%xmm4
805.byte	102,15,56,220,217
806	pxor	%xmm0,%xmm5
807.byte	102,15,56,220,225
808	pxor	%xmm0,%xmm6
809.byte	102,15,56,220,233
810	pxor	%xmm0,%xmm7
811.byte	102,15,56,220,241
812	movups	(%edx),%xmm0
813.byte	102,15,56,220,249
814	call	.L_aesni_encrypt6_enter
815	movups	(%esi),%xmm1
816	movups	16(%esi),%xmm0
817	xorps	%xmm1,%xmm2
818	movups	32(%esi),%xmm1
819	xorps	%xmm0,%xmm3
820	movups	%xmm2,(%edi)
821	movdqa	16(%esp),%xmm0
822	xorps	%xmm1,%xmm4
823	movdqa	48(%esp),%xmm1
824	movups	%xmm3,16(%edi)
825	movups	%xmm4,32(%edi)
826	paddd	%xmm0,%xmm1
827	paddd	64(%esp),%xmm0
828	movdqa	(%esp),%xmm2
829	movups	48(%esi),%xmm3
830	movups	64(%esi),%xmm4
831	xorps	%xmm3,%xmm5
832	movups	80(%esi),%xmm3
833	leal	96(%esi),%esi
834	movdqa	%xmm1,48(%esp)
835.byte	102,15,56,0,202
836	xorps	%xmm4,%xmm6
837	movups	%xmm5,48(%edi)
838	xorps	%xmm3,%xmm7
839	movdqa	%xmm0,64(%esp)
840.byte	102,15,56,0,194
841	movups	%xmm6,64(%edi)
842	pshufd	$192,%xmm1,%xmm2
843	movups	%xmm7,80(%edi)
844	leal	96(%edi),%edi
845	movl	%ebx,%ecx
846	pshufd	$128,%xmm1,%xmm3
847	subl	$6,%eax
848	jnc	.L035ctr32_loop6
849	addl	$6,%eax
850	jz	.L036ctr32_ret
851	movl	%ebp,%edx
852	leal	1(,%ecx,2),%ecx
853	movdqa	32(%esp),%xmm7
854.L034ctr32_tail:
855	por	%xmm7,%xmm2
856	cmpl	$2,%eax
857	jb	.L037ctr32_one
858	pshufd	$64,%xmm1,%xmm4
859	por	%xmm7,%xmm3
860	je	.L038ctr32_two
861	pshufd	$192,%xmm0,%xmm5
862	por	%xmm7,%xmm4
863	cmpl	$4,%eax
864	jb	.L039ctr32_three
865	pshufd	$128,%xmm0,%xmm6
866	por	%xmm7,%xmm5
867	je	.L040ctr32_four
868	por	%xmm7,%xmm6
869	call	_aesni_encrypt6
870	movups	(%esi),%xmm1
871	movups	16(%esi),%xmm0
872	xorps	%xmm1,%xmm2
873	movups	32(%esi),%xmm1
874	xorps	%xmm0,%xmm3
875	movups	48(%esi),%xmm0
876	xorps	%xmm1,%xmm4
877	movups	64(%esi),%xmm1
878	xorps	%xmm0,%xmm5
879	movups	%xmm2,(%edi)
880	xorps	%xmm1,%xmm6
881	movups	%xmm3,16(%edi)
882	movups	%xmm4,32(%edi)
883	movups	%xmm5,48(%edi)
884	movups	%xmm6,64(%edi)
885	jmp	.L036ctr32_ret
886.align	16
887.L033ctr32_one_shortcut:
888	movups	(%ebx),%xmm2
889	movl	240(%edx),%ecx
890.L037ctr32_one:
891	movups	(%edx),%xmm0
892	movups	16(%edx),%xmm1
893	leal	32(%edx),%edx
894	xorps	%xmm0,%xmm2
895.L041enc1_loop_7:
896.byte	102,15,56,220,209
897	decl	%ecx
898	movups	(%edx),%xmm1
899	leal	16(%edx),%edx
900	jnz	.L041enc1_loop_7
901.byte	102,15,56,221,209
902	movups	(%esi),%xmm6
903	xorps	%xmm2,%xmm6
904	movups	%xmm6,(%edi)
905	jmp	.L036ctr32_ret
906.align	16
907.L038ctr32_two:
908	call	_aesni_encrypt3
909	movups	(%esi),%xmm5
910	movups	16(%esi),%xmm6
911	xorps	%xmm5,%xmm2
912	xorps	%xmm6,%xmm3
913	movups	%xmm2,(%edi)
914	movups	%xmm3,16(%edi)
915	jmp	.L036ctr32_ret
916.align	16
917.L039ctr32_three:
918	call	_aesni_encrypt3
919	movups	(%esi),%xmm5
920	movups	16(%esi),%xmm6
921	xorps	%xmm5,%xmm2
922	movups	32(%esi),%xmm7
923	xorps	%xmm6,%xmm3
924	movups	%xmm2,(%edi)
925	xorps	%xmm7,%xmm4
926	movups	%xmm3,16(%edi)
927	movups	%xmm4,32(%edi)
928	jmp	.L036ctr32_ret
929.align	16
930.L040ctr32_four:
931	call	_aesni_encrypt4
932	movups	(%esi),%xmm6
933	movups	16(%esi),%xmm7
934	movups	32(%esi),%xmm1
935	xorps	%xmm6,%xmm2
936	movups	48(%esi),%xmm0
937	xorps	%xmm7,%xmm3
938	movups	%xmm2,(%edi)
939	xorps	%xmm1,%xmm4
940	movups	%xmm3,16(%edi)
941	xorps	%xmm0,%xmm5
942	movups	%xmm4,32(%edi)
943	movups	%xmm5,48(%edi)
944.L036ctr32_ret:
945	movl	80(%esp),%esp
946	popl	%edi
947	popl	%esi
948	popl	%ebx
949	popl	%ebp
950	ret
951.size	aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
952.globl	aesni_xts_encrypt
953.type	aesni_xts_encrypt,@function
954.align	16
955aesni_xts_encrypt:
956.L_aesni_xts_encrypt_begin:
957	pushl	%ebp
958	pushl	%ebx
959	pushl	%esi
960	pushl	%edi
961	movl	36(%esp),%edx
962	movl	40(%esp),%esi
963	movl	240(%edx),%ecx
964	movups	(%esi),%xmm2
965	movups	(%edx),%xmm0
966	movups	16(%edx),%xmm1
967	leal	32(%edx),%edx
968	xorps	%xmm0,%xmm2
969.L042enc1_loop_8:
970.byte	102,15,56,220,209
971	decl	%ecx
972	movups	(%edx),%xmm1
973	leal	16(%edx),%edx
974	jnz	.L042enc1_loop_8
975.byte	102,15,56,221,209
976	movl	20(%esp),%esi
977	movl	24(%esp),%edi
978	movl	28(%esp),%eax
979	movl	32(%esp),%edx
980	movl	%esp,%ebp
981	subl	$120,%esp
982	movl	240(%edx),%ecx
983	andl	$-16,%esp
984	movl	$135,96(%esp)
985	movl	$0,100(%esp)
986	movl	$1,104(%esp)
987	movl	$0,108(%esp)
988	movl	%eax,112(%esp)
989	movl	%ebp,116(%esp)
990	movdqa	%xmm2,%xmm1
991	pxor	%xmm0,%xmm0
992	movdqa	96(%esp),%xmm3
993	pcmpgtd	%xmm1,%xmm0
994	andl	$-16,%eax
995	movl	%edx,%ebp
996	movl	%ecx,%ebx
997	subl	$96,%eax
998	jc	.L043xts_enc_short
999	shrl	$1,%ecx
1000	movl	%ecx,%ebx
1001	jmp	.L044xts_enc_loop6
1002.align	16
1003.L044xts_enc_loop6:
1004	pshufd	$19,%xmm0,%xmm2
1005	pxor	%xmm0,%xmm0
1006	movdqa	%xmm1,(%esp)
1007	paddq	%xmm1,%xmm1
1008	pand	%xmm3,%xmm2
1009	pcmpgtd	%xmm1,%xmm0
1010	pxor	%xmm2,%xmm1
1011	pshufd	$19,%xmm0,%xmm2
1012	pxor	%xmm0,%xmm0
1013	movdqa	%xmm1,16(%esp)
1014	paddq	%xmm1,%xmm1
1015	pand	%xmm3,%xmm2
1016	pcmpgtd	%xmm1,%xmm0
1017	pxor	%xmm2,%xmm1
1018	pshufd	$19,%xmm0,%xmm2
1019	pxor	%xmm0,%xmm0
1020	movdqa	%xmm1,32(%esp)
1021	paddq	%xmm1,%xmm1
1022	pand	%xmm3,%xmm2
1023	pcmpgtd	%xmm1,%xmm0
1024	pxor	%xmm2,%xmm1
1025	pshufd	$19,%xmm0,%xmm2
1026	pxor	%xmm0,%xmm0
1027	movdqa	%xmm1,48(%esp)
1028	paddq	%xmm1,%xmm1
1029	pand	%xmm3,%xmm2
1030	pcmpgtd	%xmm1,%xmm0
1031	pxor	%xmm2,%xmm1
1032	pshufd	$19,%xmm0,%xmm7
1033	movdqa	%xmm1,64(%esp)
1034	paddq	%xmm1,%xmm1
1035	movups	(%ebp),%xmm0
1036	pand	%xmm3,%xmm7
1037	movups	(%esi),%xmm2
1038	pxor	%xmm1,%xmm7
1039	movdqu	16(%esi),%xmm3
1040	xorps	%xmm0,%xmm2
1041	movdqu	32(%esi),%xmm4
1042	pxor	%xmm0,%xmm3
1043	movdqu	48(%esi),%xmm5
1044	pxor	%xmm0,%xmm4
1045	movdqu	64(%esi),%xmm6
1046	pxor	%xmm0,%xmm5
1047	movdqu	80(%esi),%xmm1
1048	pxor	%xmm0,%xmm6
1049	leal	96(%esi),%esi
1050	pxor	(%esp),%xmm2
1051	movdqa	%xmm7,80(%esp)
1052	pxor	%xmm1,%xmm7
1053	movups	16(%ebp),%xmm1
1054	leal	32(%ebp),%edx
1055	pxor	16(%esp),%xmm3
1056.byte	102,15,56,220,209
1057	pxor	32(%esp),%xmm4
1058.byte	102,15,56,220,217
1059	pxor	48(%esp),%xmm5
1060	decl	%ecx
1061.byte	102,15,56,220,225
1062	pxor	64(%esp),%xmm6
1063.byte	102,15,56,220,233
1064	pxor	%xmm0,%xmm7
1065.byte	102,15,56,220,241
1066	movups	(%edx),%xmm0
1067.byte	102,15,56,220,249
1068	call	.L_aesni_encrypt6_enter
1069	movdqa	80(%esp),%xmm1
1070	pxor	%xmm0,%xmm0
1071	xorps	(%esp),%xmm2
1072	pcmpgtd	%xmm1,%xmm0
1073	xorps	16(%esp),%xmm3
1074	movups	%xmm2,(%edi)
1075	xorps	32(%esp),%xmm4
1076	movups	%xmm3,16(%edi)
1077	xorps	48(%esp),%xmm5
1078	movups	%xmm4,32(%edi)
1079	xorps	64(%esp),%xmm6
1080	movups	%xmm5,48(%edi)
1081	xorps	%xmm1,%xmm7
1082	movups	%xmm6,64(%edi)
1083	pshufd	$19,%xmm0,%xmm2
1084	movups	%xmm7,80(%edi)
1085	leal	96(%edi),%edi
1086	movdqa	96(%esp),%xmm3
1087	pxor	%xmm0,%xmm0
1088	paddq	%xmm1,%xmm1
1089	pand	%xmm3,%xmm2
1090	pcmpgtd	%xmm1,%xmm0
1091	movl	%ebx,%ecx
1092	pxor	%xmm2,%xmm1
1093	subl	$96,%eax
1094	jnc	.L044xts_enc_loop6
1095	leal	1(,%ecx,2),%ecx
1096	movl	%ebp,%edx
1097	movl	%ecx,%ebx
1098.L043xts_enc_short:
1099	addl	$96,%eax
1100	jz	.L045xts_enc_done6x
1101	movdqa	%xmm1,%xmm5
1102	cmpl	$32,%eax
1103	jb	.L046xts_enc_one
1104	pshufd	$19,%xmm0,%xmm2
1105	pxor	%xmm0,%xmm0
1106	paddq	%xmm1,%xmm1
1107	pand	%xmm3,%xmm2
1108	pcmpgtd	%xmm1,%xmm0
1109	pxor	%xmm2,%xmm1
1110	je	.L047xts_enc_two
1111	pshufd	$19,%xmm0,%xmm2
1112	pxor	%xmm0,%xmm0
1113	movdqa	%xmm1,%xmm6
1114	paddq	%xmm1,%xmm1
1115	pand	%xmm3,%xmm2
1116	pcmpgtd	%xmm1,%xmm0
1117	pxor	%xmm2,%xmm1
1118	cmpl	$64,%eax
1119	jb	.L048xts_enc_three
1120	pshufd	$19,%xmm0,%xmm2
1121	pxor	%xmm0,%xmm0
1122	movdqa	%xmm1,%xmm7
1123	paddq	%xmm1,%xmm1
1124	pand	%xmm3,%xmm2
1125	pcmpgtd	%xmm1,%xmm0
1126	pxor	%xmm2,%xmm1
1127	movdqa	%xmm5,(%esp)
1128	movdqa	%xmm6,16(%esp)
1129	je	.L049xts_enc_four
1130	movdqa	%xmm7,32(%esp)
1131	pshufd	$19,%xmm0,%xmm7
1132	movdqa	%xmm1,48(%esp)
1133	paddq	%xmm1,%xmm1
1134	pand	%xmm3,%xmm7
1135	pxor	%xmm1,%xmm7
1136	movdqu	(%esi),%xmm2
1137	movdqu	16(%esi),%xmm3
1138	movdqu	32(%esi),%xmm4
1139	pxor	(%esp),%xmm2
1140	movdqu	48(%esi),%xmm5
1141	pxor	16(%esp),%xmm3
1142	movdqu	64(%esi),%xmm6
1143	pxor	32(%esp),%xmm4
1144	leal	80(%esi),%esi
1145	pxor	48(%esp),%xmm5
1146	movdqa	%xmm7,64(%esp)
1147	pxor	%xmm7,%xmm6
1148	call	_aesni_encrypt6
1149	movaps	64(%esp),%xmm1
1150	xorps	(%esp),%xmm2
1151	xorps	16(%esp),%xmm3
1152	xorps	32(%esp),%xmm4
1153	movups	%xmm2,(%edi)
1154	xorps	48(%esp),%xmm5
1155	movups	%xmm3,16(%edi)
1156	xorps	%xmm1,%xmm6
1157	movups	%xmm4,32(%edi)
1158	movups	%xmm5,48(%edi)
1159	movups	%xmm6,64(%edi)
1160	leal	80(%edi),%edi
1161	jmp	.L050xts_enc_done
1162.align	16
1163.L046xts_enc_one:
1164	movups	(%esi),%xmm2
1165	leal	16(%esi),%esi
1166	xorps	%xmm5,%xmm2
1167	movups	(%edx),%xmm0
1168	movups	16(%edx),%xmm1
1169	leal	32(%edx),%edx
1170	xorps	%xmm0,%xmm2
1171.L051enc1_loop_9:
1172.byte	102,15,56,220,209
1173	decl	%ecx
1174	movups	(%edx),%xmm1
1175	leal	16(%edx),%edx
1176	jnz	.L051enc1_loop_9
1177.byte	102,15,56,221,209
1178	xorps	%xmm5,%xmm2
1179	movups	%xmm2,(%edi)
1180	leal	16(%edi),%edi
1181	movdqa	%xmm5,%xmm1
1182	jmp	.L050xts_enc_done
1183.align	16
1184.L047xts_enc_two:
1185	movaps	%xmm1,%xmm6
1186	movups	(%esi),%xmm2
1187	movups	16(%esi),%xmm3
1188	leal	32(%esi),%esi
1189	xorps	%xmm5,%xmm2
1190	xorps	%xmm6,%xmm3
1191	xorps	%xmm4,%xmm4
1192	call	_aesni_encrypt3
1193	xorps	%xmm5,%xmm2
1194	xorps	%xmm6,%xmm3
1195	movups	%xmm2,(%edi)
1196	movups	%xmm3,16(%edi)
1197	leal	32(%edi),%edi
1198	movdqa	%xmm6,%xmm1
1199	jmp	.L050xts_enc_done
1200.align	16
1201.L048xts_enc_three:
1202	movaps	%xmm1,%xmm7
1203	movups	(%esi),%xmm2
1204	movups	16(%esi),%xmm3
1205	movups	32(%esi),%xmm4
1206	leal	48(%esi),%esi
1207	xorps	%xmm5,%xmm2
1208	xorps	%xmm6,%xmm3
1209	xorps	%xmm7,%xmm4
1210	call	_aesni_encrypt3
1211	xorps	%xmm5,%xmm2
1212	xorps	%xmm6,%xmm3
1213	xorps	%xmm7,%xmm4
1214	movups	%xmm2,(%edi)
1215	movups	%xmm3,16(%edi)
1216	movups	%xmm4,32(%edi)
1217	leal	48(%edi),%edi
1218	movdqa	%xmm7,%xmm1
1219	jmp	.L050xts_enc_done
1220.align	16
1221.L049xts_enc_four:
1222	movaps	%xmm1,%xmm6
1223	movups	(%esi),%xmm2
1224	movups	16(%esi),%xmm3
1225	movups	32(%esi),%xmm4
1226	xorps	(%esp),%xmm2
1227	movups	48(%esi),%xmm5
1228	leal	64(%esi),%esi
1229	xorps	16(%esp),%xmm3
1230	xorps	%xmm7,%xmm4
1231	xorps	%xmm6,%xmm5
1232	call	_aesni_encrypt4
1233	xorps	(%esp),%xmm2
1234	xorps	16(%esp),%xmm3
1235	xorps	%xmm7,%xmm4
1236	movups	%xmm2,(%edi)
1237	xorps	%xmm6,%xmm5
1238	movups	%xmm3,16(%edi)
1239	movups	%xmm4,32(%edi)
1240	movups	%xmm5,48(%edi)
1241	leal	64(%edi),%edi
1242	movdqa	%xmm6,%xmm1
1243	jmp	.L050xts_enc_done
1244.align	16
1245.L045xts_enc_done6x:
1246	movl	112(%esp),%eax
1247	andl	$15,%eax
1248	jz	.L052xts_enc_ret
1249	movdqa	%xmm1,%xmm5
1250	movl	%eax,112(%esp)
1251	jmp	.L053xts_enc_steal
1252.align	16
1253.L050xts_enc_done:
1254	movl	112(%esp),%eax
1255	pxor	%xmm0,%xmm0
1256	andl	$15,%eax
1257	jz	.L052xts_enc_ret
1258	pcmpgtd	%xmm1,%xmm0
1259	movl	%eax,112(%esp)
1260	pshufd	$19,%xmm0,%xmm5
1261	paddq	%xmm1,%xmm1
1262	pand	96(%esp),%xmm5
1263	pxor	%xmm1,%xmm5
1264.L053xts_enc_steal:
1265	movzbl	(%esi),%ecx
1266	movzbl	-16(%edi),%edx
1267	leal	1(%esi),%esi
1268	movb	%cl,-16(%edi)
1269	movb	%dl,(%edi)
1270	leal	1(%edi),%edi
1271	subl	$1,%eax
1272	jnz	.L053xts_enc_steal
1273	subl	112(%esp),%edi
1274	movl	%ebp,%edx
1275	movl	%ebx,%ecx
1276	movups	-16(%edi),%xmm2
1277	xorps	%xmm5,%xmm2
1278	movups	(%edx),%xmm0
1279	movups	16(%edx),%xmm1
1280	leal	32(%edx),%edx
1281	xorps	%xmm0,%xmm2
1282.L054enc1_loop_10:
1283.byte	102,15,56,220,209
1284	decl	%ecx
1285	movups	(%edx),%xmm1
1286	leal	16(%edx),%edx
1287	jnz	.L054enc1_loop_10
1288.byte	102,15,56,221,209
1289	xorps	%xmm5,%xmm2
1290	movups	%xmm2,-16(%edi)
1291.L052xts_enc_ret:
1292	movl	116(%esp),%esp
1293	popl	%edi
1294	popl	%esi
1295	popl	%ebx
1296	popl	%ebp
1297	ret
1298.size	aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
1299.globl	aesni_xts_decrypt
1300.type	aesni_xts_decrypt,@function
1301.align	16
1302aesni_xts_decrypt:
1303.L_aesni_xts_decrypt_begin:
1304	pushl	%ebp
1305	pushl	%ebx
1306	pushl	%esi
1307	pushl	%edi
1308	movl	36(%esp),%edx
1309	movl	40(%esp),%esi
1310	movl	240(%edx),%ecx
1311	movups	(%esi),%xmm2
1312	movups	(%edx),%xmm0
1313	movups	16(%edx),%xmm1
1314	leal	32(%edx),%edx
1315	xorps	%xmm0,%xmm2
1316.L055enc1_loop_11:
1317.byte	102,15,56,220,209
1318	decl	%ecx
1319	movups	(%edx),%xmm1
1320	leal	16(%edx),%edx
1321	jnz	.L055enc1_loop_11
1322.byte	102,15,56,221,209
1323	movl	20(%esp),%esi
1324	movl	24(%esp),%edi
1325	movl	28(%esp),%eax
1326	movl	32(%esp),%edx
1327	movl	%esp,%ebp
1328	subl	$120,%esp
1329	andl	$-16,%esp
1330	xorl	%ebx,%ebx
1331	testl	$15,%eax
1332	setnz	%bl
1333	shll	$4,%ebx
1334	subl	%ebx,%eax
1335	movl	$135,96(%esp)
1336	movl	$0,100(%esp)
1337	movl	$1,104(%esp)
1338	movl	$0,108(%esp)
1339	movl	%eax,112(%esp)
1340	movl	%ebp,116(%esp)
1341	movl	240(%edx),%ecx
1342	movl	%edx,%ebp
1343	movl	%ecx,%ebx
1344	movdqa	%xmm2,%xmm1
1345	pxor	%xmm0,%xmm0
1346	movdqa	96(%esp),%xmm3
1347	pcmpgtd	%xmm1,%xmm0
1348	andl	$-16,%eax
1349	subl	$96,%eax
1350	jc	.L056xts_dec_short
1351	shrl	$1,%ecx
1352	movl	%ecx,%ebx
1353	jmp	.L057xts_dec_loop6
1354.align	16
1355.L057xts_dec_loop6:
1356	pshufd	$19,%xmm0,%xmm2
1357	pxor	%xmm0,%xmm0
1358	movdqa	%xmm1,(%esp)
1359	paddq	%xmm1,%xmm1
1360	pand	%xmm3,%xmm2
1361	pcmpgtd	%xmm1,%xmm0
1362	pxor	%xmm2,%xmm1
1363	pshufd	$19,%xmm0,%xmm2
1364	pxor	%xmm0,%xmm0
1365	movdqa	%xmm1,16(%esp)
1366	paddq	%xmm1,%xmm1
1367	pand	%xmm3,%xmm2
1368	pcmpgtd	%xmm1,%xmm0
1369	pxor	%xmm2,%xmm1
1370	pshufd	$19,%xmm0,%xmm2
1371	pxor	%xmm0,%xmm0
1372	movdqa	%xmm1,32(%esp)
1373	paddq	%xmm1,%xmm1
1374	pand	%xmm3,%xmm2
1375	pcmpgtd	%xmm1,%xmm0
1376	pxor	%xmm2,%xmm1
1377	pshufd	$19,%xmm0,%xmm2
1378	pxor	%xmm0,%xmm0
1379	movdqa	%xmm1,48(%esp)
1380	paddq	%xmm1,%xmm1
1381	pand	%xmm3,%xmm2
1382	pcmpgtd	%xmm1,%xmm0
1383	pxor	%xmm2,%xmm1
1384	pshufd	$19,%xmm0,%xmm7
1385	movdqa	%xmm1,64(%esp)
1386	paddq	%xmm1,%xmm1
1387	movups	(%ebp),%xmm0
1388	pand	%xmm3,%xmm7
1389	movups	(%esi),%xmm2
1390	pxor	%xmm1,%xmm7
1391	movdqu	16(%esi),%xmm3
1392	xorps	%xmm0,%xmm2
1393	movdqu	32(%esi),%xmm4
1394	pxor	%xmm0,%xmm3
1395	movdqu	48(%esi),%xmm5
1396	pxor	%xmm0,%xmm4
1397	movdqu	64(%esi),%xmm6
1398	pxor	%xmm0,%xmm5
1399	movdqu	80(%esi),%xmm1
1400	pxor	%xmm0,%xmm6
1401	leal	96(%esi),%esi
1402	pxor	(%esp),%xmm2
1403	movdqa	%xmm7,80(%esp)
1404	pxor	%xmm1,%xmm7
1405	movups	16(%ebp),%xmm1
1406	leal	32(%ebp),%edx
1407	pxor	16(%esp),%xmm3
1408.byte	102,15,56,222,209
1409	pxor	32(%esp),%xmm4
1410.byte	102,15,56,222,217
1411	pxor	48(%esp),%xmm5
1412	decl	%ecx
1413.byte	102,15,56,222,225
1414	pxor	64(%esp),%xmm6
1415.byte	102,15,56,222,233
1416	pxor	%xmm0,%xmm7
1417.byte	102,15,56,222,241
1418	movups	(%edx),%xmm0
1419.byte	102,15,56,222,249
1420	call	.L_aesni_decrypt6_enter
1421	movdqa	80(%esp),%xmm1
1422	pxor	%xmm0,%xmm0
1423	xorps	(%esp),%xmm2
1424	pcmpgtd	%xmm1,%xmm0
1425	xorps	16(%esp),%xmm3
1426	movups	%xmm2,(%edi)
1427	xorps	32(%esp),%xmm4
1428	movups	%xmm3,16(%edi)
1429	xorps	48(%esp),%xmm5
1430	movups	%xmm4,32(%edi)
1431	xorps	64(%esp),%xmm6
1432	movups	%xmm5,48(%edi)
1433	xorps	%xmm1,%xmm7
1434	movups	%xmm6,64(%edi)
1435	pshufd	$19,%xmm0,%xmm2
1436	movups	%xmm7,80(%edi)
1437	leal	96(%edi),%edi
1438	movdqa	96(%esp),%xmm3
1439	pxor	%xmm0,%xmm0
1440	paddq	%xmm1,%xmm1
1441	pand	%xmm3,%xmm2
1442	pcmpgtd	%xmm1,%xmm0
1443	movl	%ebx,%ecx
1444	pxor	%xmm2,%xmm1
1445	subl	$96,%eax
1446	jnc	.L057xts_dec_loop6
1447	leal	1(,%ecx,2),%ecx
1448	movl	%ebp,%edx
1449	movl	%ecx,%ebx
1450.L056xts_dec_short:
1451	addl	$96,%eax
1452	jz	.L058xts_dec_done6x
1453	movdqa	%xmm1,%xmm5
1454	cmpl	$32,%eax
1455	jb	.L059xts_dec_one
1456	pshufd	$19,%xmm0,%xmm2
1457	pxor	%xmm0,%xmm0
1458	paddq	%xmm1,%xmm1
1459	pand	%xmm3,%xmm2
1460	pcmpgtd	%xmm1,%xmm0
1461	pxor	%xmm2,%xmm1
1462	je	.L060xts_dec_two
1463	pshufd	$19,%xmm0,%xmm2
1464	pxor	%xmm0,%xmm0
1465	movdqa	%xmm1,%xmm6
1466	paddq	%xmm1,%xmm1
1467	pand	%xmm3,%xmm2
1468	pcmpgtd	%xmm1,%xmm0
1469	pxor	%xmm2,%xmm1
1470	cmpl	$64,%eax
1471	jb	.L061xts_dec_three
1472	pshufd	$19,%xmm0,%xmm2
1473	pxor	%xmm0,%xmm0
1474	movdqa	%xmm1,%xmm7
1475	paddq	%xmm1,%xmm1
1476	pand	%xmm3,%xmm2
1477	pcmpgtd	%xmm1,%xmm0
1478	pxor	%xmm2,%xmm1
1479	movdqa	%xmm5,(%esp)
1480	movdqa	%xmm6,16(%esp)
1481	je	.L062xts_dec_four
1482	movdqa	%xmm7,32(%esp)
1483	pshufd	$19,%xmm0,%xmm7
1484	movdqa	%xmm1,48(%esp)
1485	paddq	%xmm1,%xmm1
1486	pand	%xmm3,%xmm7
1487	pxor	%xmm1,%xmm7
1488	movdqu	(%esi),%xmm2
1489	movdqu	16(%esi),%xmm3
1490	movdqu	32(%esi),%xmm4
1491	pxor	(%esp),%xmm2
1492	movdqu	48(%esi),%xmm5
1493	pxor	16(%esp),%xmm3
1494	movdqu	64(%esi),%xmm6
1495	pxor	32(%esp),%xmm4
1496	leal	80(%esi),%esi
1497	pxor	48(%esp),%xmm5
1498	movdqa	%xmm7,64(%esp)
1499	pxor	%xmm7,%xmm6
1500	call	_aesni_decrypt6
1501	movaps	64(%esp),%xmm1
1502	xorps	(%esp),%xmm2
1503	xorps	16(%esp),%xmm3
1504	xorps	32(%esp),%xmm4
1505	movups	%xmm2,(%edi)
1506	xorps	48(%esp),%xmm5
1507	movups	%xmm3,16(%edi)
1508	xorps	%xmm1,%xmm6
1509	movups	%xmm4,32(%edi)
1510	movups	%xmm5,48(%edi)
1511	movups	%xmm6,64(%edi)
1512	leal	80(%edi),%edi
1513	jmp	.L063xts_dec_done
1514.align	16
1515.L059xts_dec_one:
1516	movups	(%esi),%xmm2
1517	leal	16(%esi),%esi
1518	xorps	%xmm5,%xmm2
1519	movups	(%edx),%xmm0
1520	movups	16(%edx),%xmm1
1521	leal	32(%edx),%edx
1522	xorps	%xmm0,%xmm2
1523.L064dec1_loop_12:
1524.byte	102,15,56,222,209
1525	decl	%ecx
1526	movups	(%edx),%xmm1
1527	leal	16(%edx),%edx
1528	jnz	.L064dec1_loop_12
1529.byte	102,15,56,223,209
1530	xorps	%xmm5,%xmm2
1531	movups	%xmm2,(%edi)
1532	leal	16(%edi),%edi
1533	movdqa	%xmm5,%xmm1
1534	jmp	.L063xts_dec_done
1535.align	16
1536.L060xts_dec_two:
1537	movaps	%xmm1,%xmm6
1538	movups	(%esi),%xmm2
1539	movups	16(%esi),%xmm3
1540	leal	32(%esi),%esi
1541	xorps	%xmm5,%xmm2
1542	xorps	%xmm6,%xmm3
1543	call	_aesni_decrypt3
1544	xorps	%xmm5,%xmm2
1545	xorps	%xmm6,%xmm3
1546	movups	%xmm2,(%edi)
1547	movups	%xmm3,16(%edi)
1548	leal	32(%edi),%edi
1549	movdqa	%xmm6,%xmm1
1550	jmp	.L063xts_dec_done
1551.align	16
1552.L061xts_dec_three:
1553	movaps	%xmm1,%xmm7
1554	movups	(%esi),%xmm2
1555	movups	16(%esi),%xmm3
1556	movups	32(%esi),%xmm4
1557	leal	48(%esi),%esi
1558	xorps	%xmm5,%xmm2
1559	xorps	%xmm6,%xmm3
1560	xorps	%xmm7,%xmm4
1561	call	_aesni_decrypt3
1562	xorps	%xmm5,%xmm2
1563	xorps	%xmm6,%xmm3
1564	xorps	%xmm7,%xmm4
1565	movups	%xmm2,(%edi)
1566	movups	%xmm3,16(%edi)
1567	movups	%xmm4,32(%edi)
1568	leal	48(%edi),%edi
1569	movdqa	%xmm7,%xmm1
1570	jmp	.L063xts_dec_done
1571.align	16
1572.L062xts_dec_four:
1573	movaps	%xmm1,%xmm6
1574	movups	(%esi),%xmm2
1575	movups	16(%esi),%xmm3
1576	movups	32(%esi),%xmm4
1577	xorps	(%esp),%xmm2
1578	movups	48(%esi),%xmm5
1579	leal	64(%esi),%esi
1580	xorps	16(%esp),%xmm3
1581	xorps	%xmm7,%xmm4
1582	xorps	%xmm6,%xmm5
1583	call	_aesni_decrypt4
1584	xorps	(%esp),%xmm2
1585	xorps	16(%esp),%xmm3
1586	xorps	%xmm7,%xmm4
1587	movups	%xmm2,(%edi)
1588	xorps	%xmm6,%xmm5
1589	movups	%xmm3,16(%edi)
1590	movups	%xmm4,32(%edi)
1591	movups	%xmm5,48(%edi)
1592	leal	64(%edi),%edi
1593	movdqa	%xmm6,%xmm1
1594	jmp	.L063xts_dec_done
1595.align	16
1596.L058xts_dec_done6x:
1597	movl	112(%esp),%eax
1598	andl	$15,%eax
1599	jz	.L065xts_dec_ret
1600	movl	%eax,112(%esp)
1601	jmp	.L066xts_dec_only_one_more
1602.align	16
1603.L063xts_dec_done:
1604	movl	112(%esp),%eax
1605	pxor	%xmm0,%xmm0
1606	andl	$15,%eax
1607	jz	.L065xts_dec_ret
1608	pcmpgtd	%xmm1,%xmm0
1609	movl	%eax,112(%esp)
1610	pshufd	$19,%xmm0,%xmm2
1611	pxor	%xmm0,%xmm0
1612	movdqa	96(%esp),%xmm3
1613	paddq	%xmm1,%xmm1
1614	pand	%xmm3,%xmm2
1615	pcmpgtd	%xmm1,%xmm0
1616	pxor	%xmm2,%xmm1
1617.L066xts_dec_only_one_more:
1618	pshufd	$19,%xmm0,%xmm5
1619	movdqa	%xmm1,%xmm6
1620	paddq	%xmm1,%xmm1
1621	pand	%xmm3,%xmm5
1622	pxor	%xmm1,%xmm5
1623	movl	%ebp,%edx
1624	movl	%ebx,%ecx
1625	movups	(%esi),%xmm2
1626	xorps	%xmm5,%xmm2
1627	movups	(%edx),%xmm0
1628	movups	16(%edx),%xmm1
1629	leal	32(%edx),%edx
1630	xorps	%xmm0,%xmm2
1631.L067dec1_loop_13:
1632.byte	102,15,56,222,209
1633	decl	%ecx
1634	movups	(%edx),%xmm1
1635	leal	16(%edx),%edx
1636	jnz	.L067dec1_loop_13
1637.byte	102,15,56,223,209
1638	xorps	%xmm5,%xmm2
1639	movups	%xmm2,(%edi)
1640.L068xts_dec_steal:
1641	movzbl	16(%esi),%ecx
1642	movzbl	(%edi),%edx
1643	leal	1(%esi),%esi
1644	movb	%cl,(%edi)
1645	movb	%dl,16(%edi)
1646	leal	1(%edi),%edi
1647	subl	$1,%eax
1648	jnz	.L068xts_dec_steal
1649	subl	112(%esp),%edi
1650	movl	%ebp,%edx
1651	movl	%ebx,%ecx
1652	movups	(%edi),%xmm2
1653	xorps	%xmm6,%xmm2
1654	movups	(%edx),%xmm0
1655	movups	16(%edx),%xmm1
1656	leal	32(%edx),%edx
1657	xorps	%xmm0,%xmm2
1658.L069dec1_loop_14:
1659.byte	102,15,56,222,209
1660	decl	%ecx
1661	movups	(%edx),%xmm1
1662	leal	16(%edx),%edx
1663	jnz	.L069dec1_loop_14
1664.byte	102,15,56,223,209
1665	xorps	%xmm6,%xmm2
1666	movups	%xmm2,(%edi)
1667.L065xts_dec_ret:
1668	movl	116(%esp),%esp
1669	popl	%edi
1670	popl	%esi
1671	popl	%ebx
1672	popl	%ebp
1673	ret
1674.size	aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
1675.globl	aesni_cbc_encrypt
1676.type	aesni_cbc_encrypt,@function
1677.align	16
1678aesni_cbc_encrypt:
1679.L_aesni_cbc_encrypt_begin:
1680	pushl	%ebp
1681	pushl	%ebx
1682	pushl	%esi
1683	pushl	%edi
1684	movl	20(%esp),%esi
1685	movl	%esp,%ebx
1686	movl	24(%esp),%edi
1687	subl	$24,%ebx
1688	movl	28(%esp),%eax
1689	andl	$-16,%ebx
1690	movl	32(%esp),%edx
1691	movl	36(%esp),%ebp
1692	testl	%eax,%eax
1693	jz	.L070cbc_abort
1694	cmpl	$0,40(%esp)
1695	xchgl	%esp,%ebx
1696	movups	(%ebp),%xmm7
1697	movl	240(%edx),%ecx
1698	movl	%edx,%ebp
1699	movl	%ebx,16(%esp)
1700	movl	%ecx,%ebx
1701	je	.L071cbc_decrypt
1702	movaps	%xmm7,%xmm2
1703	cmpl	$16,%eax
1704	jb	.L072cbc_enc_tail
1705	subl	$16,%eax
1706	jmp	.L073cbc_enc_loop
1707.align	16
1708.L073cbc_enc_loop:
1709	movups	(%esi),%xmm7
1710	leal	16(%esi),%esi
1711	movups	(%edx),%xmm0
1712	movups	16(%edx),%xmm1
1713	xorps	%xmm0,%xmm7
1714	leal	32(%edx),%edx
1715	xorps	%xmm7,%xmm2
1716.L074enc1_loop_15:
1717.byte	102,15,56,220,209
1718	decl	%ecx
1719	movups	(%edx),%xmm1
1720	leal	16(%edx),%edx
1721	jnz	.L074enc1_loop_15
1722.byte	102,15,56,221,209
1723	movl	%ebx,%ecx
1724	movl	%ebp,%edx
1725	movups	%xmm2,(%edi)
1726	leal	16(%edi),%edi
1727	subl	$16,%eax
1728	jnc	.L073cbc_enc_loop
1729	addl	$16,%eax
1730	jnz	.L072cbc_enc_tail
1731	movaps	%xmm2,%xmm7
1732	jmp	.L075cbc_ret
1733.L072cbc_enc_tail:
1734	movl	%eax,%ecx
1735.long	2767451785
1736	movl	$16,%ecx
1737	subl	%eax,%ecx
1738	xorl	%eax,%eax
1739.long	2868115081
1740	leal	-16(%edi),%edi
1741	movl	%ebx,%ecx
1742	movl	%edi,%esi
1743	movl	%ebp,%edx
1744	jmp	.L073cbc_enc_loop
1745.align	16
1746.L071cbc_decrypt:
1747	cmpl	$80,%eax
1748	jbe	.L076cbc_dec_tail
1749	movaps	%xmm7,(%esp)
1750	subl	$80,%eax
1751	jmp	.L077cbc_dec_loop6_enter
1752.align	16
1753.L078cbc_dec_loop6:
1754	movaps	%xmm0,(%esp)
1755	movups	%xmm7,(%edi)
1756	leal	16(%edi),%edi
1757.L077cbc_dec_loop6_enter:
1758	movdqu	(%esi),%xmm2
1759	movdqu	16(%esi),%xmm3
1760	movdqu	32(%esi),%xmm4
1761	movdqu	48(%esi),%xmm5
1762	movdqu	64(%esi),%xmm6
1763	movdqu	80(%esi),%xmm7
1764	call	_aesni_decrypt6
1765	movups	(%esi),%xmm1
1766	movups	16(%esi),%xmm0
1767	xorps	(%esp),%xmm2
1768	xorps	%xmm1,%xmm3
1769	movups	32(%esi),%xmm1
1770	xorps	%xmm0,%xmm4
1771	movups	48(%esi),%xmm0
1772	xorps	%xmm1,%xmm5
1773	movups	64(%esi),%xmm1
1774	xorps	%xmm0,%xmm6
1775	movups	80(%esi),%xmm0
1776	xorps	%xmm1,%xmm7
1777	movups	%xmm2,(%edi)
1778	movups	%xmm3,16(%edi)
1779	leal	96(%esi),%esi
1780	movups	%xmm4,32(%edi)
1781	movl	%ebx,%ecx
1782	movups	%xmm5,48(%edi)
1783	movl	%ebp,%edx
1784	movups	%xmm6,64(%edi)
1785	leal	80(%edi),%edi
1786	subl	$96,%eax
1787	ja	.L078cbc_dec_loop6
1788	movaps	%xmm7,%xmm2
1789	movaps	%xmm0,%xmm7
1790	addl	$80,%eax
1791	jle	.L079cbc_dec_tail_collected
1792	movups	%xmm2,(%edi)
1793	leal	16(%edi),%edi
1794.L076cbc_dec_tail:
1795	movups	(%esi),%xmm2
1796	movaps	%xmm2,%xmm6
1797	cmpl	$16,%eax
1798	jbe	.L080cbc_dec_one
1799	movups	16(%esi),%xmm3
1800	movaps	%xmm3,%xmm5
1801	cmpl	$32,%eax
1802	jbe	.L081cbc_dec_two
1803	movups	32(%esi),%xmm4
1804	cmpl	$48,%eax
1805	jbe	.L082cbc_dec_three
1806	movups	48(%esi),%xmm5
1807	cmpl	$64,%eax
1808	jbe	.L083cbc_dec_four
1809	movups	64(%esi),%xmm6
1810	movaps	%xmm7,(%esp)
1811	movups	(%esi),%xmm2
1812	xorps	%xmm7,%xmm7
1813	call	_aesni_decrypt6
1814	movups	(%esi),%xmm1
1815	movups	16(%esi),%xmm0
1816	xorps	(%esp),%xmm2
1817	xorps	%xmm1,%xmm3
1818	movups	32(%esi),%xmm1
1819	xorps	%xmm0,%xmm4
1820	movups	48(%esi),%xmm0
1821	xorps	%xmm1,%xmm5
1822	movups	64(%esi),%xmm7
1823	xorps	%xmm0,%xmm6
1824	movups	%xmm2,(%edi)
1825	movups	%xmm3,16(%edi)
1826	movups	%xmm4,32(%edi)
1827	movups	%xmm5,48(%edi)
1828	leal	64(%edi),%edi
1829	movaps	%xmm6,%xmm2
1830	subl	$80,%eax
1831	jmp	.L079cbc_dec_tail_collected
1832.align	16
1833.L080cbc_dec_one:
1834	movups	(%edx),%xmm0
1835	movups	16(%edx),%xmm1
1836	leal	32(%edx),%edx
1837	xorps	%xmm0,%xmm2
1838.L084dec1_loop_16:
1839.byte	102,15,56,222,209
1840	decl	%ecx
1841	movups	(%edx),%xmm1
1842	leal	16(%edx),%edx
1843	jnz	.L084dec1_loop_16
1844.byte	102,15,56,223,209
1845	xorps	%xmm7,%xmm2
1846	movaps	%xmm6,%xmm7
1847	subl	$16,%eax
1848	jmp	.L079cbc_dec_tail_collected
1849.align	16
1850.L081cbc_dec_two:
1851	xorps	%xmm4,%xmm4
1852	call	_aesni_decrypt3
1853	xorps	%xmm7,%xmm2
1854	xorps	%xmm6,%xmm3
1855	movups	%xmm2,(%edi)
1856	movaps	%xmm3,%xmm2
1857	leal	16(%edi),%edi
1858	movaps	%xmm5,%xmm7
1859	subl	$32,%eax
1860	jmp	.L079cbc_dec_tail_collected
1861.align	16
1862.L082cbc_dec_three:
1863	call	_aesni_decrypt3
1864	xorps	%xmm7,%xmm2
1865	xorps	%xmm6,%xmm3
1866	xorps	%xmm5,%xmm4
1867	movups	%xmm2,(%edi)
1868	movaps	%xmm4,%xmm2
1869	movups	%xmm3,16(%edi)
1870	leal	32(%edi),%edi
1871	movups	32(%esi),%xmm7
1872	subl	$48,%eax
1873	jmp	.L079cbc_dec_tail_collected
1874.align	16
1875.L083cbc_dec_four:
1876	call	_aesni_decrypt4
1877	movups	16(%esi),%xmm1
1878	movups	32(%esi),%xmm0
1879	xorps	%xmm7,%xmm2
1880	movups	48(%esi),%xmm7
1881	xorps	%xmm6,%xmm3
1882	movups	%xmm2,(%edi)
1883	xorps	%xmm1,%xmm4
1884	movups	%xmm3,16(%edi)
1885	xorps	%xmm0,%xmm5
1886	movups	%xmm4,32(%edi)
1887	leal	48(%edi),%edi
1888	movaps	%xmm5,%xmm2
1889	subl	$64,%eax
1890.L079cbc_dec_tail_collected:
1891	andl	$15,%eax
1892	jnz	.L085cbc_dec_tail_partial
1893	movups	%xmm2,(%edi)
1894	jmp	.L075cbc_ret
1895.align	16
1896.L085cbc_dec_tail_partial:
1897	movaps	%xmm2,(%esp)
1898	movl	$16,%ecx
1899	movl	%esp,%esi
1900	subl	%eax,%ecx
1901.long	2767451785
1902.L075cbc_ret:
1903	movl	16(%esp),%esp
1904	movl	36(%esp),%ebp
1905	movups	%xmm7,(%ebp)
1906.L070cbc_abort:
1907	popl	%edi
1908	popl	%esi
1909	popl	%ebx
1910	popl	%ebp
1911	ret
1912.size	aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
1913.type	_aesni_set_encrypt_key,@function
1914.align	16
1915_aesni_set_encrypt_key:
1916	testl	%eax,%eax
1917	jz	.L086bad_pointer
1918	testl	%edx,%edx
1919	jz	.L086bad_pointer
1920	movups	(%eax),%xmm0
1921	xorps	%xmm4,%xmm4
1922	leal	16(%edx),%edx
1923	cmpl	$256,%ecx
1924	je	.L08714rounds
1925	cmpl	$192,%ecx
1926	je	.L08812rounds
1927	cmpl	$128,%ecx
1928	jne	.L089bad_keybits
1929.align	16
1930.L09010rounds:
1931	movl	$9,%ecx
1932	movups	%xmm0,-16(%edx)
1933.byte	102,15,58,223,200,1
1934	call	.L091key_128_cold
1935.byte	102,15,58,223,200,2
1936	call	.L092key_128
1937.byte	102,15,58,223,200,4
1938	call	.L092key_128
1939.byte	102,15,58,223,200,8
1940	call	.L092key_128
1941.byte	102,15,58,223,200,16
1942	call	.L092key_128
1943.byte	102,15,58,223,200,32
1944	call	.L092key_128
1945.byte	102,15,58,223,200,64
1946	call	.L092key_128
1947.byte	102,15,58,223,200,128
1948	call	.L092key_128
1949.byte	102,15,58,223,200,27
1950	call	.L092key_128
1951.byte	102,15,58,223,200,54
1952	call	.L092key_128
1953	movups	%xmm0,(%edx)
1954	movl	%ecx,80(%edx)
1955	xorl	%eax,%eax
1956	ret
1957.align	16
1958.L092key_128:
1959	movups	%xmm0,(%edx)
1960	leal	16(%edx),%edx
1961.L091key_128_cold:
1962	shufps	$16,%xmm0,%xmm4
1963	xorps	%xmm4,%xmm0
1964	shufps	$140,%xmm0,%xmm4
1965	xorps	%xmm4,%xmm0
1966	shufps	$255,%xmm1,%xmm1
1967	xorps	%xmm1,%xmm0
1968	ret
1969.align	16
1970.L08812rounds:
1971	movq	16(%eax),%xmm2
1972	movl	$11,%ecx
1973	movups	%xmm0,-16(%edx)
1974.byte	102,15,58,223,202,1
1975	call	.L093key_192a_cold
1976.byte	102,15,58,223,202,2
1977	call	.L094key_192b
1978.byte	102,15,58,223,202,4
1979	call	.L095key_192a
1980.byte	102,15,58,223,202,8
1981	call	.L094key_192b
1982.byte	102,15,58,223,202,16
1983	call	.L095key_192a
1984.byte	102,15,58,223,202,32
1985	call	.L094key_192b
1986.byte	102,15,58,223,202,64
1987	call	.L095key_192a
1988.byte	102,15,58,223,202,128
1989	call	.L094key_192b
1990	movups	%xmm0,(%edx)
1991	movl	%ecx,48(%edx)
1992	xorl	%eax,%eax
1993	ret
1994.align	16
1995.L095key_192a:
1996	movups	%xmm0,(%edx)
1997	leal	16(%edx),%edx
1998.align	16
1999.L093key_192a_cold:
2000	movaps	%xmm2,%xmm5
2001.L096key_192b_warm:
2002	shufps	$16,%xmm0,%xmm4
2003	movdqa	%xmm2,%xmm3
2004	xorps	%xmm4,%xmm0
2005	shufps	$140,%xmm0,%xmm4
2006	pslldq	$4,%xmm3
2007	xorps	%xmm4,%xmm0
2008	pshufd	$85,%xmm1,%xmm1
2009	pxor	%xmm3,%xmm2
2010	pxor	%xmm1,%xmm0
2011	pshufd	$255,%xmm0,%xmm3
2012	pxor	%xmm3,%xmm2
2013	ret
2014.align	16
2015.L094key_192b:
2016	movaps	%xmm0,%xmm3
2017	shufps	$68,%xmm0,%xmm5
2018	movups	%xmm5,(%edx)
2019	shufps	$78,%xmm2,%xmm3
2020	movups	%xmm3,16(%edx)
2021	leal	32(%edx),%edx
2022	jmp	.L096key_192b_warm
2023.align	16
2024.L08714rounds:
2025	movups	16(%eax),%xmm2
2026	movl	$13,%ecx
2027	leal	16(%edx),%edx
2028	movups	%xmm0,-32(%edx)
2029	movups	%xmm2,-16(%edx)
2030.byte	102,15,58,223,202,1
2031	call	.L097key_256a_cold
2032.byte	102,15,58,223,200,1
2033	call	.L098key_256b
2034.byte	102,15,58,223,202,2
2035	call	.L099key_256a
2036.byte	102,15,58,223,200,2
2037	call	.L098key_256b
2038.byte	102,15,58,223,202,4
2039	call	.L099key_256a
2040.byte	102,15,58,223,200,4
2041	call	.L098key_256b
2042.byte	102,15,58,223,202,8
2043	call	.L099key_256a
2044.byte	102,15,58,223,200,8
2045	call	.L098key_256b
2046.byte	102,15,58,223,202,16
2047	call	.L099key_256a
2048.byte	102,15,58,223,200,16
2049	call	.L098key_256b
2050.byte	102,15,58,223,202,32
2051	call	.L099key_256a
2052.byte	102,15,58,223,200,32
2053	call	.L098key_256b
2054.byte	102,15,58,223,202,64
2055	call	.L099key_256a
2056	movups	%xmm0,(%edx)
2057	movl	%ecx,16(%edx)
2058	xorl	%eax,%eax
2059	ret
2060.align	16
2061.L099key_256a:
2062	movups	%xmm2,(%edx)
2063	leal	16(%edx),%edx
2064.L097key_256a_cold:
2065	shufps	$16,%xmm0,%xmm4
2066	xorps	%xmm4,%xmm0
2067	shufps	$140,%xmm0,%xmm4
2068	xorps	%xmm4,%xmm0
2069	shufps	$255,%xmm1,%xmm1
2070	xorps	%xmm1,%xmm0
2071	ret
2072.align	16
2073.L098key_256b:
2074	movups	%xmm0,(%edx)
2075	leal	16(%edx),%edx
2076	shufps	$16,%xmm2,%xmm4
2077	xorps	%xmm4,%xmm2
2078	shufps	$140,%xmm2,%xmm4
2079	xorps	%xmm4,%xmm2
2080	shufps	$170,%xmm1,%xmm1
2081	xorps	%xmm1,%xmm2
2082	ret
2083.align	4
2084.L086bad_pointer:
2085	movl	$-1,%eax
2086	ret
2087.align	4
2088.L089bad_keybits:
2089	movl	$-2,%eax
2090	ret
2091.size	_aesni_set_encrypt_key,.-_aesni_set_encrypt_key
2092.globl	aesni_set_encrypt_key
2093.type	aesni_set_encrypt_key,@function
2094.align	16
2095aesni_set_encrypt_key:
2096.L_aesni_set_encrypt_key_begin:
2097	movl	4(%esp),%eax
2098	movl	8(%esp),%ecx
2099	movl	12(%esp),%edx
2100	call	_aesni_set_encrypt_key
2101	ret
2102.size	aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
2103.globl	aesni_set_decrypt_key
2104.type	aesni_set_decrypt_key,@function
2105.align	16
2106aesni_set_decrypt_key:
2107.L_aesni_set_decrypt_key_begin:
2108	movl	4(%esp),%eax
2109	movl	8(%esp),%ecx
2110	movl	12(%esp),%edx
2111	call	_aesni_set_encrypt_key
2112	movl	12(%esp),%edx
2113	shll	$4,%ecx
2114	testl	%eax,%eax
2115	jnz	.L100dec_key_ret
2116	leal	16(%edx,%ecx,1),%eax
2117	movups	(%edx),%xmm0
2118	movups	(%eax),%xmm1
2119	movups	%xmm0,(%eax)
2120	movups	%xmm1,(%edx)
2121	leal	16(%edx),%edx
2122	leal	-16(%eax),%eax
2123.L101dec_key_inverse:
2124	movups	(%edx),%xmm0
2125	movups	(%eax),%xmm1
2126.byte	102,15,56,219,192
2127.byte	102,15,56,219,201
2128	leal	16(%edx),%edx
2129	leal	-16(%eax),%eax
2130	movups	%xmm0,16(%eax)
2131	movups	%xmm1,-16(%edx)
2132	cmpl	%edx,%eax
2133	ja	.L101dec_key_inverse
2134	movups	(%edx),%xmm0
2135.byte	102,15,56,219,192
2136	movups	%xmm0,(%edx)
2137	xorl	%eax,%eax
2138.L100dec_key_ret:
2139	ret
2140.size	aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
2141.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
2142.byte	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
2143.byte	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
2144.byte	115,108,46,111,114,103,62,0
2145