aesni-x86_64.S revision 299966
1# $FreeBSD: stable/10/secure/lib/libcrypto/amd64/aesni-x86_64.S 299966 2016-05-16 19:30:27Z jkim $
2# Do not modify. This file is auto-generated from aesni-x86_64.pl.
3.text
4.globl	aesni_encrypt
5.type	aesni_encrypt,@function
6.align	16
7aesni_encrypt:
8	movups	(%rdi),%xmm2
9	movl	240(%rdx),%eax
10	movups	(%rdx),%xmm0
11	movups	16(%rdx),%xmm1
12	leaq	32(%rdx),%rdx
13	xorps	%xmm0,%xmm2
14.Loop_enc1_1:
15.byte	102,15,56,220,209
16	decl	%eax
17	movups	(%rdx),%xmm1
18	leaq	16(%rdx),%rdx
19	jnz	.Loop_enc1_1
20.byte	102,15,56,221,209
21	movups	%xmm2,(%rsi)
22	.byte	0xf3,0xc3
23.size	aesni_encrypt,.-aesni_encrypt
24
25.globl	aesni_decrypt
26.type	aesni_decrypt,@function
27.align	16
28aesni_decrypt:
29	movups	(%rdi),%xmm2
30	movl	240(%rdx),%eax
31	movups	(%rdx),%xmm0
32	movups	16(%rdx),%xmm1
33	leaq	32(%rdx),%rdx
34	xorps	%xmm0,%xmm2
35.Loop_dec1_2:
36.byte	102,15,56,222,209
37	decl	%eax
38	movups	(%rdx),%xmm1
39	leaq	16(%rdx),%rdx
40	jnz	.Loop_dec1_2
41.byte	102,15,56,223,209
42	movups	%xmm2,(%rsi)
43	.byte	0xf3,0xc3
44.size	aesni_decrypt, .-aesni_decrypt
45.type	_aesni_encrypt3,@function
46.align	16
47_aesni_encrypt3:
48	movups	(%rcx),%xmm0
49	shrl	$1,%eax
50	movups	16(%rcx),%xmm1
51	leaq	32(%rcx),%rcx
52	xorps	%xmm0,%xmm2
53	xorps	%xmm0,%xmm3
54	xorps	%xmm0,%xmm4
55	movups	(%rcx),%xmm0
56
57.Lenc_loop3:
58.byte	102,15,56,220,209
59.byte	102,15,56,220,217
60	decl	%eax
61.byte	102,15,56,220,225
62	movups	16(%rcx),%xmm1
63.byte	102,15,56,220,208
64.byte	102,15,56,220,216
65	leaq	32(%rcx),%rcx
66.byte	102,15,56,220,224
67	movups	(%rcx),%xmm0
68	jnz	.Lenc_loop3
69
70.byte	102,15,56,220,209
71.byte	102,15,56,220,217
72.byte	102,15,56,220,225
73.byte	102,15,56,221,208
74.byte	102,15,56,221,216
75.byte	102,15,56,221,224
76	.byte	0xf3,0xc3
77.size	_aesni_encrypt3,.-_aesni_encrypt3
78.type	_aesni_decrypt3,@function
79.align	16
80_aesni_decrypt3:
81	movups	(%rcx),%xmm0
82	shrl	$1,%eax
83	movups	16(%rcx),%xmm1
84	leaq	32(%rcx),%rcx
85	xorps	%xmm0,%xmm2
86	xorps	%xmm0,%xmm3
87	xorps	%xmm0,%xmm4
88	movups	(%rcx),%xmm0
89
90.Ldec_loop3:
91.byte	102,15,56,222,209
92.byte	102,15,56,222,217
93	decl	%eax
94.byte	102,15,56,222,225
95	movups	16(%rcx),%xmm1
96.byte	102,15,56,222,208
97.byte	102,15,56,222,216
98	leaq	32(%rcx),%rcx
99.byte	102,15,56,222,224
100	movups	(%rcx),%xmm0
101	jnz	.Ldec_loop3
102
103.byte	102,15,56,222,209
104.byte	102,15,56,222,217
105.byte	102,15,56,222,225
106.byte	102,15,56,223,208
107.byte	102,15,56,223,216
108.byte	102,15,56,223,224
109	.byte	0xf3,0xc3
110.size	_aesni_decrypt3,.-_aesni_decrypt3
111.type	_aesni_encrypt4,@function
112.align	16
113_aesni_encrypt4:
114	movups	(%rcx),%xmm0
115	shrl	$1,%eax
116	movups	16(%rcx),%xmm1
117	leaq	32(%rcx),%rcx
118	xorps	%xmm0,%xmm2
119	xorps	%xmm0,%xmm3
120	xorps	%xmm0,%xmm4
121	xorps	%xmm0,%xmm5
122	movups	(%rcx),%xmm0
123
124.Lenc_loop4:
125.byte	102,15,56,220,209
126.byte	102,15,56,220,217
127	decl	%eax
128.byte	102,15,56,220,225
129.byte	102,15,56,220,233
130	movups	16(%rcx),%xmm1
131.byte	102,15,56,220,208
132.byte	102,15,56,220,216
133	leaq	32(%rcx),%rcx
134.byte	102,15,56,220,224
135.byte	102,15,56,220,232
136	movups	(%rcx),%xmm0
137	jnz	.Lenc_loop4
138
139.byte	102,15,56,220,209
140.byte	102,15,56,220,217
141.byte	102,15,56,220,225
142.byte	102,15,56,220,233
143.byte	102,15,56,221,208
144.byte	102,15,56,221,216
145.byte	102,15,56,221,224
146.byte	102,15,56,221,232
147	.byte	0xf3,0xc3
148.size	_aesni_encrypt4,.-_aesni_encrypt4
149.type	_aesni_decrypt4,@function
150.align	16
151_aesni_decrypt4:
152	movups	(%rcx),%xmm0
153	shrl	$1,%eax
154	movups	16(%rcx),%xmm1
155	leaq	32(%rcx),%rcx
156	xorps	%xmm0,%xmm2
157	xorps	%xmm0,%xmm3
158	xorps	%xmm0,%xmm4
159	xorps	%xmm0,%xmm5
160	movups	(%rcx),%xmm0
161
162.Ldec_loop4:
163.byte	102,15,56,222,209
164.byte	102,15,56,222,217
165	decl	%eax
166.byte	102,15,56,222,225
167.byte	102,15,56,222,233
168	movups	16(%rcx),%xmm1
169.byte	102,15,56,222,208
170.byte	102,15,56,222,216
171	leaq	32(%rcx),%rcx
172.byte	102,15,56,222,224
173.byte	102,15,56,222,232
174	movups	(%rcx),%xmm0
175	jnz	.Ldec_loop4
176
177.byte	102,15,56,222,209
178.byte	102,15,56,222,217
179.byte	102,15,56,222,225
180.byte	102,15,56,222,233
181.byte	102,15,56,223,208
182.byte	102,15,56,223,216
183.byte	102,15,56,223,224
184.byte	102,15,56,223,232
185	.byte	0xf3,0xc3
186.size	_aesni_decrypt4,.-_aesni_decrypt4
187.type	_aesni_encrypt6,@function
188.align	16
189_aesni_encrypt6:
190	movups	(%rcx),%xmm0
191	shrl	$1,%eax
192	movups	16(%rcx),%xmm1
193	leaq	32(%rcx),%rcx
194	xorps	%xmm0,%xmm2
195	pxor	%xmm0,%xmm3
196.byte	102,15,56,220,209
197	pxor	%xmm0,%xmm4
198.byte	102,15,56,220,217
199	pxor	%xmm0,%xmm5
200.byte	102,15,56,220,225
201	pxor	%xmm0,%xmm6
202.byte	102,15,56,220,233
203	pxor	%xmm0,%xmm7
204	decl	%eax
205.byte	102,15,56,220,241
206	movups	(%rcx),%xmm0
207.byte	102,15,56,220,249
208	jmp	.Lenc_loop6_enter
209.align	16
210.Lenc_loop6:
211.byte	102,15,56,220,209
212.byte	102,15,56,220,217
213	decl	%eax
214.byte	102,15,56,220,225
215.byte	102,15,56,220,233
216.byte	102,15,56,220,241
217.byte	102,15,56,220,249
218.Lenc_loop6_enter:
219	movups	16(%rcx),%xmm1
220.byte	102,15,56,220,208
221.byte	102,15,56,220,216
222	leaq	32(%rcx),%rcx
223.byte	102,15,56,220,224
224.byte	102,15,56,220,232
225.byte	102,15,56,220,240
226.byte	102,15,56,220,248
227	movups	(%rcx),%xmm0
228	jnz	.Lenc_loop6
229
230.byte	102,15,56,220,209
231.byte	102,15,56,220,217
232.byte	102,15,56,220,225
233.byte	102,15,56,220,233
234.byte	102,15,56,220,241
235.byte	102,15,56,220,249
236.byte	102,15,56,221,208
237.byte	102,15,56,221,216
238.byte	102,15,56,221,224
239.byte	102,15,56,221,232
240.byte	102,15,56,221,240
241.byte	102,15,56,221,248
242	.byte	0xf3,0xc3
243.size	_aesni_encrypt6,.-_aesni_encrypt6
244.type	_aesni_decrypt6,@function
245.align	16
246_aesni_decrypt6:
247	movups	(%rcx),%xmm0
248	shrl	$1,%eax
249	movups	16(%rcx),%xmm1
250	leaq	32(%rcx),%rcx
251	xorps	%xmm0,%xmm2
252	pxor	%xmm0,%xmm3
253.byte	102,15,56,222,209
254	pxor	%xmm0,%xmm4
255.byte	102,15,56,222,217
256	pxor	%xmm0,%xmm5
257.byte	102,15,56,222,225
258	pxor	%xmm0,%xmm6
259.byte	102,15,56,222,233
260	pxor	%xmm0,%xmm7
261	decl	%eax
262.byte	102,15,56,222,241
263	movups	(%rcx),%xmm0
264.byte	102,15,56,222,249
265	jmp	.Ldec_loop6_enter
266.align	16
267.Ldec_loop6:
268.byte	102,15,56,222,209
269.byte	102,15,56,222,217
270	decl	%eax
271.byte	102,15,56,222,225
272.byte	102,15,56,222,233
273.byte	102,15,56,222,241
274.byte	102,15,56,222,249
275.Ldec_loop6_enter:
276	movups	16(%rcx),%xmm1
277.byte	102,15,56,222,208
278.byte	102,15,56,222,216
279	leaq	32(%rcx),%rcx
280.byte	102,15,56,222,224
281.byte	102,15,56,222,232
282.byte	102,15,56,222,240
283.byte	102,15,56,222,248
284	movups	(%rcx),%xmm0
285	jnz	.Ldec_loop6
286
287.byte	102,15,56,222,209
288.byte	102,15,56,222,217
289.byte	102,15,56,222,225
290.byte	102,15,56,222,233
291.byte	102,15,56,222,241
292.byte	102,15,56,222,249
293.byte	102,15,56,223,208
294.byte	102,15,56,223,216
295.byte	102,15,56,223,224
296.byte	102,15,56,223,232
297.byte	102,15,56,223,240
298.byte	102,15,56,223,248
299	.byte	0xf3,0xc3
300.size	_aesni_decrypt6,.-_aesni_decrypt6
301.type	_aesni_encrypt8,@function
302.align	16
303_aesni_encrypt8:
304	movups	(%rcx),%xmm0
305	shrl	$1,%eax
306	movups	16(%rcx),%xmm1
307	leaq	32(%rcx),%rcx
308	xorps	%xmm0,%xmm2
309	xorps	%xmm0,%xmm3
310.byte	102,15,56,220,209
311	pxor	%xmm0,%xmm4
312.byte	102,15,56,220,217
313	pxor	%xmm0,%xmm5
314.byte	102,15,56,220,225
315	pxor	%xmm0,%xmm6
316.byte	102,15,56,220,233
317	pxor	%xmm0,%xmm7
318	decl	%eax
319.byte	102,15,56,220,241
320	pxor	%xmm0,%xmm8
321.byte	102,15,56,220,249
322	pxor	%xmm0,%xmm9
323	movups	(%rcx),%xmm0
324.byte	102,68,15,56,220,193
325.byte	102,68,15,56,220,201
326	movups	16(%rcx),%xmm1
327	jmp	.Lenc_loop8_enter
328.align	16
329.Lenc_loop8:
330.byte	102,15,56,220,209
331.byte	102,15,56,220,217
332	decl	%eax
333.byte	102,15,56,220,225
334.byte	102,15,56,220,233
335.byte	102,15,56,220,241
336.byte	102,15,56,220,249
337.byte	102,68,15,56,220,193
338.byte	102,68,15,56,220,201
339	movups	16(%rcx),%xmm1
340.Lenc_loop8_enter:
341.byte	102,15,56,220,208
342.byte	102,15,56,220,216
343	leaq	32(%rcx),%rcx
344.byte	102,15,56,220,224
345.byte	102,15,56,220,232
346.byte	102,15,56,220,240
347.byte	102,15,56,220,248
348.byte	102,68,15,56,220,192
349.byte	102,68,15,56,220,200
350	movups	(%rcx),%xmm0
351	jnz	.Lenc_loop8
352
353.byte	102,15,56,220,209
354.byte	102,15,56,220,217
355.byte	102,15,56,220,225
356.byte	102,15,56,220,233
357.byte	102,15,56,220,241
358.byte	102,15,56,220,249
359.byte	102,68,15,56,220,193
360.byte	102,68,15,56,220,201
361.byte	102,15,56,221,208
362.byte	102,15,56,221,216
363.byte	102,15,56,221,224
364.byte	102,15,56,221,232
365.byte	102,15,56,221,240
366.byte	102,15,56,221,248
367.byte	102,68,15,56,221,192
368.byte	102,68,15,56,221,200
369	.byte	0xf3,0xc3
370.size	_aesni_encrypt8,.-_aesni_encrypt8
371.type	_aesni_decrypt8,@function
372.align	16
373_aesni_decrypt8:
374	movups	(%rcx),%xmm0
375	shrl	$1,%eax
376	movups	16(%rcx),%xmm1
377	leaq	32(%rcx),%rcx
378	xorps	%xmm0,%xmm2
379	xorps	%xmm0,%xmm3
380.byte	102,15,56,222,209
381	pxor	%xmm0,%xmm4
382.byte	102,15,56,222,217
383	pxor	%xmm0,%xmm5
384.byte	102,15,56,222,225
385	pxor	%xmm0,%xmm6
386.byte	102,15,56,222,233
387	pxor	%xmm0,%xmm7
388	decl	%eax
389.byte	102,15,56,222,241
390	pxor	%xmm0,%xmm8
391.byte	102,15,56,222,249
392	pxor	%xmm0,%xmm9
393	movups	(%rcx),%xmm0
394.byte	102,68,15,56,222,193
395.byte	102,68,15,56,222,201
396	movups	16(%rcx),%xmm1
397	jmp	.Ldec_loop8_enter
398.align	16
399.Ldec_loop8:
400.byte	102,15,56,222,209
401.byte	102,15,56,222,217
402	decl	%eax
403.byte	102,15,56,222,225
404.byte	102,15,56,222,233
405.byte	102,15,56,222,241
406.byte	102,15,56,222,249
407.byte	102,68,15,56,222,193
408.byte	102,68,15,56,222,201
409	movups	16(%rcx),%xmm1
410.Ldec_loop8_enter:
411.byte	102,15,56,222,208
412.byte	102,15,56,222,216
413	leaq	32(%rcx),%rcx
414.byte	102,15,56,222,224
415.byte	102,15,56,222,232
416.byte	102,15,56,222,240
417.byte	102,15,56,222,248
418.byte	102,68,15,56,222,192
419.byte	102,68,15,56,222,200
420	movups	(%rcx),%xmm0
421	jnz	.Ldec_loop8
422
423.byte	102,15,56,222,209
424.byte	102,15,56,222,217
425.byte	102,15,56,222,225
426.byte	102,15,56,222,233
427.byte	102,15,56,222,241
428.byte	102,15,56,222,249
429.byte	102,68,15,56,222,193
430.byte	102,68,15,56,222,201
431.byte	102,15,56,223,208
432.byte	102,15,56,223,216
433.byte	102,15,56,223,224
434.byte	102,15,56,223,232
435.byte	102,15,56,223,240
436.byte	102,15,56,223,248
437.byte	102,68,15,56,223,192
438.byte	102,68,15,56,223,200
439	.byte	0xf3,0xc3
440.size	_aesni_decrypt8,.-_aesni_decrypt8
441.globl	aesni_ecb_encrypt
442.type	aesni_ecb_encrypt,@function
443.align	16
444aesni_ecb_encrypt:
445	andq	$-16,%rdx
446	jz	.Lecb_ret
447
448	movl	240(%rcx),%eax
449	movups	(%rcx),%xmm0
450	movq	%rcx,%r11
451	movl	%eax,%r10d
452	testl	%r8d,%r8d
453	jz	.Lecb_decrypt
454
455	cmpq	$128,%rdx
456	jb	.Lecb_enc_tail
457
458	movdqu	(%rdi),%xmm2
459	movdqu	16(%rdi),%xmm3
460	movdqu	32(%rdi),%xmm4
461	movdqu	48(%rdi),%xmm5
462	movdqu	64(%rdi),%xmm6
463	movdqu	80(%rdi),%xmm7
464	movdqu	96(%rdi),%xmm8
465	movdqu	112(%rdi),%xmm9
466	leaq	128(%rdi),%rdi
467	subq	$128,%rdx
468	jmp	.Lecb_enc_loop8_enter
469.align	16
470.Lecb_enc_loop8:
471	movups	%xmm2,(%rsi)
472	movq	%r11,%rcx
473	movdqu	(%rdi),%xmm2
474	movl	%r10d,%eax
475	movups	%xmm3,16(%rsi)
476	movdqu	16(%rdi),%xmm3
477	movups	%xmm4,32(%rsi)
478	movdqu	32(%rdi),%xmm4
479	movups	%xmm5,48(%rsi)
480	movdqu	48(%rdi),%xmm5
481	movups	%xmm6,64(%rsi)
482	movdqu	64(%rdi),%xmm6
483	movups	%xmm7,80(%rsi)
484	movdqu	80(%rdi),%xmm7
485	movups	%xmm8,96(%rsi)
486	movdqu	96(%rdi),%xmm8
487	movups	%xmm9,112(%rsi)
488	leaq	128(%rsi),%rsi
489	movdqu	112(%rdi),%xmm9
490	leaq	128(%rdi),%rdi
491.Lecb_enc_loop8_enter:
492
493	call	_aesni_encrypt8
494
495	subq	$128,%rdx
496	jnc	.Lecb_enc_loop8
497
498	movups	%xmm2,(%rsi)
499	movq	%r11,%rcx
500	movups	%xmm3,16(%rsi)
501	movl	%r10d,%eax
502	movups	%xmm4,32(%rsi)
503	movups	%xmm5,48(%rsi)
504	movups	%xmm6,64(%rsi)
505	movups	%xmm7,80(%rsi)
506	movups	%xmm8,96(%rsi)
507	movups	%xmm9,112(%rsi)
508	leaq	128(%rsi),%rsi
509	addq	$128,%rdx
510	jz	.Lecb_ret
511
512.Lecb_enc_tail:
513	movups	(%rdi),%xmm2
514	cmpq	$32,%rdx
515	jb	.Lecb_enc_one
516	movups	16(%rdi),%xmm3
517	je	.Lecb_enc_two
518	movups	32(%rdi),%xmm4
519	cmpq	$64,%rdx
520	jb	.Lecb_enc_three
521	movups	48(%rdi),%xmm5
522	je	.Lecb_enc_four
523	movups	64(%rdi),%xmm6
524	cmpq	$96,%rdx
525	jb	.Lecb_enc_five
526	movups	80(%rdi),%xmm7
527	je	.Lecb_enc_six
528	movdqu	96(%rdi),%xmm8
529	call	_aesni_encrypt8
530	movups	%xmm2,(%rsi)
531	movups	%xmm3,16(%rsi)
532	movups	%xmm4,32(%rsi)
533	movups	%xmm5,48(%rsi)
534	movups	%xmm6,64(%rsi)
535	movups	%xmm7,80(%rsi)
536	movups	%xmm8,96(%rsi)
537	jmp	.Lecb_ret
538.align	16
539.Lecb_enc_one:
540	movups	(%rcx),%xmm0
541	movups	16(%rcx),%xmm1
542	leaq	32(%rcx),%rcx
543	xorps	%xmm0,%xmm2
544.Loop_enc1_3:
545.byte	102,15,56,220,209
546	decl	%eax
547	movups	(%rcx),%xmm1
548	leaq	16(%rcx),%rcx
549	jnz	.Loop_enc1_3
550.byte	102,15,56,221,209
551	movups	%xmm2,(%rsi)
552	jmp	.Lecb_ret
553.align	16
554.Lecb_enc_two:
555	xorps	%xmm4,%xmm4
556	call	_aesni_encrypt3
557	movups	%xmm2,(%rsi)
558	movups	%xmm3,16(%rsi)
559	jmp	.Lecb_ret
560.align	16
561.Lecb_enc_three:
562	call	_aesni_encrypt3
563	movups	%xmm2,(%rsi)
564	movups	%xmm3,16(%rsi)
565	movups	%xmm4,32(%rsi)
566	jmp	.Lecb_ret
567.align	16
568.Lecb_enc_four:
569	call	_aesni_encrypt4
570	movups	%xmm2,(%rsi)
571	movups	%xmm3,16(%rsi)
572	movups	%xmm4,32(%rsi)
573	movups	%xmm5,48(%rsi)
574	jmp	.Lecb_ret
575.align	16
576.Lecb_enc_five:
577	xorps	%xmm7,%xmm7
578	call	_aesni_encrypt6
579	movups	%xmm2,(%rsi)
580	movups	%xmm3,16(%rsi)
581	movups	%xmm4,32(%rsi)
582	movups	%xmm5,48(%rsi)
583	movups	%xmm6,64(%rsi)
584	jmp	.Lecb_ret
585.align	16
586.Lecb_enc_six:
587	call	_aesni_encrypt6
588	movups	%xmm2,(%rsi)
589	movups	%xmm3,16(%rsi)
590	movups	%xmm4,32(%rsi)
591	movups	%xmm5,48(%rsi)
592	movups	%xmm6,64(%rsi)
593	movups	%xmm7,80(%rsi)
594	jmp	.Lecb_ret
595
596.align	16
597.Lecb_decrypt:
598	cmpq	$128,%rdx
599	jb	.Lecb_dec_tail
600
601	movdqu	(%rdi),%xmm2
602	movdqu	16(%rdi),%xmm3
603	movdqu	32(%rdi),%xmm4
604	movdqu	48(%rdi),%xmm5
605	movdqu	64(%rdi),%xmm6
606	movdqu	80(%rdi),%xmm7
607	movdqu	96(%rdi),%xmm8
608	movdqu	112(%rdi),%xmm9
609	leaq	128(%rdi),%rdi
610	subq	$128,%rdx
611	jmp	.Lecb_dec_loop8_enter
612.align	16
613.Lecb_dec_loop8:
614	movups	%xmm2,(%rsi)
615	movq	%r11,%rcx
616	movdqu	(%rdi),%xmm2
617	movl	%r10d,%eax
618	movups	%xmm3,16(%rsi)
619	movdqu	16(%rdi),%xmm3
620	movups	%xmm4,32(%rsi)
621	movdqu	32(%rdi),%xmm4
622	movups	%xmm5,48(%rsi)
623	movdqu	48(%rdi),%xmm5
624	movups	%xmm6,64(%rsi)
625	movdqu	64(%rdi),%xmm6
626	movups	%xmm7,80(%rsi)
627	movdqu	80(%rdi),%xmm7
628	movups	%xmm8,96(%rsi)
629	movdqu	96(%rdi),%xmm8
630	movups	%xmm9,112(%rsi)
631	leaq	128(%rsi),%rsi
632	movdqu	112(%rdi),%xmm9
633	leaq	128(%rdi),%rdi
634.Lecb_dec_loop8_enter:
635
636	call	_aesni_decrypt8
637
638	movups	(%r11),%xmm0
639	subq	$128,%rdx
640	jnc	.Lecb_dec_loop8
641
642	movups	%xmm2,(%rsi)
643	movq	%r11,%rcx
644	movups	%xmm3,16(%rsi)
645	movl	%r10d,%eax
646	movups	%xmm4,32(%rsi)
647	movups	%xmm5,48(%rsi)
648	movups	%xmm6,64(%rsi)
649	movups	%xmm7,80(%rsi)
650	movups	%xmm8,96(%rsi)
651	movups	%xmm9,112(%rsi)
652	leaq	128(%rsi),%rsi
653	addq	$128,%rdx
654	jz	.Lecb_ret
655
656.Lecb_dec_tail:
657	movups	(%rdi),%xmm2
658	cmpq	$32,%rdx
659	jb	.Lecb_dec_one
660	movups	16(%rdi),%xmm3
661	je	.Lecb_dec_two
662	movups	32(%rdi),%xmm4
663	cmpq	$64,%rdx
664	jb	.Lecb_dec_three
665	movups	48(%rdi),%xmm5
666	je	.Lecb_dec_four
667	movups	64(%rdi),%xmm6
668	cmpq	$96,%rdx
669	jb	.Lecb_dec_five
670	movups	80(%rdi),%xmm7
671	je	.Lecb_dec_six
672	movups	96(%rdi),%xmm8
673	movups	(%rcx),%xmm0
674	call	_aesni_decrypt8
675	movups	%xmm2,(%rsi)
676	movups	%xmm3,16(%rsi)
677	movups	%xmm4,32(%rsi)
678	movups	%xmm5,48(%rsi)
679	movups	%xmm6,64(%rsi)
680	movups	%xmm7,80(%rsi)
681	movups	%xmm8,96(%rsi)
682	jmp	.Lecb_ret
683.align	16
684.Lecb_dec_one:
685	movups	(%rcx),%xmm0
686	movups	16(%rcx),%xmm1
687	leaq	32(%rcx),%rcx
688	xorps	%xmm0,%xmm2
689.Loop_dec1_4:
690.byte	102,15,56,222,209
691	decl	%eax
692	movups	(%rcx),%xmm1
693	leaq	16(%rcx),%rcx
694	jnz	.Loop_dec1_4
695.byte	102,15,56,223,209
696	movups	%xmm2,(%rsi)
697	jmp	.Lecb_ret
698.align	16
699.Lecb_dec_two:
700	xorps	%xmm4,%xmm4
701	call	_aesni_decrypt3
702	movups	%xmm2,(%rsi)
703	movups	%xmm3,16(%rsi)
704	jmp	.Lecb_ret
705.align	16
706.Lecb_dec_three:
707	call	_aesni_decrypt3
708	movups	%xmm2,(%rsi)
709	movups	%xmm3,16(%rsi)
710	movups	%xmm4,32(%rsi)
711	jmp	.Lecb_ret
712.align	16
713.Lecb_dec_four:
714	call	_aesni_decrypt4
715	movups	%xmm2,(%rsi)
716	movups	%xmm3,16(%rsi)
717	movups	%xmm4,32(%rsi)
718	movups	%xmm5,48(%rsi)
719	jmp	.Lecb_ret
720.align	16
721.Lecb_dec_five:
722	xorps	%xmm7,%xmm7
723	call	_aesni_decrypt6
724	movups	%xmm2,(%rsi)
725	movups	%xmm3,16(%rsi)
726	movups	%xmm4,32(%rsi)
727	movups	%xmm5,48(%rsi)
728	movups	%xmm6,64(%rsi)
729	jmp	.Lecb_ret
730.align	16
731.Lecb_dec_six:
732	call	_aesni_decrypt6
733	movups	%xmm2,(%rsi)
734	movups	%xmm3,16(%rsi)
735	movups	%xmm4,32(%rsi)
736	movups	%xmm5,48(%rsi)
737	movups	%xmm6,64(%rsi)
738	movups	%xmm7,80(%rsi)
739
740.Lecb_ret:
741	.byte	0xf3,0xc3
742.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
743.globl	aesni_ccm64_encrypt_blocks
744.type	aesni_ccm64_encrypt_blocks,@function
745.align	16
746aesni_ccm64_encrypt_blocks:
747	movl	240(%rcx),%eax
748	movdqu	(%r8),%xmm9
749	movdqa	.Lincrement64(%rip),%xmm6
750	movdqa	.Lbswap_mask(%rip),%xmm7
751
752	shrl	$1,%eax
753	leaq	0(%rcx),%r11
754	movdqu	(%r9),%xmm3
755	movdqa	%xmm9,%xmm2
756	movl	%eax,%r10d
757.byte	102,68,15,56,0,207
758	jmp	.Lccm64_enc_outer
759.align	16
760.Lccm64_enc_outer:
761	movups	(%r11),%xmm0
762	movl	%r10d,%eax
763	movups	(%rdi),%xmm8
764
765	xorps	%xmm0,%xmm2
766	movups	16(%r11),%xmm1
767	xorps	%xmm8,%xmm0
768	leaq	32(%r11),%rcx
769	xorps	%xmm0,%xmm3
770	movups	(%rcx),%xmm0
771
772.Lccm64_enc2_loop:
773.byte	102,15,56,220,209
774	decl	%eax
775.byte	102,15,56,220,217
776	movups	16(%rcx),%xmm1
777.byte	102,15,56,220,208
778	leaq	32(%rcx),%rcx
779.byte	102,15,56,220,216
780	movups	0(%rcx),%xmm0
781	jnz	.Lccm64_enc2_loop
782.byte	102,15,56,220,209
783.byte	102,15,56,220,217
784	paddq	%xmm6,%xmm9
785.byte	102,15,56,221,208
786.byte	102,15,56,221,216
787
788	decq	%rdx
789	leaq	16(%rdi),%rdi
790	xorps	%xmm2,%xmm8
791	movdqa	%xmm9,%xmm2
792	movups	%xmm8,(%rsi)
793	leaq	16(%rsi),%rsi
794.byte	102,15,56,0,215
795	jnz	.Lccm64_enc_outer
796
797	movups	%xmm3,(%r9)
798	.byte	0xf3,0xc3
799.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
800.globl	aesni_ccm64_decrypt_blocks
801.type	aesni_ccm64_decrypt_blocks,@function
802.align	16
803aesni_ccm64_decrypt_blocks:
804	movl	240(%rcx),%eax
805	movups	(%r8),%xmm9
806	movdqu	(%r9),%xmm3
807	movdqa	.Lincrement64(%rip),%xmm6
808	movdqa	.Lbswap_mask(%rip),%xmm7
809
810	movaps	%xmm9,%xmm2
811	movl	%eax,%r10d
812	movq	%rcx,%r11
813.byte	102,68,15,56,0,207
814	movups	(%rcx),%xmm0
815	movups	16(%rcx),%xmm1
816	leaq	32(%rcx),%rcx
817	xorps	%xmm0,%xmm2
818.Loop_enc1_5:
819.byte	102,15,56,220,209
820	decl	%eax
821	movups	(%rcx),%xmm1
822	leaq	16(%rcx),%rcx
823	jnz	.Loop_enc1_5
824.byte	102,15,56,221,209
825	movups	(%rdi),%xmm8
826	paddq	%xmm6,%xmm9
827	leaq	16(%rdi),%rdi
828	jmp	.Lccm64_dec_outer
829.align	16
830.Lccm64_dec_outer:
831	xorps	%xmm2,%xmm8
832	movdqa	%xmm9,%xmm2
833	movl	%r10d,%eax
834	movups	%xmm8,(%rsi)
835	leaq	16(%rsi),%rsi
836.byte	102,15,56,0,215
837
838	subq	$1,%rdx
839	jz	.Lccm64_dec_break
840
841	movups	(%r11),%xmm0
842	shrl	$1,%eax
843	movups	16(%r11),%xmm1
844	xorps	%xmm0,%xmm8
845	leaq	32(%r11),%rcx
846	xorps	%xmm0,%xmm2
847	xorps	%xmm8,%xmm3
848	movups	(%rcx),%xmm0
849
850.Lccm64_dec2_loop:
851.byte	102,15,56,220,209
852	decl	%eax
853.byte	102,15,56,220,217
854	movups	16(%rcx),%xmm1
855.byte	102,15,56,220,208
856	leaq	32(%rcx),%rcx
857.byte	102,15,56,220,216
858	movups	0(%rcx),%xmm0
859	jnz	.Lccm64_dec2_loop
860	movups	(%rdi),%xmm8
861	paddq	%xmm6,%xmm9
862.byte	102,15,56,220,209
863.byte	102,15,56,220,217
864	leaq	16(%rdi),%rdi
865.byte	102,15,56,221,208
866.byte	102,15,56,221,216
867	jmp	.Lccm64_dec_outer
868
869.align	16
870.Lccm64_dec_break:
871
872	movups	(%r11),%xmm0
873	movups	16(%r11),%xmm1
874	xorps	%xmm0,%xmm8
875	leaq	32(%r11),%r11
876	xorps	%xmm8,%xmm3
877.Loop_enc1_6:
878.byte	102,15,56,220,217
879	decl	%eax
880	movups	(%r11),%xmm1
881	leaq	16(%r11),%r11
882	jnz	.Loop_enc1_6
883.byte	102,15,56,221,217
884	movups	%xmm3,(%r9)
885	.byte	0xf3,0xc3
886.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
887.globl	aesni_ctr32_encrypt_blocks
888.type	aesni_ctr32_encrypt_blocks,@function
889.align	16
890aesni_ctr32_encrypt_blocks:
891	cmpq	$1,%rdx
892	je	.Lctr32_one_shortcut
893
894	movdqu	(%r8),%xmm14
895	movdqa	.Lbswap_mask(%rip),%xmm15
896	xorl	%eax,%eax
897.byte	102,69,15,58,22,242,3
898.byte	102,68,15,58,34,240,3
899
900	movl	240(%rcx),%eax
901	bswapl	%r10d
902	pxor	%xmm12,%xmm12
903	pxor	%xmm13,%xmm13
904.byte	102,69,15,58,34,226,0
905	leaq	3(%r10),%r11
906.byte	102,69,15,58,34,235,0
907	incl	%r10d
908.byte	102,69,15,58,34,226,1
909	incq	%r11
910.byte	102,69,15,58,34,235,1
911	incl	%r10d
912.byte	102,69,15,58,34,226,2
913	incq	%r11
914.byte	102,69,15,58,34,235,2
915	movdqa	%xmm12,-40(%rsp)
916.byte	102,69,15,56,0,231
917	movdqa	%xmm13,-24(%rsp)
918.byte	102,69,15,56,0,239
919
920	pshufd	$192,%xmm12,%xmm2
921	pshufd	$128,%xmm12,%xmm3
922	pshufd	$64,%xmm12,%xmm4
923	cmpq	$6,%rdx
924	jb	.Lctr32_tail
925	shrl	$1,%eax
926	movq	%rcx,%r11
927	movl	%eax,%r10d
928	subq	$6,%rdx
929	jmp	.Lctr32_loop6
930
931.align	16
932.Lctr32_loop6:
933	pshufd	$192,%xmm13,%xmm5
934	por	%xmm14,%xmm2
935	movups	(%r11),%xmm0
936	pshufd	$128,%xmm13,%xmm6
937	por	%xmm14,%xmm3
938	movups	16(%r11),%xmm1
939	pshufd	$64,%xmm13,%xmm7
940	por	%xmm14,%xmm4
941	por	%xmm14,%xmm5
942	xorps	%xmm0,%xmm2
943	por	%xmm14,%xmm6
944	por	%xmm14,%xmm7
945
946
947
948
949	pxor	%xmm0,%xmm3
950.byte	102,15,56,220,209
951	leaq	32(%r11),%rcx
952	pxor	%xmm0,%xmm4
953.byte	102,15,56,220,217
954	movdqa	.Lincrement32(%rip),%xmm13
955	pxor	%xmm0,%xmm5
956.byte	102,15,56,220,225
957	movdqa	-40(%rsp),%xmm12
958	pxor	%xmm0,%xmm6
959.byte	102,15,56,220,233
960	pxor	%xmm0,%xmm7
961	movups	(%rcx),%xmm0
962	decl	%eax
963.byte	102,15,56,220,241
964.byte	102,15,56,220,249
965	jmp	.Lctr32_enc_loop6_enter
966.align	16
967.Lctr32_enc_loop6:
968.byte	102,15,56,220,209
969.byte	102,15,56,220,217
970	decl	%eax
971.byte	102,15,56,220,225
972.byte	102,15,56,220,233
973.byte	102,15,56,220,241
974.byte	102,15,56,220,249
975.Lctr32_enc_loop6_enter:
976	movups	16(%rcx),%xmm1
977.byte	102,15,56,220,208
978.byte	102,15,56,220,216
979	leaq	32(%rcx),%rcx
980.byte	102,15,56,220,224
981.byte	102,15,56,220,232
982.byte	102,15,56,220,240
983.byte	102,15,56,220,248
984	movups	(%rcx),%xmm0
985	jnz	.Lctr32_enc_loop6
986
987.byte	102,15,56,220,209
988	paddd	%xmm13,%xmm12
989.byte	102,15,56,220,217
990	paddd	-24(%rsp),%xmm13
991.byte	102,15,56,220,225
992	movdqa	%xmm12,-40(%rsp)
993.byte	102,15,56,220,233
994	movdqa	%xmm13,-24(%rsp)
995.byte	102,15,56,220,241
996.byte	102,69,15,56,0,231
997.byte	102,15,56,220,249
998.byte	102,69,15,56,0,239
999
1000.byte	102,15,56,221,208
1001	movups	(%rdi),%xmm8
1002.byte	102,15,56,221,216
1003	movups	16(%rdi),%xmm9
1004.byte	102,15,56,221,224
1005	movups	32(%rdi),%xmm10
1006.byte	102,15,56,221,232
1007	movups	48(%rdi),%xmm11
1008.byte	102,15,56,221,240
1009	movups	64(%rdi),%xmm1
1010.byte	102,15,56,221,248
1011	movups	80(%rdi),%xmm0
1012	leaq	96(%rdi),%rdi
1013
1014	xorps	%xmm2,%xmm8
1015	pshufd	$192,%xmm12,%xmm2
1016	xorps	%xmm3,%xmm9
1017	pshufd	$128,%xmm12,%xmm3
1018	movups	%xmm8,(%rsi)
1019	xorps	%xmm4,%xmm10
1020	pshufd	$64,%xmm12,%xmm4
1021	movups	%xmm9,16(%rsi)
1022	xorps	%xmm5,%xmm11
1023	movups	%xmm10,32(%rsi)
1024	xorps	%xmm6,%xmm1
1025	movups	%xmm11,48(%rsi)
1026	xorps	%xmm7,%xmm0
1027	movups	%xmm1,64(%rsi)
1028	movups	%xmm0,80(%rsi)
1029	leaq	96(%rsi),%rsi
1030	movl	%r10d,%eax
1031	subq	$6,%rdx
1032	jnc	.Lctr32_loop6
1033
1034	addq	$6,%rdx
1035	jz	.Lctr32_done
1036	movq	%r11,%rcx
1037	leal	1(%rax,%rax,1),%eax
1038
1039.Lctr32_tail:
1040	por	%xmm14,%xmm2
1041	movups	(%rdi),%xmm8
1042	cmpq	$2,%rdx
1043	jb	.Lctr32_one
1044
1045	por	%xmm14,%xmm3
1046	movups	16(%rdi),%xmm9
1047	je	.Lctr32_two
1048
1049	pshufd	$192,%xmm13,%xmm5
1050	por	%xmm14,%xmm4
1051	movups	32(%rdi),%xmm10
1052	cmpq	$4,%rdx
1053	jb	.Lctr32_three
1054
1055	pshufd	$128,%xmm13,%xmm6
1056	por	%xmm14,%xmm5
1057	movups	48(%rdi),%xmm11
1058	je	.Lctr32_four
1059
1060	por	%xmm14,%xmm6
1061	xorps	%xmm7,%xmm7
1062
1063	call	_aesni_encrypt6
1064
1065	movups	64(%rdi),%xmm1
1066	xorps	%xmm2,%xmm8
1067	xorps	%xmm3,%xmm9
1068	movups	%xmm8,(%rsi)
1069	xorps	%xmm4,%xmm10
1070	movups	%xmm9,16(%rsi)
1071	xorps	%xmm5,%xmm11
1072	movups	%xmm10,32(%rsi)
1073	xorps	%xmm6,%xmm1
1074	movups	%xmm11,48(%rsi)
1075	movups	%xmm1,64(%rsi)
1076	jmp	.Lctr32_done
1077
1078.align	16
1079.Lctr32_one_shortcut:
1080	movups	(%r8),%xmm2
1081	movups	(%rdi),%xmm8
1082	movl	240(%rcx),%eax
1083.Lctr32_one:
1084	movups	(%rcx),%xmm0
1085	movups	16(%rcx),%xmm1
1086	leaq	32(%rcx),%rcx
1087	xorps	%xmm0,%xmm2
1088.Loop_enc1_7:
1089.byte	102,15,56,220,209
1090	decl	%eax
1091	movups	(%rcx),%xmm1
1092	leaq	16(%rcx),%rcx
1093	jnz	.Loop_enc1_7
1094.byte	102,15,56,221,209
1095	xorps	%xmm2,%xmm8
1096	movups	%xmm8,(%rsi)
1097	jmp	.Lctr32_done
1098
1099.align	16
1100.Lctr32_two:
1101	xorps	%xmm4,%xmm4
1102	call	_aesni_encrypt3
1103	xorps	%xmm2,%xmm8
1104	xorps	%xmm3,%xmm9
1105	movups	%xmm8,(%rsi)
1106	movups	%xmm9,16(%rsi)
1107	jmp	.Lctr32_done
1108
1109.align	16
1110.Lctr32_three:
1111	call	_aesni_encrypt3
1112	xorps	%xmm2,%xmm8
1113	xorps	%xmm3,%xmm9
1114	movups	%xmm8,(%rsi)
1115	xorps	%xmm4,%xmm10
1116	movups	%xmm9,16(%rsi)
1117	movups	%xmm10,32(%rsi)
1118	jmp	.Lctr32_done
1119
1120.align	16
1121.Lctr32_four:
1122	call	_aesni_encrypt4
1123	xorps	%xmm2,%xmm8
1124	xorps	%xmm3,%xmm9
1125	movups	%xmm8,(%rsi)
1126	xorps	%xmm4,%xmm10
1127	movups	%xmm9,16(%rsi)
1128	xorps	%xmm5,%xmm11
1129	movups	%xmm10,32(%rsi)
1130	movups	%xmm11,48(%rsi)
1131
1132.Lctr32_done:
1133	.byte	0xf3,0xc3
1134.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1135.globl	aesni_xts_encrypt
1136.type	aesni_xts_encrypt,@function
1137.align	16
1138aesni_xts_encrypt:
1139	leaq	-104(%rsp),%rsp
1140	movups	(%r9),%xmm15
1141	movl	240(%r8),%eax
1142	movl	240(%rcx),%r10d
1143	movups	(%r8),%xmm0
1144	movups	16(%r8),%xmm1
1145	leaq	32(%r8),%r8
1146	xorps	%xmm0,%xmm15
1147.Loop_enc1_8:
1148.byte	102,68,15,56,220,249
1149	decl	%eax
1150	movups	(%r8),%xmm1
1151	leaq	16(%r8),%r8
1152	jnz	.Loop_enc1_8
1153.byte	102,68,15,56,221,249
1154	movq	%rcx,%r11
1155	movl	%r10d,%eax
1156	movq	%rdx,%r9
1157	andq	$-16,%rdx
1158
1159	movdqa	.Lxts_magic(%rip),%xmm8
1160	pxor	%xmm14,%xmm14
1161	pcmpgtd	%xmm15,%xmm14
1162	pshufd	$19,%xmm14,%xmm9
1163	pxor	%xmm14,%xmm14
1164	movdqa	%xmm15,%xmm10
1165	paddq	%xmm15,%xmm15
1166	pand	%xmm8,%xmm9
1167	pcmpgtd	%xmm15,%xmm14
1168	pxor	%xmm9,%xmm15
1169	pshufd	$19,%xmm14,%xmm9
1170	pxor	%xmm14,%xmm14
1171	movdqa	%xmm15,%xmm11
1172	paddq	%xmm15,%xmm15
1173	pand	%xmm8,%xmm9
1174	pcmpgtd	%xmm15,%xmm14
1175	pxor	%xmm9,%xmm15
1176	pshufd	$19,%xmm14,%xmm9
1177	pxor	%xmm14,%xmm14
1178	movdqa	%xmm15,%xmm12
1179	paddq	%xmm15,%xmm15
1180	pand	%xmm8,%xmm9
1181	pcmpgtd	%xmm15,%xmm14
1182	pxor	%xmm9,%xmm15
1183	pshufd	$19,%xmm14,%xmm9
1184	pxor	%xmm14,%xmm14
1185	movdqa	%xmm15,%xmm13
1186	paddq	%xmm15,%xmm15
1187	pand	%xmm8,%xmm9
1188	pcmpgtd	%xmm15,%xmm14
1189	pxor	%xmm9,%xmm15
1190	subq	$96,%rdx
1191	jc	.Lxts_enc_short
1192
1193	shrl	$1,%eax
1194	subl	$1,%eax
1195	movl	%eax,%r10d
1196	jmp	.Lxts_enc_grandloop
1197
1198.align	16
1199.Lxts_enc_grandloop:
1200	pshufd	$19,%xmm14,%xmm9
1201	movdqa	%xmm15,%xmm14
1202	paddq	%xmm15,%xmm15
1203	movdqu	0(%rdi),%xmm2
1204	pand	%xmm8,%xmm9
1205	movdqu	16(%rdi),%xmm3
1206	pxor	%xmm9,%xmm15
1207
1208	movdqu	32(%rdi),%xmm4
1209	pxor	%xmm10,%xmm2
1210	movdqu	48(%rdi),%xmm5
1211	pxor	%xmm11,%xmm3
1212	movdqu	64(%rdi),%xmm6
1213	pxor	%xmm12,%xmm4
1214	movdqu	80(%rdi),%xmm7
1215	leaq	96(%rdi),%rdi
1216	pxor	%xmm13,%xmm5
1217	movups	(%r11),%xmm0
1218	pxor	%xmm14,%xmm6
1219	pxor	%xmm15,%xmm7
1220
1221
1222
1223	movups	16(%r11),%xmm1
1224	pxor	%xmm0,%xmm2
1225	pxor	%xmm0,%xmm3
1226	movdqa	%xmm10,0(%rsp)
1227.byte	102,15,56,220,209
1228	leaq	32(%r11),%rcx
1229	pxor	%xmm0,%xmm4
1230	movdqa	%xmm11,16(%rsp)
1231.byte	102,15,56,220,217
1232	pxor	%xmm0,%xmm5
1233	movdqa	%xmm12,32(%rsp)
1234.byte	102,15,56,220,225
1235	pxor	%xmm0,%xmm6
1236	movdqa	%xmm13,48(%rsp)
1237.byte	102,15,56,220,233
1238	pxor	%xmm0,%xmm7
1239	movups	(%rcx),%xmm0
1240	decl	%eax
1241	movdqa	%xmm14,64(%rsp)
1242.byte	102,15,56,220,241
1243	movdqa	%xmm15,80(%rsp)
1244.byte	102,15,56,220,249
1245	pxor	%xmm14,%xmm14
1246	pcmpgtd	%xmm15,%xmm14
1247	jmp	.Lxts_enc_loop6_enter
1248
1249.align	16
1250.Lxts_enc_loop6:
1251.byte	102,15,56,220,209
1252.byte	102,15,56,220,217
1253	decl	%eax
1254.byte	102,15,56,220,225
1255.byte	102,15,56,220,233
1256.byte	102,15,56,220,241
1257.byte	102,15,56,220,249
1258.Lxts_enc_loop6_enter:
1259	movups	16(%rcx),%xmm1
1260.byte	102,15,56,220,208
1261.byte	102,15,56,220,216
1262	leaq	32(%rcx),%rcx
1263.byte	102,15,56,220,224
1264.byte	102,15,56,220,232
1265.byte	102,15,56,220,240
1266.byte	102,15,56,220,248
1267	movups	(%rcx),%xmm0
1268	jnz	.Lxts_enc_loop6
1269
1270	pshufd	$19,%xmm14,%xmm9
1271	pxor	%xmm14,%xmm14
1272	paddq	%xmm15,%xmm15
1273.byte	102,15,56,220,209
1274	pand	%xmm8,%xmm9
1275.byte	102,15,56,220,217
1276	pcmpgtd	%xmm15,%xmm14
1277.byte	102,15,56,220,225
1278	pxor	%xmm9,%xmm15
1279.byte	102,15,56,220,233
1280.byte	102,15,56,220,241
1281.byte	102,15,56,220,249
1282	movups	16(%rcx),%xmm1
1283
1284	pshufd	$19,%xmm14,%xmm9
1285	pxor	%xmm14,%xmm14
1286	movdqa	%xmm15,%xmm10
1287	paddq	%xmm15,%xmm15
1288.byte	102,15,56,220,208
1289	pand	%xmm8,%xmm9
1290.byte	102,15,56,220,216
1291	pcmpgtd	%xmm15,%xmm14
1292.byte	102,15,56,220,224
1293	pxor	%xmm9,%xmm15
1294.byte	102,15,56,220,232
1295.byte	102,15,56,220,240
1296.byte	102,15,56,220,248
1297	movups	32(%rcx),%xmm0
1298
1299	pshufd	$19,%xmm14,%xmm9
1300	pxor	%xmm14,%xmm14
1301	movdqa	%xmm15,%xmm11
1302	paddq	%xmm15,%xmm15
1303.byte	102,15,56,220,209
1304	pand	%xmm8,%xmm9
1305.byte	102,15,56,220,217
1306	pcmpgtd	%xmm15,%xmm14
1307.byte	102,15,56,220,225
1308	pxor	%xmm9,%xmm15
1309.byte	102,15,56,220,233
1310.byte	102,15,56,220,241
1311.byte	102,15,56,220,249
1312
1313	pshufd	$19,%xmm14,%xmm9
1314	pxor	%xmm14,%xmm14
1315	movdqa	%xmm15,%xmm12
1316	paddq	%xmm15,%xmm15
1317.byte	102,15,56,221,208
1318	pand	%xmm8,%xmm9
1319.byte	102,15,56,221,216
1320	pcmpgtd	%xmm15,%xmm14
1321.byte	102,15,56,221,224
1322	pxor	%xmm9,%xmm15
1323.byte	102,15,56,221,232
1324.byte	102,15,56,221,240
1325.byte	102,15,56,221,248
1326
1327	pshufd	$19,%xmm14,%xmm9
1328	pxor	%xmm14,%xmm14
1329	movdqa	%xmm15,%xmm13
1330	paddq	%xmm15,%xmm15
1331	xorps	0(%rsp),%xmm2
1332	pand	%xmm8,%xmm9
1333	xorps	16(%rsp),%xmm3
1334	pcmpgtd	%xmm15,%xmm14
1335	pxor	%xmm9,%xmm15
1336
1337	xorps	32(%rsp),%xmm4
1338	movups	%xmm2,0(%rsi)
1339	xorps	48(%rsp),%xmm5
1340	movups	%xmm3,16(%rsi)
1341	xorps	64(%rsp),%xmm6
1342	movups	%xmm4,32(%rsi)
1343	xorps	80(%rsp),%xmm7
1344	movups	%xmm5,48(%rsi)
1345	movl	%r10d,%eax
1346	movups	%xmm6,64(%rsi)
1347	movups	%xmm7,80(%rsi)
1348	leaq	96(%rsi),%rsi
1349	subq	$96,%rdx
1350	jnc	.Lxts_enc_grandloop
1351
1352	leal	3(%rax,%rax,1),%eax
1353	movq	%r11,%rcx
1354	movl	%eax,%r10d
1355
1356.Lxts_enc_short:
1357	addq	$96,%rdx
1358	jz	.Lxts_enc_done
1359
1360	cmpq	$32,%rdx
1361	jb	.Lxts_enc_one
1362	je	.Lxts_enc_two
1363
1364	cmpq	$64,%rdx
1365	jb	.Lxts_enc_three
1366	je	.Lxts_enc_four
1367
1368	pshufd	$19,%xmm14,%xmm9
1369	movdqa	%xmm15,%xmm14
1370	paddq	%xmm15,%xmm15
1371	movdqu	(%rdi),%xmm2
1372	pand	%xmm8,%xmm9
1373	movdqu	16(%rdi),%xmm3
1374	pxor	%xmm9,%xmm15
1375
1376	movdqu	32(%rdi),%xmm4
1377	pxor	%xmm10,%xmm2
1378	movdqu	48(%rdi),%xmm5
1379	pxor	%xmm11,%xmm3
1380	movdqu	64(%rdi),%xmm6
1381	leaq	80(%rdi),%rdi
1382	pxor	%xmm12,%xmm4
1383	pxor	%xmm13,%xmm5
1384	pxor	%xmm14,%xmm6
1385
1386	call	_aesni_encrypt6
1387
1388	xorps	%xmm10,%xmm2
1389	movdqa	%xmm15,%xmm10
1390	xorps	%xmm11,%xmm3
1391	xorps	%xmm12,%xmm4
1392	movdqu	%xmm2,(%rsi)
1393	xorps	%xmm13,%xmm5
1394	movdqu	%xmm3,16(%rsi)
1395	xorps	%xmm14,%xmm6
1396	movdqu	%xmm4,32(%rsi)
1397	movdqu	%xmm5,48(%rsi)
1398	movdqu	%xmm6,64(%rsi)
1399	leaq	80(%rsi),%rsi
1400	jmp	.Lxts_enc_done
1401
1402.align	16
1403.Lxts_enc_one:
1404	movups	(%rdi),%xmm2
1405	leaq	16(%rdi),%rdi
1406	xorps	%xmm10,%xmm2
1407	movups	(%rcx),%xmm0
1408	movups	16(%rcx),%xmm1
1409	leaq	32(%rcx),%rcx
1410	xorps	%xmm0,%xmm2
1411.Loop_enc1_9:
1412.byte	102,15,56,220,209
1413	decl	%eax
1414	movups	(%rcx),%xmm1
1415	leaq	16(%rcx),%rcx
1416	jnz	.Loop_enc1_9
1417.byte	102,15,56,221,209
1418	xorps	%xmm10,%xmm2
1419	movdqa	%xmm11,%xmm10
1420	movups	%xmm2,(%rsi)
1421	leaq	16(%rsi),%rsi
1422	jmp	.Lxts_enc_done
1423
1424.align	16
1425.Lxts_enc_two:
1426	movups	(%rdi),%xmm2
1427	movups	16(%rdi),%xmm3
1428	leaq	32(%rdi),%rdi
1429	xorps	%xmm10,%xmm2
1430	xorps	%xmm11,%xmm3
1431
1432	call	_aesni_encrypt3
1433
1434	xorps	%xmm10,%xmm2
1435	movdqa	%xmm12,%xmm10
1436	xorps	%xmm11,%xmm3
1437	movups	%xmm2,(%rsi)
1438	movups	%xmm3,16(%rsi)
1439	leaq	32(%rsi),%rsi
1440	jmp	.Lxts_enc_done
1441
1442.align	16
1443.Lxts_enc_three:
1444	movups	(%rdi),%xmm2
1445	movups	16(%rdi),%xmm3
1446	movups	32(%rdi),%xmm4
1447	leaq	48(%rdi),%rdi
1448	xorps	%xmm10,%xmm2
1449	xorps	%xmm11,%xmm3
1450	xorps	%xmm12,%xmm4
1451
1452	call	_aesni_encrypt3
1453
1454	xorps	%xmm10,%xmm2
1455	movdqa	%xmm13,%xmm10
1456	xorps	%xmm11,%xmm3
1457	xorps	%xmm12,%xmm4
1458	movups	%xmm2,(%rsi)
1459	movups	%xmm3,16(%rsi)
1460	movups	%xmm4,32(%rsi)
1461	leaq	48(%rsi),%rsi
1462	jmp	.Lxts_enc_done
1463
1464.align	16
1465.Lxts_enc_four:
1466	movups	(%rdi),%xmm2
1467	movups	16(%rdi),%xmm3
1468	movups	32(%rdi),%xmm4
1469	xorps	%xmm10,%xmm2
1470	movups	48(%rdi),%xmm5
1471	leaq	64(%rdi),%rdi
1472	xorps	%xmm11,%xmm3
1473	xorps	%xmm12,%xmm4
1474	xorps	%xmm13,%xmm5
1475
1476	call	_aesni_encrypt4
1477
1478	xorps	%xmm10,%xmm2
1479	movdqa	%xmm15,%xmm10
1480	xorps	%xmm11,%xmm3
1481	xorps	%xmm12,%xmm4
1482	movups	%xmm2,(%rsi)
1483	xorps	%xmm13,%xmm5
1484	movups	%xmm3,16(%rsi)
1485	movups	%xmm4,32(%rsi)
1486	movups	%xmm5,48(%rsi)
1487	leaq	64(%rsi),%rsi
1488	jmp	.Lxts_enc_done
1489
1490.align	16
1491.Lxts_enc_done:
1492	andq	$15,%r9
1493	jz	.Lxts_enc_ret
1494	movq	%r9,%rdx
1495
1496.Lxts_enc_steal:
1497	movzbl	(%rdi),%eax
1498	movzbl	-16(%rsi),%ecx
1499	leaq	1(%rdi),%rdi
1500	movb	%al,-16(%rsi)
1501	movb	%cl,0(%rsi)
1502	leaq	1(%rsi),%rsi
1503	subq	$1,%rdx
1504	jnz	.Lxts_enc_steal
1505
1506	subq	%r9,%rsi
1507	movq	%r11,%rcx
1508	movl	%r10d,%eax
1509
1510	movups	-16(%rsi),%xmm2
1511	xorps	%xmm10,%xmm2
1512	movups	(%rcx),%xmm0
1513	movups	16(%rcx),%xmm1
1514	leaq	32(%rcx),%rcx
1515	xorps	%xmm0,%xmm2
1516.Loop_enc1_10:
1517.byte	102,15,56,220,209
1518	decl	%eax
1519	movups	(%rcx),%xmm1
1520	leaq	16(%rcx),%rcx
1521	jnz	.Loop_enc1_10
1522.byte	102,15,56,221,209
1523	xorps	%xmm10,%xmm2
1524	movups	%xmm2,-16(%rsi)
1525
1526.Lxts_enc_ret:
1527	leaq	104(%rsp),%rsp
1528.Lxts_enc_epilogue:
1529	.byte	0xf3,0xc3
1530.size	aesni_xts_encrypt,.-aesni_xts_encrypt
1531.globl	aesni_xts_decrypt
1532.type	aesni_xts_decrypt,@function
1533.align	16
1534aesni_xts_decrypt:
1535	leaq	-104(%rsp),%rsp
1536	movups	(%r9),%xmm15
1537	movl	240(%r8),%eax
1538	movl	240(%rcx),%r10d
1539	movups	(%r8),%xmm0
1540	movups	16(%r8),%xmm1
1541	leaq	32(%r8),%r8
1542	xorps	%xmm0,%xmm15
1543.Loop_enc1_11:
1544.byte	102,68,15,56,220,249
1545	decl	%eax
1546	movups	(%r8),%xmm1
1547	leaq	16(%r8),%r8
1548	jnz	.Loop_enc1_11
1549.byte	102,68,15,56,221,249
1550	xorl	%eax,%eax
1551	testq	$15,%rdx
1552	setnz	%al
1553	shlq	$4,%rax
1554	subq	%rax,%rdx
1555
1556	movq	%rcx,%r11
1557	movl	%r10d,%eax
1558	movq	%rdx,%r9
1559	andq	$-16,%rdx
1560
1561	movdqa	.Lxts_magic(%rip),%xmm8
1562	pxor	%xmm14,%xmm14
1563	pcmpgtd	%xmm15,%xmm14
1564	pshufd	$19,%xmm14,%xmm9
1565	pxor	%xmm14,%xmm14
1566	movdqa	%xmm15,%xmm10
1567	paddq	%xmm15,%xmm15
1568	pand	%xmm8,%xmm9
1569	pcmpgtd	%xmm15,%xmm14
1570	pxor	%xmm9,%xmm15
1571	pshufd	$19,%xmm14,%xmm9
1572	pxor	%xmm14,%xmm14
1573	movdqa	%xmm15,%xmm11
1574	paddq	%xmm15,%xmm15
1575	pand	%xmm8,%xmm9
1576	pcmpgtd	%xmm15,%xmm14
1577	pxor	%xmm9,%xmm15
1578	pshufd	$19,%xmm14,%xmm9
1579	pxor	%xmm14,%xmm14
1580	movdqa	%xmm15,%xmm12
1581	paddq	%xmm15,%xmm15
1582	pand	%xmm8,%xmm9
1583	pcmpgtd	%xmm15,%xmm14
1584	pxor	%xmm9,%xmm15
1585	pshufd	$19,%xmm14,%xmm9
1586	pxor	%xmm14,%xmm14
1587	movdqa	%xmm15,%xmm13
1588	paddq	%xmm15,%xmm15
1589	pand	%xmm8,%xmm9
1590	pcmpgtd	%xmm15,%xmm14
1591	pxor	%xmm9,%xmm15
1592	subq	$96,%rdx
1593	jc	.Lxts_dec_short
1594
1595	shrl	$1,%eax
1596	subl	$1,%eax
1597	movl	%eax,%r10d
1598	jmp	.Lxts_dec_grandloop
1599
1600.align	16
1601.Lxts_dec_grandloop:
1602	pshufd	$19,%xmm14,%xmm9
1603	movdqa	%xmm15,%xmm14
1604	paddq	%xmm15,%xmm15
1605	movdqu	0(%rdi),%xmm2
1606	pand	%xmm8,%xmm9
1607	movdqu	16(%rdi),%xmm3
1608	pxor	%xmm9,%xmm15
1609
1610	movdqu	32(%rdi),%xmm4
1611	pxor	%xmm10,%xmm2
1612	movdqu	48(%rdi),%xmm5
1613	pxor	%xmm11,%xmm3
1614	movdqu	64(%rdi),%xmm6
1615	pxor	%xmm12,%xmm4
1616	movdqu	80(%rdi),%xmm7
1617	leaq	96(%rdi),%rdi
1618	pxor	%xmm13,%xmm5
1619	movups	(%r11),%xmm0
1620	pxor	%xmm14,%xmm6
1621	pxor	%xmm15,%xmm7
1622
1623
1624
1625	movups	16(%r11),%xmm1
1626	pxor	%xmm0,%xmm2
1627	pxor	%xmm0,%xmm3
1628	movdqa	%xmm10,0(%rsp)
1629.byte	102,15,56,222,209
1630	leaq	32(%r11),%rcx
1631	pxor	%xmm0,%xmm4
1632	movdqa	%xmm11,16(%rsp)
1633.byte	102,15,56,222,217
1634	pxor	%xmm0,%xmm5
1635	movdqa	%xmm12,32(%rsp)
1636.byte	102,15,56,222,225
1637	pxor	%xmm0,%xmm6
1638	movdqa	%xmm13,48(%rsp)
1639.byte	102,15,56,222,233
1640	pxor	%xmm0,%xmm7
1641	movups	(%rcx),%xmm0
1642	decl	%eax
1643	movdqa	%xmm14,64(%rsp)
1644.byte	102,15,56,222,241
1645	movdqa	%xmm15,80(%rsp)
1646.byte	102,15,56,222,249
1647	pxor	%xmm14,%xmm14
1648	pcmpgtd	%xmm15,%xmm14
1649	jmp	.Lxts_dec_loop6_enter
1650
1651.align	16
1652.Lxts_dec_loop6:
1653.byte	102,15,56,222,209
1654.byte	102,15,56,222,217
1655	decl	%eax
1656.byte	102,15,56,222,225
1657.byte	102,15,56,222,233
1658.byte	102,15,56,222,241
1659.byte	102,15,56,222,249
1660.Lxts_dec_loop6_enter:
1661	movups	16(%rcx),%xmm1
1662.byte	102,15,56,222,208
1663.byte	102,15,56,222,216
1664	leaq	32(%rcx),%rcx
1665.byte	102,15,56,222,224
1666.byte	102,15,56,222,232
1667.byte	102,15,56,222,240
1668.byte	102,15,56,222,248
1669	movups	(%rcx),%xmm0
1670	jnz	.Lxts_dec_loop6
1671
1672	pshufd	$19,%xmm14,%xmm9
1673	pxor	%xmm14,%xmm14
1674	paddq	%xmm15,%xmm15
1675.byte	102,15,56,222,209
1676	pand	%xmm8,%xmm9
1677.byte	102,15,56,222,217
1678	pcmpgtd	%xmm15,%xmm14
1679.byte	102,15,56,222,225
1680	pxor	%xmm9,%xmm15
1681.byte	102,15,56,222,233
1682.byte	102,15,56,222,241
1683.byte	102,15,56,222,249
1684	movups	16(%rcx),%xmm1
1685
1686	pshufd	$19,%xmm14,%xmm9
1687	pxor	%xmm14,%xmm14
1688	movdqa	%xmm15,%xmm10
1689	paddq	%xmm15,%xmm15
1690.byte	102,15,56,222,208
1691	pand	%xmm8,%xmm9
1692.byte	102,15,56,222,216
1693	pcmpgtd	%xmm15,%xmm14
1694.byte	102,15,56,222,224
1695	pxor	%xmm9,%xmm15
1696.byte	102,15,56,222,232
1697.byte	102,15,56,222,240
1698.byte	102,15,56,222,248
1699	movups	32(%rcx),%xmm0
1700
1701	pshufd	$19,%xmm14,%xmm9
1702	pxor	%xmm14,%xmm14
1703	movdqa	%xmm15,%xmm11
1704	paddq	%xmm15,%xmm15
1705.byte	102,15,56,222,209
1706	pand	%xmm8,%xmm9
1707.byte	102,15,56,222,217
1708	pcmpgtd	%xmm15,%xmm14
1709.byte	102,15,56,222,225
1710	pxor	%xmm9,%xmm15
1711.byte	102,15,56,222,233
1712.byte	102,15,56,222,241
1713.byte	102,15,56,222,249
1714
1715	pshufd	$19,%xmm14,%xmm9
1716	pxor	%xmm14,%xmm14
1717	movdqa	%xmm15,%xmm12
1718	paddq	%xmm15,%xmm15
1719.byte	102,15,56,223,208
1720	pand	%xmm8,%xmm9
1721.byte	102,15,56,223,216
1722	pcmpgtd	%xmm15,%xmm14
1723.byte	102,15,56,223,224
1724	pxor	%xmm9,%xmm15
1725.byte	102,15,56,223,232
1726.byte	102,15,56,223,240
1727.byte	102,15,56,223,248
1728
1729	pshufd	$19,%xmm14,%xmm9
1730	pxor	%xmm14,%xmm14
1731	movdqa	%xmm15,%xmm13
1732	paddq	%xmm15,%xmm15
1733	xorps	0(%rsp),%xmm2
1734	pand	%xmm8,%xmm9
1735	xorps	16(%rsp),%xmm3
1736	pcmpgtd	%xmm15,%xmm14
1737	pxor	%xmm9,%xmm15
1738
1739	xorps	32(%rsp),%xmm4
1740	movups	%xmm2,0(%rsi)
1741	xorps	48(%rsp),%xmm5
1742	movups	%xmm3,16(%rsi)
1743	xorps	64(%rsp),%xmm6
1744	movups	%xmm4,32(%rsi)
1745	xorps	80(%rsp),%xmm7
1746	movups	%xmm5,48(%rsi)
1747	movl	%r10d,%eax
1748	movups	%xmm6,64(%rsi)
1749	movups	%xmm7,80(%rsi)
1750	leaq	96(%rsi),%rsi
1751	subq	$96,%rdx
1752	jnc	.Lxts_dec_grandloop
1753
1754	leal	3(%rax,%rax,1),%eax
1755	movq	%r11,%rcx
1756	movl	%eax,%r10d
1757
1758.Lxts_dec_short:
1759	addq	$96,%rdx
1760	jz	.Lxts_dec_done
1761
1762	cmpq	$32,%rdx
1763	jb	.Lxts_dec_one
1764	je	.Lxts_dec_two
1765
1766	cmpq	$64,%rdx
1767	jb	.Lxts_dec_three
1768	je	.Lxts_dec_four
1769
1770	pshufd	$19,%xmm14,%xmm9
1771	movdqa	%xmm15,%xmm14
1772	paddq	%xmm15,%xmm15
1773	movdqu	(%rdi),%xmm2
1774	pand	%xmm8,%xmm9
1775	movdqu	16(%rdi),%xmm3
1776	pxor	%xmm9,%xmm15
1777
1778	movdqu	32(%rdi),%xmm4
1779	pxor	%xmm10,%xmm2
1780	movdqu	48(%rdi),%xmm5
1781	pxor	%xmm11,%xmm3
1782	movdqu	64(%rdi),%xmm6
1783	leaq	80(%rdi),%rdi
1784	pxor	%xmm12,%xmm4
1785	pxor	%xmm13,%xmm5
1786	pxor	%xmm14,%xmm6
1787
1788	call	_aesni_decrypt6
1789
1790	xorps	%xmm10,%xmm2
1791	xorps	%xmm11,%xmm3
1792	xorps	%xmm12,%xmm4
1793	movdqu	%xmm2,(%rsi)
1794	xorps	%xmm13,%xmm5
1795	movdqu	%xmm3,16(%rsi)
1796	xorps	%xmm14,%xmm6
1797	movdqu	%xmm4,32(%rsi)
1798	pxor	%xmm14,%xmm14
1799	movdqu	%xmm5,48(%rsi)
1800	pcmpgtd	%xmm15,%xmm14
1801	movdqu	%xmm6,64(%rsi)
1802	leaq	80(%rsi),%rsi
1803	pshufd	$19,%xmm14,%xmm11
1804	andq	$15,%r9
1805	jz	.Lxts_dec_ret
1806
1807	movdqa	%xmm15,%xmm10
1808	paddq	%xmm15,%xmm15
1809	pand	%xmm8,%xmm11
1810	pxor	%xmm15,%xmm11
1811	jmp	.Lxts_dec_done2
1812
1813.align	16
1814.Lxts_dec_one:
1815	movups	(%rdi),%xmm2
1816	leaq	16(%rdi),%rdi
1817	xorps	%xmm10,%xmm2
1818	movups	(%rcx),%xmm0
1819	movups	16(%rcx),%xmm1
1820	leaq	32(%rcx),%rcx
1821	xorps	%xmm0,%xmm2
1822.Loop_dec1_12:
1823.byte	102,15,56,222,209
1824	decl	%eax
1825	movups	(%rcx),%xmm1
1826	leaq	16(%rcx),%rcx
1827	jnz	.Loop_dec1_12
1828.byte	102,15,56,223,209
1829	xorps	%xmm10,%xmm2
1830	movdqa	%xmm11,%xmm10
1831	movups	%xmm2,(%rsi)
1832	movdqa	%xmm12,%xmm11
1833	leaq	16(%rsi),%rsi
1834	jmp	.Lxts_dec_done
1835
1836.align	16
1837.Lxts_dec_two:
1838	movups	(%rdi),%xmm2
1839	movups	16(%rdi),%xmm3
1840	leaq	32(%rdi),%rdi
1841	xorps	%xmm10,%xmm2
1842	xorps	%xmm11,%xmm3
1843
1844	call	_aesni_decrypt3
1845
1846	xorps	%xmm10,%xmm2
1847	movdqa	%xmm12,%xmm10
1848	xorps	%xmm11,%xmm3
1849	movdqa	%xmm13,%xmm11
1850	movups	%xmm2,(%rsi)
1851	movups	%xmm3,16(%rsi)
1852	leaq	32(%rsi),%rsi
1853	jmp	.Lxts_dec_done
1854
1855.align	16
1856.Lxts_dec_three:
1857	movups	(%rdi),%xmm2
1858	movups	16(%rdi),%xmm3
1859	movups	32(%rdi),%xmm4
1860	leaq	48(%rdi),%rdi
1861	xorps	%xmm10,%xmm2
1862	xorps	%xmm11,%xmm3
1863	xorps	%xmm12,%xmm4
1864
1865	call	_aesni_decrypt3
1866
1867	xorps	%xmm10,%xmm2
1868	movdqa	%xmm13,%xmm10
1869	xorps	%xmm11,%xmm3
1870	movdqa	%xmm15,%xmm11
1871	xorps	%xmm12,%xmm4
1872	movups	%xmm2,(%rsi)
1873	movups	%xmm3,16(%rsi)
1874	movups	%xmm4,32(%rsi)
1875	leaq	48(%rsi),%rsi
1876	jmp	.Lxts_dec_done
1877
1878.align	16
1879.Lxts_dec_four:
1880	pshufd	$19,%xmm14,%xmm9
1881	movdqa	%xmm15,%xmm14
1882	paddq	%xmm15,%xmm15
1883	movups	(%rdi),%xmm2
1884	pand	%xmm8,%xmm9
1885	movups	16(%rdi),%xmm3
1886	pxor	%xmm9,%xmm15
1887
1888	movups	32(%rdi),%xmm4
1889	xorps	%xmm10,%xmm2
1890	movups	48(%rdi),%xmm5
1891	leaq	64(%rdi),%rdi
1892	xorps	%xmm11,%xmm3
1893	xorps	%xmm12,%xmm4
1894	xorps	%xmm13,%xmm5
1895
1896	call	_aesni_decrypt4
1897
1898	xorps	%xmm10,%xmm2
1899	movdqa	%xmm14,%xmm10
1900	xorps	%xmm11,%xmm3
1901	movdqa	%xmm15,%xmm11
1902	xorps	%xmm12,%xmm4
1903	movups	%xmm2,(%rsi)
1904	xorps	%xmm13,%xmm5
1905	movups	%xmm3,16(%rsi)
1906	movups	%xmm4,32(%rsi)
1907	movups	%xmm5,48(%rsi)
1908	leaq	64(%rsi),%rsi
1909	jmp	.Lxts_dec_done
1910
1911.align	16
1912.Lxts_dec_done:
1913	andq	$15,%r9
1914	jz	.Lxts_dec_ret
1915.Lxts_dec_done2:
1916	movq	%r9,%rdx
1917	movq	%r11,%rcx
1918	movl	%r10d,%eax
1919
1920	movups	(%rdi),%xmm2
1921	xorps	%xmm11,%xmm2
1922	movups	(%rcx),%xmm0
1923	movups	16(%rcx),%xmm1
1924	leaq	32(%rcx),%rcx
1925	xorps	%xmm0,%xmm2
1926.Loop_dec1_13:
1927.byte	102,15,56,222,209
1928	decl	%eax
1929	movups	(%rcx),%xmm1
1930	leaq	16(%rcx),%rcx
1931	jnz	.Loop_dec1_13
1932.byte	102,15,56,223,209
1933	xorps	%xmm11,%xmm2
1934	movups	%xmm2,(%rsi)
1935
1936.Lxts_dec_steal:
1937	movzbl	16(%rdi),%eax
1938	movzbl	(%rsi),%ecx
1939	leaq	1(%rdi),%rdi
1940	movb	%al,(%rsi)
1941	movb	%cl,16(%rsi)
1942	leaq	1(%rsi),%rsi
1943	subq	$1,%rdx
1944	jnz	.Lxts_dec_steal
1945
1946	subq	%r9,%rsi
1947	movq	%r11,%rcx
1948	movl	%r10d,%eax
1949
1950	movups	(%rsi),%xmm2
1951	xorps	%xmm10,%xmm2
1952	movups	(%rcx),%xmm0
1953	movups	16(%rcx),%xmm1
1954	leaq	32(%rcx),%rcx
1955	xorps	%xmm0,%xmm2
1956.Loop_dec1_14:
1957.byte	102,15,56,222,209
1958	decl	%eax
1959	movups	(%rcx),%xmm1
1960	leaq	16(%rcx),%rcx
1961	jnz	.Loop_dec1_14
1962.byte	102,15,56,223,209
1963	xorps	%xmm10,%xmm2
1964	movups	%xmm2,(%rsi)
1965
1966.Lxts_dec_ret:
1967	leaq	104(%rsp),%rsp
1968.Lxts_dec_epilogue:
1969	.byte	0xf3,0xc3
1970.size	aesni_xts_decrypt,.-aesni_xts_decrypt
1971.globl	aesni_cbc_encrypt
1972.type	aesni_cbc_encrypt,@function
1973.align	16
1974aesni_cbc_encrypt:
1975	testq	%rdx,%rdx
1976	jz	.Lcbc_ret
1977
1978	movl	240(%rcx),%r10d
1979	movq	%rcx,%r11
1980	testl	%r9d,%r9d
1981	jz	.Lcbc_decrypt
1982
1983	movups	(%r8),%xmm2
1984	movl	%r10d,%eax
1985	cmpq	$16,%rdx
1986	jb	.Lcbc_enc_tail
1987	subq	$16,%rdx
1988	jmp	.Lcbc_enc_loop
1989.align	16
1990.Lcbc_enc_loop:
1991	movups	(%rdi),%xmm3
1992	leaq	16(%rdi),%rdi
1993
1994	movups	(%rcx),%xmm0
1995	movups	16(%rcx),%xmm1
1996	xorps	%xmm0,%xmm3
1997	leaq	32(%rcx),%rcx
1998	xorps	%xmm3,%xmm2
1999.Loop_enc1_15:
2000.byte	102,15,56,220,209
2001	decl	%eax
2002	movups	(%rcx),%xmm1
2003	leaq	16(%rcx),%rcx
2004	jnz	.Loop_enc1_15
2005.byte	102,15,56,221,209
2006	movl	%r10d,%eax
2007	movq	%r11,%rcx
2008	movups	%xmm2,0(%rsi)
2009	leaq	16(%rsi),%rsi
2010	subq	$16,%rdx
2011	jnc	.Lcbc_enc_loop
2012	addq	$16,%rdx
2013	jnz	.Lcbc_enc_tail
2014	movups	%xmm2,(%r8)
2015	jmp	.Lcbc_ret
2016
2017.Lcbc_enc_tail:
2018	movq	%rdx,%rcx
2019	xchgq	%rdi,%rsi
2020.long	0x9066A4F3
2021	movl	$16,%ecx
2022	subq	%rdx,%rcx
2023	xorl	%eax,%eax
2024.long	0x9066AAF3
2025	leaq	-16(%rdi),%rdi
2026	movl	%r10d,%eax
2027	movq	%rdi,%rsi
2028	movq	%r11,%rcx
2029	xorq	%rdx,%rdx
2030	jmp	.Lcbc_enc_loop
2031
2032.align	16
2033.Lcbc_decrypt:
2034	movups	(%r8),%xmm9
2035	movl	%r10d,%eax
2036	cmpq	$112,%rdx
2037	jbe	.Lcbc_dec_tail
2038	shrl	$1,%r10d
2039	subq	$112,%rdx
2040	movl	%r10d,%eax
2041	movaps	%xmm9,-24(%rsp)
2042	jmp	.Lcbc_dec_loop8_enter
2043.align	16
2044.Lcbc_dec_loop8:
2045	movaps	%xmm0,-24(%rsp)
2046	movups	%xmm9,(%rsi)
2047	leaq	16(%rsi),%rsi
2048.Lcbc_dec_loop8_enter:
2049	movups	(%rcx),%xmm0
2050	movups	(%rdi),%xmm2
2051	movups	16(%rdi),%xmm3
2052	movups	16(%rcx),%xmm1
2053
2054	leaq	32(%rcx),%rcx
2055	movdqu	32(%rdi),%xmm4
2056	xorps	%xmm0,%xmm2
2057	movdqu	48(%rdi),%xmm5
2058	xorps	%xmm0,%xmm3
2059	movdqu	64(%rdi),%xmm6
2060.byte	102,15,56,222,209
2061	pxor	%xmm0,%xmm4
2062	movdqu	80(%rdi),%xmm7
2063.byte	102,15,56,222,217
2064	pxor	%xmm0,%xmm5
2065	movdqu	96(%rdi),%xmm8
2066.byte	102,15,56,222,225
2067	pxor	%xmm0,%xmm6
2068	movdqu	112(%rdi),%xmm9
2069.byte	102,15,56,222,233
2070	pxor	%xmm0,%xmm7
2071	decl	%eax
2072.byte	102,15,56,222,241
2073	pxor	%xmm0,%xmm8
2074.byte	102,15,56,222,249
2075	pxor	%xmm0,%xmm9
2076	movups	(%rcx),%xmm0
2077.byte	102,68,15,56,222,193
2078.byte	102,68,15,56,222,201
2079	movups	16(%rcx),%xmm1
2080
2081	call	.Ldec_loop8_enter
2082
2083	movups	(%rdi),%xmm1
2084	movups	16(%rdi),%xmm0
2085	xorps	-24(%rsp),%xmm2
2086	xorps	%xmm1,%xmm3
2087	movups	32(%rdi),%xmm1
2088	xorps	%xmm0,%xmm4
2089	movups	48(%rdi),%xmm0
2090	xorps	%xmm1,%xmm5
2091	movups	64(%rdi),%xmm1
2092	xorps	%xmm0,%xmm6
2093	movups	80(%rdi),%xmm0
2094	xorps	%xmm1,%xmm7
2095	movups	96(%rdi),%xmm1
2096	xorps	%xmm0,%xmm8
2097	movups	112(%rdi),%xmm0
2098	xorps	%xmm1,%xmm9
2099	movups	%xmm2,(%rsi)
2100	movups	%xmm3,16(%rsi)
2101	movups	%xmm4,32(%rsi)
2102	movups	%xmm5,48(%rsi)
2103	movl	%r10d,%eax
2104	movups	%xmm6,64(%rsi)
2105	movq	%r11,%rcx
2106	movups	%xmm7,80(%rsi)
2107	leaq	128(%rdi),%rdi
2108	movups	%xmm8,96(%rsi)
2109	leaq	112(%rsi),%rsi
2110	subq	$128,%rdx
2111	ja	.Lcbc_dec_loop8
2112
2113	movaps	%xmm9,%xmm2
2114	movaps	%xmm0,%xmm9
2115	addq	$112,%rdx
2116	jle	.Lcbc_dec_tail_collected
2117	movups	%xmm2,(%rsi)
2118	leal	1(%r10,%r10,1),%eax
2119	leaq	16(%rsi),%rsi
2120.Lcbc_dec_tail:
2121	movups	(%rdi),%xmm2
2122	movaps	%xmm2,%xmm8
2123	cmpq	$16,%rdx
2124	jbe	.Lcbc_dec_one
2125
2126	movups	16(%rdi),%xmm3
2127	movaps	%xmm3,%xmm7
2128	cmpq	$32,%rdx
2129	jbe	.Lcbc_dec_two
2130
2131	movups	32(%rdi),%xmm4
2132	movaps	%xmm4,%xmm6
2133	cmpq	$48,%rdx
2134	jbe	.Lcbc_dec_three
2135
2136	movups	48(%rdi),%xmm5
2137	cmpq	$64,%rdx
2138	jbe	.Lcbc_dec_four
2139
2140	movups	64(%rdi),%xmm6
2141	cmpq	$80,%rdx
2142	jbe	.Lcbc_dec_five
2143
2144	movups	80(%rdi),%xmm7
2145	cmpq	$96,%rdx
2146	jbe	.Lcbc_dec_six
2147
2148	movups	96(%rdi),%xmm8
2149	movaps	%xmm9,-24(%rsp)
2150	call	_aesni_decrypt8
2151	movups	(%rdi),%xmm1
2152	movups	16(%rdi),%xmm0
2153	xorps	-24(%rsp),%xmm2
2154	xorps	%xmm1,%xmm3
2155	movups	32(%rdi),%xmm1
2156	xorps	%xmm0,%xmm4
2157	movups	48(%rdi),%xmm0
2158	xorps	%xmm1,%xmm5
2159	movups	64(%rdi),%xmm1
2160	xorps	%xmm0,%xmm6
2161	movups	80(%rdi),%xmm0
2162	xorps	%xmm1,%xmm7
2163	movups	96(%rdi),%xmm9
2164	xorps	%xmm0,%xmm8
2165	movups	%xmm2,(%rsi)
2166	movups	%xmm3,16(%rsi)
2167	movups	%xmm4,32(%rsi)
2168	movups	%xmm5,48(%rsi)
2169	movups	%xmm6,64(%rsi)
2170	movups	%xmm7,80(%rsi)
2171	leaq	96(%rsi),%rsi
2172	movaps	%xmm8,%xmm2
2173	subq	$112,%rdx
2174	jmp	.Lcbc_dec_tail_collected
2175.align	16
2176.Lcbc_dec_one:
2177	movups	(%rcx),%xmm0
2178	movups	16(%rcx),%xmm1
2179	leaq	32(%rcx),%rcx
2180	xorps	%xmm0,%xmm2
2181.Loop_dec1_16:
2182.byte	102,15,56,222,209
2183	decl	%eax
2184	movups	(%rcx),%xmm1
2185	leaq	16(%rcx),%rcx
2186	jnz	.Loop_dec1_16
2187.byte	102,15,56,223,209
2188	xorps	%xmm9,%xmm2
2189	movaps	%xmm8,%xmm9
2190	subq	$16,%rdx
2191	jmp	.Lcbc_dec_tail_collected
2192.align	16
2193.Lcbc_dec_two:
2194	xorps	%xmm4,%xmm4
2195	call	_aesni_decrypt3
2196	xorps	%xmm9,%xmm2
2197	xorps	%xmm8,%xmm3
2198	movups	%xmm2,(%rsi)
2199	movaps	%xmm7,%xmm9
2200	movaps	%xmm3,%xmm2
2201	leaq	16(%rsi),%rsi
2202	subq	$32,%rdx
2203	jmp	.Lcbc_dec_tail_collected
2204.align	16
2205.Lcbc_dec_three:
2206	call	_aesni_decrypt3
2207	xorps	%xmm9,%xmm2
2208	xorps	%xmm8,%xmm3
2209	movups	%xmm2,(%rsi)
2210	xorps	%xmm7,%xmm4
2211	movups	%xmm3,16(%rsi)
2212	movaps	%xmm6,%xmm9
2213	movaps	%xmm4,%xmm2
2214	leaq	32(%rsi),%rsi
2215	subq	$48,%rdx
2216	jmp	.Lcbc_dec_tail_collected
2217.align	16
2218.Lcbc_dec_four:
2219	call	_aesni_decrypt4
2220	xorps	%xmm9,%xmm2
2221	movups	48(%rdi),%xmm9
2222	xorps	%xmm8,%xmm3
2223	movups	%xmm2,(%rsi)
2224	xorps	%xmm7,%xmm4
2225	movups	%xmm3,16(%rsi)
2226	xorps	%xmm6,%xmm5
2227	movups	%xmm4,32(%rsi)
2228	movaps	%xmm5,%xmm2
2229	leaq	48(%rsi),%rsi
2230	subq	$64,%rdx
2231	jmp	.Lcbc_dec_tail_collected
2232.align	16
2233.Lcbc_dec_five:
2234	xorps	%xmm7,%xmm7
2235	call	_aesni_decrypt6
2236	movups	16(%rdi),%xmm1
2237	movups	32(%rdi),%xmm0
2238	xorps	%xmm9,%xmm2
2239	xorps	%xmm8,%xmm3
2240	xorps	%xmm1,%xmm4
2241	movups	48(%rdi),%xmm1
2242	xorps	%xmm0,%xmm5
2243	movups	64(%rdi),%xmm9
2244	xorps	%xmm1,%xmm6
2245	movups	%xmm2,(%rsi)
2246	movups	%xmm3,16(%rsi)
2247	movups	%xmm4,32(%rsi)
2248	movups	%xmm5,48(%rsi)
2249	leaq	64(%rsi),%rsi
2250	movaps	%xmm6,%xmm2
2251	subq	$80,%rdx
2252	jmp	.Lcbc_dec_tail_collected
2253.align	16
2254.Lcbc_dec_six:
2255	call	_aesni_decrypt6
2256	movups	16(%rdi),%xmm1
2257	movups	32(%rdi),%xmm0
2258	xorps	%xmm9,%xmm2
2259	xorps	%xmm8,%xmm3
2260	xorps	%xmm1,%xmm4
2261	movups	48(%rdi),%xmm1
2262	xorps	%xmm0,%xmm5
2263	movups	64(%rdi),%xmm0
2264	xorps	%xmm1,%xmm6
2265	movups	80(%rdi),%xmm9
2266	xorps	%xmm0,%xmm7
2267	movups	%xmm2,(%rsi)
2268	movups	%xmm3,16(%rsi)
2269	movups	%xmm4,32(%rsi)
2270	movups	%xmm5,48(%rsi)
2271	movups	%xmm6,64(%rsi)
2272	leaq	80(%rsi),%rsi
2273	movaps	%xmm7,%xmm2
2274	subq	$96,%rdx
2275	jmp	.Lcbc_dec_tail_collected
2276.align	16
2277.Lcbc_dec_tail_collected:
2278	andq	$15,%rdx
2279	movups	%xmm9,(%r8)
2280	jnz	.Lcbc_dec_tail_partial
2281	movups	%xmm2,(%rsi)
2282	jmp	.Lcbc_dec_ret
2283.align	16
2284.Lcbc_dec_tail_partial:
2285	movaps	%xmm2,-24(%rsp)
2286	movq	$16,%rcx
2287	movq	%rsi,%rdi
2288	subq	%rdx,%rcx
2289	leaq	-24(%rsp),%rsi
2290.long	0x9066A4F3
2291
2292.Lcbc_dec_ret:
2293.Lcbc_ret:
2294	.byte	0xf3,0xc3
2295.size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
2296.globl	aesni_set_decrypt_key
2297.type	aesni_set_decrypt_key,@function
2298.align	16
2299aesni_set_decrypt_key:
2300.byte	0x48,0x83,0xEC,0x08
2301	call	__aesni_set_encrypt_key
2302	shll	$4,%esi
2303	testl	%eax,%eax
2304	jnz	.Ldec_key_ret
2305	leaq	16(%rdx,%rsi,1),%rdi
2306
2307	movups	(%rdx),%xmm0
2308	movups	(%rdi),%xmm1
2309	movups	%xmm0,(%rdi)
2310	movups	%xmm1,(%rdx)
2311	leaq	16(%rdx),%rdx
2312	leaq	-16(%rdi),%rdi
2313
2314.Ldec_key_inverse:
2315	movups	(%rdx),%xmm0
2316	movups	(%rdi),%xmm1
2317.byte	102,15,56,219,192
2318.byte	102,15,56,219,201
2319	leaq	16(%rdx),%rdx
2320	leaq	-16(%rdi),%rdi
2321	movups	%xmm0,16(%rdi)
2322	movups	%xmm1,-16(%rdx)
2323	cmpq	%rdx,%rdi
2324	ja	.Ldec_key_inverse
2325
2326	movups	(%rdx),%xmm0
2327.byte	102,15,56,219,192
2328	movups	%xmm0,(%rdi)
2329.Ldec_key_ret:
2330	addq	$8,%rsp
2331	.byte	0xf3,0xc3
2332.LSEH_end_set_decrypt_key:
2333.size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
2334.globl	aesni_set_encrypt_key
2335.type	aesni_set_encrypt_key,@function
2336.align	16
2337aesni_set_encrypt_key:
2338__aesni_set_encrypt_key:
2339.byte	0x48,0x83,0xEC,0x08
2340	movq	$-1,%rax
2341	testq	%rdi,%rdi
2342	jz	.Lenc_key_ret
2343	testq	%rdx,%rdx
2344	jz	.Lenc_key_ret
2345
2346	movups	(%rdi),%xmm0
2347	xorps	%xmm4,%xmm4
2348	leaq	16(%rdx),%rax
2349	cmpl	$256,%esi
2350	je	.L14rounds
2351	cmpl	$192,%esi
2352	je	.L12rounds
2353	cmpl	$128,%esi
2354	jne	.Lbad_keybits
2355
2356.L10rounds:
2357	movl	$9,%esi
2358	movups	%xmm0,(%rdx)
2359.byte	102,15,58,223,200,1
2360	call	.Lkey_expansion_128_cold
2361.byte	102,15,58,223,200,2
2362	call	.Lkey_expansion_128
2363.byte	102,15,58,223,200,4
2364	call	.Lkey_expansion_128
2365.byte	102,15,58,223,200,8
2366	call	.Lkey_expansion_128
2367.byte	102,15,58,223,200,16
2368	call	.Lkey_expansion_128
2369.byte	102,15,58,223,200,32
2370	call	.Lkey_expansion_128
2371.byte	102,15,58,223,200,64
2372	call	.Lkey_expansion_128
2373.byte	102,15,58,223,200,128
2374	call	.Lkey_expansion_128
2375.byte	102,15,58,223,200,27
2376	call	.Lkey_expansion_128
2377.byte	102,15,58,223,200,54
2378	call	.Lkey_expansion_128
2379	movups	%xmm0,(%rax)
2380	movl	%esi,80(%rax)
2381	xorl	%eax,%eax
2382	jmp	.Lenc_key_ret
2383
2384.align	16
2385.L12rounds:
2386	movq	16(%rdi),%xmm2
2387	movl	$11,%esi
2388	movups	%xmm0,(%rdx)
2389.byte	102,15,58,223,202,1
2390	call	.Lkey_expansion_192a_cold
2391.byte	102,15,58,223,202,2
2392	call	.Lkey_expansion_192b
2393.byte	102,15,58,223,202,4
2394	call	.Lkey_expansion_192a
2395.byte	102,15,58,223,202,8
2396	call	.Lkey_expansion_192b
2397.byte	102,15,58,223,202,16
2398	call	.Lkey_expansion_192a
2399.byte	102,15,58,223,202,32
2400	call	.Lkey_expansion_192b
2401.byte	102,15,58,223,202,64
2402	call	.Lkey_expansion_192a
2403.byte	102,15,58,223,202,128
2404	call	.Lkey_expansion_192b
2405	movups	%xmm0,(%rax)
2406	movl	%esi,48(%rax)
2407	xorq	%rax,%rax
2408	jmp	.Lenc_key_ret
2409
2410.align	16
2411.L14rounds:
2412	movups	16(%rdi),%xmm2
2413	movl	$13,%esi
2414	leaq	16(%rax),%rax
2415	movups	%xmm0,(%rdx)
2416	movups	%xmm2,16(%rdx)
2417.byte	102,15,58,223,202,1
2418	call	.Lkey_expansion_256a_cold
2419.byte	102,15,58,223,200,1
2420	call	.Lkey_expansion_256b
2421.byte	102,15,58,223,202,2
2422	call	.Lkey_expansion_256a
2423.byte	102,15,58,223,200,2
2424	call	.Lkey_expansion_256b
2425.byte	102,15,58,223,202,4
2426	call	.Lkey_expansion_256a
2427.byte	102,15,58,223,200,4
2428	call	.Lkey_expansion_256b
2429.byte	102,15,58,223,202,8
2430	call	.Lkey_expansion_256a
2431.byte	102,15,58,223,200,8
2432	call	.Lkey_expansion_256b
2433.byte	102,15,58,223,202,16
2434	call	.Lkey_expansion_256a
2435.byte	102,15,58,223,200,16
2436	call	.Lkey_expansion_256b
2437.byte	102,15,58,223,202,32
2438	call	.Lkey_expansion_256a
2439.byte	102,15,58,223,200,32
2440	call	.Lkey_expansion_256b
2441.byte	102,15,58,223,202,64
2442	call	.Lkey_expansion_256a
2443	movups	%xmm0,(%rax)
2444	movl	%esi,16(%rax)
2445	xorq	%rax,%rax
2446	jmp	.Lenc_key_ret
2447
2448.align	16
2449.Lbad_keybits:
2450	movq	$-2,%rax
2451.Lenc_key_ret:
2452	addq	$8,%rsp
2453	.byte	0xf3,0xc3
2454.LSEH_end_set_encrypt_key:
2455
2456.align	16
2457.Lkey_expansion_128:
2458	movups	%xmm0,(%rax)
2459	leaq	16(%rax),%rax
2460.Lkey_expansion_128_cold:
2461	shufps	$16,%xmm0,%xmm4
2462	xorps	%xmm4,%xmm0
2463	shufps	$140,%xmm0,%xmm4
2464	xorps	%xmm4,%xmm0
2465	shufps	$255,%xmm1,%xmm1
2466	xorps	%xmm1,%xmm0
2467	.byte	0xf3,0xc3
2468
2469.align	16
2470.Lkey_expansion_192a:
2471	movups	%xmm0,(%rax)
2472	leaq	16(%rax),%rax
2473.Lkey_expansion_192a_cold:
2474	movaps	%xmm2,%xmm5
2475.Lkey_expansion_192b_warm:
2476	shufps	$16,%xmm0,%xmm4
2477	movdqa	%xmm2,%xmm3
2478	xorps	%xmm4,%xmm0
2479	shufps	$140,%xmm0,%xmm4
2480	pslldq	$4,%xmm3
2481	xorps	%xmm4,%xmm0
2482	pshufd	$85,%xmm1,%xmm1
2483	pxor	%xmm3,%xmm2
2484	pxor	%xmm1,%xmm0
2485	pshufd	$255,%xmm0,%xmm3
2486	pxor	%xmm3,%xmm2
2487	.byte	0xf3,0xc3
2488
2489.align	16
2490.Lkey_expansion_192b:
2491	movaps	%xmm0,%xmm3
2492	shufps	$68,%xmm0,%xmm5
2493	movups	%xmm5,(%rax)
2494	shufps	$78,%xmm2,%xmm3
2495	movups	%xmm3,16(%rax)
2496	leaq	32(%rax),%rax
2497	jmp	.Lkey_expansion_192b_warm
2498
2499.align	16
2500.Lkey_expansion_256a:
2501	movups	%xmm2,(%rax)
2502	leaq	16(%rax),%rax
2503.Lkey_expansion_256a_cold:
2504	shufps	$16,%xmm0,%xmm4
2505	xorps	%xmm4,%xmm0
2506	shufps	$140,%xmm0,%xmm4
2507	xorps	%xmm4,%xmm0
2508	shufps	$255,%xmm1,%xmm1
2509	xorps	%xmm1,%xmm0
2510	.byte	0xf3,0xc3
2511
2512.align	16
2513.Lkey_expansion_256b:
2514	movups	%xmm0,(%rax)
2515	leaq	16(%rax),%rax
2516
2517	shufps	$16,%xmm2,%xmm4
2518	xorps	%xmm4,%xmm2
2519	shufps	$140,%xmm2,%xmm4
2520	xorps	%xmm4,%xmm2
2521	shufps	$170,%xmm1,%xmm1
2522	xorps	%xmm1,%xmm2
2523	.byte	0xf3,0xc3
2524.size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
2525.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
2526.align	64
2527.Lbswap_mask:
2528.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2529.Lincrement32:
2530.long	6,6,6,0
2531.Lincrement64:
2532.long	1,0,0,0
2533.Lxts_magic:
2534.long	0x87,0,1,0
2535
2536.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2537.align	64
2538