1	# $FreeBSD$
2.text
3.globl	aesni_encrypt
4.type	aesni_encrypt,@function
5.align	16
6aesni_encrypt:
7	movups	(%rdi),%xmm2
8	movl	240(%rdx),%eax
9	movups	(%rdx),%xmm0
10	movups	16(%rdx),%xmm1
11	leaq	32(%rdx),%rdx
12	xorps	%xmm0,%xmm2
13.Loop_enc1_1:
14.byte	102,15,56,220,209
15	decl	%eax
16	movups	(%rdx),%xmm1
17	leaq	16(%rdx),%rdx
18	jnz	.Loop_enc1_1
19.byte	102,15,56,221,209
20	movups	%xmm2,(%rsi)
21	.byte	0xf3,0xc3
22.size	aesni_encrypt,.-aesni_encrypt
23
24.globl	aesni_decrypt
25.type	aesni_decrypt,@function
26.align	16
27aesni_decrypt:
28	movups	(%rdi),%xmm2
29	movl	240(%rdx),%eax
30	movups	(%rdx),%xmm0
31	movups	16(%rdx),%xmm1
32	leaq	32(%rdx),%rdx
33	xorps	%xmm0,%xmm2
34.Loop_dec1_2:
35.byte	102,15,56,222,209
36	decl	%eax
37	movups	(%rdx),%xmm1
38	leaq	16(%rdx),%rdx
39	jnz	.Loop_dec1_2
40.byte	102,15,56,223,209
41	movups	%xmm2,(%rsi)
42	.byte	0xf3,0xc3
43.size	aesni_decrypt, .-aesni_decrypt
44.type	_aesni_encrypt3,@function
45.align	16
46_aesni_encrypt3:
47	movups	(%rcx),%xmm0
48	shrl	$1,%eax
49	movups	16(%rcx),%xmm1
50	leaq	32(%rcx),%rcx
51	xorps	%xmm0,%xmm2
52	xorps	%xmm0,%xmm3
53	xorps	%xmm0,%xmm4
54	movups	(%rcx),%xmm0
55
56.Lenc_loop3:
57.byte	102,15,56,220,209
58.byte	102,15,56,220,217
59	decl	%eax
60.byte	102,15,56,220,225
61	movups	16(%rcx),%xmm1
62.byte	102,15,56,220,208
63.byte	102,15,56,220,216
64	leaq	32(%rcx),%rcx
65.byte	102,15,56,220,224
66	movups	(%rcx),%xmm0
67	jnz	.Lenc_loop3
68
69.byte	102,15,56,220,209
70.byte	102,15,56,220,217
71.byte	102,15,56,220,225
72.byte	102,15,56,221,208
73.byte	102,15,56,221,216
74.byte	102,15,56,221,224
75	.byte	0xf3,0xc3
76.size	_aesni_encrypt3,.-_aesni_encrypt3
77.type	_aesni_decrypt3,@function
78.align	16
79_aesni_decrypt3:
80	movups	(%rcx),%xmm0
81	shrl	$1,%eax
82	movups	16(%rcx),%xmm1
83	leaq	32(%rcx),%rcx
84	xorps	%xmm0,%xmm2
85	xorps	%xmm0,%xmm3
86	xorps	%xmm0,%xmm4
87	movups	(%rcx),%xmm0
88
89.Ldec_loop3:
90.byte	102,15,56,222,209
91.byte	102,15,56,222,217
92	decl	%eax
93.byte	102,15,56,222,225
94	movups	16(%rcx),%xmm1
95.byte	102,15,56,222,208
96.byte	102,15,56,222,216
97	leaq	32(%rcx),%rcx
98.byte	102,15,56,222,224
99	movups	(%rcx),%xmm0
100	jnz	.Ldec_loop3
101
102.byte	102,15,56,222,209
103.byte	102,15,56,222,217
104.byte	102,15,56,222,225
105.byte	102,15,56,223,208
106.byte	102,15,56,223,216
107.byte	102,15,56,223,224
108	.byte	0xf3,0xc3
109.size	_aesni_decrypt3,.-_aesni_decrypt3
110.type	_aesni_encrypt4,@function
111.align	16
112_aesni_encrypt4:
113	movups	(%rcx),%xmm0
114	shrl	$1,%eax
115	movups	16(%rcx),%xmm1
116	leaq	32(%rcx),%rcx
117	xorps	%xmm0,%xmm2
118	xorps	%xmm0,%xmm3
119	xorps	%xmm0,%xmm4
120	xorps	%xmm0,%xmm5
121	movups	(%rcx),%xmm0
122
123.Lenc_loop4:
124.byte	102,15,56,220,209
125.byte	102,15,56,220,217
126	decl	%eax
127.byte	102,15,56,220,225
128.byte	102,15,56,220,233
129	movups	16(%rcx),%xmm1
130.byte	102,15,56,220,208
131.byte	102,15,56,220,216
132	leaq	32(%rcx),%rcx
133.byte	102,15,56,220,224
134.byte	102,15,56,220,232
135	movups	(%rcx),%xmm0
136	jnz	.Lenc_loop4
137
138.byte	102,15,56,220,209
139.byte	102,15,56,220,217
140.byte	102,15,56,220,225
141.byte	102,15,56,220,233
142.byte	102,15,56,221,208
143.byte	102,15,56,221,216
144.byte	102,15,56,221,224
145.byte	102,15,56,221,232
146	.byte	0xf3,0xc3
147.size	_aesni_encrypt4,.-_aesni_encrypt4
148.type	_aesni_decrypt4,@function
149.align	16
150_aesni_decrypt4:
151	movups	(%rcx),%xmm0
152	shrl	$1,%eax
153	movups	16(%rcx),%xmm1
154	leaq	32(%rcx),%rcx
155	xorps	%xmm0,%xmm2
156	xorps	%xmm0,%xmm3
157	xorps	%xmm0,%xmm4
158	xorps	%xmm0,%xmm5
159	movups	(%rcx),%xmm0
160
161.Ldec_loop4:
162.byte	102,15,56,222,209
163.byte	102,15,56,222,217
164	decl	%eax
165.byte	102,15,56,222,225
166.byte	102,15,56,222,233
167	movups	16(%rcx),%xmm1
168.byte	102,15,56,222,208
169.byte	102,15,56,222,216
170	leaq	32(%rcx),%rcx
171.byte	102,15,56,222,224
172.byte	102,15,56,222,232
173	movups	(%rcx),%xmm0
174	jnz	.Ldec_loop4
175
176.byte	102,15,56,222,209
177.byte	102,15,56,222,217
178.byte	102,15,56,222,225
179.byte	102,15,56,222,233
180.byte	102,15,56,223,208
181.byte	102,15,56,223,216
182.byte	102,15,56,223,224
183.byte	102,15,56,223,232
184	.byte	0xf3,0xc3
185.size	_aesni_decrypt4,.-_aesni_decrypt4
186.type	_aesni_encrypt6,@function
187.align	16
188_aesni_encrypt6:
189	movups	(%rcx),%xmm0
190	shrl	$1,%eax
191	movups	16(%rcx),%xmm1
192	leaq	32(%rcx),%rcx
193	xorps	%xmm0,%xmm2
194	pxor	%xmm0,%xmm3
195.byte	102,15,56,220,209
196	pxor	%xmm0,%xmm4
197.byte	102,15,56,220,217
198	pxor	%xmm0,%xmm5
199.byte	102,15,56,220,225
200	pxor	%xmm0,%xmm6
201.byte	102,15,56,220,233
202	pxor	%xmm0,%xmm7
203	decl	%eax
204.byte	102,15,56,220,241
205	movups	(%rcx),%xmm0
206.byte	102,15,56,220,249
207	jmp	.Lenc_loop6_enter
208.align	16
209.Lenc_loop6:
210.byte	102,15,56,220,209
211.byte	102,15,56,220,217
212	decl	%eax
213.byte	102,15,56,220,225
214.byte	102,15,56,220,233
215.byte	102,15,56,220,241
216.byte	102,15,56,220,249
217.Lenc_loop6_enter:
218	movups	16(%rcx),%xmm1
219.byte	102,15,56,220,208
220.byte	102,15,56,220,216
221	leaq	32(%rcx),%rcx
222.byte	102,15,56,220,224
223.byte	102,15,56,220,232
224.byte	102,15,56,220,240
225.byte	102,15,56,220,248
226	movups	(%rcx),%xmm0
227	jnz	.Lenc_loop6
228
229.byte	102,15,56,220,209
230.byte	102,15,56,220,217
231.byte	102,15,56,220,225
232.byte	102,15,56,220,233
233.byte	102,15,56,220,241
234.byte	102,15,56,220,249
235.byte	102,15,56,221,208
236.byte	102,15,56,221,216
237.byte	102,15,56,221,224
238.byte	102,15,56,221,232
239.byte	102,15,56,221,240
240.byte	102,15,56,221,248
241	.byte	0xf3,0xc3
242.size	_aesni_encrypt6,.-_aesni_encrypt6
243.type	_aesni_decrypt6,@function
244.align	16
245_aesni_decrypt6:
246	movups	(%rcx),%xmm0
247	shrl	$1,%eax
248	movups	16(%rcx),%xmm1
249	leaq	32(%rcx),%rcx
250	xorps	%xmm0,%xmm2
251	pxor	%xmm0,%xmm3
252.byte	102,15,56,222,209
253	pxor	%xmm0,%xmm4
254.byte	102,15,56,222,217
255	pxor	%xmm0,%xmm5
256.byte	102,15,56,222,225
257	pxor	%xmm0,%xmm6
258.byte	102,15,56,222,233
259	pxor	%xmm0,%xmm7
260	decl	%eax
261.byte	102,15,56,222,241
262	movups	(%rcx),%xmm0
263.byte	102,15,56,222,249
264	jmp	.Ldec_loop6_enter
265.align	16
266.Ldec_loop6:
267.byte	102,15,56,222,209
268.byte	102,15,56,222,217
269	decl	%eax
270.byte	102,15,56,222,225
271.byte	102,15,56,222,233
272.byte	102,15,56,222,241
273.byte	102,15,56,222,249
274.Ldec_loop6_enter:
275	movups	16(%rcx),%xmm1
276.byte	102,15,56,222,208
277.byte	102,15,56,222,216
278	leaq	32(%rcx),%rcx
279.byte	102,15,56,222,224
280.byte	102,15,56,222,232
281.byte	102,15,56,222,240
282.byte	102,15,56,222,248
283	movups	(%rcx),%xmm0
284	jnz	.Ldec_loop6
285
286.byte	102,15,56,222,209
287.byte	102,15,56,222,217
288.byte	102,15,56,222,225
289.byte	102,15,56,222,233
290.byte	102,15,56,222,241
291.byte	102,15,56,222,249
292.byte	102,15,56,223,208
293.byte	102,15,56,223,216
294.byte	102,15,56,223,224
295.byte	102,15,56,223,232
296.byte	102,15,56,223,240
297.byte	102,15,56,223,248
298	.byte	0xf3,0xc3
299.size	_aesni_decrypt6,.-_aesni_decrypt6
300.type	_aesni_encrypt8,@function
301.align	16
302_aesni_encrypt8:
303	movups	(%rcx),%xmm0
304	shrl	$1,%eax
305	movups	16(%rcx),%xmm1
306	leaq	32(%rcx),%rcx
307	xorps	%xmm0,%xmm2
308	xorps	%xmm0,%xmm3
309.byte	102,15,56,220,209
310	pxor	%xmm0,%xmm4
311.byte	102,15,56,220,217
312	pxor	%xmm0,%xmm5
313.byte	102,15,56,220,225
314	pxor	%xmm0,%xmm6
315.byte	102,15,56,220,233
316	pxor	%xmm0,%xmm7
317	decl	%eax
318.byte	102,15,56,220,241
319	pxor	%xmm0,%xmm8
320.byte	102,15,56,220,249
321	pxor	%xmm0,%xmm9
322	movups	(%rcx),%xmm0
323.byte	102,68,15,56,220,193
324.byte	102,68,15,56,220,201
325	movups	16(%rcx),%xmm1
326	jmp	.Lenc_loop8_enter
327.align	16
328.Lenc_loop8:
329.byte	102,15,56,220,209
330.byte	102,15,56,220,217
331	decl	%eax
332.byte	102,15,56,220,225
333.byte	102,15,56,220,233
334.byte	102,15,56,220,241
335.byte	102,15,56,220,249
336.byte	102,68,15,56,220,193
337.byte	102,68,15,56,220,201
338	movups	16(%rcx),%xmm1
339.Lenc_loop8_enter:
340.byte	102,15,56,220,208
341.byte	102,15,56,220,216
342	leaq	32(%rcx),%rcx
343.byte	102,15,56,220,224
344.byte	102,15,56,220,232
345.byte	102,15,56,220,240
346.byte	102,15,56,220,248
347.byte	102,68,15,56,220,192
348.byte	102,68,15,56,220,200
349	movups	(%rcx),%xmm0
350	jnz	.Lenc_loop8
351
352.byte	102,15,56,220,209
353.byte	102,15,56,220,217
354.byte	102,15,56,220,225
355.byte	102,15,56,220,233
356.byte	102,15,56,220,241
357.byte	102,15,56,220,249
358.byte	102,68,15,56,220,193
359.byte	102,68,15,56,220,201
360.byte	102,15,56,221,208
361.byte	102,15,56,221,216
362.byte	102,15,56,221,224
363.byte	102,15,56,221,232
364.byte	102,15,56,221,240
365.byte	102,15,56,221,248
366.byte	102,68,15,56,221,192
367.byte	102,68,15,56,221,200
368	.byte	0xf3,0xc3
369.size	_aesni_encrypt8,.-_aesni_encrypt8
370.type	_aesni_decrypt8,@function
371.align	16
372_aesni_decrypt8:
373	movups	(%rcx),%xmm0
374	shrl	$1,%eax
375	movups	16(%rcx),%xmm1
376	leaq	32(%rcx),%rcx
377	xorps	%xmm0,%xmm2
378	xorps	%xmm0,%xmm3
379.byte	102,15,56,222,209
380	pxor	%xmm0,%xmm4
381.byte	102,15,56,222,217
382	pxor	%xmm0,%xmm5
383.byte	102,15,56,222,225
384	pxor	%xmm0,%xmm6
385.byte	102,15,56,222,233
386	pxor	%xmm0,%xmm7
387	decl	%eax
388.byte	102,15,56,222,241
389	pxor	%xmm0,%xmm8
390.byte	102,15,56,222,249
391	pxor	%xmm0,%xmm9
392	movups	(%rcx),%xmm0
393.byte	102,68,15,56,222,193
394.byte	102,68,15,56,222,201
395	movups	16(%rcx),%xmm1
396	jmp	.Ldec_loop8_enter
397.align	16
398.Ldec_loop8:
399.byte	102,15,56,222,209
400.byte	102,15,56,222,217
401	decl	%eax
402.byte	102,15,56,222,225
403.byte	102,15,56,222,233
404.byte	102,15,56,222,241
405.byte	102,15,56,222,249
406.byte	102,68,15,56,222,193
407.byte	102,68,15,56,222,201
408	movups	16(%rcx),%xmm1
409.Ldec_loop8_enter:
410.byte	102,15,56,222,208
411.byte	102,15,56,222,216
412	leaq	32(%rcx),%rcx
413.byte	102,15,56,222,224
414.byte	102,15,56,222,232
415.byte	102,15,56,222,240
416.byte	102,15,56,222,248
417.byte	102,68,15,56,222,192
418.byte	102,68,15,56,222,200
419	movups	(%rcx),%xmm0
420	jnz	.Ldec_loop8
421
422.byte	102,15,56,222,209
423.byte	102,15,56,222,217
424.byte	102,15,56,222,225
425.byte	102,15,56,222,233
426.byte	102,15,56,222,241
427.byte	102,15,56,222,249
428.byte	102,68,15,56,222,193
429.byte	102,68,15,56,222,201
430.byte	102,15,56,223,208
431.byte	102,15,56,223,216
432.byte	102,15,56,223,224
433.byte	102,15,56,223,232
434.byte	102,15,56,223,240
435.byte	102,15,56,223,248
436.byte	102,68,15,56,223,192
437.byte	102,68,15,56,223,200
438	.byte	0xf3,0xc3
439.size	_aesni_decrypt8,.-_aesni_decrypt8
440.globl	aesni_ecb_encrypt
441.type	aesni_ecb_encrypt,@function
442.align	16
443aesni_ecb_encrypt:
444	andq	$-16,%rdx
445	jz	.Lecb_ret
446
447	movl	240(%rcx),%eax
448	movups	(%rcx),%xmm0
449	movq	%rcx,%r11
450	movl	%eax,%r10d
451	testl	%r8d,%r8d
452	jz	.Lecb_decrypt
453
454	cmpq	$128,%rdx
455	jb	.Lecb_enc_tail
456
457	movdqu	(%rdi),%xmm2
458	movdqu	16(%rdi),%xmm3
459	movdqu	32(%rdi),%xmm4
460	movdqu	48(%rdi),%xmm5
461	movdqu	64(%rdi),%xmm6
462	movdqu	80(%rdi),%xmm7
463	movdqu	96(%rdi),%xmm8
464	movdqu	112(%rdi),%xmm9
465	leaq	128(%rdi),%rdi
466	subq	$128,%rdx
467	jmp	.Lecb_enc_loop8_enter
468.align	16
469.Lecb_enc_loop8:
470	movups	%xmm2,(%rsi)
471	movq	%r11,%rcx
472	movdqu	(%rdi),%xmm2
473	movl	%r10d,%eax
474	movups	%xmm3,16(%rsi)
475	movdqu	16(%rdi),%xmm3
476	movups	%xmm4,32(%rsi)
477	movdqu	32(%rdi),%xmm4
478	movups	%xmm5,48(%rsi)
479	movdqu	48(%rdi),%xmm5
480	movups	%xmm6,64(%rsi)
481	movdqu	64(%rdi),%xmm6
482	movups	%xmm7,80(%rsi)
483	movdqu	80(%rdi),%xmm7
484	movups	%xmm8,96(%rsi)
485	movdqu	96(%rdi),%xmm8
486	movups	%xmm9,112(%rsi)
487	leaq	128(%rsi),%rsi
488	movdqu	112(%rdi),%xmm9
489	leaq	128(%rdi),%rdi
490.Lecb_enc_loop8_enter:
491
492	call	_aesni_encrypt8
493
494	subq	$128,%rdx
495	jnc	.Lecb_enc_loop8
496
497	movups	%xmm2,(%rsi)
498	movq	%r11,%rcx
499	movups	%xmm3,16(%rsi)
500	movl	%r10d,%eax
501	movups	%xmm4,32(%rsi)
502	movups	%xmm5,48(%rsi)
503	movups	%xmm6,64(%rsi)
504	movups	%xmm7,80(%rsi)
505	movups	%xmm8,96(%rsi)
506	movups	%xmm9,112(%rsi)
507	leaq	128(%rsi),%rsi
508	addq	$128,%rdx
509	jz	.Lecb_ret
510
511.Lecb_enc_tail:
512	movups	(%rdi),%xmm2
513	cmpq	$32,%rdx
514	jb	.Lecb_enc_one
515	movups	16(%rdi),%xmm3
516	je	.Lecb_enc_two
517	movups	32(%rdi),%xmm4
518	cmpq	$64,%rdx
519	jb	.Lecb_enc_three
520	movups	48(%rdi),%xmm5
521	je	.Lecb_enc_four
522	movups	64(%rdi),%xmm6
523	cmpq	$96,%rdx
524	jb	.Lecb_enc_five
525	movups	80(%rdi),%xmm7
526	je	.Lecb_enc_six
527	movdqu	96(%rdi),%xmm8
528	call	_aesni_encrypt8
529	movups	%xmm2,(%rsi)
530	movups	%xmm3,16(%rsi)
531	movups	%xmm4,32(%rsi)
532	movups	%xmm5,48(%rsi)
533	movups	%xmm6,64(%rsi)
534	movups	%xmm7,80(%rsi)
535	movups	%xmm8,96(%rsi)
536	jmp	.Lecb_ret
537.align	16
538.Lecb_enc_one:
539	movups	(%rcx),%xmm0
540	movups	16(%rcx),%xmm1
541	leaq	32(%rcx),%rcx
542	xorps	%xmm0,%xmm2
543.Loop_enc1_3:
544.byte	102,15,56,220,209
545	decl	%eax
546	movups	(%rcx),%xmm1
547	leaq	16(%rcx),%rcx
548	jnz	.Loop_enc1_3
549.byte	102,15,56,221,209
550	movups	%xmm2,(%rsi)
551	jmp	.Lecb_ret
552.align	16
553.Lecb_enc_two:
554	xorps	%xmm4,%xmm4
555	call	_aesni_encrypt3
556	movups	%xmm2,(%rsi)
557	movups	%xmm3,16(%rsi)
558	jmp	.Lecb_ret
559.align	16
560.Lecb_enc_three:
561	call	_aesni_encrypt3
562	movups	%xmm2,(%rsi)
563	movups	%xmm3,16(%rsi)
564	movups	%xmm4,32(%rsi)
565	jmp	.Lecb_ret
566.align	16
567.Lecb_enc_four:
568	call	_aesni_encrypt4
569	movups	%xmm2,(%rsi)
570	movups	%xmm3,16(%rsi)
571	movups	%xmm4,32(%rsi)
572	movups	%xmm5,48(%rsi)
573	jmp	.Lecb_ret
574.align	16
575.Lecb_enc_five:
576	xorps	%xmm7,%xmm7
577	call	_aesni_encrypt6
578	movups	%xmm2,(%rsi)
579	movups	%xmm3,16(%rsi)
580	movups	%xmm4,32(%rsi)
581	movups	%xmm5,48(%rsi)
582	movups	%xmm6,64(%rsi)
583	jmp	.Lecb_ret
584.align	16
585.Lecb_enc_six:
586	call	_aesni_encrypt6
587	movups	%xmm2,(%rsi)
588	movups	%xmm3,16(%rsi)
589	movups	%xmm4,32(%rsi)
590	movups	%xmm5,48(%rsi)
591	movups	%xmm6,64(%rsi)
592	movups	%xmm7,80(%rsi)
593	jmp	.Lecb_ret
594
595.align	16
596.Lecb_decrypt:
597	cmpq	$128,%rdx
598	jb	.Lecb_dec_tail
599
600	movdqu	(%rdi),%xmm2
601	movdqu	16(%rdi),%xmm3
602	movdqu	32(%rdi),%xmm4
603	movdqu	48(%rdi),%xmm5
604	movdqu	64(%rdi),%xmm6
605	movdqu	80(%rdi),%xmm7
606	movdqu	96(%rdi),%xmm8
607	movdqu	112(%rdi),%xmm9
608	leaq	128(%rdi),%rdi
609	subq	$128,%rdx
610	jmp	.Lecb_dec_loop8_enter
611.align	16
612.Lecb_dec_loop8:
613	movups	%xmm2,(%rsi)
614	movq	%r11,%rcx
615	movdqu	(%rdi),%xmm2
616	movl	%r10d,%eax
617	movups	%xmm3,16(%rsi)
618	movdqu	16(%rdi),%xmm3
619	movups	%xmm4,32(%rsi)
620	movdqu	32(%rdi),%xmm4
621	movups	%xmm5,48(%rsi)
622	movdqu	48(%rdi),%xmm5
623	movups	%xmm6,64(%rsi)
624	movdqu	64(%rdi),%xmm6
625	movups	%xmm7,80(%rsi)
626	movdqu	80(%rdi),%xmm7
627	movups	%xmm8,96(%rsi)
628	movdqu	96(%rdi),%xmm8
629	movups	%xmm9,112(%rsi)
630	leaq	128(%rsi),%rsi
631	movdqu	112(%rdi),%xmm9
632	leaq	128(%rdi),%rdi
633.Lecb_dec_loop8_enter:
634
635	call	_aesni_decrypt8
636
637	movups	(%r11),%xmm0
638	subq	$128,%rdx
639	jnc	.Lecb_dec_loop8
640
641	movups	%xmm2,(%rsi)
642	movq	%r11,%rcx
643	movups	%xmm3,16(%rsi)
644	movl	%r10d,%eax
645	movups	%xmm4,32(%rsi)
646	movups	%xmm5,48(%rsi)
647	movups	%xmm6,64(%rsi)
648	movups	%xmm7,80(%rsi)
649	movups	%xmm8,96(%rsi)
650	movups	%xmm9,112(%rsi)
651	leaq	128(%rsi),%rsi
652	addq	$128,%rdx
653	jz	.Lecb_ret
654
655.Lecb_dec_tail:
656	movups	(%rdi),%xmm2
657	cmpq	$32,%rdx
658	jb	.Lecb_dec_one
659	movups	16(%rdi),%xmm3
660	je	.Lecb_dec_two
661	movups	32(%rdi),%xmm4
662	cmpq	$64,%rdx
663	jb	.Lecb_dec_three
664	movups	48(%rdi),%xmm5
665	je	.Lecb_dec_four
666	movups	64(%rdi),%xmm6
667	cmpq	$96,%rdx
668	jb	.Lecb_dec_five
669	movups	80(%rdi),%xmm7
670	je	.Lecb_dec_six
671	movups	96(%rdi),%xmm8
672	movups	(%rcx),%xmm0
673	call	_aesni_decrypt8
674	movups	%xmm2,(%rsi)
675	movups	%xmm3,16(%rsi)
676	movups	%xmm4,32(%rsi)
677	movups	%xmm5,48(%rsi)
678	movups	%xmm6,64(%rsi)
679	movups	%xmm7,80(%rsi)
680	movups	%xmm8,96(%rsi)
681	jmp	.Lecb_ret
682.align	16
683.Lecb_dec_one:
684	movups	(%rcx),%xmm0
685	movups	16(%rcx),%xmm1
686	leaq	32(%rcx),%rcx
687	xorps	%xmm0,%xmm2
688.Loop_dec1_4:
689.byte	102,15,56,222,209
690	decl	%eax
691	movups	(%rcx),%xmm1
692	leaq	16(%rcx),%rcx
693	jnz	.Loop_dec1_4
694.byte	102,15,56,223,209
695	movups	%xmm2,(%rsi)
696	jmp	.Lecb_ret
697.align	16
698.Lecb_dec_two:
699	xorps	%xmm4,%xmm4
700	call	_aesni_decrypt3
701	movups	%xmm2,(%rsi)
702	movups	%xmm3,16(%rsi)
703	jmp	.Lecb_ret
704.align	16
705.Lecb_dec_three:
706	call	_aesni_decrypt3
707	movups	%xmm2,(%rsi)
708	movups	%xmm3,16(%rsi)
709	movups	%xmm4,32(%rsi)
710	jmp	.Lecb_ret
711.align	16
712.Lecb_dec_four:
713	call	_aesni_decrypt4
714	movups	%xmm2,(%rsi)
715	movups	%xmm3,16(%rsi)
716	movups	%xmm4,32(%rsi)
717	movups	%xmm5,48(%rsi)
718	jmp	.Lecb_ret
719.align	16
720.Lecb_dec_five:
721	xorps	%xmm7,%xmm7
722	call	_aesni_decrypt6
723	movups	%xmm2,(%rsi)
724	movups	%xmm3,16(%rsi)
725	movups	%xmm4,32(%rsi)
726	movups	%xmm5,48(%rsi)
727	movups	%xmm6,64(%rsi)
728	jmp	.Lecb_ret
729.align	16
730.Lecb_dec_six:
731	call	_aesni_decrypt6
732	movups	%xmm2,(%rsi)
733	movups	%xmm3,16(%rsi)
734	movups	%xmm4,32(%rsi)
735	movups	%xmm5,48(%rsi)
736	movups	%xmm6,64(%rsi)
737	movups	%xmm7,80(%rsi)
738
739.Lecb_ret:
740	.byte	0xf3,0xc3
741.size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
742.globl	aesni_ccm64_encrypt_blocks
743.type	aesni_ccm64_encrypt_blocks,@function
744.align	16
745aesni_ccm64_encrypt_blocks:
746	movl	240(%rcx),%eax
747	movdqu	(%r8),%xmm9
748	movdqa	.Lincrement64(%rip),%xmm6
749	movdqa	.Lbswap_mask(%rip),%xmm7
750
751	shrl	$1,%eax
752	leaq	0(%rcx),%r11
753	movdqu	(%r9),%xmm3
754	movdqa	%xmm9,%xmm2
755	movl	%eax,%r10d
756.byte	102,68,15,56,0,207
757	jmp	.Lccm64_enc_outer
758.align	16
759.Lccm64_enc_outer:
760	movups	(%r11),%xmm0
761	movl	%r10d,%eax
762	movups	(%rdi),%xmm8
763
764	xorps	%xmm0,%xmm2
765	movups	16(%r11),%xmm1
766	xorps	%xmm8,%xmm0
767	leaq	32(%r11),%rcx
768	xorps	%xmm0,%xmm3
769	movups	(%rcx),%xmm0
770
771.Lccm64_enc2_loop:
772.byte	102,15,56,220,209
773	decl	%eax
774.byte	102,15,56,220,217
775	movups	16(%rcx),%xmm1
776.byte	102,15,56,220,208
777	leaq	32(%rcx),%rcx
778.byte	102,15,56,220,216
779	movups	0(%rcx),%xmm0
780	jnz	.Lccm64_enc2_loop
781.byte	102,15,56,220,209
782.byte	102,15,56,220,217
783	paddq	%xmm6,%xmm9
784.byte	102,15,56,221,208
785.byte	102,15,56,221,216
786
787	decq	%rdx
788	leaq	16(%rdi),%rdi
789	xorps	%xmm2,%xmm8
790	movdqa	%xmm9,%xmm2
791	movups	%xmm8,(%rsi)
792	leaq	16(%rsi),%rsi
793.byte	102,15,56,0,215
794	jnz	.Lccm64_enc_outer
795
796	movups	%xmm3,(%r9)
797	.byte	0xf3,0xc3
798.size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
799.globl	aesni_ccm64_decrypt_blocks
800.type	aesni_ccm64_decrypt_blocks,@function
801.align	16
802aesni_ccm64_decrypt_blocks:
803	movl	240(%rcx),%eax
804	movups	(%r8),%xmm9
805	movdqu	(%r9),%xmm3
806	movdqa	.Lincrement64(%rip),%xmm6
807	movdqa	.Lbswap_mask(%rip),%xmm7
808
809	movaps	%xmm9,%xmm2
810	movl	%eax,%r10d
811	movq	%rcx,%r11
812.byte	102,68,15,56,0,207
813	movups	(%rcx),%xmm0
814	movups	16(%rcx),%xmm1
815	leaq	32(%rcx),%rcx
816	xorps	%xmm0,%xmm2
817.Loop_enc1_5:
818.byte	102,15,56,220,209
819	decl	%eax
820	movups	(%rcx),%xmm1
821	leaq	16(%rcx),%rcx
822	jnz	.Loop_enc1_5
823.byte	102,15,56,221,209
824	movups	(%rdi),%xmm8
825	paddq	%xmm6,%xmm9
826	leaq	16(%rdi),%rdi
827	jmp	.Lccm64_dec_outer
828.align	16
829.Lccm64_dec_outer:
830	xorps	%xmm2,%xmm8
831	movdqa	%xmm9,%xmm2
832	movl	%r10d,%eax
833	movups	%xmm8,(%rsi)
834	leaq	16(%rsi),%rsi
835.byte	102,15,56,0,215
836
837	subq	$1,%rdx
838	jz	.Lccm64_dec_break
839
840	movups	(%r11),%xmm0
841	shrl	$1,%eax
842	movups	16(%r11),%xmm1
843	xorps	%xmm0,%xmm8
844	leaq	32(%r11),%rcx
845	xorps	%xmm0,%xmm2
846	xorps	%xmm8,%xmm3
847	movups	(%rcx),%xmm0
848
849.Lccm64_dec2_loop:
850.byte	102,15,56,220,209
851	decl	%eax
852.byte	102,15,56,220,217
853	movups	16(%rcx),%xmm1
854.byte	102,15,56,220,208
855	leaq	32(%rcx),%rcx
856.byte	102,15,56,220,216
857	movups	0(%rcx),%xmm0
858	jnz	.Lccm64_dec2_loop
859	movups	(%rdi),%xmm8
860	paddq	%xmm6,%xmm9
861.byte	102,15,56,220,209
862.byte	102,15,56,220,217
863	leaq	16(%rdi),%rdi
864.byte	102,15,56,221,208
865.byte	102,15,56,221,216
866	jmp	.Lccm64_dec_outer
867
868.align	16
869.Lccm64_dec_break:
870
871	movups	(%r11),%xmm0
872	movups	16(%r11),%xmm1
873	xorps	%xmm0,%xmm8
874	leaq	32(%r11),%r11
875	xorps	%xmm8,%xmm3
876.Loop_enc1_6:
877.byte	102,15,56,220,217
878	decl	%eax
879	movups	(%r11),%xmm1
880	leaq	16(%r11),%r11
881	jnz	.Loop_enc1_6
882.byte	102,15,56,221,217
883	movups	%xmm3,(%r9)
884	.byte	0xf3,0xc3
885.size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
886.globl	aesni_ctr32_encrypt_blocks
887.type	aesni_ctr32_encrypt_blocks,@function
888.align	16
889aesni_ctr32_encrypt_blocks:
890	cmpq	$1,%rdx
891	je	.Lctr32_one_shortcut
892
893	movdqu	(%r8),%xmm14
894	movdqa	.Lbswap_mask(%rip),%xmm15
895	xorl	%eax,%eax
896.byte	102,69,15,58,22,242,3
897.byte	102,68,15,58,34,240,3
898
899	movl	240(%rcx),%eax
900	bswapl	%r10d
901	pxor	%xmm12,%xmm12
902	pxor	%xmm13,%xmm13
903.byte	102,69,15,58,34,226,0
904	leaq	3(%r10),%r11
905.byte	102,69,15,58,34,235,0
906	incl	%r10d
907.byte	102,69,15,58,34,226,1
908	incq	%r11
909.byte	102,69,15,58,34,235,1
910	incl	%r10d
911.byte	102,69,15,58,34,226,2
912	incq	%r11
913.byte	102,69,15,58,34,235,2
914	movdqa	%xmm12,-40(%rsp)
915.byte	102,69,15,56,0,231
916	movdqa	%xmm13,-24(%rsp)
917.byte	102,69,15,56,0,239
918
919	pshufd	$192,%xmm12,%xmm2
920	pshufd	$128,%xmm12,%xmm3
921	pshufd	$64,%xmm12,%xmm4
922	cmpq	$6,%rdx
923	jb	.Lctr32_tail
924	shrl	$1,%eax
925	movq	%rcx,%r11
926	movl	%eax,%r10d
927	subq	$6,%rdx
928	jmp	.Lctr32_loop6
929
930.align	16
931.Lctr32_loop6:
932	pshufd	$192,%xmm13,%xmm5
933	por	%xmm14,%xmm2
934	movups	(%r11),%xmm0
935	pshufd	$128,%xmm13,%xmm6
936	por	%xmm14,%xmm3
937	movups	16(%r11),%xmm1
938	pshufd	$64,%xmm13,%xmm7
939	por	%xmm14,%xmm4
940	por	%xmm14,%xmm5
941	xorps	%xmm0,%xmm2
942	por	%xmm14,%xmm6
943	por	%xmm14,%xmm7
944
945
946
947
948	pxor	%xmm0,%xmm3
949.byte	102,15,56,220,209
950	leaq	32(%r11),%rcx
951	pxor	%xmm0,%xmm4
952.byte	102,15,56,220,217
953	movdqa	.Lincrement32(%rip),%xmm13
954	pxor	%xmm0,%xmm5
955.byte	102,15,56,220,225
956	movdqa	-40(%rsp),%xmm12
957	pxor	%xmm0,%xmm6
958.byte	102,15,56,220,233
959	pxor	%xmm0,%xmm7
960	movups	(%rcx),%xmm0
961	decl	%eax
962.byte	102,15,56,220,241
963.byte	102,15,56,220,249
964	jmp	.Lctr32_enc_loop6_enter
965.align	16
966.Lctr32_enc_loop6:
967.byte	102,15,56,220,209
968.byte	102,15,56,220,217
969	decl	%eax
970.byte	102,15,56,220,225
971.byte	102,15,56,220,233
972.byte	102,15,56,220,241
973.byte	102,15,56,220,249
974.Lctr32_enc_loop6_enter:
975	movups	16(%rcx),%xmm1
976.byte	102,15,56,220,208
977.byte	102,15,56,220,216
978	leaq	32(%rcx),%rcx
979.byte	102,15,56,220,224
980.byte	102,15,56,220,232
981.byte	102,15,56,220,240
982.byte	102,15,56,220,248
983	movups	(%rcx),%xmm0
984	jnz	.Lctr32_enc_loop6
985
986.byte	102,15,56,220,209
987	paddd	%xmm13,%xmm12
988.byte	102,15,56,220,217
989	paddd	-24(%rsp),%xmm13
990.byte	102,15,56,220,225
991	movdqa	%xmm12,-40(%rsp)
992.byte	102,15,56,220,233
993	movdqa	%xmm13,-24(%rsp)
994.byte	102,15,56,220,241
995.byte	102,69,15,56,0,231
996.byte	102,15,56,220,249
997.byte	102,69,15,56,0,239
998
999.byte	102,15,56,221,208
1000	movups	(%rdi),%xmm8
1001.byte	102,15,56,221,216
1002	movups	16(%rdi),%xmm9
1003.byte	102,15,56,221,224
1004	movups	32(%rdi),%xmm10
1005.byte	102,15,56,221,232
1006	movups	48(%rdi),%xmm11
1007.byte	102,15,56,221,240
1008	movups	64(%rdi),%xmm1
1009.byte	102,15,56,221,248
1010	movups	80(%rdi),%xmm0
1011	leaq	96(%rdi),%rdi
1012
1013	xorps	%xmm2,%xmm8
1014	pshufd	$192,%xmm12,%xmm2
1015	xorps	%xmm3,%xmm9
1016	pshufd	$128,%xmm12,%xmm3
1017	movups	%xmm8,(%rsi)
1018	xorps	%xmm4,%xmm10
1019	pshufd	$64,%xmm12,%xmm4
1020	movups	%xmm9,16(%rsi)
1021	xorps	%xmm5,%xmm11
1022	movups	%xmm10,32(%rsi)
1023	xorps	%xmm6,%xmm1
1024	movups	%xmm11,48(%rsi)
1025	xorps	%xmm7,%xmm0
1026	movups	%xmm1,64(%rsi)
1027	movups	%xmm0,80(%rsi)
1028	leaq	96(%rsi),%rsi
1029	movl	%r10d,%eax
1030	subq	$6,%rdx
1031	jnc	.Lctr32_loop6
1032
1033	addq	$6,%rdx
1034	jz	.Lctr32_done
1035	movq	%r11,%rcx
1036	leal	1(%rax,%rax,1),%eax
1037
1038.Lctr32_tail:
1039	por	%xmm14,%xmm2
1040	movups	(%rdi),%xmm8
1041	cmpq	$2,%rdx
1042	jb	.Lctr32_one
1043
1044	por	%xmm14,%xmm3
1045	movups	16(%rdi),%xmm9
1046	je	.Lctr32_two
1047
1048	pshufd	$192,%xmm13,%xmm5
1049	por	%xmm14,%xmm4
1050	movups	32(%rdi),%xmm10
1051	cmpq	$4,%rdx
1052	jb	.Lctr32_three
1053
1054	pshufd	$128,%xmm13,%xmm6
1055	por	%xmm14,%xmm5
1056	movups	48(%rdi),%xmm11
1057	je	.Lctr32_four
1058
1059	por	%xmm14,%xmm6
1060	xorps	%xmm7,%xmm7
1061
1062	call	_aesni_encrypt6
1063
1064	movups	64(%rdi),%xmm1
1065	xorps	%xmm2,%xmm8
1066	xorps	%xmm3,%xmm9
1067	movups	%xmm8,(%rsi)
1068	xorps	%xmm4,%xmm10
1069	movups	%xmm9,16(%rsi)
1070	xorps	%xmm5,%xmm11
1071	movups	%xmm10,32(%rsi)
1072	xorps	%xmm6,%xmm1
1073	movups	%xmm11,48(%rsi)
1074	movups	%xmm1,64(%rsi)
1075	jmp	.Lctr32_done
1076
1077.align	16
1078.Lctr32_one_shortcut:
1079	movups	(%r8),%xmm2
1080	movups	(%rdi),%xmm8
1081	movl	240(%rcx),%eax
1082.Lctr32_one:
1083	movups	(%rcx),%xmm0
1084	movups	16(%rcx),%xmm1
1085	leaq	32(%rcx),%rcx
1086	xorps	%xmm0,%xmm2
1087.Loop_enc1_7:
1088.byte	102,15,56,220,209
1089	decl	%eax
1090	movups	(%rcx),%xmm1
1091	leaq	16(%rcx),%rcx
1092	jnz	.Loop_enc1_7
1093.byte	102,15,56,221,209
1094	xorps	%xmm2,%xmm8
1095	movups	%xmm8,(%rsi)
1096	jmp	.Lctr32_done
1097
1098.align	16
1099.Lctr32_two:
1100	xorps	%xmm4,%xmm4
1101	call	_aesni_encrypt3
1102	xorps	%xmm2,%xmm8
1103	xorps	%xmm3,%xmm9
1104	movups	%xmm8,(%rsi)
1105	movups	%xmm9,16(%rsi)
1106	jmp	.Lctr32_done
1107
1108.align	16
1109.Lctr32_three:
1110	call	_aesni_encrypt3
1111	xorps	%xmm2,%xmm8
1112	xorps	%xmm3,%xmm9
1113	movups	%xmm8,(%rsi)
1114	xorps	%xmm4,%xmm10
1115	movups	%xmm9,16(%rsi)
1116	movups	%xmm10,32(%rsi)
1117	jmp	.Lctr32_done
1118
1119.align	16
1120.Lctr32_four:
1121	call	_aesni_encrypt4
1122	xorps	%xmm2,%xmm8
1123	xorps	%xmm3,%xmm9
1124	movups	%xmm8,(%rsi)
1125	xorps	%xmm4,%xmm10
1126	movups	%xmm9,16(%rsi)
1127	xorps	%xmm5,%xmm11
1128	movups	%xmm10,32(%rsi)
1129	movups	%xmm11,48(%rsi)
1130
1131.Lctr32_done:
1132	.byte	0xf3,0xc3
1133.size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1134.globl	aesni_xts_encrypt
1135.type	aesni_xts_encrypt,@function
1136.align	16
1137aesni_xts_encrypt:
1138	leaq	-104(%rsp),%rsp
1139	movups	(%r9),%xmm15
1140	movl	240(%r8),%eax
1141	movl	240(%rcx),%r10d
1142	movups	(%r8),%xmm0
1143	movups	16(%r8),%xmm1
1144	leaq	32(%r8),%r8
1145	xorps	%xmm0,%xmm15
1146.Loop_enc1_8:
1147.byte	102,68,15,56,220,249
1148	decl	%eax
1149	movups	(%r8),%xmm1
1150	leaq	16(%r8),%r8
1151	jnz	.Loop_enc1_8
1152.byte	102,68,15,56,221,249
1153	movq	%rcx,%r11
1154	movl	%r10d,%eax
1155	movq	%rdx,%r9
1156	andq	$-16,%rdx
1157
1158	movdqa	.Lxts_magic(%rip),%xmm8
1159	pxor	%xmm14,%xmm14
1160	pcmpgtd	%xmm15,%xmm14
1161	pshufd	$19,%xmm14,%xmm9
1162	pxor	%xmm14,%xmm14
1163	movdqa	%xmm15,%xmm10
1164	paddq	%xmm15,%xmm15
1165	pand	%xmm8,%xmm9
1166	pcmpgtd	%xmm15,%xmm14
1167	pxor	%xmm9,%xmm15
1168	pshufd	$19,%xmm14,%xmm9
1169	pxor	%xmm14,%xmm14
1170	movdqa	%xmm15,%xmm11
1171	paddq	%xmm15,%xmm15
1172	pand	%xmm8,%xmm9
1173	pcmpgtd	%xmm15,%xmm14
1174	pxor	%xmm9,%xmm15
1175	pshufd	$19,%xmm14,%xmm9
1176	pxor	%xmm14,%xmm14
1177	movdqa	%xmm15,%xmm12
1178	paddq	%xmm15,%xmm15
1179	pand	%xmm8,%xmm9
1180	pcmpgtd	%xmm15,%xmm14
1181	pxor	%xmm9,%xmm15
1182	pshufd	$19,%xmm14,%xmm9
1183	pxor	%xmm14,%xmm14
1184	movdqa	%xmm15,%xmm13
1185	paddq	%xmm15,%xmm15
1186	pand	%xmm8,%xmm9
1187	pcmpgtd	%xmm15,%xmm14
1188	pxor	%xmm9,%xmm15
1189	subq	$96,%rdx
1190	jc	.Lxts_enc_short
1191
1192	shrl	$1,%eax
1193	subl	$1,%eax
1194	movl	%eax,%r10d
1195	jmp	.Lxts_enc_grandloop
1196
1197.align	16
1198.Lxts_enc_grandloop:
1199	pshufd	$19,%xmm14,%xmm9
1200	movdqa	%xmm15,%xmm14
1201	paddq	%xmm15,%xmm15
1202	movdqu	0(%rdi),%xmm2
1203	pand	%xmm8,%xmm9
1204	movdqu	16(%rdi),%xmm3
1205	pxor	%xmm9,%xmm15
1206
1207	movdqu	32(%rdi),%xmm4
1208	pxor	%xmm10,%xmm2
1209	movdqu	48(%rdi),%xmm5
1210	pxor	%xmm11,%xmm3
1211	movdqu	64(%rdi),%xmm6
1212	pxor	%xmm12,%xmm4
1213	movdqu	80(%rdi),%xmm7
1214	leaq	96(%rdi),%rdi
1215	pxor	%xmm13,%xmm5
1216	movups	(%r11),%xmm0
1217	pxor	%xmm14,%xmm6
1218	pxor	%xmm15,%xmm7
1219
1220
1221
1222	movups	16(%r11),%xmm1
1223	pxor	%xmm0,%xmm2
1224	pxor	%xmm0,%xmm3
1225	movdqa	%xmm10,0(%rsp)
1226.byte	102,15,56,220,209
1227	leaq	32(%r11),%rcx
1228	pxor	%xmm0,%xmm4
1229	movdqa	%xmm11,16(%rsp)
1230.byte	102,15,56,220,217
1231	pxor	%xmm0,%xmm5
1232	movdqa	%xmm12,32(%rsp)
1233.byte	102,15,56,220,225
1234	pxor	%xmm0,%xmm6
1235	movdqa	%xmm13,48(%rsp)
1236.byte	102,15,56,220,233
1237	pxor	%xmm0,%xmm7
1238	movups	(%rcx),%xmm0
1239	decl	%eax
1240	movdqa	%xmm14,64(%rsp)
1241.byte	102,15,56,220,241
1242	movdqa	%xmm15,80(%rsp)
1243.byte	102,15,56,220,249
1244	pxor	%xmm14,%xmm14
1245	pcmpgtd	%xmm15,%xmm14
1246	jmp	.Lxts_enc_loop6_enter
1247
1248.align	16
1249.Lxts_enc_loop6:
1250.byte	102,15,56,220,209
1251.byte	102,15,56,220,217
1252	decl	%eax
1253.byte	102,15,56,220,225
1254.byte	102,15,56,220,233
1255.byte	102,15,56,220,241
1256.byte	102,15,56,220,249
1257.Lxts_enc_loop6_enter:
1258	movups	16(%rcx),%xmm1
1259.byte	102,15,56,220,208
1260.byte	102,15,56,220,216
1261	leaq	32(%rcx),%rcx
1262.byte	102,15,56,220,224
1263.byte	102,15,56,220,232
1264.byte	102,15,56,220,240
1265.byte	102,15,56,220,248
1266	movups	(%rcx),%xmm0
1267	jnz	.Lxts_enc_loop6
1268
1269	pshufd	$19,%xmm14,%xmm9
1270	pxor	%xmm14,%xmm14
1271	paddq	%xmm15,%xmm15
1272.byte	102,15,56,220,209
1273	pand	%xmm8,%xmm9
1274.byte	102,15,56,220,217
1275	pcmpgtd	%xmm15,%xmm14
1276.byte	102,15,56,220,225
1277	pxor	%xmm9,%xmm15
1278.byte	102,15,56,220,233
1279.byte	102,15,56,220,241
1280.byte	102,15,56,220,249
1281	movups	16(%rcx),%xmm1
1282
1283	pshufd	$19,%xmm14,%xmm9
1284	pxor	%xmm14,%xmm14
1285	movdqa	%xmm15,%xmm10
1286	paddq	%xmm15,%xmm15
1287.byte	102,15,56,220,208
1288	pand	%xmm8,%xmm9
1289.byte	102,15,56,220,216
1290	pcmpgtd	%xmm15,%xmm14
1291.byte	102,15,56,220,224
1292	pxor	%xmm9,%xmm15
1293.byte	102,15,56,220,232
1294.byte	102,15,56,220,240
1295.byte	102,15,56,220,248
1296	movups	32(%rcx),%xmm0
1297
1298	pshufd	$19,%xmm14,%xmm9
1299	pxor	%xmm14,%xmm14
1300	movdqa	%xmm15,%xmm11
1301	paddq	%xmm15,%xmm15
1302.byte	102,15,56,220,209
1303	pand	%xmm8,%xmm9
1304.byte	102,15,56,220,217
1305	pcmpgtd	%xmm15,%xmm14
1306.byte	102,15,56,220,225
1307	pxor	%xmm9,%xmm15
1308.byte	102,15,56,220,233
1309.byte	102,15,56,220,241
1310.byte	102,15,56,220,249
1311
1312	pshufd	$19,%xmm14,%xmm9
1313	pxor	%xmm14,%xmm14
1314	movdqa	%xmm15,%xmm12
1315	paddq	%xmm15,%xmm15
1316.byte	102,15,56,221,208
1317	pand	%xmm8,%xmm9
1318.byte	102,15,56,221,216
1319	pcmpgtd	%xmm15,%xmm14
1320.byte	102,15,56,221,224
1321	pxor	%xmm9,%xmm15
1322.byte	102,15,56,221,232
1323.byte	102,15,56,221,240
1324.byte	102,15,56,221,248
1325
1326	pshufd	$19,%xmm14,%xmm9
1327	pxor	%xmm14,%xmm14
1328	movdqa	%xmm15,%xmm13
1329	paddq	%xmm15,%xmm15
1330	xorps	0(%rsp),%xmm2
1331	pand	%xmm8,%xmm9
1332	xorps	16(%rsp),%xmm3
1333	pcmpgtd	%xmm15,%xmm14
1334	pxor	%xmm9,%xmm15
1335
1336	xorps	32(%rsp),%xmm4
1337	movups	%xmm2,0(%rsi)
1338	xorps	48(%rsp),%xmm5
1339	movups	%xmm3,16(%rsi)
1340	xorps	64(%rsp),%xmm6
1341	movups	%xmm4,32(%rsi)
1342	xorps	80(%rsp),%xmm7
1343	movups	%xmm5,48(%rsi)
1344	movl	%r10d,%eax
1345	movups	%xmm6,64(%rsi)
1346	movups	%xmm7,80(%rsi)
1347	leaq	96(%rsi),%rsi
1348	subq	$96,%rdx
1349	jnc	.Lxts_enc_grandloop
1350
1351	leal	3(%rax,%rax,1),%eax
1352	movq	%r11,%rcx
1353	movl	%eax,%r10d
1354
1355.Lxts_enc_short:
1356	addq	$96,%rdx
1357	jz	.Lxts_enc_done
1358
1359	cmpq	$32,%rdx
1360	jb	.Lxts_enc_one
1361	je	.Lxts_enc_two
1362
1363	cmpq	$64,%rdx
1364	jb	.Lxts_enc_three
1365	je	.Lxts_enc_four
1366
1367	pshufd	$19,%xmm14,%xmm9
1368	movdqa	%xmm15,%xmm14
1369	paddq	%xmm15,%xmm15
1370	movdqu	(%rdi),%xmm2
1371	pand	%xmm8,%xmm9
1372	movdqu	16(%rdi),%xmm3
1373	pxor	%xmm9,%xmm15
1374
1375	movdqu	32(%rdi),%xmm4
1376	pxor	%xmm10,%xmm2
1377	movdqu	48(%rdi),%xmm5
1378	pxor	%xmm11,%xmm3
1379	movdqu	64(%rdi),%xmm6
1380	leaq	80(%rdi),%rdi
1381	pxor	%xmm12,%xmm4
1382	pxor	%xmm13,%xmm5
1383	pxor	%xmm14,%xmm6
1384
1385	call	_aesni_encrypt6
1386
1387	xorps	%xmm10,%xmm2
1388	movdqa	%xmm15,%xmm10
1389	xorps	%xmm11,%xmm3
1390	xorps	%xmm12,%xmm4
1391	movdqu	%xmm2,(%rsi)
1392	xorps	%xmm13,%xmm5
1393	movdqu	%xmm3,16(%rsi)
1394	xorps	%xmm14,%xmm6
1395	movdqu	%xmm4,32(%rsi)
1396	movdqu	%xmm5,48(%rsi)
1397	movdqu	%xmm6,64(%rsi)
1398	leaq	80(%rsi),%rsi
1399	jmp	.Lxts_enc_done
1400
1401.align	16
1402.Lxts_enc_one:
1403	movups	(%rdi),%xmm2
1404	leaq	16(%rdi),%rdi
1405	xorps	%xmm10,%xmm2
1406	movups	(%rcx),%xmm0
1407	movups	16(%rcx),%xmm1
1408	leaq	32(%rcx),%rcx
1409	xorps	%xmm0,%xmm2
1410.Loop_enc1_9:
1411.byte	102,15,56,220,209
1412	decl	%eax
1413	movups	(%rcx),%xmm1
1414	leaq	16(%rcx),%rcx
1415	jnz	.Loop_enc1_9
1416.byte	102,15,56,221,209
1417	xorps	%xmm10,%xmm2
1418	movdqa	%xmm11,%xmm10
1419	movups	%xmm2,(%rsi)
1420	leaq	16(%rsi),%rsi
1421	jmp	.Lxts_enc_done
1422
1423.align	16
1424.Lxts_enc_two:
1425	movups	(%rdi),%xmm2
1426	movups	16(%rdi),%xmm3
1427	leaq	32(%rdi),%rdi
1428	xorps	%xmm10,%xmm2
1429	xorps	%xmm11,%xmm3
1430
1431	call	_aesni_encrypt3
1432
1433	xorps	%xmm10,%xmm2
1434	movdqa	%xmm12,%xmm10
1435	xorps	%xmm11,%xmm3
1436	movups	%xmm2,(%rsi)
1437	movups	%xmm3,16(%rsi)
1438	leaq	32(%rsi),%rsi
1439	jmp	.Lxts_enc_done
1440
1441.align	16
1442.Lxts_enc_three:
1443	movups	(%rdi),%xmm2
1444	movups	16(%rdi),%xmm3
1445	movups	32(%rdi),%xmm4
1446	leaq	48(%rdi),%rdi
1447	xorps	%xmm10,%xmm2
1448	xorps	%xmm11,%xmm3
1449	xorps	%xmm12,%xmm4
1450
1451	call	_aesni_encrypt3
1452
1453	xorps	%xmm10,%xmm2
1454	movdqa	%xmm13,%xmm10
1455	xorps	%xmm11,%xmm3
1456	xorps	%xmm12,%xmm4
1457	movups	%xmm2,(%rsi)
1458	movups	%xmm3,16(%rsi)
1459	movups	%xmm4,32(%rsi)
1460	leaq	48(%rsi),%rsi
1461	jmp	.Lxts_enc_done
1462
1463.align	16
1464.Lxts_enc_four:
1465	movups	(%rdi),%xmm2
1466	movups	16(%rdi),%xmm3
1467	movups	32(%rdi),%xmm4
1468	xorps	%xmm10,%xmm2
1469	movups	48(%rdi),%xmm5
1470	leaq	64(%rdi),%rdi
1471	xorps	%xmm11,%xmm3
1472	xorps	%xmm12,%xmm4
1473	xorps	%xmm13,%xmm5
1474
1475	call	_aesni_encrypt4
1476
1477	xorps	%xmm10,%xmm2
1478	movdqa	%xmm15,%xmm10
1479	xorps	%xmm11,%xmm3
1480	xorps	%xmm12,%xmm4
1481	movups	%xmm2,(%rsi)
1482	xorps	%xmm13,%xmm5
1483	movups	%xmm3,16(%rsi)
1484	movups	%xmm4,32(%rsi)
1485	movups	%xmm5,48(%rsi)
1486	leaq	64(%rsi),%rsi
1487	jmp	.Lxts_enc_done
1488
1489.align	16
1490.Lxts_enc_done:
1491	andq	$15,%r9
1492	jz	.Lxts_enc_ret
1493	movq	%r9,%rdx
1494
1495.Lxts_enc_steal:
1496	movzbl	(%rdi),%eax
1497	movzbl	-16(%rsi),%ecx
1498	leaq	1(%rdi),%rdi
1499	movb	%al,-16(%rsi)
1500	movb	%cl,0(%rsi)
1501	leaq	1(%rsi),%rsi
1502	subq	$1,%rdx
1503	jnz	.Lxts_enc_steal
1504
1505	subq	%r9,%rsi
1506	movq	%r11,%rcx
1507	movl	%r10d,%eax
1508
1509	movups	-16(%rsi),%xmm2
1510	xorps	%xmm10,%xmm2
1511	movups	(%rcx),%xmm0
1512	movups	16(%rcx),%xmm1
1513	leaq	32(%rcx),%rcx
1514	xorps	%xmm0,%xmm2
1515.Loop_enc1_10:
1516.byte	102,15,56,220,209
1517	decl	%eax
1518	movups	(%rcx),%xmm1
1519	leaq	16(%rcx),%rcx
1520	jnz	.Loop_enc1_10
1521.byte	102,15,56,221,209
1522	xorps	%xmm10,%xmm2
1523	movups	%xmm2,-16(%rsi)
1524
1525.Lxts_enc_ret:
1526	leaq	104(%rsp),%rsp
1527.Lxts_enc_epilogue:
1528	.byte	0xf3,0xc3
1529.size	aesni_xts_encrypt,.-aesni_xts_encrypt
1530.globl	aesni_xts_decrypt
1531.type	aesni_xts_decrypt,@function
1532.align	16
1533aesni_xts_decrypt:
1534	leaq	-104(%rsp),%rsp
1535	movups	(%r9),%xmm15
1536	movl	240(%r8),%eax
1537	movl	240(%rcx),%r10d
1538	movups	(%r8),%xmm0
1539	movups	16(%r8),%xmm1
1540	leaq	32(%r8),%r8
1541	xorps	%xmm0,%xmm15
1542.Loop_enc1_11:
1543.byte	102,68,15,56,220,249
1544	decl	%eax
1545	movups	(%r8),%xmm1
1546	leaq	16(%r8),%r8
1547	jnz	.Loop_enc1_11
1548.byte	102,68,15,56,221,249
1549	xorl	%eax,%eax
1550	testq	$15,%rdx
1551	setnz	%al
1552	shlq	$4,%rax
1553	subq	%rax,%rdx
1554
1555	movq	%rcx,%r11
1556	movl	%r10d,%eax
1557	movq	%rdx,%r9
1558	andq	$-16,%rdx
1559
1560	movdqa	.Lxts_magic(%rip),%xmm8
1561	pxor	%xmm14,%xmm14
1562	pcmpgtd	%xmm15,%xmm14
1563	pshufd	$19,%xmm14,%xmm9
1564	pxor	%xmm14,%xmm14
1565	movdqa	%xmm15,%xmm10
1566	paddq	%xmm15,%xmm15
1567	pand	%xmm8,%xmm9
1568	pcmpgtd	%xmm15,%xmm14
1569	pxor	%xmm9,%xmm15
1570	pshufd	$19,%xmm14,%xmm9
1571	pxor	%xmm14,%xmm14
1572	movdqa	%xmm15,%xmm11
1573	paddq	%xmm15,%xmm15
1574	pand	%xmm8,%xmm9
1575	pcmpgtd	%xmm15,%xmm14
1576	pxor	%xmm9,%xmm15
1577	pshufd	$19,%xmm14,%xmm9
1578	pxor	%xmm14,%xmm14
1579	movdqa	%xmm15,%xmm12
1580	paddq	%xmm15,%xmm15
1581	pand	%xmm8,%xmm9
1582	pcmpgtd	%xmm15,%xmm14
1583	pxor	%xmm9,%xmm15
1584	pshufd	$19,%xmm14,%xmm9
1585	pxor	%xmm14,%xmm14
1586	movdqa	%xmm15,%xmm13
1587	paddq	%xmm15,%xmm15
1588	pand	%xmm8,%xmm9
1589	pcmpgtd	%xmm15,%xmm14
1590	pxor	%xmm9,%xmm15
1591	subq	$96,%rdx
1592	jc	.Lxts_dec_short
1593
1594	shrl	$1,%eax
1595	subl	$1,%eax
1596	movl	%eax,%r10d
1597	jmp	.Lxts_dec_grandloop
1598
1599.align	16
1600.Lxts_dec_grandloop:
1601	pshufd	$19,%xmm14,%xmm9
1602	movdqa	%xmm15,%xmm14
1603	paddq	%xmm15,%xmm15
1604	movdqu	0(%rdi),%xmm2
1605	pand	%xmm8,%xmm9
1606	movdqu	16(%rdi),%xmm3
1607	pxor	%xmm9,%xmm15
1608
1609	movdqu	32(%rdi),%xmm4
1610	pxor	%xmm10,%xmm2
1611	movdqu	48(%rdi),%xmm5
1612	pxor	%xmm11,%xmm3
1613	movdqu	64(%rdi),%xmm6
1614	pxor	%xmm12,%xmm4
1615	movdqu	80(%rdi),%xmm7
1616	leaq	96(%rdi),%rdi
1617	pxor	%xmm13,%xmm5
1618	movups	(%r11),%xmm0
1619	pxor	%xmm14,%xmm6
1620	pxor	%xmm15,%xmm7
1621
1622
1623
1624	movups	16(%r11),%xmm1
1625	pxor	%xmm0,%xmm2
1626	pxor	%xmm0,%xmm3
1627	movdqa	%xmm10,0(%rsp)
1628.byte	102,15,56,222,209
1629	leaq	32(%r11),%rcx
1630	pxor	%xmm0,%xmm4
1631	movdqa	%xmm11,16(%rsp)
1632.byte	102,15,56,222,217
1633	pxor	%xmm0,%xmm5
1634	movdqa	%xmm12,32(%rsp)
1635.byte	102,15,56,222,225
1636	pxor	%xmm0,%xmm6
1637	movdqa	%xmm13,48(%rsp)
1638.byte	102,15,56,222,233
1639	pxor	%xmm0,%xmm7
1640	movups	(%rcx),%xmm0
1641	decl	%eax
1642	movdqa	%xmm14,64(%rsp)
1643.byte	102,15,56,222,241
1644	movdqa	%xmm15,80(%rsp)
1645.byte	102,15,56,222,249
1646	pxor	%xmm14,%xmm14
1647	pcmpgtd	%xmm15,%xmm14
1648	jmp	.Lxts_dec_loop6_enter
1649
1650.align	16
1651.Lxts_dec_loop6:
1652.byte	102,15,56,222,209
1653.byte	102,15,56,222,217
1654	decl	%eax
1655.byte	102,15,56,222,225
1656.byte	102,15,56,222,233
1657.byte	102,15,56,222,241
1658.byte	102,15,56,222,249
1659.Lxts_dec_loop6_enter:
1660	movups	16(%rcx),%xmm1
1661.byte	102,15,56,222,208
1662.byte	102,15,56,222,216
1663	leaq	32(%rcx),%rcx
1664.byte	102,15,56,222,224
1665.byte	102,15,56,222,232
1666.byte	102,15,56,222,240
1667.byte	102,15,56,222,248
1668	movups	(%rcx),%xmm0
1669	jnz	.Lxts_dec_loop6
1670
1671	pshufd	$19,%xmm14,%xmm9
1672	pxor	%xmm14,%xmm14
1673	paddq	%xmm15,%xmm15
1674.byte	102,15,56,222,209
1675	pand	%xmm8,%xmm9
1676.byte	102,15,56,222,217
1677	pcmpgtd	%xmm15,%xmm14
1678.byte	102,15,56,222,225
1679	pxor	%xmm9,%xmm15
1680.byte	102,15,56,222,233
1681.byte	102,15,56,222,241
1682.byte	102,15,56,222,249
1683	movups	16(%rcx),%xmm1
1684
1685	pshufd	$19,%xmm14,%xmm9
1686	pxor	%xmm14,%xmm14
1687	movdqa	%xmm15,%xmm10
1688	paddq	%xmm15,%xmm15
1689.byte	102,15,56,222,208
1690	pand	%xmm8,%xmm9
1691.byte	102,15,56,222,216
1692	pcmpgtd	%xmm15,%xmm14
1693.byte	102,15,56,222,224
1694	pxor	%xmm9,%xmm15
1695.byte	102,15,56,222,232
1696.byte	102,15,56,222,240
1697.byte	102,15,56,222,248
1698	movups	32(%rcx),%xmm0
1699
1700	pshufd	$19,%xmm14,%xmm9
1701	pxor	%xmm14,%xmm14
1702	movdqa	%xmm15,%xmm11
1703	paddq	%xmm15,%xmm15
1704.byte	102,15,56,222,209
1705	pand	%xmm8,%xmm9
1706.byte	102,15,56,222,217
1707	pcmpgtd	%xmm15,%xmm14
1708.byte	102,15,56,222,225
1709	pxor	%xmm9,%xmm15
1710.byte	102,15,56,222,233
1711.byte	102,15,56,222,241
1712.byte	102,15,56,222,249
1713
1714	pshufd	$19,%xmm14,%xmm9
1715	pxor	%xmm14,%xmm14
1716	movdqa	%xmm15,%xmm12
1717	paddq	%xmm15,%xmm15
1718.byte	102,15,56,223,208
1719	pand	%xmm8,%xmm9
1720.byte	102,15,56,223,216
1721	pcmpgtd	%xmm15,%xmm14
1722.byte	102,15,56,223,224
1723	pxor	%xmm9,%xmm15
1724.byte	102,15,56,223,232
1725.byte	102,15,56,223,240
1726.byte	102,15,56,223,248
1727
1728	pshufd	$19,%xmm14,%xmm9
1729	pxor	%xmm14,%xmm14
1730	movdqa	%xmm15,%xmm13
1731	paddq	%xmm15,%xmm15
1732	xorps	0(%rsp),%xmm2
1733	pand	%xmm8,%xmm9
1734	xorps	16(%rsp),%xmm3
1735	pcmpgtd	%xmm15,%xmm14
1736	pxor	%xmm9,%xmm15
1737
1738	xorps	32(%rsp),%xmm4
1739	movups	%xmm2,0(%rsi)
1740	xorps	48(%rsp),%xmm5
1741	movups	%xmm3,16(%rsi)
1742	xorps	64(%rsp),%xmm6
1743	movups	%xmm4,32(%rsi)
1744	xorps	80(%rsp),%xmm7
1745	movups	%xmm5,48(%rsi)
1746	movl	%r10d,%eax
1747	movups	%xmm6,64(%rsi)
1748	movups	%xmm7,80(%rsi)
1749	leaq	96(%rsi),%rsi
1750	subq	$96,%rdx
1751	jnc	.Lxts_dec_grandloop
1752
1753	leal	3(%rax,%rax,1),%eax
1754	movq	%r11,%rcx
1755	movl	%eax,%r10d
1756
1757.Lxts_dec_short:
1758	addq	$96,%rdx
1759	jz	.Lxts_dec_done
1760
1761	cmpq	$32,%rdx
1762	jb	.Lxts_dec_one
1763	je	.Lxts_dec_two
1764
1765	cmpq	$64,%rdx
1766	jb	.Lxts_dec_three
1767	je	.Lxts_dec_four
1768
1769	pshufd	$19,%xmm14,%xmm9
1770	movdqa	%xmm15,%xmm14
1771	paddq	%xmm15,%xmm15
1772	movdqu	(%rdi),%xmm2
1773	pand	%xmm8,%xmm9
1774	movdqu	16(%rdi),%xmm3
1775	pxor	%xmm9,%xmm15
1776
1777	movdqu	32(%rdi),%xmm4
1778	pxor	%xmm10,%xmm2
1779	movdqu	48(%rdi),%xmm5
1780	pxor	%xmm11,%xmm3
1781	movdqu	64(%rdi),%xmm6
1782	leaq	80(%rdi),%rdi
1783	pxor	%xmm12,%xmm4
1784	pxor	%xmm13,%xmm5
1785	pxor	%xmm14,%xmm6
1786
1787	call	_aesni_decrypt6
1788
1789	xorps	%xmm10,%xmm2
1790	xorps	%xmm11,%xmm3
1791	xorps	%xmm12,%xmm4
1792	movdqu	%xmm2,(%rsi)
1793	xorps	%xmm13,%xmm5
1794	movdqu	%xmm3,16(%rsi)
1795	xorps	%xmm14,%xmm6
1796	movdqu	%xmm4,32(%rsi)
1797	pxor	%xmm14,%xmm14
1798	movdqu	%xmm5,48(%rsi)
1799	pcmpgtd	%xmm15,%xmm14
1800	movdqu	%xmm6,64(%rsi)
1801	leaq	80(%rsi),%rsi
1802	pshufd	$19,%xmm14,%xmm11
1803	andq	$15,%r9
1804	jz	.Lxts_dec_ret
1805
1806	movdqa	%xmm15,%xmm10
1807	paddq	%xmm15,%xmm15
1808	pand	%xmm8,%xmm11
1809	pxor	%xmm15,%xmm11
1810	jmp	.Lxts_dec_done2
1811
1812.align	16
1813.Lxts_dec_one:
1814	movups	(%rdi),%xmm2
1815	leaq	16(%rdi),%rdi
1816	xorps	%xmm10,%xmm2
1817	movups	(%rcx),%xmm0
1818	movups	16(%rcx),%xmm1
1819	leaq	32(%rcx),%rcx
1820	xorps	%xmm0,%xmm2
1821.Loop_dec1_12:
1822.byte	102,15,56,222,209
1823	decl	%eax
1824	movups	(%rcx),%xmm1
1825	leaq	16(%rcx),%rcx
1826	jnz	.Loop_dec1_12
1827.byte	102,15,56,223,209
1828	xorps	%xmm10,%xmm2
1829	movdqa	%xmm11,%xmm10
1830	movups	%xmm2,(%rsi)
1831	movdqa	%xmm12,%xmm11
1832	leaq	16(%rsi),%rsi
1833	jmp	.Lxts_dec_done
1834
1835.align	16
1836.Lxts_dec_two:
1837	movups	(%rdi),%xmm2
1838	movups	16(%rdi),%xmm3
1839	leaq	32(%rdi),%rdi
1840	xorps	%xmm10,%xmm2
1841	xorps	%xmm11,%xmm3
1842
1843	call	_aesni_decrypt3
1844
1845	xorps	%xmm10,%xmm2
1846	movdqa	%xmm12,%xmm10
1847	xorps	%xmm11,%xmm3
1848	movdqa	%xmm13,%xmm11
1849	movups	%xmm2,(%rsi)
1850	movups	%xmm3,16(%rsi)
1851	leaq	32(%rsi),%rsi
1852	jmp	.Lxts_dec_done
1853
1854.align	16
1855.Lxts_dec_three:
1856	movups	(%rdi),%xmm2
1857	movups	16(%rdi),%xmm3
1858	movups	32(%rdi),%xmm4
1859	leaq	48(%rdi),%rdi
1860	xorps	%xmm10,%xmm2
1861	xorps	%xmm11,%xmm3
1862	xorps	%xmm12,%xmm4
1863
1864	call	_aesni_decrypt3
1865
1866	xorps	%xmm10,%xmm2
1867	movdqa	%xmm13,%xmm10
1868	xorps	%xmm11,%xmm3
1869	movdqa	%xmm15,%xmm11
1870	xorps	%xmm12,%xmm4
1871	movups	%xmm2,(%rsi)
1872	movups	%xmm3,16(%rsi)
1873	movups	%xmm4,32(%rsi)
1874	leaq	48(%rsi),%rsi
1875	jmp	.Lxts_dec_done
1876
1877.align	16
1878.Lxts_dec_four:
1879	pshufd	$19,%xmm14,%xmm9
1880	movdqa	%xmm15,%xmm14
1881	paddq	%xmm15,%xmm15
1882	movups	(%rdi),%xmm2
1883	pand	%xmm8,%xmm9
1884	movups	16(%rdi),%xmm3
1885	pxor	%xmm9,%xmm15
1886
1887	movups	32(%rdi),%xmm4
1888	xorps	%xmm10,%xmm2
1889	movups	48(%rdi),%xmm5
1890	leaq	64(%rdi),%rdi
1891	xorps	%xmm11,%xmm3
1892	xorps	%xmm12,%xmm4
1893	xorps	%xmm13,%xmm5
1894
1895	call	_aesni_decrypt4
1896
1897	xorps	%xmm10,%xmm2
1898	movdqa	%xmm14,%xmm10
1899	xorps	%xmm11,%xmm3
1900	movdqa	%xmm15,%xmm11
1901	xorps	%xmm12,%xmm4
1902	movups	%xmm2,(%rsi)
1903	xorps	%xmm13,%xmm5
1904	movups	%xmm3,16(%rsi)
1905	movups	%xmm4,32(%rsi)
1906	movups	%xmm5,48(%rsi)
1907	leaq	64(%rsi),%rsi
1908	jmp	.Lxts_dec_done
1909
1910.align	16
1911.Lxts_dec_done:
1912	andq	$15,%r9
1913	jz	.Lxts_dec_ret
1914.Lxts_dec_done2:
1915	movq	%r9,%rdx
1916	movq	%r11,%rcx
1917	movl	%r10d,%eax
1918
1919	movups	(%rdi),%xmm2
1920	xorps	%xmm11,%xmm2
1921	movups	(%rcx),%xmm0
1922	movups	16(%rcx),%xmm1
1923	leaq	32(%rcx),%rcx
1924	xorps	%xmm0,%xmm2
1925.Loop_dec1_13:
1926.byte	102,15,56,222,209
1927	decl	%eax
1928	movups	(%rcx),%xmm1
1929	leaq	16(%rcx),%rcx
1930	jnz	.Loop_dec1_13
1931.byte	102,15,56,223,209
1932	xorps	%xmm11,%xmm2
1933	movups	%xmm2,(%rsi)
1934
1935.Lxts_dec_steal:
1936	movzbl	16(%rdi),%eax
1937	movzbl	(%rsi),%ecx
1938	leaq	1(%rdi),%rdi
1939	movb	%al,(%rsi)
1940	movb	%cl,16(%rsi)
1941	leaq	1(%rsi),%rsi
1942	subq	$1,%rdx
1943	jnz	.Lxts_dec_steal
1944
1945	subq	%r9,%rsi
1946	movq	%r11,%rcx
1947	movl	%r10d,%eax
1948
1949	movups	(%rsi),%xmm2
1950	xorps	%xmm10,%xmm2
1951	movups	(%rcx),%xmm0
1952	movups	16(%rcx),%xmm1
1953	leaq	32(%rcx),%rcx
1954	xorps	%xmm0,%xmm2
1955.Loop_dec1_14:
1956.byte	102,15,56,222,209
1957	decl	%eax
1958	movups	(%rcx),%xmm1
1959	leaq	16(%rcx),%rcx
1960	jnz	.Loop_dec1_14
1961.byte	102,15,56,223,209
1962	xorps	%xmm10,%xmm2
1963	movups	%xmm2,(%rsi)
1964
1965.Lxts_dec_ret:
1966	leaq	104(%rsp),%rsp
1967.Lxts_dec_epilogue:
1968	.byte	0xf3,0xc3
1969.size	aesni_xts_decrypt,.-aesni_xts_decrypt
1970.globl	aesni_cbc_encrypt
1971.type	aesni_cbc_encrypt,@function
1972.align	16
1973aesni_cbc_encrypt:
1974	testq	%rdx,%rdx
1975	jz	.Lcbc_ret
1976
1977	movl	240(%rcx),%r10d
1978	movq	%rcx,%r11
1979	testl	%r9d,%r9d
1980	jz	.Lcbc_decrypt
1981
1982	movups	(%r8),%xmm2
1983	movl	%r10d,%eax
1984	cmpq	$16,%rdx
1985	jb	.Lcbc_enc_tail
1986	subq	$16,%rdx
1987	jmp	.Lcbc_enc_loop
1988.align	16
1989.Lcbc_enc_loop:
1990	movups	(%rdi),%xmm3
1991	leaq	16(%rdi),%rdi
1992
1993	movups	(%rcx),%xmm0
1994	movups	16(%rcx),%xmm1
1995	xorps	%xmm0,%xmm3
1996	leaq	32(%rcx),%rcx
1997	xorps	%xmm3,%xmm2
1998.Loop_enc1_15:
1999.byte	102,15,56,220,209
2000	decl	%eax
2001	movups	(%rcx),%xmm1
2002	leaq	16(%rcx),%rcx
2003	jnz	.Loop_enc1_15
2004.byte	102,15,56,221,209
2005	movl	%r10d,%eax
2006	movq	%r11,%rcx
2007	movups	%xmm2,0(%rsi)
2008	leaq	16(%rsi),%rsi
2009	subq	$16,%rdx
2010	jnc	.Lcbc_enc_loop
2011	addq	$16,%rdx
2012	jnz	.Lcbc_enc_tail
2013	movups	%xmm2,(%r8)
2014	jmp	.Lcbc_ret
2015
2016.Lcbc_enc_tail:
2017	movq	%rdx,%rcx
2018	xchgq	%rdi,%rsi
2019.long	0x9066A4F3
2020	movl	$16,%ecx
2021	subq	%rdx,%rcx
2022	xorl	%eax,%eax
2023.long	0x9066AAF3
2024	leaq	-16(%rdi),%rdi
2025	movl	%r10d,%eax
2026	movq	%rdi,%rsi
2027	movq	%r11,%rcx
2028	xorq	%rdx,%rdx
2029	jmp	.Lcbc_enc_loop
2030
2031.align	16
2032.Lcbc_decrypt:
2033	movups	(%r8),%xmm9
2034	movl	%r10d,%eax
2035	cmpq	$112,%rdx
2036	jbe	.Lcbc_dec_tail
2037	shrl	$1,%r10d
2038	subq	$112,%rdx
2039	movl	%r10d,%eax
2040	movaps	%xmm9,-24(%rsp)
2041	jmp	.Lcbc_dec_loop8_enter
2042.align	16
2043.Lcbc_dec_loop8:
2044	movaps	%xmm0,-24(%rsp)
2045	movups	%xmm9,(%rsi)
2046	leaq	16(%rsi),%rsi
2047.Lcbc_dec_loop8_enter:
2048	movups	(%rcx),%xmm0
2049	movups	(%rdi),%xmm2
2050	movups	16(%rdi),%xmm3
2051	movups	16(%rcx),%xmm1
2052
2053	leaq	32(%rcx),%rcx
2054	movdqu	32(%rdi),%xmm4
2055	xorps	%xmm0,%xmm2
2056	movdqu	48(%rdi),%xmm5
2057	xorps	%xmm0,%xmm3
2058	movdqu	64(%rdi),%xmm6
2059.byte	102,15,56,222,209
2060	pxor	%xmm0,%xmm4
2061	movdqu	80(%rdi),%xmm7
2062.byte	102,15,56,222,217
2063	pxor	%xmm0,%xmm5
2064	movdqu	96(%rdi),%xmm8
2065.byte	102,15,56,222,225
2066	pxor	%xmm0,%xmm6
2067	movdqu	112(%rdi),%xmm9
2068.byte	102,15,56,222,233
2069	pxor	%xmm0,%xmm7
2070	decl	%eax
2071.byte	102,15,56,222,241
2072	pxor	%xmm0,%xmm8
2073.byte	102,15,56,222,249
2074	pxor	%xmm0,%xmm9
2075	movups	(%rcx),%xmm0
2076.byte	102,68,15,56,222,193
2077.byte	102,68,15,56,222,201
2078	movups	16(%rcx),%xmm1
2079
2080	call	.Ldec_loop8_enter
2081
2082	movups	(%rdi),%xmm1
2083	movups	16(%rdi),%xmm0
2084	xorps	-24(%rsp),%xmm2
2085	xorps	%xmm1,%xmm3
2086	movups	32(%rdi),%xmm1
2087	xorps	%xmm0,%xmm4
2088	movups	48(%rdi),%xmm0
2089	xorps	%xmm1,%xmm5
2090	movups	64(%rdi),%xmm1
2091	xorps	%xmm0,%xmm6
2092	movups	80(%rdi),%xmm0
2093	xorps	%xmm1,%xmm7
2094	movups	96(%rdi),%xmm1
2095	xorps	%xmm0,%xmm8
2096	movups	112(%rdi),%xmm0
2097	xorps	%xmm1,%xmm9
2098	movups	%xmm2,(%rsi)
2099	movups	%xmm3,16(%rsi)
2100	movups	%xmm4,32(%rsi)
2101	movups	%xmm5,48(%rsi)
2102	movl	%r10d,%eax
2103	movups	%xmm6,64(%rsi)
2104	movq	%r11,%rcx
2105	movups	%xmm7,80(%rsi)
2106	leaq	128(%rdi),%rdi
2107	movups	%xmm8,96(%rsi)
2108	leaq	112(%rsi),%rsi
2109	subq	$128,%rdx
2110	ja	.Lcbc_dec_loop8
2111
2112	movaps	%xmm9,%xmm2
2113	movaps	%xmm0,%xmm9
2114	addq	$112,%rdx
2115	jle	.Lcbc_dec_tail_collected
2116	movups	%xmm2,(%rsi)
2117	leal	1(%r10,%r10,1),%eax
2118	leaq	16(%rsi),%rsi
2119.Lcbc_dec_tail:
2120	movups	(%rdi),%xmm2
2121	movaps	%xmm2,%xmm8
2122	cmpq	$16,%rdx
2123	jbe	.Lcbc_dec_one
2124
2125	movups	16(%rdi),%xmm3
2126	movaps	%xmm3,%xmm7
2127	cmpq	$32,%rdx
2128	jbe	.Lcbc_dec_two
2129
2130	movups	32(%rdi),%xmm4
2131	movaps	%xmm4,%xmm6
2132	cmpq	$48,%rdx
2133	jbe	.Lcbc_dec_three
2134
2135	movups	48(%rdi),%xmm5
2136	cmpq	$64,%rdx
2137	jbe	.Lcbc_dec_four
2138
2139	movups	64(%rdi),%xmm6
2140	cmpq	$80,%rdx
2141	jbe	.Lcbc_dec_five
2142
2143	movups	80(%rdi),%xmm7
2144	cmpq	$96,%rdx
2145	jbe	.Lcbc_dec_six
2146
2147	movups	96(%rdi),%xmm8
2148	movaps	%xmm9,-24(%rsp)
2149	call	_aesni_decrypt8
2150	movups	(%rdi),%xmm1
2151	movups	16(%rdi),%xmm0
2152	xorps	-24(%rsp),%xmm2
2153	xorps	%xmm1,%xmm3
2154	movups	32(%rdi),%xmm1
2155	xorps	%xmm0,%xmm4
2156	movups	48(%rdi),%xmm0
2157	xorps	%xmm1,%xmm5
2158	movups	64(%rdi),%xmm1
2159	xorps	%xmm0,%xmm6
2160	movups	80(%rdi),%xmm0
2161	xorps	%xmm1,%xmm7
2162	movups	96(%rdi),%xmm9
2163	xorps	%xmm0,%xmm8
2164	movups	%xmm2,(%rsi)
2165	movups	%xmm3,16(%rsi)
2166	movups	%xmm4,32(%rsi)
2167	movups	%xmm5,48(%rsi)
2168	movups	%xmm6,64(%rsi)
2169	movups	%xmm7,80(%rsi)
2170	leaq	96(%rsi),%rsi
2171	movaps	%xmm8,%xmm2
2172	subq	$112,%rdx
2173	jmp	.Lcbc_dec_tail_collected
2174.align	16
2175.Lcbc_dec_one:
2176	movups	(%rcx),%xmm0
2177	movups	16(%rcx),%xmm1
2178	leaq	32(%rcx),%rcx
2179	xorps	%xmm0,%xmm2
2180.Loop_dec1_16:
2181.byte	102,15,56,222,209
2182	decl	%eax
2183	movups	(%rcx),%xmm1
2184	leaq	16(%rcx),%rcx
2185	jnz	.Loop_dec1_16
2186.byte	102,15,56,223,209
2187	xorps	%xmm9,%xmm2
2188	movaps	%xmm8,%xmm9
2189	subq	$16,%rdx
2190	jmp	.Lcbc_dec_tail_collected
2191.align	16
2192.Lcbc_dec_two:
2193	xorps	%xmm4,%xmm4
2194	call	_aesni_decrypt3
2195	xorps	%xmm9,%xmm2
2196	xorps	%xmm8,%xmm3
2197	movups	%xmm2,(%rsi)
2198	movaps	%xmm7,%xmm9
2199	movaps	%xmm3,%xmm2
2200	leaq	16(%rsi),%rsi
2201	subq	$32,%rdx
2202	jmp	.Lcbc_dec_tail_collected
2203.align	16
2204.Lcbc_dec_three:
2205	call	_aesni_decrypt3
2206	xorps	%xmm9,%xmm2
2207	xorps	%xmm8,%xmm3
2208	movups	%xmm2,(%rsi)
2209	xorps	%xmm7,%xmm4
2210	movups	%xmm3,16(%rsi)
2211	movaps	%xmm6,%xmm9
2212	movaps	%xmm4,%xmm2
2213	leaq	32(%rsi),%rsi
2214	subq	$48,%rdx
2215	jmp	.Lcbc_dec_tail_collected
2216.align	16
2217.Lcbc_dec_four:
2218	call	_aesni_decrypt4
2219	xorps	%xmm9,%xmm2
2220	movups	48(%rdi),%xmm9
2221	xorps	%xmm8,%xmm3
2222	movups	%xmm2,(%rsi)
2223	xorps	%xmm7,%xmm4
2224	movups	%xmm3,16(%rsi)
2225	xorps	%xmm6,%xmm5
2226	movups	%xmm4,32(%rsi)
2227	movaps	%xmm5,%xmm2
2228	leaq	48(%rsi),%rsi
2229	subq	$64,%rdx
2230	jmp	.Lcbc_dec_tail_collected
2231.align	16
2232.Lcbc_dec_five:
2233	xorps	%xmm7,%xmm7
2234	call	_aesni_decrypt6
2235	movups	16(%rdi),%xmm1
2236	movups	32(%rdi),%xmm0
2237	xorps	%xmm9,%xmm2
2238	xorps	%xmm8,%xmm3
2239	xorps	%xmm1,%xmm4
2240	movups	48(%rdi),%xmm1
2241	xorps	%xmm0,%xmm5
2242	movups	64(%rdi),%xmm9
2243	xorps	%xmm1,%xmm6
2244	movups	%xmm2,(%rsi)
2245	movups	%xmm3,16(%rsi)
2246	movups	%xmm4,32(%rsi)
2247	movups	%xmm5,48(%rsi)
2248	leaq	64(%rsi),%rsi
2249	movaps	%xmm6,%xmm2
2250	subq	$80,%rdx
2251	jmp	.Lcbc_dec_tail_collected
2252.align	16
2253.Lcbc_dec_six:
2254	call	_aesni_decrypt6
2255	movups	16(%rdi),%xmm1
2256	movups	32(%rdi),%xmm0
2257	xorps	%xmm9,%xmm2
2258	xorps	%xmm8,%xmm3
2259	xorps	%xmm1,%xmm4
2260	movups	48(%rdi),%xmm1
2261	xorps	%xmm0,%xmm5
2262	movups	64(%rdi),%xmm0
2263	xorps	%xmm1,%xmm6
2264	movups	80(%rdi),%xmm9
2265	xorps	%xmm0,%xmm7
2266	movups	%xmm2,(%rsi)
2267	movups	%xmm3,16(%rsi)
2268	movups	%xmm4,32(%rsi)
2269	movups	%xmm5,48(%rsi)
2270	movups	%xmm6,64(%rsi)
2271	leaq	80(%rsi),%rsi
2272	movaps	%xmm7,%xmm2
2273	subq	$96,%rdx
2274	jmp	.Lcbc_dec_tail_collected
2275.align	16
2276.Lcbc_dec_tail_collected:
2277	andq	$15,%rdx
2278	movups	%xmm9,(%r8)
2279	jnz	.Lcbc_dec_tail_partial
2280	movups	%xmm2,(%rsi)
2281	jmp	.Lcbc_dec_ret
2282.align	16
2283.Lcbc_dec_tail_partial:
2284	movaps	%xmm2,-24(%rsp)
2285	movq	$16,%rcx
2286	movq	%rsi,%rdi
2287	subq	%rdx,%rcx
2288	leaq	-24(%rsp),%rsi
2289.long	0x9066A4F3
2290
2291.Lcbc_dec_ret:
2292.Lcbc_ret:
2293	.byte	0xf3,0xc3
2294.size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
2295.globl	aesni_set_decrypt_key
2296.type	aesni_set_decrypt_key,@function
2297.align	16
2298aesni_set_decrypt_key:
2299.byte	0x48,0x83,0xEC,0x08
2300	call	__aesni_set_encrypt_key
2301	shll	$4,%esi
2302	testl	%eax,%eax
2303	jnz	.Ldec_key_ret
2304	leaq	16(%rdx,%rsi,1),%rdi
2305
2306	movups	(%rdx),%xmm0
2307	movups	(%rdi),%xmm1
2308	movups	%xmm0,(%rdi)
2309	movups	%xmm1,(%rdx)
2310	leaq	16(%rdx),%rdx
2311	leaq	-16(%rdi),%rdi
2312
2313.Ldec_key_inverse:
2314	movups	(%rdx),%xmm0
2315	movups	(%rdi),%xmm1
2316.byte	102,15,56,219,192
2317.byte	102,15,56,219,201
2318	leaq	16(%rdx),%rdx
2319	leaq	-16(%rdi),%rdi
2320	movups	%xmm0,16(%rdi)
2321	movups	%xmm1,-16(%rdx)
2322	cmpq	%rdx,%rdi
2323	ja	.Ldec_key_inverse
2324
2325	movups	(%rdx),%xmm0
2326.byte	102,15,56,219,192
2327	movups	%xmm0,(%rdi)
2328.Ldec_key_ret:
2329	addq	$8,%rsp
2330	.byte	0xf3,0xc3
2331.LSEH_end_set_decrypt_key:
2332.size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
2333.globl	aesni_set_encrypt_key
2334.type	aesni_set_encrypt_key,@function
2335.align	16
2336aesni_set_encrypt_key:
2337__aesni_set_encrypt_key:
2338.byte	0x48,0x83,0xEC,0x08
2339	movq	$-1,%rax
2340	testq	%rdi,%rdi
2341	jz	.Lenc_key_ret
2342	testq	%rdx,%rdx
2343	jz	.Lenc_key_ret
2344
2345	movups	(%rdi),%xmm0
2346	xorps	%xmm4,%xmm4
2347	leaq	16(%rdx),%rax
2348	cmpl	$256,%esi
2349	je	.L14rounds
2350	cmpl	$192,%esi
2351	je	.L12rounds
2352	cmpl	$128,%esi
2353	jne	.Lbad_keybits
2354
2355.L10rounds:
2356	movl	$9,%esi
2357	movups	%xmm0,(%rdx)
2358.byte	102,15,58,223,200,1
2359	call	.Lkey_expansion_128_cold
2360.byte	102,15,58,223,200,2
2361	call	.Lkey_expansion_128
2362.byte	102,15,58,223,200,4
2363	call	.Lkey_expansion_128
2364.byte	102,15,58,223,200,8
2365	call	.Lkey_expansion_128
2366.byte	102,15,58,223,200,16
2367	call	.Lkey_expansion_128
2368.byte	102,15,58,223,200,32
2369	call	.Lkey_expansion_128
2370.byte	102,15,58,223,200,64
2371	call	.Lkey_expansion_128
2372.byte	102,15,58,223,200,128
2373	call	.Lkey_expansion_128
2374.byte	102,15,58,223,200,27
2375	call	.Lkey_expansion_128
2376.byte	102,15,58,223,200,54
2377	call	.Lkey_expansion_128
2378	movups	%xmm0,(%rax)
2379	movl	%esi,80(%rax)
2380	xorl	%eax,%eax
2381	jmp	.Lenc_key_ret
2382
2383.align	16
2384.L12rounds:
2385	movq	16(%rdi),%xmm2
2386	movl	$11,%esi
2387	movups	%xmm0,(%rdx)
2388.byte	102,15,58,223,202,1
2389	call	.Lkey_expansion_192a_cold
2390.byte	102,15,58,223,202,2
2391	call	.Lkey_expansion_192b
2392.byte	102,15,58,223,202,4
2393	call	.Lkey_expansion_192a
2394.byte	102,15,58,223,202,8
2395	call	.Lkey_expansion_192b
2396.byte	102,15,58,223,202,16
2397	call	.Lkey_expansion_192a
2398.byte	102,15,58,223,202,32
2399	call	.Lkey_expansion_192b
2400.byte	102,15,58,223,202,64
2401	call	.Lkey_expansion_192a
2402.byte	102,15,58,223,202,128
2403	call	.Lkey_expansion_192b
2404	movups	%xmm0,(%rax)
2405	movl	%esi,48(%rax)
2406	xorq	%rax,%rax
2407	jmp	.Lenc_key_ret
2408
2409.align	16
2410.L14rounds:
2411	movups	16(%rdi),%xmm2
2412	movl	$13,%esi
2413	leaq	16(%rax),%rax
2414	movups	%xmm0,(%rdx)
2415	movups	%xmm2,16(%rdx)
2416.byte	102,15,58,223,202,1
2417	call	.Lkey_expansion_256a_cold
2418.byte	102,15,58,223,200,1
2419	call	.Lkey_expansion_256b
2420.byte	102,15,58,223,202,2
2421	call	.Lkey_expansion_256a
2422.byte	102,15,58,223,200,2
2423	call	.Lkey_expansion_256b
2424.byte	102,15,58,223,202,4
2425	call	.Lkey_expansion_256a
2426.byte	102,15,58,223,200,4
2427	call	.Lkey_expansion_256b
2428.byte	102,15,58,223,202,8
2429	call	.Lkey_expansion_256a
2430.byte	102,15,58,223,200,8
2431	call	.Lkey_expansion_256b
2432.byte	102,15,58,223,202,16
2433	call	.Lkey_expansion_256a
2434.byte	102,15,58,223,200,16
2435	call	.Lkey_expansion_256b
2436.byte	102,15,58,223,202,32
2437	call	.Lkey_expansion_256a
2438.byte	102,15,58,223,200,32
2439	call	.Lkey_expansion_256b
2440.byte	102,15,58,223,202,64
2441	call	.Lkey_expansion_256a
2442	movups	%xmm0,(%rax)
2443	movl	%esi,16(%rax)
2444	xorq	%rax,%rax
2445	jmp	.Lenc_key_ret
2446
2447.align	16
2448.Lbad_keybits:
2449	movq	$-2,%rax
2450.Lenc_key_ret:
2451	addq	$8,%rsp
2452	.byte	0xf3,0xc3
2453.LSEH_end_set_encrypt_key:
2454
2455.align	16
2456.Lkey_expansion_128:
2457	movups	%xmm0,(%rax)
2458	leaq	16(%rax),%rax
2459.Lkey_expansion_128_cold:
2460	shufps	$16,%xmm0,%xmm4
2461	xorps	%xmm4,%xmm0
2462	shufps	$140,%xmm0,%xmm4
2463	xorps	%xmm4,%xmm0
2464	shufps	$255,%xmm1,%xmm1
2465	xorps	%xmm1,%xmm0
2466	.byte	0xf3,0xc3
2467
2468.align	16
2469.Lkey_expansion_192a:
2470	movups	%xmm0,(%rax)
2471	leaq	16(%rax),%rax
2472.Lkey_expansion_192a_cold:
2473	movaps	%xmm2,%xmm5
2474.Lkey_expansion_192b_warm:
2475	shufps	$16,%xmm0,%xmm4
2476	movdqa	%xmm2,%xmm3
2477	xorps	%xmm4,%xmm0
2478	shufps	$140,%xmm0,%xmm4
2479	pslldq	$4,%xmm3
2480	xorps	%xmm4,%xmm0
2481	pshufd	$85,%xmm1,%xmm1
2482	pxor	%xmm3,%xmm2
2483	pxor	%xmm1,%xmm0
2484	pshufd	$255,%xmm0,%xmm3
2485	pxor	%xmm3,%xmm2
2486	.byte	0xf3,0xc3
2487
2488.align	16
2489.Lkey_expansion_192b:
2490	movaps	%xmm0,%xmm3
2491	shufps	$68,%xmm0,%xmm5
2492	movups	%xmm5,(%rax)
2493	shufps	$78,%xmm2,%xmm3
2494	movups	%xmm3,16(%rax)
2495	leaq	32(%rax),%rax
2496	jmp	.Lkey_expansion_192b_warm
2497
2498.align	16
2499.Lkey_expansion_256a:
2500	movups	%xmm2,(%rax)
2501	leaq	16(%rax),%rax
2502.Lkey_expansion_256a_cold:
2503	shufps	$16,%xmm0,%xmm4
2504	xorps	%xmm4,%xmm0
2505	shufps	$140,%xmm0,%xmm4
2506	xorps	%xmm4,%xmm0
2507	shufps	$255,%xmm1,%xmm1
2508	xorps	%xmm1,%xmm0
2509	.byte	0xf3,0xc3
2510
2511.align	16
2512.Lkey_expansion_256b:
2513	movups	%xmm0,(%rax)
2514	leaq	16(%rax),%rax
2515
2516	shufps	$16,%xmm2,%xmm4
2517	xorps	%xmm4,%xmm2
2518	shufps	$140,%xmm2,%xmm4
2519	xorps	%xmm4,%xmm2
2520	shufps	$170,%xmm1,%xmm1
2521	xorps	%xmm1,%xmm2
2522	.byte	0xf3,0xc3
2523.size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
2524.size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
2525.align	64
2526.Lbswap_mask:
2527.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
2528.Lincrement32:
2529.long	6,6,6,0
2530.Lincrement64:
2531.long	1,0,0,0
2532.Lxts_magic:
2533.long	0x87,0,1,0
2534
2535.byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2536.align	64
2537