1#ifdef HAVE_AMD64_ASM
2
3.text
4.p2align 5
5
6#ifdef ASM_HIDE_SYMBOL
7ASM_HIDE_SYMBOL stream_salsa20_xmm6
8ASM_HIDE_SYMBOL _stream_salsa20_xmm6
9#endif
10.globl  stream_salsa20_xmm6
11.globl _stream_salsa20_xmm6
12#ifdef __ELF__
13.type  stream_salsa20_xmm6, @function
14.type _stream_salsa20_xmm6, @function
15#endif
16stream_salsa20_xmm6:
17_stream_salsa20_xmm6:
18mov %rsp,%r11
19and $31,%r11
20add $512,%r11
21sub %r11,%rsp
22movq %r11,416(%rsp)
23movq %r12,424(%rsp)
24movq %r13,432(%rsp)
25movq %r14,440(%rsp)
26movq %r15,448(%rsp)
27movq %rbx,456(%rsp)
28movq %rbp,464(%rsp)
29mov  %rsi,%r9
30mov  %rdi,%rdi
31mov  %rdi,%rsi
32mov  %rdx,%rdx
33mov  %rcx,%r10
34cmp  $0,%r9
35jbe ._done
36mov  $0,%rax
37mov  %r9,%rcx
38rep stosb
39sub  %r9,%rdi
40movq $0,472(%rsp)
41jmp ._start
42
43.text
44.p2align 5
45
46#ifdef ASM_HIDE_SYMBOL
47ASM_HIDE_SYMBOL stream_salsa20_xmm6_xor_ic
48ASM_HIDE_SYMBOL _stream_salsa20_xmm6_xor_ic
49#endif
50.globl  stream_salsa20_xmm6_xor_ic
51.globl _stream_salsa20_xmm6_xor_ic
52#ifdef __ELF__
53.type  stream_salsa20_xmm6_xor_ic, @function
54.type _stream_salsa20_xmm6_xor_ic, @function
55#endif
56stream_salsa20_xmm6_xor_ic:
57_stream_salsa20_xmm6_xor_ic:
58
59mov %rsp,%r11
60and $31,%r11
61add $512,%r11
62sub %r11,%rsp
63movq %r11,416(%rsp)
64movq %r12,424(%rsp)
65movq %r13,432(%rsp)
66movq %r14,440(%rsp)
67movq %r15,448(%rsp)
68movq %rbx,456(%rsp)
69movq %rbp,464(%rsp)
70mov  %rdi,%rdi
71mov  %rsi,%rsi
72mov  %r9,%r10
73movq %r8,472(%rsp)
74mov  %rdx,%r9
75mov  %rcx,%rdx
76cmp  $0,%r9
77jbe ._done
78
79._start:
80movl   20(%r10),%ecx
81movl   0(%r10),%r8d
82movl   0(%rdx),%eax
83movl   16(%r10),%r11d
84movl %ecx,64(%rsp)
85movl %r8d,4+64(%rsp)
86movl %eax,8+64(%rsp)
87movl %r11d,12+64(%rsp)
88movl   24(%r10),%r8d
89movl   4(%r10),%eax
90movl   4(%rdx),%edx
91movq 472(%rsp),%rcx
92movl %ecx,80(%rsp)
93movl %r8d,4+80(%rsp)
94movl %eax,8+80(%rsp)
95movl %edx,12+80(%rsp)
96movl   12(%r10),%edx
97shr  $32,%rcx
98movl   28(%r10),%r8d
99movl   8(%r10),%eax
100movl %edx,96(%rsp)
101movl %ecx,4+96(%rsp)
102movl %r8d,8+96(%rsp)
103movl %eax,12+96(%rsp)
104mov  $1634760805,%rdx
105mov  $857760878,%rcx
106mov  $2036477234,%r8
107mov  $1797285236,%rax
108movl %edx,112(%rsp)
109movl %ecx,4+112(%rsp)
110movl %r8d,8+112(%rsp)
111movl %eax,12+112(%rsp)
112cmp  $256,%r9
113jb ._bytesbetween1and255
114movdqa 112(%rsp),%xmm0
115pshufd $0x55,%xmm0,%xmm1
116pshufd $0xaa,%xmm0,%xmm2
117pshufd $0xff,%xmm0,%xmm3
118pshufd $0x00,%xmm0,%xmm0
119movdqa %xmm1,128(%rsp)
120movdqa %xmm2,144(%rsp)
121movdqa %xmm3,160(%rsp)
122movdqa %xmm0,176(%rsp)
123movdqa 64(%rsp),%xmm0
124pshufd $0xaa,%xmm0,%xmm1
125pshufd $0xff,%xmm0,%xmm2
126pshufd $0x00,%xmm0,%xmm3
127pshufd $0x55,%xmm0,%xmm0
128movdqa %xmm1,192(%rsp)
129movdqa %xmm2,208(%rsp)
130movdqa %xmm3,224(%rsp)
131movdqa %xmm0,240(%rsp)
132movdqa 80(%rsp),%xmm0
133pshufd $0xff,%xmm0,%xmm1
134pshufd $0x55,%xmm0,%xmm2
135pshufd $0xaa,%xmm0,%xmm0
136movdqa %xmm1,256(%rsp)
137movdqa %xmm2,272(%rsp)
138movdqa %xmm0,288(%rsp)
139movdqa 96(%rsp),%xmm0
140pshufd $0x00,%xmm0,%xmm1
141pshufd $0xaa,%xmm0,%xmm2
142pshufd $0xff,%xmm0,%xmm0
143movdqa %xmm1,304(%rsp)
144movdqa %xmm2,320(%rsp)
145movdqa %xmm0,336(%rsp)
146
147.p2align 4
148._bytesatleast256:
149movq 472(%rsp),%rdx
150mov  %rdx,%rcx
151shr  $32,%rcx
152movl %edx,352(%rsp)
153movl %ecx,368(%rsp)
154add  $1,%rdx
155mov  %rdx,%rcx
156shr  $32,%rcx
157movl %edx,4+352(%rsp)
158movl %ecx,4+368(%rsp)
159add  $1,%rdx
160mov  %rdx,%rcx
161shr  $32,%rcx
162movl %edx,8+352(%rsp)
163movl %ecx,8+368(%rsp)
164add  $1,%rdx
165mov  %rdx,%rcx
166shr  $32,%rcx
167movl %edx,12+352(%rsp)
168movl %ecx,12+368(%rsp)
169add  $1,%rdx
170mov  %rdx,%rcx
171shr  $32,%rcx
172movl %edx,80(%rsp)
173movl %ecx,4+96(%rsp)
174movq %rdx,472(%rsp)
175movq %r9,480(%rsp)
176mov  $20,%rdx
177movdqa 128(%rsp),%xmm0
178movdqa 144(%rsp),%xmm1
179movdqa 160(%rsp),%xmm2
180movdqa 320(%rsp),%xmm3
181movdqa 336(%rsp),%xmm4
182movdqa 192(%rsp),%xmm5
183movdqa 208(%rsp),%xmm6
184movdqa 240(%rsp),%xmm7
185movdqa 256(%rsp),%xmm8
186movdqa 272(%rsp),%xmm9
187movdqa 288(%rsp),%xmm10
188movdqa 368(%rsp),%xmm11
189movdqa 176(%rsp),%xmm12
190movdqa 224(%rsp),%xmm13
191movdqa 304(%rsp),%xmm14
192movdqa 352(%rsp),%xmm15
193
194.p2align 4
195._mainloop1:
196movdqa %xmm1,384(%rsp)
197movdqa %xmm2,400(%rsp)
198movdqa %xmm13,%xmm1
199paddd %xmm12,%xmm1
200movdqa %xmm1,%xmm2
201pslld $7,%xmm1
202pxor  %xmm1,%xmm14
203psrld $25,%xmm2
204pxor  %xmm2,%xmm14
205movdqa %xmm7,%xmm1
206paddd %xmm0,%xmm1
207movdqa %xmm1,%xmm2
208pslld $7,%xmm1
209pxor  %xmm1,%xmm11
210psrld $25,%xmm2
211pxor  %xmm2,%xmm11
212movdqa %xmm12,%xmm1
213paddd %xmm14,%xmm1
214movdqa %xmm1,%xmm2
215pslld $9,%xmm1
216pxor  %xmm1,%xmm15
217psrld $23,%xmm2
218pxor  %xmm2,%xmm15
219movdqa %xmm0,%xmm1
220paddd %xmm11,%xmm1
221movdqa %xmm1,%xmm2
222pslld $9,%xmm1
223pxor  %xmm1,%xmm9
224psrld $23,%xmm2
225pxor  %xmm2,%xmm9
226movdqa %xmm14,%xmm1
227paddd %xmm15,%xmm1
228movdqa %xmm1,%xmm2
229pslld $13,%xmm1
230pxor  %xmm1,%xmm13
231psrld $19,%xmm2
232pxor  %xmm2,%xmm13
233movdqa %xmm11,%xmm1
234paddd %xmm9,%xmm1
235movdqa %xmm1,%xmm2
236pslld $13,%xmm1
237pxor  %xmm1,%xmm7
238psrld $19,%xmm2
239pxor  %xmm2,%xmm7
240movdqa %xmm15,%xmm1
241paddd %xmm13,%xmm1
242movdqa %xmm1,%xmm2
243pslld $18,%xmm1
244pxor  %xmm1,%xmm12
245psrld $14,%xmm2
246pxor  %xmm2,%xmm12
247movdqa 384(%rsp),%xmm1
248movdqa %xmm12,384(%rsp)
249movdqa %xmm9,%xmm2
250paddd %xmm7,%xmm2
251movdqa %xmm2,%xmm12
252pslld $18,%xmm2
253pxor  %xmm2,%xmm0
254psrld $14,%xmm12
255pxor  %xmm12,%xmm0
256movdqa %xmm5,%xmm2
257paddd %xmm1,%xmm2
258movdqa %xmm2,%xmm12
259pslld $7,%xmm2
260pxor  %xmm2,%xmm3
261psrld $25,%xmm12
262pxor  %xmm12,%xmm3
263movdqa 400(%rsp),%xmm2
264movdqa %xmm0,400(%rsp)
265movdqa %xmm6,%xmm0
266paddd %xmm2,%xmm0
267movdqa %xmm0,%xmm12
268pslld $7,%xmm0
269pxor  %xmm0,%xmm4
270psrld $25,%xmm12
271pxor  %xmm12,%xmm4
272movdqa %xmm1,%xmm0
273paddd %xmm3,%xmm0
274movdqa %xmm0,%xmm12
275pslld $9,%xmm0
276pxor  %xmm0,%xmm10
277psrld $23,%xmm12
278pxor  %xmm12,%xmm10
279movdqa %xmm2,%xmm0
280paddd %xmm4,%xmm0
281movdqa %xmm0,%xmm12
282pslld $9,%xmm0
283pxor  %xmm0,%xmm8
284psrld $23,%xmm12
285pxor  %xmm12,%xmm8
286movdqa %xmm3,%xmm0
287paddd %xmm10,%xmm0
288movdqa %xmm0,%xmm12
289pslld $13,%xmm0
290pxor  %xmm0,%xmm5
291psrld $19,%xmm12
292pxor  %xmm12,%xmm5
293movdqa %xmm4,%xmm0
294paddd %xmm8,%xmm0
295movdqa %xmm0,%xmm12
296pslld $13,%xmm0
297pxor  %xmm0,%xmm6
298psrld $19,%xmm12
299pxor  %xmm12,%xmm6
300movdqa %xmm10,%xmm0
301paddd %xmm5,%xmm0
302movdqa %xmm0,%xmm12
303pslld $18,%xmm0
304pxor  %xmm0,%xmm1
305psrld $14,%xmm12
306pxor  %xmm12,%xmm1
307movdqa 384(%rsp),%xmm0
308movdqa %xmm1,384(%rsp)
309movdqa %xmm4,%xmm1
310paddd %xmm0,%xmm1
311movdqa %xmm1,%xmm12
312pslld $7,%xmm1
313pxor  %xmm1,%xmm7
314psrld $25,%xmm12
315pxor  %xmm12,%xmm7
316movdqa %xmm8,%xmm1
317paddd %xmm6,%xmm1
318movdqa %xmm1,%xmm12
319pslld $18,%xmm1
320pxor  %xmm1,%xmm2
321psrld $14,%xmm12
322pxor  %xmm12,%xmm2
323movdqa 400(%rsp),%xmm12
324movdqa %xmm2,400(%rsp)
325movdqa %xmm14,%xmm1
326paddd %xmm12,%xmm1
327movdqa %xmm1,%xmm2
328pslld $7,%xmm1
329pxor  %xmm1,%xmm5
330psrld $25,%xmm2
331pxor  %xmm2,%xmm5
332movdqa %xmm0,%xmm1
333paddd %xmm7,%xmm1
334movdqa %xmm1,%xmm2
335pslld $9,%xmm1
336pxor  %xmm1,%xmm10
337psrld $23,%xmm2
338pxor  %xmm2,%xmm10
339movdqa %xmm12,%xmm1
340paddd %xmm5,%xmm1
341movdqa %xmm1,%xmm2
342pslld $9,%xmm1
343pxor  %xmm1,%xmm8
344psrld $23,%xmm2
345pxor  %xmm2,%xmm8
346movdqa %xmm7,%xmm1
347paddd %xmm10,%xmm1
348movdqa %xmm1,%xmm2
349pslld $13,%xmm1
350pxor  %xmm1,%xmm4
351psrld $19,%xmm2
352pxor  %xmm2,%xmm4
353movdqa %xmm5,%xmm1
354paddd %xmm8,%xmm1
355movdqa %xmm1,%xmm2
356pslld $13,%xmm1
357pxor  %xmm1,%xmm14
358psrld $19,%xmm2
359pxor  %xmm2,%xmm14
360movdqa %xmm10,%xmm1
361paddd %xmm4,%xmm1
362movdqa %xmm1,%xmm2
363pslld $18,%xmm1
364pxor  %xmm1,%xmm0
365psrld $14,%xmm2
366pxor  %xmm2,%xmm0
367movdqa 384(%rsp),%xmm1
368movdqa %xmm0,384(%rsp)
369movdqa %xmm8,%xmm0
370paddd %xmm14,%xmm0
371movdqa %xmm0,%xmm2
372pslld $18,%xmm0
373pxor  %xmm0,%xmm12
374psrld $14,%xmm2
375pxor  %xmm2,%xmm12
376movdqa %xmm11,%xmm0
377paddd %xmm1,%xmm0
378movdqa %xmm0,%xmm2
379pslld $7,%xmm0
380pxor  %xmm0,%xmm6
381psrld $25,%xmm2
382pxor  %xmm2,%xmm6
383movdqa 400(%rsp),%xmm2
384movdqa %xmm12,400(%rsp)
385movdqa %xmm3,%xmm0
386paddd %xmm2,%xmm0
387movdqa %xmm0,%xmm12
388pslld $7,%xmm0
389pxor  %xmm0,%xmm13
390psrld $25,%xmm12
391pxor  %xmm12,%xmm13
392movdqa %xmm1,%xmm0
393paddd %xmm6,%xmm0
394movdqa %xmm0,%xmm12
395pslld $9,%xmm0
396pxor  %xmm0,%xmm15
397psrld $23,%xmm12
398pxor  %xmm12,%xmm15
399movdqa %xmm2,%xmm0
400paddd %xmm13,%xmm0
401movdqa %xmm0,%xmm12
402pslld $9,%xmm0
403pxor  %xmm0,%xmm9
404psrld $23,%xmm12
405pxor  %xmm12,%xmm9
406movdqa %xmm6,%xmm0
407paddd %xmm15,%xmm0
408movdqa %xmm0,%xmm12
409pslld $13,%xmm0
410pxor  %xmm0,%xmm11
411psrld $19,%xmm12
412pxor  %xmm12,%xmm11
413movdqa %xmm13,%xmm0
414paddd %xmm9,%xmm0
415movdqa %xmm0,%xmm12
416pslld $13,%xmm0
417pxor  %xmm0,%xmm3
418psrld $19,%xmm12
419pxor  %xmm12,%xmm3
420movdqa %xmm15,%xmm0
421paddd %xmm11,%xmm0
422movdqa %xmm0,%xmm12
423pslld $18,%xmm0
424pxor  %xmm0,%xmm1
425psrld $14,%xmm12
426pxor  %xmm12,%xmm1
427movdqa %xmm9,%xmm0
428paddd %xmm3,%xmm0
429movdqa %xmm0,%xmm12
430pslld $18,%xmm0
431pxor  %xmm0,%xmm2
432psrld $14,%xmm12
433pxor  %xmm12,%xmm2
434movdqa 384(%rsp),%xmm12
435movdqa 400(%rsp),%xmm0
436sub  $2,%rdx
437ja ._mainloop1
438
439paddd 176(%rsp),%xmm12
440paddd 240(%rsp),%xmm7
441paddd 288(%rsp),%xmm10
442paddd 336(%rsp),%xmm4
443movd   %xmm12,%rdx
444movd   %xmm7,%rcx
445movd   %xmm10,%r8
446movd   %xmm4,%r9
447pshufd $0x39,%xmm12,%xmm12
448pshufd $0x39,%xmm7,%xmm7
449pshufd $0x39,%xmm10,%xmm10
450pshufd $0x39,%xmm4,%xmm4
451xorl 0(%rsi),%edx
452xorl 4(%rsi),%ecx
453xorl 8(%rsi),%r8d
454xorl 12(%rsi),%r9d
455movl   %edx,0(%rdi)
456movl   %ecx,4(%rdi)
457movl   %r8d,8(%rdi)
458movl   %r9d,12(%rdi)
459movd   %xmm12,%rdx
460movd   %xmm7,%rcx
461movd   %xmm10,%r8
462movd   %xmm4,%r9
463pshufd $0x39,%xmm12,%xmm12
464pshufd $0x39,%xmm7,%xmm7
465pshufd $0x39,%xmm10,%xmm10
466pshufd $0x39,%xmm4,%xmm4
467xorl 64(%rsi),%edx
468xorl 68(%rsi),%ecx
469xorl 72(%rsi),%r8d
470xorl 76(%rsi),%r9d
471movl   %edx,64(%rdi)
472movl   %ecx,68(%rdi)
473movl   %r8d,72(%rdi)
474movl   %r9d,76(%rdi)
475movd   %xmm12,%rdx
476movd   %xmm7,%rcx
477movd   %xmm10,%r8
478movd   %xmm4,%r9
479pshufd $0x39,%xmm12,%xmm12
480pshufd $0x39,%xmm7,%xmm7
481pshufd $0x39,%xmm10,%xmm10
482pshufd $0x39,%xmm4,%xmm4
483xorl 128(%rsi),%edx
484xorl 132(%rsi),%ecx
485xorl 136(%rsi),%r8d
486xorl 140(%rsi),%r9d
487movl   %edx,128(%rdi)
488movl   %ecx,132(%rdi)
489movl   %r8d,136(%rdi)
490movl   %r9d,140(%rdi)
491movd   %xmm12,%rdx
492movd   %xmm7,%rcx
493movd   %xmm10,%r8
494movd   %xmm4,%r9
495xorl 192(%rsi),%edx
496xorl 196(%rsi),%ecx
497xorl 200(%rsi),%r8d
498xorl 204(%rsi),%r9d
499movl   %edx,192(%rdi)
500movl   %ecx,196(%rdi)
501movl   %r8d,200(%rdi)
502movl   %r9d,204(%rdi)
503paddd 304(%rsp),%xmm14
504paddd 128(%rsp),%xmm0
505paddd 192(%rsp),%xmm5
506paddd 256(%rsp),%xmm8
507movd   %xmm14,%rdx
508movd   %xmm0,%rcx
509movd   %xmm5,%r8
510movd   %xmm8,%r9
511pshufd $0x39,%xmm14,%xmm14
512pshufd $0x39,%xmm0,%xmm0
513pshufd $0x39,%xmm5,%xmm5
514pshufd $0x39,%xmm8,%xmm8
515xorl 16(%rsi),%edx
516xorl 20(%rsi),%ecx
517xorl 24(%rsi),%r8d
518xorl 28(%rsi),%r9d
519movl   %edx,16(%rdi)
520movl   %ecx,20(%rdi)
521movl   %r8d,24(%rdi)
522movl   %r9d,28(%rdi)
523movd   %xmm14,%rdx
524movd   %xmm0,%rcx
525movd   %xmm5,%r8
526movd   %xmm8,%r9
527pshufd $0x39,%xmm14,%xmm14
528pshufd $0x39,%xmm0,%xmm0
529pshufd $0x39,%xmm5,%xmm5
530pshufd $0x39,%xmm8,%xmm8
531xorl 80(%rsi),%edx
532xorl 84(%rsi),%ecx
533xorl 88(%rsi),%r8d
534xorl 92(%rsi),%r9d
535movl   %edx,80(%rdi)
536movl   %ecx,84(%rdi)
537movl   %r8d,88(%rdi)
538movl   %r9d,92(%rdi)
539movd   %xmm14,%rdx
540movd   %xmm0,%rcx
541movd   %xmm5,%r8
542movd   %xmm8,%r9
543pshufd $0x39,%xmm14,%xmm14
544pshufd $0x39,%xmm0,%xmm0
545pshufd $0x39,%xmm5,%xmm5
546pshufd $0x39,%xmm8,%xmm8
547xorl 144(%rsi),%edx
548xorl 148(%rsi),%ecx
549xorl 152(%rsi),%r8d
550xorl 156(%rsi),%r9d
551movl   %edx,144(%rdi)
552movl   %ecx,148(%rdi)
553movl   %r8d,152(%rdi)
554movl   %r9d,156(%rdi)
555movd   %xmm14,%rdx
556movd   %xmm0,%rcx
557movd   %xmm5,%r8
558movd   %xmm8,%r9
559xorl 208(%rsi),%edx
560xorl 212(%rsi),%ecx
561xorl 216(%rsi),%r8d
562xorl 220(%rsi),%r9d
563movl   %edx,208(%rdi)
564movl   %ecx,212(%rdi)
565movl   %r8d,216(%rdi)
566movl   %r9d,220(%rdi)
567paddd 352(%rsp),%xmm15
568paddd 368(%rsp),%xmm11
569paddd 144(%rsp),%xmm1
570paddd 208(%rsp),%xmm6
571movd   %xmm15,%rdx
572movd   %xmm11,%rcx
573movd   %xmm1,%r8
574movd   %xmm6,%r9
575pshufd $0x39,%xmm15,%xmm15
576pshufd $0x39,%xmm11,%xmm11
577pshufd $0x39,%xmm1,%xmm1
578pshufd $0x39,%xmm6,%xmm6
579xorl 32(%rsi),%edx
580xorl 36(%rsi),%ecx
581xorl 40(%rsi),%r8d
582xorl 44(%rsi),%r9d
583movl   %edx,32(%rdi)
584movl   %ecx,36(%rdi)
585movl   %r8d,40(%rdi)
586movl   %r9d,44(%rdi)
587movd   %xmm15,%rdx
588movd   %xmm11,%rcx
589movd   %xmm1,%r8
590movd   %xmm6,%r9
591pshufd $0x39,%xmm15,%xmm15
592pshufd $0x39,%xmm11,%xmm11
593pshufd $0x39,%xmm1,%xmm1
594pshufd $0x39,%xmm6,%xmm6
595xorl 96(%rsi),%edx
596xorl 100(%rsi),%ecx
597xorl 104(%rsi),%r8d
598xorl 108(%rsi),%r9d
599movl   %edx,96(%rdi)
600movl   %ecx,100(%rdi)
601movl   %r8d,104(%rdi)
602movl   %r9d,108(%rdi)
603movd   %xmm15,%rdx
604movd   %xmm11,%rcx
605movd   %xmm1,%r8
606movd   %xmm6,%r9
607pshufd $0x39,%xmm15,%xmm15
608pshufd $0x39,%xmm11,%xmm11
609pshufd $0x39,%xmm1,%xmm1
610pshufd $0x39,%xmm6,%xmm6
611xorl 160(%rsi),%edx
612xorl 164(%rsi),%ecx
613xorl 168(%rsi),%r8d
614xorl 172(%rsi),%r9d
615movl   %edx,160(%rdi)
616movl   %ecx,164(%rdi)
617movl   %r8d,168(%rdi)
618movl   %r9d,172(%rdi)
619movd   %xmm15,%rdx
620movd   %xmm11,%rcx
621movd   %xmm1,%r8
622movd   %xmm6,%r9
623xorl 224(%rsi),%edx
624xorl 228(%rsi),%ecx
625xorl 232(%rsi),%r8d
626xorl 236(%rsi),%r9d
627movl   %edx,224(%rdi)
628movl   %ecx,228(%rdi)
629movl   %r8d,232(%rdi)
630movl   %r9d,236(%rdi)
631paddd 224(%rsp),%xmm13
632paddd 272(%rsp),%xmm9
633paddd 320(%rsp),%xmm3
634paddd 160(%rsp),%xmm2
635movd   %xmm13,%rdx
636movd   %xmm9,%rcx
637movd   %xmm3,%r8
638movd   %xmm2,%r9
639pshufd $0x39,%xmm13,%xmm13
640pshufd $0x39,%xmm9,%xmm9
641pshufd $0x39,%xmm3,%xmm3
642pshufd $0x39,%xmm2,%xmm2
643xorl 48(%rsi),%edx
644xorl 52(%rsi),%ecx
645xorl 56(%rsi),%r8d
646xorl 60(%rsi),%r9d
647movl   %edx,48(%rdi)
648movl   %ecx,52(%rdi)
649movl   %r8d,56(%rdi)
650movl   %r9d,60(%rdi)
651movd   %xmm13,%rdx
652movd   %xmm9,%rcx
653movd   %xmm3,%r8
654movd   %xmm2,%r9
655pshufd $0x39,%xmm13,%xmm13
656pshufd $0x39,%xmm9,%xmm9
657pshufd $0x39,%xmm3,%xmm3
658pshufd $0x39,%xmm2,%xmm2
659xorl 112(%rsi),%edx
660xorl 116(%rsi),%ecx
661xorl 120(%rsi),%r8d
662xorl 124(%rsi),%r9d
663movl   %edx,112(%rdi)
664movl   %ecx,116(%rdi)
665movl   %r8d,120(%rdi)
666movl   %r9d,124(%rdi)
667movd   %xmm13,%rdx
668movd   %xmm9,%rcx
669movd   %xmm3,%r8
670movd   %xmm2,%r9
671pshufd $0x39,%xmm13,%xmm13
672pshufd $0x39,%xmm9,%xmm9
673pshufd $0x39,%xmm3,%xmm3
674pshufd $0x39,%xmm2,%xmm2
675xorl 176(%rsi),%edx
676xorl 180(%rsi),%ecx
677xorl 184(%rsi),%r8d
678xorl 188(%rsi),%r9d
679movl   %edx,176(%rdi)
680movl   %ecx,180(%rdi)
681movl   %r8d,184(%rdi)
682movl   %r9d,188(%rdi)
683movd   %xmm13,%rdx
684movd   %xmm9,%rcx
685movd   %xmm3,%r8
686movd   %xmm2,%r9
687xorl 240(%rsi),%edx
688xorl 244(%rsi),%ecx
689xorl 248(%rsi),%r8d
690xorl 252(%rsi),%r9d
691movl   %edx,240(%rdi)
692movl   %ecx,244(%rdi)
693movl   %r8d,248(%rdi)
694movl   %r9d,252(%rdi)
695movq 480(%rsp),%r9
696sub  $256,%r9
697add  $256,%rsi
698add  $256,%rdi
699cmp  $256,%r9
700jae ._bytesatleast256
701
702cmp  $0,%r9
703jbe ._done
704
705._bytesbetween1and255:
706cmp  $64,%r9
707jae ._nocopy
708
709mov  %rdi,%rdx
710leaq 0(%rsp),%rdi
711mov  %r9,%rcx
712rep movsb
713leaq 0(%rsp),%rdi
714leaq 0(%rsp),%rsi
715
716._nocopy:
717movq %r9,480(%rsp)
718movdqa 112(%rsp),%xmm0
719movdqa 64(%rsp),%xmm1
720movdqa 80(%rsp),%xmm2
721movdqa 96(%rsp),%xmm3
722movdqa %xmm1,%xmm4
723mov  $20,%rcx
724
725.p2align 4
726._mainloop2:
727paddd %xmm0,%xmm4
728movdqa %xmm0,%xmm5
729movdqa %xmm4,%xmm6
730pslld $7,%xmm4
731psrld $25,%xmm6
732pxor  %xmm4,%xmm3
733pxor  %xmm6,%xmm3
734paddd %xmm3,%xmm5
735movdqa %xmm3,%xmm4
736movdqa %xmm5,%xmm6
737pslld $9,%xmm5
738psrld $23,%xmm6
739pxor  %xmm5,%xmm2
740pshufd $0x93,%xmm3,%xmm3
741pxor  %xmm6,%xmm2
742paddd %xmm2,%xmm4
743movdqa %xmm2,%xmm5
744movdqa %xmm4,%xmm6
745pslld $13,%xmm4
746psrld $19,%xmm6
747pxor  %xmm4,%xmm1
748pshufd $0x4e,%xmm2,%xmm2
749pxor  %xmm6,%xmm1
750paddd %xmm1,%xmm5
751movdqa %xmm3,%xmm4
752movdqa %xmm5,%xmm6
753pslld $18,%xmm5
754psrld $14,%xmm6
755pxor  %xmm5,%xmm0
756pshufd $0x39,%xmm1,%xmm1
757pxor  %xmm6,%xmm0
758paddd %xmm0,%xmm4
759movdqa %xmm0,%xmm5
760movdqa %xmm4,%xmm6
761pslld $7,%xmm4
762psrld $25,%xmm6
763pxor  %xmm4,%xmm1
764pxor  %xmm6,%xmm1
765paddd %xmm1,%xmm5
766movdqa %xmm1,%xmm4
767movdqa %xmm5,%xmm6
768pslld $9,%xmm5
769psrld $23,%xmm6
770pxor  %xmm5,%xmm2
771pshufd $0x93,%xmm1,%xmm1
772pxor  %xmm6,%xmm2
773paddd %xmm2,%xmm4
774movdqa %xmm2,%xmm5
775movdqa %xmm4,%xmm6
776pslld $13,%xmm4
777psrld $19,%xmm6
778pxor  %xmm4,%xmm3
779pshufd $0x4e,%xmm2,%xmm2
780pxor  %xmm6,%xmm3
781paddd %xmm3,%xmm5
782movdqa %xmm1,%xmm4
783movdqa %xmm5,%xmm6
784pslld $18,%xmm5
785psrld $14,%xmm6
786pxor  %xmm5,%xmm0
787pshufd $0x39,%xmm3,%xmm3
788pxor  %xmm6,%xmm0
789paddd %xmm0,%xmm4
790movdqa %xmm0,%xmm5
791movdqa %xmm4,%xmm6
792pslld $7,%xmm4
793psrld $25,%xmm6
794pxor  %xmm4,%xmm3
795pxor  %xmm6,%xmm3
796paddd %xmm3,%xmm5
797movdqa %xmm3,%xmm4
798movdqa %xmm5,%xmm6
799pslld $9,%xmm5
800psrld $23,%xmm6
801pxor  %xmm5,%xmm2
802pshufd $0x93,%xmm3,%xmm3
803pxor  %xmm6,%xmm2
804paddd %xmm2,%xmm4
805movdqa %xmm2,%xmm5
806movdqa %xmm4,%xmm6
807pslld $13,%xmm4
808psrld $19,%xmm6
809pxor  %xmm4,%xmm1
810pshufd $0x4e,%xmm2,%xmm2
811pxor  %xmm6,%xmm1
812paddd %xmm1,%xmm5
813movdqa %xmm3,%xmm4
814movdqa %xmm5,%xmm6
815pslld $18,%xmm5
816psrld $14,%xmm6
817pxor  %xmm5,%xmm0
818pshufd $0x39,%xmm1,%xmm1
819pxor  %xmm6,%xmm0
820paddd %xmm0,%xmm4
821movdqa %xmm0,%xmm5
822movdqa %xmm4,%xmm6
823pslld $7,%xmm4
824psrld $25,%xmm6
825pxor  %xmm4,%xmm1
826pxor  %xmm6,%xmm1
827paddd %xmm1,%xmm5
828movdqa %xmm1,%xmm4
829movdqa %xmm5,%xmm6
830pslld $9,%xmm5
831psrld $23,%xmm6
832pxor  %xmm5,%xmm2
833pshufd $0x93,%xmm1,%xmm1
834pxor  %xmm6,%xmm2
835paddd %xmm2,%xmm4
836movdqa %xmm2,%xmm5
837movdqa %xmm4,%xmm6
838pslld $13,%xmm4
839psrld $19,%xmm6
840pxor  %xmm4,%xmm3
841pshufd $0x4e,%xmm2,%xmm2
842pxor  %xmm6,%xmm3
843sub  $4,%rcx
844paddd %xmm3,%xmm5
845movdqa %xmm1,%xmm4
846movdqa %xmm5,%xmm6
847pslld $18,%xmm5
848pxor   %xmm7,%xmm7
849psrld $14,%xmm6
850pxor  %xmm5,%xmm0
851pshufd $0x39,%xmm3,%xmm3
852pxor  %xmm6,%xmm0
853ja ._mainloop2
854
855paddd 112(%rsp),%xmm0
856paddd 64(%rsp),%xmm1
857paddd 80(%rsp),%xmm2
858paddd 96(%rsp),%xmm3
859movd   %xmm0,%rcx
860movd   %xmm1,%r8
861movd   %xmm2,%r9
862movd   %xmm3,%rax
863pshufd $0x39,%xmm0,%xmm0
864pshufd $0x39,%xmm1,%xmm1
865pshufd $0x39,%xmm2,%xmm2
866pshufd $0x39,%xmm3,%xmm3
867xorl 0(%rsi),%ecx
868xorl 48(%rsi),%r8d
869xorl 32(%rsi),%r9d
870xorl 16(%rsi),%eax
871movl   %ecx,0(%rdi)
872movl   %r8d,48(%rdi)
873movl   %r9d,32(%rdi)
874movl   %eax,16(%rdi)
875movd   %xmm0,%rcx
876movd   %xmm1,%r8
877movd   %xmm2,%r9
878movd   %xmm3,%rax
879pshufd $0x39,%xmm0,%xmm0
880pshufd $0x39,%xmm1,%xmm1
881pshufd $0x39,%xmm2,%xmm2
882pshufd $0x39,%xmm3,%xmm3
883xorl 20(%rsi),%ecx
884xorl 4(%rsi),%r8d
885xorl 52(%rsi),%r9d
886xorl 36(%rsi),%eax
887movl   %ecx,20(%rdi)
888movl   %r8d,4(%rdi)
889movl   %r9d,52(%rdi)
890movl   %eax,36(%rdi)
891movd   %xmm0,%rcx
892movd   %xmm1,%r8
893movd   %xmm2,%r9
894movd   %xmm3,%rax
895pshufd $0x39,%xmm0,%xmm0
896pshufd $0x39,%xmm1,%xmm1
897pshufd $0x39,%xmm2,%xmm2
898pshufd $0x39,%xmm3,%xmm3
899xorl 40(%rsi),%ecx
900xorl 24(%rsi),%r8d
901xorl 8(%rsi),%r9d
902xorl 56(%rsi),%eax
903movl   %ecx,40(%rdi)
904movl   %r8d,24(%rdi)
905movl   %r9d,8(%rdi)
906movl   %eax,56(%rdi)
907movd   %xmm0,%rcx
908movd   %xmm1,%r8
909movd   %xmm2,%r9
910movd   %xmm3,%rax
911xorl 60(%rsi),%ecx
912xorl 44(%rsi),%r8d
913xorl 28(%rsi),%r9d
914xorl 12(%rsi),%eax
915movl   %ecx,60(%rdi)
916movl   %r8d,44(%rdi)
917movl   %r9d,28(%rdi)
918movl   %eax,12(%rdi)
919movq 480(%rsp),%r9
920movq 472(%rsp),%rcx
921add  $1,%rcx
922mov  %rcx,%r8
923shr  $32,%r8
924movl %ecx,80(%rsp)
925movl %r8d,4+96(%rsp)
926movq %rcx,472(%rsp)
927cmp  $64,%r9
928ja ._bytesatleast65
929jae ._bytesatleast64
930
931mov  %rdi,%rsi
932mov  %rdx,%rdi
933mov  %r9,%rcx
934rep movsb
935
936._bytesatleast64:
937._done:
938movq 416(%rsp),%r11
939movq 424(%rsp),%r12
940movq 432(%rsp),%r13
941movq 440(%rsp),%r14
942movq 448(%rsp),%r15
943movq 456(%rsp),%rbx
944movq 464(%rsp),%rbp
945add %r11,%rsp
946xor %rax,%rax
947mov %rsi,%rdx
948ret
949
950._bytesatleast65:
951sub  $64,%r9
952add  $64,%rdi
953add  $64,%rsi
954jmp ._bytesbetween1and255
955
956#endif
957
958#if defined(__linux__) && defined(__ELF__)
959.section .note.GNU-stack,"",%progbits
960#endif
961