1160814Ssimon#!/usr/bin/env perl
2160814Ssimon
3238405Sjkim$flavour = shift;
4238405Sjkim$output  = shift;
5238405Sjkimif ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6160814Ssimon
7238405Sjkim$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8160814Ssimon
9238405Sjkim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10238405Sjkim( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11238405Sjkim( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12238405Sjkimdie "can't locate x86_64-xlate.pl";
13160814Ssimon
14246772Sjkimopen OUT,"| \"$^X\" $xlate $flavour $output";
15246772Sjkim*STDOUT=*OUT;
16160814Ssimon
17238405Sjkim($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") :	# Win64 order
18238405Sjkim				 ("%rdi","%rsi","%rdx","%rcx");	# Unix order
19194206Ssimon
20238405Sjkimprint<<___;
21238405Sjkim.extern		OPENSSL_cpuid_setup
22238405Sjkim.hidden		OPENSSL_cpuid_setup
23238405Sjkim.section	.init
24238405Sjkim	call	OPENSSL_cpuid_setup
25238405Sjkim
26238405Sjkim.hidden	OPENSSL_ia32cap_P
27238405Sjkim.comm	OPENSSL_ia32cap_P,8,4
28238405Sjkim
29160814Ssimon.text
30160814Ssimon
31160814Ssimon.globl	OPENSSL_atomic_add
32238405Sjkim.type	OPENSSL_atomic_add,\@abi-omnipotent
33160814Ssimon.align	16
34160814SsimonOPENSSL_atomic_add:
35238405Sjkim	movl	($arg1),%eax
36238405Sjkim.Lspin:	leaq	($arg2,%rax),%r8
37238405Sjkim	.byte	0xf0		# lock
38238405Sjkim	cmpxchgl	%r8d,($arg1)
39160814Ssimon	jne	.Lspin
40160814Ssimon	movl	%r8d,%eax
41238405Sjkim	.byte	0x48,0x98	# cltq/cdqe
42160814Ssimon	ret
43160814Ssimon.size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
44160814Ssimon
45194206Ssimon.globl	OPENSSL_rdtsc
46194206Ssimon.type	OPENSSL_rdtsc,\@abi-omnipotent
47194206Ssimon.align	16
48194206SsimonOPENSSL_rdtsc:
49194206Ssimon	rdtsc
50194206Ssimon	shl	\$32,%rdx
51194206Ssimon	or	%rdx,%rax
52194206Ssimon	ret
53194206Ssimon.size	OPENSSL_rdtsc,.-OPENSSL_rdtsc
54194206Ssimon
55160814Ssimon.globl	OPENSSL_ia32_cpuid
56194206Ssimon.type	OPENSSL_ia32_cpuid,\@abi-omnipotent
57160814Ssimon.align	16
58160814SsimonOPENSSL_ia32_cpuid:
59238405Sjkim	mov	%rbx,%r8		# save %rbx
60194206Ssimon
61194206Ssimon	xor	%eax,%eax
62160814Ssimon	cpuid
63238405Sjkim	mov	%eax,%r11d		# max value for standard query level
64238405Sjkim
65194206Ssimon	xor	%eax,%eax
66194206Ssimon	cmp	\$0x756e6547,%ebx	# "Genu"
67194206Ssimon	setne	%al
68194206Ssimon	mov	%eax,%r9d
69194206Ssimon	cmp	\$0x49656e69,%edx	# "ineI"
70194206Ssimon	setne	%al
71194206Ssimon	or	%eax,%r9d
72194206Ssimon	cmp	\$0x6c65746e,%ecx	# "ntel"
73194206Ssimon	setne	%al
74238405Sjkim	or	%eax,%r9d		# 0 indicates Intel CPU
75238405Sjkim	jz	.Lintel
76194206Ssimon
77238405Sjkim	cmp	\$0x68747541,%ebx	# "Auth"
78238405Sjkim	setne	%al
79238405Sjkim	mov	%eax,%r10d
80238405Sjkim	cmp	\$0x69746E65,%edx	# "enti"
81238405Sjkim	setne	%al
82238405Sjkim	or	%eax,%r10d
83238405Sjkim	cmp	\$0x444D4163,%ecx	# "cAMD"
84238405Sjkim	setne	%al
85238405Sjkim	or	%eax,%r10d		# 0 indicates AMD CPU
86238405Sjkim	jnz	.Lintel
87238405Sjkim
88238405Sjkim	# AMD specific
89238405Sjkim	mov	\$0x80000000,%eax
90238405Sjkim	cpuid
91238405Sjkim	cmp	\$0x80000001,%eax
92238405Sjkim	jb	.Lintel
93238405Sjkim	mov	%eax,%r10d
94238405Sjkim	mov	\$0x80000001,%eax
95238405Sjkim	cpuid
96238405Sjkim	or	%ecx,%r9d
97238405Sjkim	and	\$0x00000801,%r9d	# isolate AMD XOP bit, 1<<11
98238405Sjkim
99238405Sjkim	cmp	\$0x80000008,%r10d
100238405Sjkim	jb	.Lintel
101238405Sjkim
102238405Sjkim	mov	\$0x80000008,%eax
103238405Sjkim	cpuid
104238405Sjkim	movzb	%cl,%r10		# number of cores - 1
105238405Sjkim	inc	%r10			# number of cores
106238405Sjkim
107194206Ssimon	mov	\$1,%eax
108194206Ssimon	cpuid
109238405Sjkim	bt	\$28,%edx		# test hyper-threading bit
110238405Sjkim	jnc	.Lgeneric
111238405Sjkim	shr	\$16,%ebx		# number of logical processors
112238405Sjkim	cmp	%r10b,%bl
113238405Sjkim	ja	.Lgeneric
114238405Sjkim	and	\$0xefffffff,%edx	# ~(1<<28)
115238405Sjkim	jmp	.Lgeneric
116238405Sjkim
117238405Sjkim.Lintel:
118238405Sjkim	cmp	\$4,%r11d
119238405Sjkim	mov	\$-1,%r10d
120238405Sjkim	jb	.Lnocacheinfo
121238405Sjkim
122238405Sjkim	mov	\$4,%eax
123238405Sjkim	mov	\$0,%ecx		# query L1D
124238405Sjkim	cpuid
125238405Sjkim	mov	%eax,%r10d
126238405Sjkim	shr	\$14,%r10d
127238405Sjkim	and	\$0xfff,%r10d		# number of cores -1 per L1D
128238405Sjkim
129238405Sjkim.Lnocacheinfo:
130238405Sjkim	mov	\$1,%eax
131238405Sjkim	cpuid
132238405Sjkim	and	\$0xbfefffff,%edx	# force reserved bits to 0
133194206Ssimon	cmp	\$0,%r9d
134194206Ssimon	jne	.Lnotintel
135238405Sjkim	or	\$0x40000000,%edx	# set reserved bit#30 on Intel CPUs
136194206Ssimon	and	\$15,%ah
137194206Ssimon	cmp	\$15,%ah		# examine Family ID
138238405Sjkim	jne	.Lnotintel
139238405Sjkim	or	\$0x00100000,%edx	# set reserved bit#20 to engage RC4_CHAR
140194206Ssimon.Lnotintel:
141194206Ssimon	bt	\$28,%edx		# test hyper-threading bit
142238405Sjkim	jnc	.Lgeneric
143238405Sjkim	and	\$0xefffffff,%edx	# ~(1<<28)
144238405Sjkim	cmp	\$0,%r10d
145238405Sjkim	je	.Lgeneric
146238405Sjkim
147238405Sjkim	or	\$0x10000000,%edx	# 1<<28
148194206Ssimon	shr	\$16,%ebx
149194206Ssimon	cmp	\$1,%bl			# see if cache is shared
150238405Sjkim	ja	.Lgeneric
151194206Ssimon	and	\$0xefffffff,%edx	# ~(1<<28)
152238405Sjkim.Lgeneric:
153238405Sjkim	and	\$0x00000800,%r9d	# isolate AMD XOP flag
154238405Sjkim	and	\$0xfffff7ff,%ecx
155238405Sjkim	or	%ecx,%r9d		# merge AMD XOP flag
156238405Sjkim
157238405Sjkim	mov	%edx,%r10d		# %r9d:%r10d is copy of %ecx:%edx
158238405Sjkim	bt	\$27,%r9d		# check OSXSAVE bit
159238405Sjkim	jnc	.Lclear_avx
160238405Sjkim	xor	%ecx,%ecx		# XCR0
161238405Sjkim	.byte	0x0f,0x01,0xd0		# xgetbv
162238405Sjkim	and	\$6,%eax		# isolate XMM and YMM state support
163238405Sjkim	cmp	\$6,%eax
164238405Sjkim	je	.Ldone
165238405Sjkim.Lclear_avx:
166238405Sjkim	mov	\$0xefffe7ff,%eax	# ~(1<<28|1<<12|1<<11)
167238405Sjkim	and	%eax,%r9d		# clear AVX, FMA and AMD XOP bits
168194206Ssimon.Ldone:
169238405Sjkim	shl	\$32,%r9
170238405Sjkim	mov	%r10d,%eax
171238405Sjkim	mov	%r8,%rbx		# restore %rbx
172238405Sjkim	or	%r9,%rax
173160814Ssimon	ret
174160814Ssimon.size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
175238405Sjkim
176238405Sjkim.globl  OPENSSL_cleanse
177238405Sjkim.type   OPENSSL_cleanse,\@abi-omnipotent
178238405Sjkim.align  16
179238405SjkimOPENSSL_cleanse:
180238405Sjkim	xor	%rax,%rax
181238405Sjkim	cmp	\$15,$arg2
182238405Sjkim	jae	.Lot
183238405Sjkim	cmp	\$0,$arg2
184238405Sjkim	je	.Lret
185238405Sjkim.Little:
186238405Sjkim	mov	%al,($arg1)
187238405Sjkim	sub	\$1,$arg2
188238405Sjkim	lea	1($arg1),$arg1
189238405Sjkim	jnz	.Little
190238405Sjkim.Lret:
191238405Sjkim	ret
192238405Sjkim.align	16
193238405Sjkim.Lot:
194238405Sjkim	test	\$7,$arg1
195238405Sjkim	jz	.Laligned
196238405Sjkim	mov	%al,($arg1)
197238405Sjkim	lea	-1($arg2),$arg2
198238405Sjkim	lea	1($arg1),$arg1
199238405Sjkim	jmp	.Lot
200238405Sjkim.Laligned:
201238405Sjkim	mov	%rax,($arg1)
202238405Sjkim	lea	-8($arg2),$arg2
203238405Sjkim	test	\$-8,$arg2
204238405Sjkim	lea	8($arg1),$arg1
205238405Sjkim	jnz	.Laligned
206238405Sjkim	cmp	\$0,$arg2
207238405Sjkim	jne	.Little
208238405Sjkim	ret
209238405Sjkim.size	OPENSSL_cleanse,.-OPENSSL_cleanse
210160814Ssimon___
211238405Sjkim
212238405Sjkimprint<<___ if (!$win64);
213238405Sjkim.globl	OPENSSL_wipe_cpu
214238405Sjkim.type	OPENSSL_wipe_cpu,\@abi-omnipotent
215238405Sjkim.align	16
216238405SjkimOPENSSL_wipe_cpu:
217238405Sjkim	pxor	%xmm0,%xmm0
218238405Sjkim	pxor	%xmm1,%xmm1
219238405Sjkim	pxor	%xmm2,%xmm2
220238405Sjkim	pxor	%xmm3,%xmm3
221238405Sjkim	pxor	%xmm4,%xmm4
222238405Sjkim	pxor	%xmm5,%xmm5
223238405Sjkim	pxor	%xmm6,%xmm6
224238405Sjkim	pxor	%xmm7,%xmm7
225238405Sjkim	pxor	%xmm8,%xmm8
226238405Sjkim	pxor	%xmm9,%xmm9
227238405Sjkim	pxor	%xmm10,%xmm10
228238405Sjkim	pxor	%xmm11,%xmm11
229238405Sjkim	pxor	%xmm12,%xmm12
230238405Sjkim	pxor	%xmm13,%xmm13
231238405Sjkim	pxor	%xmm14,%xmm14
232238405Sjkim	pxor	%xmm15,%xmm15
233238405Sjkim	xorq	%rcx,%rcx
234238405Sjkim	xorq	%rdx,%rdx
235238405Sjkim	xorq	%rsi,%rsi
236238405Sjkim	xorq	%rdi,%rdi
237238405Sjkim	xorq	%r8,%r8
238238405Sjkim	xorq	%r9,%r9
239238405Sjkim	xorq	%r10,%r10
240238405Sjkim	xorq	%r11,%r11
241238405Sjkim	leaq	8(%rsp),%rax
242238405Sjkim	ret
243238405Sjkim.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
244238405Sjkim___
245238405Sjkimprint<<___ if ($win64);
246238405Sjkim.globl	OPENSSL_wipe_cpu
247238405Sjkim.type	OPENSSL_wipe_cpu,\@abi-omnipotent
248238405Sjkim.align	16
249238405SjkimOPENSSL_wipe_cpu:
250238405Sjkim	pxor	%xmm0,%xmm0
251238405Sjkim	pxor	%xmm1,%xmm1
252238405Sjkim	pxor	%xmm2,%xmm2
253238405Sjkim	pxor	%xmm3,%xmm3
254238405Sjkim	pxor	%xmm4,%xmm4
255238405Sjkim	pxor	%xmm5,%xmm5
256238405Sjkim	xorq	%rcx,%rcx
257238405Sjkim	xorq	%rdx,%rdx
258238405Sjkim	xorq	%r8,%r8
259238405Sjkim	xorq	%r9,%r9
260238405Sjkim	xorq	%r10,%r10
261238405Sjkim	xorq	%r11,%r11
262238405Sjkim	leaq	8(%rsp),%rax
263238405Sjkim	ret
264238405Sjkim.size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
265238405Sjkim___
266238405Sjkim
267238405Sjkimprint<<___;
268238405Sjkim.globl	OPENSSL_ia32_rdrand
269238405Sjkim.type	OPENSSL_ia32_rdrand,\@abi-omnipotent
270238405Sjkim.align	16
271238405SjkimOPENSSL_ia32_rdrand:
272238405Sjkim	mov	\$8,%ecx
273238405Sjkim.Loop_rdrand:
274238405Sjkim	rdrand	%rax
275238405Sjkim	jc	.Lbreak_rdrand
276238405Sjkim	loop	.Loop_rdrand
277238405Sjkim.Lbreak_rdrand:
278238405Sjkim	cmp	\$0,%rax
279238405Sjkim	cmove	%rcx,%rax
280238405Sjkim	ret
281238405Sjkim.size	OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
282238405Sjkim___
283238405Sjkim
284194206Ssimonclose STDOUT;	# flush
285