1#!/usr/bin/env perl
2
3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4push(@INC, "${dir}perlasm", "perlasm");
5require "x86asm.pl";
6
7&asm_init($ARGV[0],"x86cpuid");
8
9for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
10
11&function_begin("OPENSSL_ia32_cpuid");
12	&xor	("edx","edx");
13	&pushf	();
14	&pop	("eax");
15	&mov	("ecx","eax");
16	&xor	("eax",1<<21);
17	&push	("eax");
18	&popf	();
19	&pushf	();
20	&pop	("eax");
21	&xor	("ecx","eax");
22	&xor	("eax","eax");
23	&bt	("ecx",21);
24	&jnc	(&label("nocpuid"));
25	&cpuid	();
26	&mov	("edi","eax");		# max value for standard query level
27
28	&xor	("eax","eax");
29	&cmp	("ebx",0x756e6547);	# "Genu"
30	&setne	(&LB("eax"));
31	&mov	("ebp","eax");
32	&cmp	("edx",0x49656e69);	# "ineI"
33	&setne	(&LB("eax"));
34	&or	("ebp","eax");
35	&cmp	("ecx",0x6c65746e);	# "ntel"
36	&setne	(&LB("eax"));
37	&or	("ebp","eax");		# 0 indicates Intel CPU
38	&jz	(&label("intel"));
39
40	&cmp	("ebx",0x68747541);	# "Auth"
41	&setne	(&LB("eax"));
42	&mov	("esi","eax");
43	&cmp	("edx",0x69746E65);	# "enti"
44	&setne	(&LB("eax"));
45	&or	("esi","eax");
46	&cmp	("ecx",0x444D4163);	# "cAMD"
47	&setne	(&LB("eax"));
48	&or	("esi","eax");		# 0 indicates AMD CPU
49	&jnz	(&label("intel"));
50
51	# AMD specific
52	&mov	("eax",0x80000000);
53	&cpuid	();
54	&cmp	("eax",0x80000001);
55	&jb	(&label("intel"));
56	&mov	("esi","eax");
57	&mov	("eax",0x80000001);
58	&cpuid	();
59	&and	("ecx","\$IA32CAP_MASK1_AMD_XOP");	# isolate AMD XOP bit
60	&or	("ecx",1);		# make sure ecx is not zero
61	&mov	("ebp","ecx");
62
63	&cmp	("esi",0x80000008);
64	&jb	(&label("intel"));
65
66	&mov	("eax",0x80000008);
67	&cpuid	();
68	&movz	("esi",&LB("ecx"));	# number of cores - 1
69	&inc	("esi");		# number of cores
70
71	&mov	("eax",1);
72	&xor	("ecx","ecx");
73	&cpuid	();
74	&bt	("edx","\$IA32CAP_BIT0_HT");
75	&jnc	(&label("generic"));
76	&shr	("ebx",16);
77	&and	("ebx",0xff);
78	&cmp	("ebx","esi");
79	&ja	(&label("generic"));
80	&xor	("edx","\$IA32CAP_MASK0_HT");	# clear hyper-threading bit
81	&jmp	(&label("generic"));
82
83&set_label("intel");
84	&cmp	("edi",4);
85	&mov	("edi",-1);
86	&jb	(&label("nocacheinfo"));
87
88	&mov	("eax",4);
89	&mov	("ecx",0);		# query L1D
90	&cpuid	();
91	&mov	("edi","eax");
92	&shr	("edi",14);
93	&and	("edi",0xfff);		# number of cores -1 per L1D
94
95&set_label("nocacheinfo");
96	&mov	("eax",1);
97	&xor	("ecx","ecx");
98	&cpuid	();
99	# force reserved bits to 0.
100	&and	("edx","\$~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)");
101	&cmp	("ebp",0);
102	&jne	(&label("notintel"));
103	# set reserved bit#30 on Intel CPUs
104	&or	("edx","\$IA32CAP_MASK0_INTEL");
105	&and	(&HB("eax"),15);	# family ID
106	&cmp	(&HB("eax"),15);	# P4?
107	&jne	(&label("notintel"));
108	# set reserved bit#20 to engage RC4_CHAR
109	&or	("edx","\$IA32CAP_MASK0_INTELP4");
110&set_label("notintel");
111	&bt	("edx","\$IA32CAP_BIT0_HT");	# test hyper-threading bit
112	&jnc	(&label("generic"));
113	&xor	("edx","\$IA32CAP_MASK0_HT");
114	&cmp	("edi",0);
115	&je	(&label("generic"));
116
117	&or	("edx","\$IA32CAP_MASK0_HT");
118	&shr	("ebx",16);
119	&cmp	(&LB("ebx"),1);		# see if cache is shared
120	&ja	(&label("generic"));
121	&xor	("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit if not
122
123&set_label("generic");
124	&and	("ebp","\$IA32CAP_MASK1_AMD_XOP");	# isolate AMD XOP flag
125	# force reserved bits to 0.
126	&and	("ecx","\$~IA32CAP_MASK1_AMD_XOP");
127	&mov	("esi","edx");
128	&or	("ebp","ecx");		# merge AMD XOP flag
129
130	&bt	("ecx","\$IA32CAP_BIT1_OSXSAVE");	# check OSXSAVE bit
131	&jnc	(&label("clear_avx"));
132	&xor	("ecx","ecx");
133	&data_byte(0x0f,0x01,0xd0);	# xgetbv
134	&and	("eax",6);
135	&cmp	("eax",6);
136	&je	(&label("done"));
137	&cmp	("eax",2);
138	&je	(&label("clear_avx"));
139&set_label("clear_xmm");
140	# clear AESNI and PCLMULQDQ bits.
141	&and	("ebp","\$~(IA32CAP_MASK1_AESNI | IA32CAP_MASK1_PCLMUL)");
142	# clear FXSR.
143	&and	("esi","\$~IA32CAP_MASK0_FXSR");
144&set_label("clear_avx");
145	# clear AVX, FMA3 and AMD XOP bits.
146	&and	("ebp","\$~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)");
147&set_label("done");
148	&mov	("eax","esi");
149	&mov	("edx","ebp");
150&set_label("nocpuid");
151&function_end("OPENSSL_ia32_cpuid");
152
153&asm_finish();
154