1/* Subroutines for the gcc driver. 2 Copyright (C) 2006-2015 Free Software Foundation, Inc. 3 4This file is part of GCC. 5 6GCC is free software; you can redistribute it and/or modify 7it under the terms of the GNU General Public License as published by 8the Free Software Foundation; either version 3, or (at your option) 9any later version. 10 11GCC is distributed in the hope that it will be useful, 12but WITHOUT ANY WARRANTY; without even the implied warranty of 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14GNU General Public License for more details. 15 16You should have received a copy of the GNU General Public License 17along with GCC; see the file COPYING3. If not see 18<http://www.gnu.org/licenses/>. */ 19 20#include "config.h" 21#include "system.h" 22#include "coretypes.h" 23#include "tm.h" 24 25const char *host_detect_local_cpu (int argc, const char **argv); 26 27#if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__)) 28#include "cpuid.h" 29 30struct cache_desc 31{ 32 unsigned sizekb; 33 unsigned assoc; 34 unsigned line; 35}; 36 37/* Returns command line parameters that describe size and 38 cache line size of the processor caches. */ 39 40static char * 41describe_cache (struct cache_desc level1, struct cache_desc level2) 42{ 43 char size[100], line[100], size2[100]; 44 45 /* At the moment, gcc does not use the information 46 about the associativity of the cache. */ 47 48 snprintf (size, sizeof (size), 49 "--param l1-cache-size=%u ", level1.sizekb); 50 snprintf (line, sizeof (line), 51 "--param l1-cache-line-size=%u ", level1.line); 52 53 snprintf (size2, sizeof (size2), 54 "--param l2-cache-size=%u ", level2.sizekb); 55 56 return concat (size, line, size2, NULL); 57} 58 59/* Detect L2 cache parameters using CPUID extended function 0x80000006. */ 60 61static void 62detect_l2_cache (struct cache_desc *level2) 63{ 64 unsigned eax, ebx, ecx, edx; 65 unsigned assoc; 66 67 __cpuid (0x80000006, eax, ebx, ecx, edx); 68 69 level2->sizekb = (ecx >> 16) & 0xffff; 70 level2->line = ecx & 0xff; 71 72 assoc = (ecx >> 12) & 0xf; 73 if (assoc == 6) 74 assoc = 8; 75 else if (assoc == 8) 76 assoc = 16; 77 else if (assoc >= 0xa && assoc <= 0xc) 78 assoc = 32 + (assoc - 0xa) * 16; 79 else if (assoc >= 0xd && assoc <= 0xe) 80 assoc = 96 + (assoc - 0xd) * 32; 81 82 level2->assoc = assoc; 83} 84 85/* Returns the description of caches for an AMD processor. */ 86 87static const char * 88detect_caches_amd (unsigned max_ext_level) 89{ 90 unsigned eax, ebx, ecx, edx; 91 92 struct cache_desc level1, level2 = {0, 0, 0}; 93 94 if (max_ext_level < 0x80000005) 95 return ""; 96 97 __cpuid (0x80000005, eax, ebx, ecx, edx); 98 99 level1.sizekb = (ecx >> 24) & 0xff; 100 level1.assoc = (ecx >> 16) & 0xff; 101 level1.line = ecx & 0xff; 102 103 if (max_ext_level >= 0x80000006) 104 detect_l2_cache (&level2); 105 106 return describe_cache (level1, level2); 107} 108 109/* Decodes the size, the associativity and the cache line size of 110 L1/L2 caches of an Intel processor. Values are based on 111 "Intel Processor Identification and the CPUID Instruction" 112 [Application Note 485], revision -032, December 2007. */ 113 114static void 115decode_caches_intel (unsigned reg, bool xeon_mp, 116 struct cache_desc *level1, struct cache_desc *level2) 117{ 118 int i; 119 120 for (i = 24; i >= 0; i -= 8) 121 switch ((reg >> i) & 0xff) 122 { 123 case 0x0a: 124 level1->sizekb = 8; level1->assoc = 2; level1->line = 32; 125 break; 126 case 0x0c: 127 level1->sizekb = 16; level1->assoc = 4; level1->line = 32; 128 break; 129 case 0x0d: 130 level1->sizekb = 16; level1->assoc = 4; level1->line = 64; 131 break; 132 case 0x0e: 133 level1->sizekb = 24; level1->assoc = 6; level1->line = 64; 134 break; 135 case 0x21: 136 level2->sizekb = 256; level2->assoc = 8; level2->line = 64; 137 break; 138 case 0x24: 139 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64; 140 break; 141 case 0x2c: 142 level1->sizekb = 32; level1->assoc = 8; level1->line = 64; 143 break; 144 case 0x39: 145 level2->sizekb = 128; level2->assoc = 4; level2->line = 64; 146 break; 147 case 0x3a: 148 level2->sizekb = 192; level2->assoc = 6; level2->line = 64; 149 break; 150 case 0x3b: 151 level2->sizekb = 128; level2->assoc = 2; level2->line = 64; 152 break; 153 case 0x3c: 154 level2->sizekb = 256; level2->assoc = 4; level2->line = 64; 155 break; 156 case 0x3d: 157 level2->sizekb = 384; level2->assoc = 6; level2->line = 64; 158 break; 159 case 0x3e: 160 level2->sizekb = 512; level2->assoc = 4; level2->line = 64; 161 break; 162 case 0x41: 163 level2->sizekb = 128; level2->assoc = 4; level2->line = 32; 164 break; 165 case 0x42: 166 level2->sizekb = 256; level2->assoc = 4; level2->line = 32; 167 break; 168 case 0x43: 169 level2->sizekb = 512; level2->assoc = 4; level2->line = 32; 170 break; 171 case 0x44: 172 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32; 173 break; 174 case 0x45: 175 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32; 176 break; 177 case 0x48: 178 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64; 179 break; 180 case 0x49: 181 if (xeon_mp) 182 break; 183 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64; 184 break; 185 case 0x4e: 186 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64; 187 break; 188 case 0x60: 189 level1->sizekb = 16; level1->assoc = 8; level1->line = 64; 190 break; 191 case 0x66: 192 level1->sizekb = 8; level1->assoc = 4; level1->line = 64; 193 break; 194 case 0x67: 195 level1->sizekb = 16; level1->assoc = 4; level1->line = 64; 196 break; 197 case 0x68: 198 level1->sizekb = 32; level1->assoc = 4; level1->line = 64; 199 break; 200 case 0x78: 201 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64; 202 break; 203 case 0x79: 204 level2->sizekb = 128; level2->assoc = 8; level2->line = 64; 205 break; 206 case 0x7a: 207 level2->sizekb = 256; level2->assoc = 8; level2->line = 64; 208 break; 209 case 0x7b: 210 level2->sizekb = 512; level2->assoc = 8; level2->line = 64; 211 break; 212 case 0x7c: 213 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; 214 break; 215 case 0x7d: 216 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64; 217 break; 218 case 0x7f: 219 level2->sizekb = 512; level2->assoc = 2; level2->line = 64; 220 break; 221 case 0x80: 222 level2->sizekb = 512; level2->assoc = 8; level2->line = 64; 223 break; 224 case 0x82: 225 level2->sizekb = 256; level2->assoc = 8; level2->line = 32; 226 break; 227 case 0x83: 228 level2->sizekb = 512; level2->assoc = 8; level2->line = 32; 229 break; 230 case 0x84: 231 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32; 232 break; 233 case 0x85: 234 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32; 235 break; 236 case 0x86: 237 level2->sizekb = 512; level2->assoc = 4; level2->line = 64; 238 break; 239 case 0x87: 240 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64; 241 242 default: 243 break; 244 } 245} 246 247/* Detect cache parameters using CPUID function 2. */ 248 249static void 250detect_caches_cpuid2 (bool xeon_mp, 251 struct cache_desc *level1, struct cache_desc *level2) 252{ 253 unsigned regs[4]; 254 int nreps, i; 255 256 __cpuid (2, regs[0], regs[1], regs[2], regs[3]); 257 258 nreps = regs[0] & 0x0f; 259 regs[0] &= ~0x0f; 260 261 while (--nreps >= 0) 262 { 263 for (i = 0; i < 4; i++) 264 if (regs[i] && !((regs[i] >> 31) & 1)) 265 decode_caches_intel (regs[i], xeon_mp, level1, level2); 266 267 if (nreps) 268 __cpuid (2, regs[0], regs[1], regs[2], regs[3]); 269 } 270} 271 272/* Detect cache parameters using CPUID function 4. This 273 method doesn't require hardcoded tables. */ 274 275enum cache_type 276{ 277 CACHE_END = 0, 278 CACHE_DATA = 1, 279 CACHE_INST = 2, 280 CACHE_UNIFIED = 3 281}; 282 283static void 284detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2, 285 struct cache_desc *level3) 286{ 287 struct cache_desc *cache; 288 289 unsigned eax, ebx, ecx, edx; 290 int count; 291 292 for (count = 0;; count++) 293 { 294 __cpuid_count(4, count, eax, ebx, ecx, edx); 295 switch (eax & 0x1f) 296 { 297 case CACHE_END: 298 return; 299 case CACHE_DATA: 300 case CACHE_UNIFIED: 301 { 302 switch ((eax >> 5) & 0x07) 303 { 304 case 1: 305 cache = level1; 306 break; 307 case 2: 308 cache = level2; 309 break; 310 case 3: 311 cache = level3; 312 break; 313 default: 314 cache = NULL; 315 } 316 317 if (cache) 318 { 319 unsigned sets = ecx + 1; 320 unsigned part = ((ebx >> 12) & 0x03ff) + 1; 321 322 cache->assoc = ((ebx >> 22) & 0x03ff) + 1; 323 cache->line = (ebx & 0x0fff) + 1; 324 325 cache->sizekb = (cache->assoc * part 326 * cache->line * sets) / 1024; 327 } 328 } 329 default: 330 break; 331 } 332 } 333} 334 335/* Returns the description of caches for an Intel processor. */ 336 337static const char * 338detect_caches_intel (bool xeon_mp, unsigned max_level, 339 unsigned max_ext_level, unsigned *l2sizekb) 340{ 341 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0}; 342 343 if (max_level >= 4) 344 detect_caches_cpuid4 (&level1, &level2, &level3); 345 else if (max_level >= 2) 346 detect_caches_cpuid2 (xeon_mp, &level1, &level2); 347 else 348 return ""; 349 350 if (level1.sizekb == 0) 351 return ""; 352 353 /* Let the L3 replace the L2. This assumes inclusive caches 354 and single threaded program for now. */ 355 if (level3.sizekb) 356 level2 = level3; 357 358 /* Intel CPUs are equipped with AMD style L2 cache info. Try this 359 method if other methods fail to provide L2 cache parameters. */ 360 if (level2.sizekb == 0 && max_ext_level >= 0x80000006) 361 detect_l2_cache (&level2); 362 363 *l2sizekb = level2.sizekb; 364 365 return describe_cache (level1, level2); 366} 367 368/* This will be called by the spec parser in gcc.c when it sees 369 a %:local_cpu_detect(args) construct. Currently it will be called 370 with either "arch" or "tune" as argument depending on if -march=native 371 or -mtune=native is to be substituted. 372 373 It returns a string containing new command line parameters to be 374 put at the place of the above two options, depending on what CPU 375 this is executed. E.g. "-march=k8" on an AMD64 machine 376 for -march=native. 377 378 ARGC and ARGV are set depending on the actual arguments given 379 in the spec. */ 380 381const char *host_detect_local_cpu (int argc, const char **argv) 382{ 383 enum processor_type processor = PROCESSOR_I386; 384 const char *cpu = "i386"; 385 386 const char *cache = ""; 387 const char *options = ""; 388 389 unsigned int eax, ebx, ecx, edx; 390 391 unsigned int max_level, ext_level; 392 393 unsigned int vendor; 394 unsigned int model, family; 395 396 unsigned int has_sse3, has_ssse3, has_cmpxchg16b; 397 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2; 398 399 /* Extended features */ 400 unsigned int has_lahf_lm = 0, has_sse4a = 0; 401 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; 402 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; 403 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0; 404 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; 405 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0; 406 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0; 407 unsigned int has_hle = 0, has_rtm = 0; 408 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0; 409 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0; 410 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0; 411 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0; 412 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0; 413 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0; 414 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0; 415 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0; 416 unsigned int has_pcommit = 0, has_mwaitx = 0; 417 418 bool arch; 419 420 unsigned int l2sizekb = 0; 421 422 if (argc < 1) 423 return NULL; 424 425 arch = !strcmp (argv[0], "arch"); 426 427 if (!arch && strcmp (argv[0], "tune")) 428 return NULL; 429 430 max_level = __get_cpuid_max (0, &vendor); 431 if (max_level < 1) 432 goto done; 433 434 __cpuid (1, eax, ebx, ecx, edx); 435 436 model = (eax >> 4) & 0x0f; 437 family = (eax >> 8) & 0x0f; 438 if (vendor == signature_INTEL_ebx 439 || vendor == signature_AMD_ebx) 440 { 441 unsigned int extended_model, extended_family; 442 443 extended_model = (eax >> 12) & 0xf0; 444 extended_family = (eax >> 20) & 0xff; 445 if (family == 0x0f) 446 { 447 family += extended_family; 448 model += extended_model; 449 } 450 else if (family == 0x06) 451 model += extended_model; 452 } 453 454 has_sse3 = ecx & bit_SSE3; 455 has_ssse3 = ecx & bit_SSSE3; 456 has_sse4_1 = ecx & bit_SSE4_1; 457 has_sse4_2 = ecx & bit_SSE4_2; 458 has_avx = ecx & bit_AVX; 459 has_osxsave = ecx & bit_OSXSAVE; 460 has_cmpxchg16b = ecx & bit_CMPXCHG16B; 461 has_movbe = ecx & bit_MOVBE; 462 has_popcnt = ecx & bit_POPCNT; 463 has_aes = ecx & bit_AES; 464 has_pclmul = ecx & bit_PCLMUL; 465 has_fma = ecx & bit_FMA; 466 has_f16c = ecx & bit_F16C; 467 has_rdrnd = ecx & bit_RDRND; 468 has_xsave = ecx & bit_XSAVE; 469 470 has_cmpxchg8b = edx & bit_CMPXCHG8B; 471 has_cmov = edx & bit_CMOV; 472 has_mmx = edx & bit_MMX; 473 has_fxsr = edx & bit_FXSAVE; 474 has_sse = edx & bit_SSE; 475 has_sse2 = edx & bit_SSE2; 476 477 if (max_level >= 7) 478 { 479 __cpuid_count (7, 0, eax, ebx, ecx, edx); 480 481 has_bmi = ebx & bit_BMI; 482 has_hle = ebx & bit_HLE; 483 has_rtm = ebx & bit_RTM; 484 has_avx2 = ebx & bit_AVX2; 485 has_bmi2 = ebx & bit_BMI2; 486 has_fsgsbase = ebx & bit_FSGSBASE; 487 has_rdseed = ebx & bit_RDSEED; 488 has_adx = ebx & bit_ADX; 489 has_avx512f = ebx & bit_AVX512F; 490 has_avx512er = ebx & bit_AVX512ER; 491 has_avx512pf = ebx & bit_AVX512PF; 492 has_avx512cd = ebx & bit_AVX512CD; 493 has_sha = ebx & bit_SHA; 494 has_pcommit = ebx & bit_PCOMMIT; 495 has_clflushopt = ebx & bit_CLFLUSHOPT; 496 has_clwb = ebx & bit_CLWB; 497 has_avx512dq = ebx & bit_AVX512DQ; 498 has_avx512bw = ebx & bit_AVX512BW; 499 has_avx512vl = ebx & bit_AVX512VL; 500 has_avx512vl = ebx & bit_AVX512IFMA; 501 502 has_prefetchwt1 = ecx & bit_PREFETCHWT1; 503 has_avx512vl = ecx & bit_AVX512VBMI; 504 } 505 506 if (max_level >= 13) 507 { 508 __cpuid_count (13, 1, eax, ebx, ecx, edx); 509 510 has_xsaveopt = eax & bit_XSAVEOPT; 511 has_xsavec = eax & bit_XSAVEC; 512 has_xsaves = eax & bit_XSAVES; 513 } 514 515 /* Check cpuid level of extended features. */ 516 __cpuid (0x80000000, ext_level, ebx, ecx, edx); 517 518 if (ext_level > 0x80000000) 519 { 520 __cpuid (0x80000001, eax, ebx, ecx, edx); 521 522 has_lahf_lm = ecx & bit_LAHF_LM; 523 has_sse4a = ecx & bit_SSE4a; 524 has_abm = ecx & bit_ABM; 525 has_lwp = ecx & bit_LWP; 526 has_fma4 = ecx & bit_FMA4; 527 has_xop = ecx & bit_XOP; 528 has_tbm = ecx & bit_TBM; 529 has_lzcnt = ecx & bit_LZCNT; 530 has_prfchw = ecx & bit_PRFCHW; 531 532 has_longmode = edx & bit_LM; 533 has_3dnowp = edx & bit_3DNOWP; 534 has_3dnow = edx & bit_3DNOW; 535 has_mwaitx = ecx & bit_MWAITX; 536 } 537 538 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ 539#define XCR_XFEATURE_ENABLED_MASK 0x0 540#define XSTATE_FP 0x1 541#define XSTATE_SSE 0x2 542#define XSTATE_YMM 0x4 543#define XSTATE_OPMASK 0x20 544#define XSTATE_ZMM 0x40 545#define XSTATE_HI_ZMM 0x80 546 if (has_osxsave) 547 asm (".byte 0x0f; .byte 0x01; .byte 0xd0" 548 : "=a" (eax), "=d" (edx) 549 : "c" (XCR_XFEATURE_ENABLED_MASK)); 550 551 /* Check if SSE and YMM states are supported. */ 552 if (!has_osxsave 553 || (eax & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) 554 { 555 has_avx = 0; 556 has_avx2 = 0; 557 has_fma = 0; 558 has_fma4 = 0; 559 has_f16c = 0; 560 has_xop = 0; 561 has_xsave = 0; 562 has_xsaveopt = 0; 563 has_xsaves = 0; 564 has_xsavec = 0; 565 } 566 567 if (!has_osxsave 568 || (eax & 569 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)) 570 != (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)) 571 { 572 has_avx512f = 0; 573 has_avx512er = 0; 574 has_avx512pf = 0; 575 has_avx512cd = 0; 576 has_avx512dq = 0; 577 has_avx512bw = 0; 578 has_avx512vl = 0; 579 } 580 581 if (!arch) 582 { 583 if (vendor == signature_AMD_ebx 584 || vendor == signature_CENTAUR_ebx 585 || vendor == signature_CYRIX_ebx 586 || vendor == signature_NSC_ebx) 587 cache = detect_caches_amd (ext_level); 588 else if (vendor == signature_INTEL_ebx) 589 { 590 bool xeon_mp = (family == 15 && model == 6); 591 cache = detect_caches_intel (xeon_mp, max_level, 592 ext_level, &l2sizekb); 593 } 594 } 595 596 if (vendor == signature_AMD_ebx) 597 { 598 unsigned int name; 599 600 /* Detect geode processor by its processor signature. */ 601 if (ext_level > 0x80000001) 602 __cpuid (0x80000002, name, ebx, ecx, edx); 603 else 604 name = 0; 605 606 if (name == signature_NSC_ebx) 607 processor = PROCESSOR_GEODE; 608 else if (has_movbe && family == 22) 609 processor = PROCESSOR_BTVER2; 610 else if (has_avx2) 611 processor = PROCESSOR_BDVER4; 612 else if (has_xsaveopt) 613 processor = PROCESSOR_BDVER3; 614 else if (has_bmi) 615 processor = PROCESSOR_BDVER2; 616 else if (has_xop) 617 processor = PROCESSOR_BDVER1; 618 else if (has_sse4a && has_ssse3) 619 processor = PROCESSOR_BTVER1; 620 else if (has_sse4a) 621 processor = PROCESSOR_AMDFAM10; 622 else if (has_sse2 || has_longmode) 623 processor = PROCESSOR_K8; 624 else if (has_3dnowp && family == 6) 625 processor = PROCESSOR_ATHLON; 626 else if (has_mmx) 627 processor = PROCESSOR_K6; 628 else 629 processor = PROCESSOR_PENTIUM; 630 } 631 else if (vendor == signature_CENTAUR_ebx) 632 { 633 if (arch) 634 { 635 switch (family) 636 { 637 case 6: 638 if (model > 9) 639 /* Use the default detection procedure. */ 640 processor = PROCESSOR_GENERIC; 641 else if (model == 9) 642 cpu = "c3-2"; 643 else if (model >= 6) 644 cpu = "c3"; 645 else 646 processor = PROCESSOR_GENERIC; 647 break; 648 case 5: 649 if (has_3dnow) 650 cpu = "winchip2"; 651 else if (has_mmx) 652 cpu = "winchip2-c6"; 653 else 654 processor = PROCESSOR_GENERIC; 655 break; 656 default: 657 /* We have no idea. */ 658 processor = PROCESSOR_GENERIC; 659 } 660 } 661 } 662 else 663 { 664 switch (family) 665 { 666 case 4: 667 processor = PROCESSOR_I486; 668 break; 669 case 5: 670 processor = PROCESSOR_PENTIUM; 671 break; 672 case 6: 673 processor = PROCESSOR_PENTIUMPRO; 674 break; 675 case 15: 676 processor = PROCESSOR_PENTIUM4; 677 break; 678 default: 679 /* We have no idea. */ 680 processor = PROCESSOR_GENERIC; 681 } 682 } 683 684 switch (processor) 685 { 686 case PROCESSOR_I386: 687 /* Default. */ 688 break; 689 case PROCESSOR_I486: 690 cpu = "i486"; 691 break; 692 case PROCESSOR_PENTIUM: 693 if (arch && has_mmx) 694 cpu = "pentium-mmx"; 695 else 696 cpu = "pentium"; 697 break; 698 case PROCESSOR_PENTIUMPRO: 699 switch (model) 700 { 701 case 0x1c: 702 case 0x26: 703 /* Bonnell. */ 704 cpu = "bonnell"; 705 break; 706 case 0x37: 707 case 0x4a: 708 case 0x4d: 709 case 0x5a: 710 case 0x5d: 711 /* Silvermont. */ 712 cpu = "silvermont"; 713 break; 714 case 0x0f: 715 /* Merom. */ 716 case 0x17: 717 case 0x1d: 718 /* Penryn. */ 719 cpu = "core2"; 720 break; 721 case 0x1a: 722 case 0x1e: 723 case 0x1f: 724 case 0x2e: 725 /* Nehalem. */ 726 cpu = "nehalem"; 727 break; 728 case 0x25: 729 case 0x2c: 730 case 0x2f: 731 /* Westmere. */ 732 cpu = "westmere"; 733 break; 734 case 0x2a: 735 case 0x2d: 736 /* Sandy Bridge. */ 737 cpu = "sandybridge"; 738 break; 739 case 0x3a: 740 case 0x3e: 741 /* Ivy Bridge. */ 742 cpu = "ivybridge"; 743 break; 744 case 0x3c: 745 case 0x3f: 746 case 0x45: 747 case 0x46: 748 /* Haswell. */ 749 cpu = "haswell"; 750 break; 751 case 0x3d: 752 case 0x4f: 753 case 0x56: 754 /* Broadwell. */ 755 cpu = "broadwell"; 756 break; 757 case 0x57: 758 /* Knights Landing. */ 759 cpu = "knl"; 760 break; 761 default: 762 if (arch) 763 { 764 /* This is unknown family 0x6 CPU. */ 765 /* Assume Knights Landing. */ 766 if (has_avx512f) 767 cpu = "knl"; 768 /* Assume Broadwell. */ 769 else if (has_adx) 770 cpu = "broadwell"; 771 else if (has_avx2) 772 /* Assume Haswell. */ 773 cpu = "haswell"; 774 else if (has_avx) 775 /* Assume Sandy Bridge. */ 776 cpu = "sandybridge"; 777 else if (has_sse4_2) 778 { 779 if (has_movbe) 780 /* Assume Silvermont. */ 781 cpu = "silvermont"; 782 else 783 /* Assume Nehalem. */ 784 cpu = "nehalem"; 785 } 786 else if (has_ssse3) 787 { 788 if (has_movbe) 789 /* Assume Bonnell. */ 790 cpu = "bonnell"; 791 else 792 /* Assume Core 2. */ 793 cpu = "core2"; 794 } 795 else if (has_longmode) 796 /* Perhaps some emulator? Assume x86-64, otherwise gcc 797 -march=native would be unusable for 64-bit compilations, 798 as all the CPUs below are 32-bit only. */ 799 cpu = "x86-64"; 800 else if (has_sse3) 801 /* It is Core Duo. */ 802 cpu = "pentium-m"; 803 else if (has_sse2) 804 /* It is Pentium M. */ 805 cpu = "pentium-m"; 806 else if (has_sse) 807 /* It is Pentium III. */ 808 cpu = "pentium3"; 809 else if (has_mmx) 810 /* It is Pentium II. */ 811 cpu = "pentium2"; 812 else 813 /* Default to Pentium Pro. */ 814 cpu = "pentiumpro"; 815 } 816 else 817 /* For -mtune, we default to -mtune=generic. */ 818 cpu = "generic"; 819 break; 820 } 821 break; 822 case PROCESSOR_PENTIUM4: 823 if (has_sse3) 824 { 825 if (has_longmode) 826 cpu = "nocona"; 827 else 828 cpu = "prescott"; 829 } 830 else 831 cpu = "pentium4"; 832 break; 833 case PROCESSOR_GEODE: 834 cpu = "geode"; 835 break; 836 case PROCESSOR_K6: 837 if (arch && has_3dnow) 838 cpu = "k6-3"; 839 else 840 cpu = "k6"; 841 break; 842 case PROCESSOR_ATHLON: 843 if (arch && has_sse) 844 cpu = "athlon-4"; 845 else 846 cpu = "athlon"; 847 break; 848 case PROCESSOR_K8: 849 if (arch && has_sse3) 850 cpu = "k8-sse3"; 851 else 852 cpu = "k8"; 853 break; 854 case PROCESSOR_AMDFAM10: 855 cpu = "amdfam10"; 856 break; 857 case PROCESSOR_BDVER1: 858 cpu = "bdver1"; 859 break; 860 case PROCESSOR_BDVER2: 861 cpu = "bdver2"; 862 break; 863 case PROCESSOR_BDVER3: 864 cpu = "bdver3"; 865 break; 866 case PROCESSOR_BDVER4: 867 cpu = "bdver4"; 868 break; 869 case PROCESSOR_BTVER1: 870 cpu = "btver1"; 871 break; 872 case PROCESSOR_BTVER2: 873 cpu = "btver2"; 874 break; 875 876 default: 877 /* Use something reasonable. */ 878 if (arch) 879 { 880 if (has_ssse3) 881 cpu = "core2"; 882 else if (has_sse3) 883 { 884 if (has_longmode) 885 cpu = "nocona"; 886 else 887 cpu = "prescott"; 888 } 889 else if (has_sse2) 890 cpu = "pentium4"; 891 else if (has_cmov) 892 cpu = "pentiumpro"; 893 else if (has_mmx) 894 cpu = "pentium-mmx"; 895 else if (has_cmpxchg8b) 896 cpu = "pentium"; 897 } 898 else 899 cpu = "generic"; 900 } 901 902 if (arch) 903 { 904 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx"; 905 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow"; 906 const char *sse = has_sse ? " -msse" : " -mno-sse"; 907 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2"; 908 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3"; 909 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3"; 910 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a"; 911 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16"; 912 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf"; 913 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe"; 914 const char *aes = has_aes ? " -maes" : " -mno-aes"; 915 const char *sha = has_sha ? " -msha" : " -mno-sha"; 916 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul"; 917 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt"; 918 const char *abm = has_abm ? " -mabm" : " -mno-abm"; 919 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp"; 920 const char *fma = has_fma ? " -mfma" : " -mno-fma"; 921 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4"; 922 const char *xop = has_xop ? " -mxop" : " -mno-xop"; 923 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi"; 924 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2"; 925 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm"; 926 const char *avx = has_avx ? " -mavx" : " -mno-avx"; 927 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2"; 928 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2"; 929 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1"; 930 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt"; 931 const char *hle = has_hle ? " -mhle" : " -mno-hle"; 932 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm"; 933 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd"; 934 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c"; 935 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase"; 936 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed"; 937 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw"; 938 const char *adx = has_adx ? " -madx" : " -mno-adx"; 939 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr"; 940 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave"; 941 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt"; 942 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f"; 943 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er"; 944 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd"; 945 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf"; 946 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1"; 947 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt"; 948 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec"; 949 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves"; 950 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq"; 951 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw"; 952 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl"; 953 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma"; 954 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi"; 955 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb"; 956 const char *pcommit = has_pcommit ? " -mpcommit" : " -mno-pcommit"; 957 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx"; 958 959 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, 960 sse4a, cx16, sahf, movbe, aes, sha, pclmul, 961 popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2, 962 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm, 963 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx, 964 fxsr, xsave, xsaveopt, avx512f, avx512er, 965 avx512cd, avx512pf, prefetchwt1, clflushopt, 966 xsavec, xsaves, avx512dq, avx512bw, avx512vl, 967 avx512ifma, avx512vbmi, clwb, pcommit, mwaitx, NULL); 968 } 969 970done: 971 return concat (cache, "-m", argv[0], "=", cpu, options, NULL); 972} 973#else 974 975/* If we are compiling with GCC where %EBX register is fixed, then the 976 driver will just ignore -march and -mtune "native" target and will leave 977 to the newly built compiler to generate code for its default target. */ 978 979const char *host_detect_local_cpu (int, const char **) 980{ 981 return NULL; 982} 983#endif /* __GNUC__ */ 984