1/* $NetBSD: acpi_cpu_md.c,v 1.71 2012/02/11 22:09:47 jruoho Exp $ */ 2 3/*- 4 * Copyright (c) 2010, 2011 Jukka Ruohonen <jruohonen@iki.fi> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29#include <sys/cdefs.h> 30__KERNEL_RCSID(0, "$NetBSD: acpi_cpu_md.c,v 1.71 2012/02/11 22:09:47 jruoho Exp $"); 31 32#include <sys/param.h> 33#include <sys/bus.h> 34#include <sys/cpufreq.h> 35#include <sys/device.h> 36#include <sys/kcore.h> 37#include <sys/sysctl.h> 38#include <sys/xcall.h> 39 40#include <x86/cpu.h> 41#include <x86/cpufunc.h> 42#include <x86/cputypes.h> 43#include <x86/cpuvar.h> 44#include <x86/cpu_msr.h> 45#include <x86/machdep.h> 46 47#include <dev/acpi/acpica.h> 48#include <dev/acpi/acpi_cpu.h> 49 50#include <dev/pci/pcivar.h> 51#include <dev/pci/pcidevs.h> 52 53#include <machine/acpi_machdep.h> 54 55/* 56 * Intel IA32_MISC_ENABLE. 57 */ 58#define MSR_MISC_ENABLE_EST __BIT(16) 59#define MSR_MISC_ENABLE_TURBO __BIT(38) 60 61/* 62 * AMD C1E. 63 */ 64#define MSR_CMPHALT 0xc0010055 65 66#define MSR_CMPHALT_SMI __BIT(27) 67#define MSR_CMPHALT_C1E __BIT(28) 68#define MSR_CMPHALT_BMSTS __BIT(29) 69 70/* 71 * AMD families 10h, 11h, 12h, 14h, and 15h. 72 */ 73#define MSR_10H_LIMIT 0xc0010061 74#define MSR_10H_CONTROL 0xc0010062 75#define MSR_10H_STATUS 0xc0010063 76#define MSR_10H_CONFIG 0xc0010064 77 78/* 79 * AMD family 0Fh. 80 */ 81#define MSR_0FH_CONTROL 0xc0010041 82#define MSR_0FH_STATUS 0xc0010042 83 84#define MSR_0FH_STATUS_CFID __BITS( 0, 5) 85#define MSR_0FH_STATUS_CVID __BITS(32, 36) 86#define MSR_0FH_STATUS_PENDING __BITS(31, 31) 87 88#define MSR_0FH_CONTROL_FID __BITS( 0, 5) 89#define MSR_0FH_CONTROL_VID __BITS( 8, 12) 90#define MSR_0FH_CONTROL_CHG __BITS(16, 16) 91#define MSR_0FH_CONTROL_CNT __BITS(32, 51) 92 93#define ACPI_0FH_STATUS_FID __BITS( 0, 5) 94#define ACPI_0FH_STATUS_VID __BITS( 6, 10) 95 96#define ACPI_0FH_CONTROL_FID __BITS( 0, 5) 97#define ACPI_0FH_CONTROL_VID __BITS( 6, 10) 98#define ACPI_0FH_CONTROL_VST __BITS(11, 17) 99#define ACPI_0FH_CONTROL_MVS __BITS(18, 19) 100#define ACPI_0FH_CONTROL_PLL __BITS(20, 26) 101#define ACPI_0FH_CONTROL_RVO __BITS(28, 29) 102#define ACPI_0FH_CONTROL_IRT __BITS(30, 31) 103 104#define FID_TO_VCO_FID(fidd) (((fid) < 8) ? (8 + ((fid) << 1)) : (fid)) 105 106static char native_idle_text[16]; 107void (*native_idle)(void) = NULL; 108 109static int acpicpu_md_quirk_piix4(const struct pci_attach_args *); 110static void acpicpu_md_pstate_hwf_reset(void *, void *); 111static int acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *, 112 uint32_t *); 113static int acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *); 114static int acpicpu_md_pstate_fidvid_read(uint32_t *, uint32_t *); 115static void acpicpu_md_pstate_fidvid_write(uint32_t, uint32_t, 116 uint32_t, uint32_t); 117static int acpicpu_md_pstate_sysctl_init(void); 118static int acpicpu_md_pstate_sysctl_get(SYSCTLFN_PROTO); 119static int acpicpu_md_pstate_sysctl_set(SYSCTLFN_PROTO); 120static int acpicpu_md_pstate_sysctl_all(SYSCTLFN_PROTO); 121 122extern struct acpicpu_softc **acpicpu_sc; 123static struct sysctllog *acpicpu_log = NULL; 124 125struct cpu_info * 126acpicpu_md_match(device_t parent, cfdata_t match, void *aux) 127{ 128 struct cpufeature_attach_args *cfaa = aux; 129 130 if (strcmp(cfaa->name, "frequency") != 0) 131 return NULL; 132 133 return cfaa->ci; 134} 135 136struct cpu_info * 137acpicpu_md_attach(device_t parent, device_t self, void *aux) 138{ 139 struct cpufeature_attach_args *cfaa = aux; 140 141 return cfaa->ci; 142} 143 144uint32_t 145acpicpu_md_flags(void) 146{ 147 struct cpu_info *ci = curcpu(); 148 struct pci_attach_args pa; 149 uint32_t family, val = 0; 150 uint32_t regs[4]; 151 uint64_t msr; 152 153 if (acpi_md_ncpus() == 1) 154 val |= ACPICPU_FLAG_C_BM; 155 156 if ((ci->ci_feat_val[1] & CPUID2_MONITOR) != 0) 157 val |= ACPICPU_FLAG_C_FFH; 158 159 /* 160 * By default, assume that the local APIC timer 161 * as well as TSC are stalled during C3 sleep. 162 */ 163 val |= ACPICPU_FLAG_C_APIC | ACPICPU_FLAG_C_TSC; 164 165 switch (cpu_vendor) { 166 167 case CPUVENDOR_IDT: 168 169 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) 170 val |= ACPICPU_FLAG_P_FFH; 171 172 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) 173 val |= ACPICPU_FLAG_T_FFH; 174 175 break; 176 177 case CPUVENDOR_INTEL: 178 179 /* 180 * Bus master control and arbitration should be 181 * available on all supported Intel CPUs (to be 182 * sure, this is double-checked later from the 183 * firmware data). These flags imply that it is 184 * not necessary to flush caches before C3 state. 185 */ 186 val |= ACPICPU_FLAG_C_BM | ACPICPU_FLAG_C_ARB; 187 188 /* 189 * Check if we can use "native", MSR-based, 190 * access. If not, we have to resort to I/O. 191 */ 192 if ((ci->ci_feat_val[1] & CPUID2_EST) != 0) 193 val |= ACPICPU_FLAG_P_FFH; 194 195 if ((ci->ci_feat_val[0] & CPUID_ACPI) != 0) 196 val |= ACPICPU_FLAG_T_FFH; 197 198 /* 199 * Check whether MSR_APERF, MSR_MPERF, and Turbo 200 * Boost are available. Also see if we might have 201 * an invariant local APIC timer ("ARAT"). 202 */ 203 if (cpuid_level >= 0x06) { 204 205 x86_cpuid(0x00000006, regs); 206 207 if ((regs[2] & CPUID_DSPM_HWF) != 0) 208 val |= ACPICPU_FLAG_P_HWF; 209 210 if ((regs[0] & CPUID_DSPM_IDA) != 0) 211 val |= ACPICPU_FLAG_P_TURBO; 212 213 if ((regs[0] & CPUID_DSPM_ARAT) != 0) 214 val &= ~ACPICPU_FLAG_C_APIC; 215 } 216 217 /* 218 * Detect whether TSC is invariant. If it is not, 219 * we keep the flag to note that TSC will not run 220 * at constant rate. Depending on the CPU, this may 221 * affect P- and T-state changes, but especially 222 * relevant are C-states; with variant TSC, states 223 * larger than C1 may completely stop the counter. 224 */ 225 x86_cpuid(0x80000000, regs); 226 227 if (regs[0] >= 0x80000007) { 228 229 x86_cpuid(0x80000007, regs); 230 231 if ((regs[3] & __BIT(8)) != 0) 232 val &= ~ACPICPU_FLAG_C_TSC; 233 } 234 235 break; 236 237 case CPUVENDOR_AMD: 238 239 x86_cpuid(0x80000000, regs); 240 241 if (regs[0] < 0x80000007) 242 break; 243 244 x86_cpuid(0x80000007, regs); 245 246 family = CPUID2FAMILY(ci->ci_signature); 247 248 if (family == 0xf) 249 family += CPUID2EXTFAMILY(ci->ci_signature); 250 251 switch (family) { 252 253 case 0x0f: 254 255 /* 256 * Disable C1E if present. 257 */ 258 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT) 259 val |= ACPICPU_FLAG_C_C1E; 260 261 /* 262 * Evaluate support for the "FID/VID 263 * algorithm" also used by powernow(4). 264 */ 265 if ((regs[3] & CPUID_APM_FID) == 0) 266 break; 267 268 if ((regs[3] & CPUID_APM_VID) == 0) 269 break; 270 271 val |= ACPICPU_FLAG_P_FFH | ACPICPU_FLAG_P_FIDVID; 272 break; 273 274 case 0x10: 275 case 0x11: 276 277 /* 278 * Disable C1E if present. 279 */ 280 if (rdmsr_safe(MSR_CMPHALT, &msr) != EFAULT) 281 val |= ACPICPU_FLAG_C_C1E; 282 283 /* FALLTHROUGH */ 284 285 case 0x12: 286 case 0x14: /* AMD Fusion */ 287 case 0x15: /* AMD Bulldozer */ 288 289 /* 290 * Like with Intel, detect invariant TSC, 291 * MSR-based P-states, and AMD's "turbo" 292 * (Core Performance Boost), respectively. 293 */ 294 if ((regs[3] & CPUID_APM_TSC) != 0) 295 val &= ~ACPICPU_FLAG_C_TSC; 296 297 if ((regs[3] & CPUID_APM_HWP) != 0) 298 val |= ACPICPU_FLAG_P_FFH; 299 300 if ((regs[3] & CPUID_APM_CPB) != 0) 301 val |= ACPICPU_FLAG_P_TURBO; 302 303 /* 304 * Also check for APERF and MPERF, 305 * first available in the family 10h. 306 */ 307 if (cpuid_level >= 0x06) { 308 309 x86_cpuid(0x00000006, regs); 310 311 if ((regs[2] & CPUID_DSPM_HWF) != 0) 312 val |= ACPICPU_FLAG_P_HWF; 313 } 314 315 break; 316 } 317 318 break; 319 } 320 321 /* 322 * There are several erratums for PIIX4. 323 */ 324 if (pci_find_device(&pa, acpicpu_md_quirk_piix4) != 0) 325 val |= ACPICPU_FLAG_PIIX4; 326 327 return val; 328} 329 330static int 331acpicpu_md_quirk_piix4(const struct pci_attach_args *pa) 332{ 333 334 /* 335 * XXX: The pci_find_device(9) function only 336 * deals with attached devices. Change this 337 * to use something like pci_device_foreach(). 338 */ 339 if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_INTEL) 340 return 0; 341 342 if (PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82371AB_ISA || 343 PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_INTEL_82440MX_PMC) 344 return 1; 345 346 return 0; 347} 348 349void 350acpicpu_md_quirk_c1e(void) 351{ 352 const uint64_t c1e = MSR_CMPHALT_SMI | MSR_CMPHALT_C1E; 353 uint64_t val; 354 355 val = rdmsr(MSR_CMPHALT); 356 357 if ((val & c1e) != 0) 358 wrmsr(MSR_CMPHALT, val & ~c1e); 359} 360 361int 362acpicpu_md_cstate_start(struct acpicpu_softc *sc) 363{ 364 const size_t size = sizeof(native_idle_text); 365 struct acpicpu_cstate *cs; 366 bool ipi = false; 367 int i; 368 369 /* 370 * Save the cpu_idle(9) loop used by default. 371 */ 372 x86_cpu_idle_get(&native_idle, native_idle_text, size); 373 374 for (i = 0; i < ACPI_C_STATE_COUNT; i++) { 375 376 cs = &sc->sc_cstate[i]; 377 378 if (cs->cs_method == ACPICPU_C_STATE_HALT) { 379 ipi = true; 380 break; 381 } 382 } 383 384 x86_cpu_idle_set(acpicpu_cstate_idle, "acpi", ipi); 385 386 return 0; 387} 388 389int 390acpicpu_md_cstate_stop(void) 391{ 392 static char text[16]; 393 void (*func)(void); 394 uint64_t xc; 395 bool ipi; 396 397 x86_cpu_idle_get(&func, text, sizeof(text)); 398 399 if (func == native_idle) 400 return EALREADY; 401 402 ipi = (native_idle != x86_cpu_idle_halt) ? false : true; 403 x86_cpu_idle_set(native_idle, native_idle_text, ipi); 404 405 /* 406 * Run a cross-call to ensure that all CPUs are 407 * out from the ACPI idle-loop before detachment. 408 */ 409 xc = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL); 410 xc_wait(xc); 411 412 return 0; 413} 414 415/* 416 * Called with interrupts enabled. 417 */ 418void 419acpicpu_md_cstate_enter(int method, int state) 420{ 421 struct cpu_info *ci = curcpu(); 422 423 KASSERT(ci->ci_ilevel == IPL_NONE); 424 425 switch (method) { 426 427 case ACPICPU_C_STATE_FFH: 428 429 x86_monitor(&ci->ci_want_resched, 0, 0); 430 431 if (__predict_false(ci->ci_want_resched != 0)) 432 return; 433 434 x86_mwait((state - 1) << 4, 0); 435 break; 436 437 case ACPICPU_C_STATE_HALT: 438 439 x86_disable_intr(); 440 441 if (__predict_false(ci->ci_want_resched != 0)) { 442 x86_enable_intr(); 443 return; 444 } 445 446 x86_stihlt(); 447 break; 448 } 449} 450 451int 452acpicpu_md_pstate_start(struct acpicpu_softc *sc) 453{ 454 uint64_t xc, val; 455 456 switch (cpu_vendor) { 457 458 case CPUVENDOR_IDT: 459 case CPUVENDOR_INTEL: 460 461 /* 462 * Make sure EST is enabled. 463 */ 464 if ((sc->sc_flags & ACPICPU_FLAG_P_FFH) != 0) { 465 466 val = rdmsr(MSR_MISC_ENABLE); 467 468 if ((val & MSR_MISC_ENABLE_EST) == 0) { 469 470 val |= MSR_MISC_ENABLE_EST; 471 wrmsr(MSR_MISC_ENABLE, val); 472 val = rdmsr(MSR_MISC_ENABLE); 473 474 if ((val & MSR_MISC_ENABLE_EST) == 0) 475 return ENOTTY; 476 } 477 } 478 } 479 480 /* 481 * Reset the APERF and MPERF counters. 482 */ 483 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { 484 xc = xc_broadcast(0, acpicpu_md_pstate_hwf_reset, NULL, NULL); 485 xc_wait(xc); 486 } 487 488 return acpicpu_md_pstate_sysctl_init(); 489} 490 491int 492acpicpu_md_pstate_stop(void) 493{ 494 495 if (acpicpu_log == NULL) 496 return EALREADY; 497 498 sysctl_teardown(&acpicpu_log); 499 acpicpu_log = NULL; 500 501 return 0; 502} 503 504int 505acpicpu_md_pstate_init(struct acpicpu_softc *sc) 506{ 507 struct cpu_info *ci = sc->sc_ci; 508 struct acpicpu_pstate *ps, msr; 509 uint32_t family, i = 0; 510 511 (void)memset(&msr, 0, sizeof(struct acpicpu_pstate)); 512 513 switch (cpu_vendor) { 514 515 case CPUVENDOR_IDT: 516 case CPUVENDOR_INTEL: 517 518 /* 519 * If the so-called Turbo Boost is present, 520 * the P0-state is always the "turbo state". 521 * It is shown as the P1 frequency + 1 MHz. 522 * 523 * For discussion, see: 524 * 525 * Intel Corporation: Intel Turbo Boost Technology 526 * in Intel Core(tm) Microarchitectures (Nehalem) 527 * Based Processors. White Paper, November 2008. 528 */ 529 if (sc->sc_pstate_count >= 2 && 530 (sc->sc_flags & ACPICPU_FLAG_P_TURBO) != 0) { 531 532 ps = &sc->sc_pstate[0]; 533 534 if (ps->ps_freq == sc->sc_pstate[1].ps_freq + 1) 535 ps->ps_flags |= ACPICPU_FLAG_P_TURBO; 536 } 537 538 msr.ps_control_addr = MSR_PERF_CTL; 539 msr.ps_control_mask = __BITS(0, 15); 540 541 msr.ps_status_addr = MSR_PERF_STATUS; 542 msr.ps_status_mask = __BITS(0, 15); 543 break; 544 545 case CPUVENDOR_AMD: 546 547 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) 548 msr.ps_flags |= ACPICPU_FLAG_P_FIDVID; 549 550 family = CPUID2FAMILY(ci->ci_signature); 551 552 if (family == 0xf) 553 family += CPUID2EXTFAMILY(ci->ci_signature); 554 555 switch (family) { 556 557 case 0x0f: 558 msr.ps_control_addr = MSR_0FH_CONTROL; 559 msr.ps_status_addr = MSR_0FH_STATUS; 560 break; 561 562 case 0x10: 563 case 0x11: 564 case 0x12: 565 case 0x14: 566 case 0x15: 567 msr.ps_control_addr = MSR_10H_CONTROL; 568 msr.ps_control_mask = __BITS(0, 2); 569 570 msr.ps_status_addr = MSR_10H_STATUS; 571 msr.ps_status_mask = __BITS(0, 2); 572 break; 573 574 default: 575 /* 576 * If we have an unknown AMD CPU, rely on XPSS. 577 */ 578 if ((sc->sc_flags & ACPICPU_FLAG_P_XPSS) == 0) 579 return EOPNOTSUPP; 580 } 581 582 break; 583 584 default: 585 return ENODEV; 586 } 587 588 /* 589 * Fill the P-state structures with MSR addresses that are 590 * known to be correct. If we do not know the addresses, 591 * leave the values intact. If a vendor uses XPSS, we do 592 * not necessarily need to do anything to support new CPUs. 593 */ 594 while (i < sc->sc_pstate_count) { 595 596 ps = &sc->sc_pstate[i]; 597 598 if (msr.ps_flags != 0) 599 ps->ps_flags |= msr.ps_flags; 600 601 if (msr.ps_status_addr != 0) 602 ps->ps_status_addr = msr.ps_status_addr; 603 604 if (msr.ps_status_mask != 0) 605 ps->ps_status_mask = msr.ps_status_mask; 606 607 if (msr.ps_control_addr != 0) 608 ps->ps_control_addr = msr.ps_control_addr; 609 610 if (msr.ps_control_mask != 0) 611 ps->ps_control_mask = msr.ps_control_mask; 612 613 i++; 614 } 615 616 return 0; 617} 618 619/* 620 * Read the IA32_APERF and IA32_MPERF counters. The first 621 * increments at the rate of the fixed maximum frequency 622 * configured during the boot, whereas APERF counts at the 623 * rate of the actual frequency. Note that the MSRs must be 624 * read without delay, and that only the ratio between 625 * IA32_APERF and IA32_MPERF is architecturally defined. 626 * 627 * The function thus returns the percentage of the actual 628 * frequency in terms of the maximum frequency of the calling 629 * CPU since the last call. A value zero implies an error. 630 * 631 * For further details, refer to: 632 * 633 * Intel Corporation: Intel 64 and IA-32 Architectures 634 * Software Developer's Manual. Section 13.2, Volume 3A: 635 * System Programming Guide, Part 1. July, 2008. 636 * 637 * Advanced Micro Devices: BIOS and Kernel Developer's 638 * Guide (BKDG) for AMD Family 10h Processors. Section 639 * 2.4.5, Revision 3.48, April 2010. 640 */ 641uint8_t 642acpicpu_md_pstate_hwf(struct cpu_info *ci) 643{ 644 struct acpicpu_softc *sc; 645 uint64_t aperf, mperf; 646 uint8_t rv = 0; 647 648 sc = acpicpu_sc[ci->ci_acpiid]; 649 650 if (__predict_false(sc == NULL)) 651 return 0; 652 653 if (__predict_false((sc->sc_flags & ACPICPU_FLAG_P_HWF) == 0)) 654 return 0; 655 656 aperf = sc->sc_pstate_aperf; 657 mperf = sc->sc_pstate_mperf; 658 659 x86_disable_intr(); 660 661 sc->sc_pstate_aperf = rdmsr(MSR_APERF); 662 sc->sc_pstate_mperf = rdmsr(MSR_MPERF); 663 664 x86_enable_intr(); 665 666 aperf = sc->sc_pstate_aperf - aperf; 667 mperf = sc->sc_pstate_mperf - mperf; 668 669 if (__predict_true(mperf != 0)) 670 rv = (aperf * 100) / mperf; 671 672 return rv; 673} 674 675static void 676acpicpu_md_pstate_hwf_reset(void *arg1, void *arg2) 677{ 678 struct cpu_info *ci = curcpu(); 679 struct acpicpu_softc *sc; 680 681 sc = acpicpu_sc[ci->ci_acpiid]; 682 683 if (__predict_false(sc == NULL)) 684 return; 685 686 x86_disable_intr(); 687 688 wrmsr(MSR_APERF, 0); 689 wrmsr(MSR_MPERF, 0); 690 691 x86_enable_intr(); 692 693 sc->sc_pstate_aperf = 0; 694 sc->sc_pstate_mperf = 0; 695} 696 697int 698acpicpu_md_pstate_get(struct acpicpu_softc *sc, uint32_t *freq) 699{ 700 struct acpicpu_pstate *ps = NULL; 701 uint64_t val; 702 uint32_t i; 703 704 if ((sc->sc_flags & ACPICPU_FLAG_P_FIDVID) != 0) 705 return acpicpu_md_pstate_fidvid_get(sc, freq); 706 707 /* 708 * Pick any P-state for the status address. 709 */ 710 for (i = 0; i < sc->sc_pstate_count; i++) { 711 712 ps = &sc->sc_pstate[i]; 713 714 if (__predict_true(ps->ps_freq != 0)) 715 break; 716 } 717 718 if (__predict_false(ps == NULL)) 719 return ENODEV; 720 721 if (__predict_false(ps->ps_status_addr == 0)) 722 return EINVAL; 723 724 val = rdmsr(ps->ps_status_addr); 725 726 if (__predict_true(ps->ps_status_mask != 0)) 727 val = val & ps->ps_status_mask; 728 729 /* 730 * Search for the value from known P-states. 731 */ 732 for (i = 0; i < sc->sc_pstate_count; i++) { 733 734 ps = &sc->sc_pstate[i]; 735 736 if (__predict_false(ps->ps_freq == 0)) 737 continue; 738 739 if (val == ps->ps_status) { 740 *freq = ps->ps_freq; 741 return 0; 742 } 743 } 744 745 /* 746 * If the value was not found, try APERF/MPERF. 747 * The state is P0 if the return value is 100 %. 748 */ 749 if ((sc->sc_flags & ACPICPU_FLAG_P_HWF) != 0) { 750 751 KASSERT(sc->sc_pstate_count > 0); 752 KASSERT(sc->sc_pstate[0].ps_freq != 0); 753 754 if (acpicpu_md_pstate_hwf(sc->sc_ci) == 100) { 755 *freq = sc->sc_pstate[0].ps_freq; 756 return 0; 757 } 758 } 759 760 return EIO; 761} 762 763int 764acpicpu_md_pstate_set(struct acpicpu_pstate *ps) 765{ 766 uint64_t val = 0; 767 768 if (__predict_false(ps->ps_control_addr == 0)) 769 return EINVAL; 770 771 if ((ps->ps_flags & ACPICPU_FLAG_P_FIDVID) != 0) 772 return acpicpu_md_pstate_fidvid_set(ps); 773 774 /* 775 * If the mask is set, do a read-modify-write. 776 */ 777 if (__predict_true(ps->ps_control_mask != 0)) { 778 val = rdmsr(ps->ps_control_addr); 779 val &= ~ps->ps_control_mask; 780 } 781 782 val |= ps->ps_control; 783 784 wrmsr(ps->ps_control_addr, val); 785 DELAY(ps->ps_latency); 786 787 return 0; 788} 789 790static int 791acpicpu_md_pstate_fidvid_get(struct acpicpu_softc *sc, uint32_t *freq) 792{ 793 struct acpicpu_pstate *ps; 794 uint32_t fid, i, vid; 795 uint32_t cfid, cvid; 796 int rv; 797 798 /* 799 * AMD family 0Fh needs special treatment. 800 * While it wants to use ACPI, it does not 801 * comply with the ACPI specifications. 802 */ 803 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); 804 805 if (rv != 0) 806 return rv; 807 808 for (i = 0; i < sc->sc_pstate_count; i++) { 809 810 ps = &sc->sc_pstate[i]; 811 812 if (__predict_false(ps->ps_freq == 0)) 813 continue; 814 815 fid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_FID); 816 vid = __SHIFTOUT(ps->ps_status, ACPI_0FH_STATUS_VID); 817 818 if (cfid == fid && cvid == vid) { 819 *freq = ps->ps_freq; 820 return 0; 821 } 822 } 823 824 return EIO; 825} 826 827static int 828acpicpu_md_pstate_fidvid_set(struct acpicpu_pstate *ps) 829{ 830 const uint64_t ctrl = ps->ps_control; 831 uint32_t cfid, cvid, fid, i, irt; 832 uint32_t pll, vco_cfid, vco_fid; 833 uint32_t val, vid, vst; 834 int rv; 835 836 rv = acpicpu_md_pstate_fidvid_read(&cfid, &cvid); 837 838 if (rv != 0) 839 return rv; 840 841 fid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_FID); 842 vid = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VID); 843 irt = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_IRT); 844 vst = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_VST); 845 pll = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_PLL); 846 847 vst = vst * 20; 848 pll = pll * 1000 / 5; 849 irt = 10 * __BIT(irt); 850 851 /* 852 * Phase 1. 853 */ 854 while (cvid > vid) { 855 856 val = 1 << __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_MVS); 857 val = (val > cvid) ? 0 : cvid - val; 858 859 acpicpu_md_pstate_fidvid_write(cfid, val, 1, vst); 860 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 861 862 if (rv != 0) 863 return rv; 864 } 865 866 i = __SHIFTOUT(ctrl, ACPI_0FH_CONTROL_RVO); 867 868 for (; i > 0 && cvid > 0; --i) { 869 870 acpicpu_md_pstate_fidvid_write(cfid, cvid - 1, 1, vst); 871 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 872 873 if (rv != 0) 874 return rv; 875 } 876 877 /* 878 * Phase 2. 879 */ 880 if (cfid != fid) { 881 882 vco_fid = FID_TO_VCO_FID(fid); 883 vco_cfid = FID_TO_VCO_FID(cfid); 884 885 while (abs(vco_fid - vco_cfid) > 2) { 886 887 if (fid <= cfid) 888 val = cfid - 2; 889 else { 890 val = (cfid > 6) ? cfid + 2 : 891 FID_TO_VCO_FID(cfid) + 2; 892 } 893 894 acpicpu_md_pstate_fidvid_write(val, cvid, pll, irt); 895 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); 896 897 if (rv != 0) 898 return rv; 899 900 vco_cfid = FID_TO_VCO_FID(cfid); 901 } 902 903 acpicpu_md_pstate_fidvid_write(fid, cvid, pll, irt); 904 rv = acpicpu_md_pstate_fidvid_read(&cfid, NULL); 905 906 if (rv != 0) 907 return rv; 908 } 909 910 /* 911 * Phase 3. 912 */ 913 if (cvid != vid) { 914 915 acpicpu_md_pstate_fidvid_write(cfid, vid, 1, vst); 916 rv = acpicpu_md_pstate_fidvid_read(NULL, &cvid); 917 918 if (rv != 0) 919 return rv; 920 } 921 922 return 0; 923} 924 925static int 926acpicpu_md_pstate_fidvid_read(uint32_t *cfid, uint32_t *cvid) 927{ 928 int i = ACPICPU_P_STATE_RETRY * 100; 929 uint64_t val; 930 931 do { 932 val = rdmsr(MSR_0FH_STATUS); 933 934 } while (__SHIFTOUT(val, MSR_0FH_STATUS_PENDING) != 0 && --i >= 0); 935 936 if (i == 0) 937 return EAGAIN; 938 939 if (cfid != NULL) 940 *cfid = __SHIFTOUT(val, MSR_0FH_STATUS_CFID); 941 942 if (cvid != NULL) 943 *cvid = __SHIFTOUT(val, MSR_0FH_STATUS_CVID); 944 945 return 0; 946} 947 948static void 949acpicpu_md_pstate_fidvid_write(uint32_t fid, 950 uint32_t vid, uint32_t cnt, uint32_t tmo) 951{ 952 uint64_t val = 0; 953 954 val |= __SHIFTIN(fid, MSR_0FH_CONTROL_FID); 955 val |= __SHIFTIN(vid, MSR_0FH_CONTROL_VID); 956 val |= __SHIFTIN(cnt, MSR_0FH_CONTROL_CNT); 957 val |= __SHIFTIN(0x1, MSR_0FH_CONTROL_CHG); 958 959 wrmsr(MSR_0FH_CONTROL, val); 960 DELAY(tmo); 961} 962 963int 964acpicpu_md_tstate_get(struct acpicpu_softc *sc, uint32_t *percent) 965{ 966 struct acpicpu_tstate *ts; 967 uint64_t val; 968 uint32_t i; 969 970 val = rdmsr(MSR_THERM_CONTROL); 971 972 for (i = 0; i < sc->sc_tstate_count; i++) { 973 974 ts = &sc->sc_tstate[i]; 975 976 if (ts->ts_percent == 0) 977 continue; 978 979 if (val == ts->ts_status) { 980 *percent = ts->ts_percent; 981 return 0; 982 } 983 } 984 985 return EIO; 986} 987 988int 989acpicpu_md_tstate_set(struct acpicpu_tstate *ts) 990{ 991 uint64_t val; 992 uint8_t i; 993 994 val = ts->ts_control; 995 val = val & __BITS(1, 4); 996 997 wrmsr(MSR_THERM_CONTROL, val); 998 999 if (ts->ts_status == 0) { 1000 DELAY(ts->ts_latency); 1001 return 0; 1002 } 1003 1004 for (i = val = 0; i < ACPICPU_T_STATE_RETRY; i++) { 1005 1006 val = rdmsr(MSR_THERM_CONTROL); 1007 1008 if (val == ts->ts_status) 1009 return 0; 1010 1011 DELAY(ts->ts_latency); 1012 } 1013 1014 return EAGAIN; 1015} 1016 1017/* 1018 * A kludge for backwards compatibility. 1019 */ 1020static int 1021acpicpu_md_pstate_sysctl_init(void) 1022{ 1023 const struct sysctlnode *fnode, *mnode, *rnode; 1024 const char *str; 1025 int rv; 1026 1027 switch (cpu_vendor) { 1028 1029 case CPUVENDOR_IDT: 1030 case CPUVENDOR_INTEL: 1031 str = "est"; 1032 break; 1033 1034 case CPUVENDOR_AMD: 1035 str = "powernow"; 1036 break; 1037 1038 default: 1039 return ENODEV; 1040 } 1041 1042 1043 rv = sysctl_createv(&acpicpu_log, 0, NULL, &rnode, 1044 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL, 1045 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL); 1046 1047 if (rv != 0) 1048 goto fail; 1049 1050 rv = sysctl_createv(&acpicpu_log, 0, &rnode, &mnode, 1051 0, CTLTYPE_NODE, str, NULL, 1052 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1053 1054 if (rv != 0) 1055 goto fail; 1056 1057 rv = sysctl_createv(&acpicpu_log, 0, &mnode, &fnode, 1058 0, CTLTYPE_NODE, "frequency", NULL, 1059 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1060 1061 if (rv != 0) 1062 goto fail; 1063 1064 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1065 CTLFLAG_READWRITE, CTLTYPE_INT, "target", NULL, 1066 acpicpu_md_pstate_sysctl_set, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1067 1068 if (rv != 0) 1069 goto fail; 1070 1071 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1072 CTLFLAG_READONLY, CTLTYPE_INT, "current", NULL, 1073 acpicpu_md_pstate_sysctl_get, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1074 1075 if (rv != 0) 1076 goto fail; 1077 1078 rv = sysctl_createv(&acpicpu_log, 0, &fnode, &rnode, 1079 CTLFLAG_READONLY, CTLTYPE_STRING, "available", NULL, 1080 acpicpu_md_pstate_sysctl_all, 0, NULL, 0, CTL_CREATE, CTL_EOL); 1081 1082 if (rv != 0) 1083 goto fail; 1084 1085 return 0; 1086 1087fail: 1088 if (acpicpu_log != NULL) { 1089 sysctl_teardown(&acpicpu_log); 1090 acpicpu_log = NULL; 1091 } 1092 1093 return rv; 1094} 1095 1096static int 1097acpicpu_md_pstate_sysctl_get(SYSCTLFN_ARGS) 1098{ 1099 struct sysctlnode node; 1100 uint32_t freq; 1101 int err; 1102 1103 freq = cpufreq_get(curcpu()); 1104 1105 if (freq == 0) 1106 return ENXIO; 1107 1108 node = *rnode; 1109 node.sysctl_data = &freq; 1110 1111 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1112 1113 if (err != 0 || newp == NULL) 1114 return err; 1115 1116 return 0; 1117} 1118 1119static int 1120acpicpu_md_pstate_sysctl_set(SYSCTLFN_ARGS) 1121{ 1122 struct sysctlnode node; 1123 uint32_t freq; 1124 int err; 1125 1126 freq = cpufreq_get(curcpu()); 1127 1128 if (freq == 0) 1129 return ENXIO; 1130 1131 node = *rnode; 1132 node.sysctl_data = &freq; 1133 1134 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1135 1136 if (err != 0 || newp == NULL) 1137 return err; 1138 1139 cpufreq_set_all(freq); 1140 1141 return 0; 1142} 1143 1144static int 1145acpicpu_md_pstate_sysctl_all(SYSCTLFN_ARGS) 1146{ 1147 struct cpu_info *ci = curcpu(); 1148 struct acpicpu_softc *sc; 1149 struct sysctlnode node; 1150 char buf[1024]; 1151 size_t len; 1152 uint32_t i; 1153 int err; 1154 1155 sc = acpicpu_sc[ci->ci_acpiid]; 1156 1157 if (sc == NULL) 1158 return ENXIO; 1159 1160 (void)memset(&buf, 0, sizeof(buf)); 1161 1162 mutex_enter(&sc->sc_mtx); 1163 1164 for (len = 0, i = sc->sc_pstate_max; i < sc->sc_pstate_count; i++) { 1165 1166 if (sc->sc_pstate[i].ps_freq == 0) 1167 continue; 1168 1169 len += snprintf(buf + len, sizeof(buf) - len, "%u%s", 1170 sc->sc_pstate[i].ps_freq, 1171 i < (sc->sc_pstate_count - 1) ? " " : ""); 1172 } 1173 1174 mutex_exit(&sc->sc_mtx); 1175 1176 node = *rnode; 1177 node.sysctl_data = buf; 1178 1179 err = sysctl_lookup(SYSCTLFN_CALL(&node)); 1180 1181 if (err != 0 || newp == NULL) 1182 return err; 1183 1184 return 0; 1185} 1186 1187