1171854Sdes/*- 2178950Srpaulo * Copyright (c) 2007, 2008 Rui Paulo <rpaulo@FreeBSD.org> 3171854Sdes * All rights reserved. 4171854Sdes * 5171854Sdes * Redistribution and use in source and binary forms, with or without 6171854Sdes * modification, are permitted provided that the following conditions 7171854Sdes * are met: 8171854Sdes * 1. Redistributions of source code must retain the above copyright 9171854Sdes * notice, this list of conditions and the following disclaimer. 10171854Sdes * 2. Redistributions in binary form must reproduce the above copyright 11171854Sdes * notice, this list of conditions and the following disclaimer in the 12171854Sdes * documentation and/or other materials provided with the distribution. 13171854Sdes * 14171854Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15171854Sdes * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16171854Sdes * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17171854Sdes * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 18171854Sdes * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19171854Sdes * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20171854Sdes * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21171854Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22171854Sdes * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 23171854Sdes * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24171854Sdes * POSSIBILITY OF SUCH DAMAGE. 25171854Sdes */ 26171854Sdes 27171854Sdes/* 28171854Sdes * Device driver for Intel's On Die thermal sensor via MSR. 29171854Sdes * First introduced in Intel's Core line of processors. 30171854Sdes */ 31171854Sdes 32171854Sdes#include <sys/cdefs.h> 33171854Sdes__FBSDID("$FreeBSD$"); 34171854Sdes 35171854Sdes#include <sys/param.h> 36171854Sdes#include <sys/bus.h> 37171854Sdes#include <sys/systm.h> 38171854Sdes#include <sys/types.h> 39171854Sdes#include <sys/module.h> 40171854Sdes#include <sys/conf.h> 41171854Sdes#include <sys/kernel.h> 42172674Snetchild#include <sys/sysctl.h> 43171854Sdes#include <sys/proc.h> /* for curthread */ 44171854Sdes#include <sys/sched.h> 45171854Sdes 46171854Sdes#include <machine/specialreg.h> 47171854Sdes#include <machine/cpufunc.h> 48185341Sjkim#include <machine/cputypes.h> 49171854Sdes#include <machine/md_var.h> 50171854Sdes 51225339Sdelphij#define TZ_ZEROC 2732 52196889Snork 53225339Sdelphij#define THERM_STATUS_LOG 0x02 54225339Sdelphij#define THERM_STATUS 0x01 55225339Sdelphij#define THERM_STATUS_TEMP_SHIFT 16 56225339Sdelphij#define THERM_STATUS_TEMP_MASK 0x7f 57225339Sdelphij#define THERM_STATUS_RES_SHIFT 27 58225339Sdelphij#define THERM_STATUS_RES_MASK 0x0f 59225339Sdelphij#define THERM_STATUS_VALID_SHIFT 31 60225339Sdelphij#define THERM_STATUS_VALID_MASK 0x01 61225339Sdelphij 62171854Sdesstruct coretemp_softc { 63172674Snetchild device_t sc_dev; 64172674Snetchild int sc_tjmax; 65225339Sdelphij unsigned int sc_throttle_log; 66171854Sdes}; 67171854Sdes 68171854Sdes/* 69171854Sdes * Device methods. 70171854Sdes */ 71171854Sdesstatic void coretemp_identify(driver_t *driver, device_t parent); 72171854Sdesstatic int coretemp_probe(device_t dev); 73171854Sdesstatic int coretemp_attach(device_t dev); 74171854Sdesstatic int coretemp_detach(device_t dev); 75171854Sdes 76225339Sdelphijstatic uint64_t coretemp_get_thermal_msr(int cpu); 77225339Sdelphijstatic void coretemp_clear_thermal_msr(int cpu); 78225339Sdelphijstatic int coretemp_get_val_sysctl(SYSCTL_HANDLER_ARGS); 79225339Sdelphijstatic int coretemp_throttle_log_sysctl(SYSCTL_HANDLER_ARGS); 80171854Sdes 81171854Sdesstatic device_method_t coretemp_methods[] = { 82171854Sdes /* Device interface */ 83171854Sdes DEVMETHOD(device_identify, coretemp_identify), 84171854Sdes DEVMETHOD(device_probe, coretemp_probe), 85171854Sdes DEVMETHOD(device_attach, coretemp_attach), 86171854Sdes DEVMETHOD(device_detach, coretemp_detach), 87171854Sdes 88246128Ssbz DEVMETHOD_END 89171854Sdes}; 90171854Sdes 91171854Sdesstatic driver_t coretemp_driver = { 92171854Sdes "coretemp", 93171854Sdes coretemp_methods, 94171854Sdes sizeof(struct coretemp_softc), 95171854Sdes}; 96171854Sdes 97225339Sdelphijenum therm_info { 98225339Sdelphij CORETEMP_TEMP, 99225339Sdelphij CORETEMP_DELTA, 100225339Sdelphij CORETEMP_RESOLUTION, 101225339Sdelphij CORETEMP_TJMAX, 102225339Sdelphij}; 103225339Sdelphij 104171854Sdesstatic devclass_t coretemp_devclass; 105225339SdelphijDRIVER_MODULE(coretemp, cpu, coretemp_driver, coretemp_devclass, NULL, 106225339Sdelphij NULL); 107171854Sdes 108171854Sdesstatic void 109171854Sdescoretemp_identify(driver_t *driver, device_t parent) 110171854Sdes{ 111171854Sdes device_t child; 112171854Sdes u_int regs[4]; 113171854Sdes 114171854Sdes /* Make sure we're not being doubly invoked. */ 115171854Sdes if (device_find_child(parent, "coretemp", -1) != NULL) 116171854Sdes return; 117171933Sdes 118178950Srpaulo /* Check that CPUID 0x06 is supported and the vendor is Intel.*/ 119185341Sjkim if (cpu_high < 6 || cpu_vendor_id != CPU_VENDOR_INTEL) 120171854Sdes return; 121171854Sdes /* 122171854Sdes * CPUID 0x06 returns 1 if the processor has on-die thermal 123171854Sdes * sensors. EBX[0:3] contains the number of sensors. 124171854Sdes */ 125171854Sdes do_cpuid(0x06, regs); 126171854Sdes if ((regs[0] & 0x1) != 1) 127171854Sdes return; 128171854Sdes 129171854Sdes /* 130171854Sdes * We add a child for each CPU since settings must be performed 131171854Sdes * on each CPU in the SMP case. 132171854Sdes */ 133171854Sdes child = device_add_child(parent, "coretemp", -1); 134171854Sdes if (child == NULL) 135171854Sdes device_printf(parent, "add coretemp child failed\n"); 136171854Sdes} 137171854Sdes 138171854Sdesstatic int 139171854Sdescoretemp_probe(device_t dev) 140171854Sdes{ 141241885Seadler if (resource_disabled("coretemp", 0)) 142241885Seadler return (ENXIO); 143171854Sdes 144171854Sdes device_set_desc(dev, "CPU On-Die Thermal Sensors"); 145171854Sdes 146171854Sdes return (BUS_PROBE_GENERIC); 147171854Sdes} 148171854Sdes 149171854Sdesstatic int 150171854Sdescoretemp_attach(device_t dev) 151171854Sdes{ 152171854Sdes struct coretemp_softc *sc = device_get_softc(dev); 153171854Sdes device_t pdev; 154171854Sdes uint64_t msr; 155210624Sdelphij int cpu_model, cpu_stepping; 156210624Sdelphij int ret, tjtarget; 157225339Sdelphij struct sysctl_oid *oid; 158225339Sdelphij struct sysctl_ctx_list *ctx; 159171854Sdes 160171933Sdes sc->sc_dev = dev; 161171854Sdes pdev = device_get_parent(dev); 162210624Sdelphij cpu_model = CPUID_TO_MODEL(cpu_id); 163210624Sdelphij cpu_stepping = cpu_id & CPUID_STEPPING; 164171854Sdes 165176279Srpaulo /* 166176279Srpaulo * Some CPUs, namely the PIII, don't have thermal sensors, but 167176279Srpaulo * report them when the CPUID check is performed in 168176279Srpaulo * coretemp_identify(). This leads to a later GPF when the sensor 169176279Srpaulo * is queried via a MSR, so we stop here. 170176279Srpaulo */ 171176279Srpaulo if (cpu_model < 0xe) 172176279Srpaulo return (ENXIO); 173225339Sdelphij 174175214Srpaulo#if 0 /* 175175214Srpaulo * XXXrpaulo: I have this CPU model and when it returns from C3 176175214Srpaulo * coretemp continues to function properly. 177175214Srpaulo */ 178175214Srpaulo 179171854Sdes /* 180171854Sdes * Check for errata AE18. 181171854Sdes * "Processor Digital Thermal Sensor (DTS) Readout stops 182171854Sdes * updating upon returning from C3/C4 state." 183171854Sdes * 184171854Sdes * Adapted from the Linux coretemp driver. 185171933Sdes */ 186210624Sdelphij if (cpu_model == 0xe && cpu_stepping < 0xc) { 187171854Sdes msr = rdmsr(MSR_BIOS_SIGN); 188171854Sdes msr = msr >> 32; 189171854Sdes if (msr < 0x39) { 190171933Sdes device_printf(dev, "not supported (Intel errata " 191171933Sdes "AE18), try updating your BIOS\n"); 192171854Sdes return (ENXIO); 193171854Sdes } 194171854Sdes } 195175214Srpaulo#endif 196210624Sdelphij 197171854Sdes /* 198210624Sdelphij * Use 100C as the initial value. 199210624Sdelphij */ 200210624Sdelphij sc->sc_tjmax = 100; 201210624Sdelphij 202210624Sdelphij if ((cpu_model == 0xf && cpu_stepping >= 2) || cpu_model == 0xe) { 203210624Sdelphij /* 204210624Sdelphij * On some Core 2 CPUs, there's an undocumented MSR that 205210624Sdelphij * can tell us if Tj(max) is 100 or 85. 206210624Sdelphij * 207210624Sdelphij * The if-clause for CPUs having the MSR_IA32_EXT_CONFIG was adapted 208210624Sdelphij * from the Linux coretemp driver. 209210624Sdelphij */ 210171854Sdes msr = rdmsr(MSR_IA32_EXT_CONFIG); 211171933Sdes if (msr & (1 << 30)) 212171854Sdes sc->sc_tjmax = 85; 213210833Sdelphij } else if (cpu_model == 0x17) { 214210833Sdelphij switch (cpu_stepping) { 215210833Sdelphij case 0x6: /* Mobile Core 2 Duo */ 216221335Sdelphij sc->sc_tjmax = 105; 217210833Sdelphij break; 218210833Sdelphij default: /* Unknown stepping */ 219210833Sdelphij break; 220210833Sdelphij } 221221509Sdelphij } else if (cpu_model == 0x1c) { 222221509Sdelphij switch (cpu_stepping) { 223221509Sdelphij case 0xa: /* 45nm Atom D400, N400 and D500 series */ 224221509Sdelphij sc->sc_tjmax = 100; 225221509Sdelphij break; 226221509Sdelphij default: 227221509Sdelphij sc->sc_tjmax = 90; 228221509Sdelphij break; 229221509Sdelphij } 230210833Sdelphij } else { 231210833Sdelphij /* 232210833Sdelphij * Attempt to get Tj(max) from MSR IA32_TEMPERATURE_TARGET. 233210833Sdelphij * 234210833Sdelphij * This method is described in Intel white paper "CPU 235210833Sdelphij * Monitoring With DTS/PECI". (#322683) 236210833Sdelphij */ 237210833Sdelphij ret = rdmsr_safe(MSR_IA32_TEMPERATURE_TARGET, &msr); 238210833Sdelphij if (ret == 0) { 239210833Sdelphij tjtarget = (msr >> 16) & 0xff; 240225339Sdelphij 241210833Sdelphij /* 242210833Sdelphij * On earlier generation of processors, the value 243210833Sdelphij * obtained from IA32_TEMPERATURE_TARGET register is 244210833Sdelphij * an offset that needs to be summed with a model 245210833Sdelphij * specific base. It is however not clear what 246210833Sdelphij * these numbers are, with the publicly available 247210833Sdelphij * documents from Intel. 248210833Sdelphij * 249225009Sattilio * For now, we consider [70, 110]C range, as 250210833Sdelphij * described in #322683, as "reasonable" and accept 251210833Sdelphij * these values whenever the MSR is available for 252210833Sdelphij * read, regardless the CPU model. 253210833Sdelphij */ 254225009Sattilio if (tjtarget >= 70 && tjtarget <= 110) 255210833Sdelphij sc->sc_tjmax = tjtarget; 256210833Sdelphij else 257210833Sdelphij device_printf(dev, "Tj(target) value %d " 258210833Sdelphij "does not seem right.\n", tjtarget); 259210833Sdelphij } else 260210833Sdelphij device_printf(dev, "Can not get Tj(target) " 261210833Sdelphij "from your CPU, using 100C.\n"); 262171933Sdes } 263171933Sdes 264210624Sdelphij if (bootverbose) 265210624Sdelphij device_printf(dev, "Setting TjMax=%d\n", sc->sc_tjmax); 266210624Sdelphij 267225339Sdelphij ctx = device_get_sysctl_ctx(dev); 268225339Sdelphij 269225339Sdelphij oid = SYSCTL_ADD_NODE(ctx, 270225339Sdelphij SYSCTL_CHILDREN(device_get_sysctl_tree(pdev)), OID_AUTO, 271225339Sdelphij "coretemp", CTLFLAG_RD, NULL, "Per-CPU thermal information"); 272225339Sdelphij 273171854Sdes /* 274225339Sdelphij * Add the MIBs to dev.cpu.N and dev.cpu.N.coretemp. 275171854Sdes */ 276225339Sdelphij SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(pdev)), 277246951Smarkj OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 278246951Smarkj dev, CORETEMP_TEMP, coretemp_get_val_sysctl, "IK", 279196889Snork "Current temperature"); 280225339Sdelphij SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "delta", 281246951Smarkj CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_DELTA, 282225339Sdelphij coretemp_get_val_sysctl, "I", 283225339Sdelphij "Delta between TCC activation and current temperature"); 284225339Sdelphij SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "resolution", 285246951Smarkj CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_RESOLUTION, 286225339Sdelphij coretemp_get_val_sysctl, "I", 287225339Sdelphij "Resolution of CPU thermal sensor"); 288225339Sdelphij SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "tjmax", 289246951Smarkj CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_TJMAX, 290225339Sdelphij coretemp_get_val_sysctl, "IK", 291225339Sdelphij "TCC activation temperature"); 292225339Sdelphij SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, 293246951Smarkj "throttle_log", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, dev, 0, 294225339Sdelphij coretemp_throttle_log_sysctl, "I", 295225339Sdelphij "Set to 1 if the thermal sensor has tripped"); 296171854Sdes 297171854Sdes return (0); 298171854Sdes} 299171854Sdes 300171854Sdesstatic int 301171854Sdescoretemp_detach(device_t dev) 302171854Sdes{ 303171854Sdes return (0); 304171854Sdes} 305171854Sdes 306225339Sdelphijstatic uint64_t 307225339Sdelphijcoretemp_get_thermal_msr(int cpu) 308171854Sdes{ 309171933Sdes uint64_t msr; 310171854Sdes 311172674Snetchild thread_lock(curthread); 312172674Snetchild sched_bind(curthread, cpu); 313172674Snetchild thread_unlock(curthread); 314172674Snetchild 315171854Sdes /* 316172674Snetchild * The digital temperature reading is located at bit 16 317172674Snetchild * of MSR_THERM_STATUS. 318172674Snetchild * 319172674Snetchild * There is a bit on that MSR that indicates whether the 320172674Snetchild * temperature is valid or not. 321172674Snetchild * 322172674Snetchild * The temperature is computed by subtracting the temperature 323172674Snetchild * reading by Tj(max). 324171854Sdes */ 325172674Snetchild msr = rdmsr(MSR_THERM_STATUS); 326171854Sdes 327172674Snetchild thread_lock(curthread); 328172674Snetchild sched_unbind(curthread); 329172674Snetchild thread_unlock(curthread); 330172674Snetchild 331225339Sdelphij return (msr); 332225339Sdelphij} 333171854Sdes 334225339Sdelphijstatic void 335225339Sdelphijcoretemp_clear_thermal_msr(int cpu) 336225339Sdelphij{ 337225339Sdelphij thread_lock(curthread); 338225339Sdelphij sched_bind(curthread, cpu); 339225339Sdelphij thread_unlock(curthread); 340225339Sdelphij 341225339Sdelphij wrmsr(MSR_THERM_STATUS, 0); 342225339Sdelphij 343225339Sdelphij thread_lock(curthread); 344225339Sdelphij sched_unbind(curthread); 345225339Sdelphij thread_unlock(curthread); 346225339Sdelphij} 347225339Sdelphij 348225339Sdelphijstatic int 349225339Sdelphijcoretemp_get_val_sysctl(SYSCTL_HANDLER_ARGS) 350225339Sdelphij{ 351225339Sdelphij device_t dev; 352225339Sdelphij uint64_t msr; 353225339Sdelphij int val, tmp; 354225339Sdelphij struct coretemp_softc *sc; 355225339Sdelphij enum therm_info type; 356225339Sdelphij char stemp[16]; 357225339Sdelphij 358225339Sdelphij dev = (device_t) arg1; 359225339Sdelphij msr = coretemp_get_thermal_msr(device_get_unit(dev)); 360225339Sdelphij sc = device_get_softc(dev); 361225339Sdelphij type = arg2; 362225339Sdelphij 363225339Sdelphij if (((msr >> THERM_STATUS_VALID_SHIFT) & THERM_STATUS_VALID_MASK) != 1) { 364225339Sdelphij val = -1; 365225339Sdelphij } else { 366225339Sdelphij switch (type) { 367225339Sdelphij case CORETEMP_TEMP: 368225339Sdelphij tmp = (msr >> THERM_STATUS_TEMP_SHIFT) & 369225339Sdelphij THERM_STATUS_TEMP_MASK; 370225339Sdelphij val = (sc->sc_tjmax - tmp) * 10 + TZ_ZEROC; 371225339Sdelphij break; 372225339Sdelphij case CORETEMP_DELTA: 373225339Sdelphij val = (msr >> THERM_STATUS_TEMP_SHIFT) & 374225339Sdelphij THERM_STATUS_TEMP_MASK; 375225339Sdelphij break; 376225339Sdelphij case CORETEMP_RESOLUTION: 377225339Sdelphij val = (msr >> THERM_STATUS_RES_SHIFT) & 378225339Sdelphij THERM_STATUS_RES_MASK; 379225339Sdelphij break; 380225339Sdelphij case CORETEMP_TJMAX: 381225339Sdelphij val = sc->sc_tjmax * 10 + TZ_ZEROC; 382225339Sdelphij break; 383225339Sdelphij } 384225339Sdelphij } 385225339Sdelphij 386225339Sdelphij if (msr & THERM_STATUS_LOG) { 387225662Sattilio coretemp_clear_thermal_msr(device_get_unit(dev)); 388225339Sdelphij sc->sc_throttle_log = 1; 389225339Sdelphij 390171933Sdes /* 391225339Sdelphij * Check for Critical Temperature Status and Critical 392225339Sdelphij * Temperature Log. It doesn't really matter if the 393225339Sdelphij * current temperature is invalid because the "Critical 394225339Sdelphij * Temperature Log" bit will tell us if the Critical 395225339Sdelphij * Temperature has * been reached in past. It's not 396225339Sdelphij * directly related to the current temperature. 397225339Sdelphij * 398225339Sdelphij * If we reach a critical level, allow devctl(4) 399225339Sdelphij * to catch this and shutdown the system. 400171933Sdes */ 401225339Sdelphij if (msr & THERM_STATUS) { 402225339Sdelphij tmp = (msr >> THERM_STATUS_TEMP_SHIFT) & 403225339Sdelphij THERM_STATUS_TEMP_MASK; 404225339Sdelphij tmp = (sc->sc_tjmax - tmp) * 10 + TZ_ZEROC; 405225339Sdelphij device_printf(dev, "critical temperature detected, " 406225339Sdelphij "suggest system shutdown\n"); 407225339Sdelphij snprintf(stemp, sizeof(stemp), "%d", tmp); 408225339Sdelphij devctl_notify("coretemp", "Thermal", stemp, 409225339Sdelphij "notify=0xcc"); 410225339Sdelphij } 411171854Sdes } 412171854Sdes 413225339Sdelphij return (sysctl_handle_int(oidp, &val, 0, req)); 414171854Sdes} 415171854Sdes 416172674Snetchildstatic int 417225339Sdelphijcoretemp_throttle_log_sysctl(SYSCTL_HANDLER_ARGS) 418171854Sdes{ 419225339Sdelphij device_t dev; 420225339Sdelphij uint64_t msr; 421225339Sdelphij int error, val; 422225339Sdelphij struct coretemp_softc *sc; 423171854Sdes 424225339Sdelphij dev = (device_t) arg1; 425225339Sdelphij msr = coretemp_get_thermal_msr(device_get_unit(dev)); 426225339Sdelphij sc = device_get_softc(dev); 427171854Sdes 428225662Sattilio if (msr & THERM_STATUS_LOG) { 429225662Sattilio coretemp_clear_thermal_msr(device_get_unit(dev)); 430225339Sdelphij sc->sc_throttle_log = 1; 431225662Sattilio } 432225339Sdelphij 433225339Sdelphij val = sc->sc_throttle_log; 434225339Sdelphij 435225339Sdelphij error = sysctl_handle_int(oidp, &val, 0, req); 436225339Sdelphij 437225339Sdelphij if (error || !req->newptr) 438225339Sdelphij return (error); 439225339Sdelphij else if (val != 0) 440225339Sdelphij return (EINVAL); 441225339Sdelphij 442225339Sdelphij coretemp_clear_thermal_msr(device_get_unit(dev)); 443225339Sdelphij sc->sc_throttle_log = 0; 444225339Sdelphij 445225339Sdelphij return (0); 446171854Sdes} 447