powerd.c revision 193161
1/*-
2 * Copyright (c) 2004 Colin Percival
3 * Copyright (c) 2005 Nate Lawson
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted providing that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/usr.sbin/powerd/powerd.c 193161 2009-05-31 10:27:24Z nwhitehorn $");
30
31#include <sys/param.h>
32#include <sys/ioctl.h>
33#include <sys/sysctl.h>
34#include <sys/resource.h>
35#include <sys/socket.h>
36#include <sys/time.h>
37#include <sys/un.h>
38
39#include <err.h>
40#include <errno.h>
41#include <fcntl.h>
42#include <libutil.h>
43#include <signal.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48
49#ifdef USE_APM
50#include <machine/apm_bios.h>
51#endif
52
53#define DEFAULT_ACTIVE_PERCENT	75
54#define DEFAULT_IDLE_PERCENT	50
55#define DEFAULT_POLL_INTERVAL	250	/* Poll interval in milliseconds */
56
57typedef enum {
58	MODE_MIN,
59	MODE_ADAPTIVE,
60	MODE_HIADAPTIVE,
61	MODE_MAX,
62} modes_t;
63
64typedef enum {
65	SRC_AC,
66	SRC_BATTERY,
67	SRC_UNKNOWN,
68} power_src_t;
69
70const char *modes[] = {
71	"AC",
72	"battery",
73	"unknown"
74};
75
76#define ACPIAC		"hw.acpi.acline"
77#define PMUAC		"dev.pmu.0.acline"
78#define APMDEV		"/dev/apm"
79#define DEVDPIPE	"/var/run/devd.pipe"
80#define DEVCTL_MAXBUF	1024
81
82static int	read_usage_times(int *load);
83static int	read_freqs(int *numfreqs, int **freqs, int **power);
84static int	set_freq(int freq);
85static void	acline_init(void);
86static void	acline_read(void);
87static int	devd_init(void);
88static void	devd_close(void);
89static void	handle_sigs(int sig);
90static void	parse_mode(char *arg, int *mode, int ch);
91static void	usage(void);
92
93/* Sysctl data structures. */
94static int	cp_times_mib[2];
95static int	freq_mib[4];
96static int	levels_mib[4];
97static int	acline_mib[4];
98static size_t	acline_mib_len;
99
100/* Configuration */
101static int	cpu_running_mark;
102static int	cpu_idle_mark;
103static int	poll_ival;
104static int	vflag;
105
106static volatile sig_atomic_t exit_requested;
107static power_src_t acline_status;
108static enum {
109	ac_none,
110	ac_sysctl,
111	ac_acpi_devd,
112#ifdef USE_APM
113	ac_apm,
114#endif
115} acline_mode;
116#ifdef USE_APM
117static int	apm_fd = -1;
118#endif
119static int	devd_pipe = -1;
120
121#define DEVD_RETRY_INTERVAL 60 /* seconds */
122static struct timeval tried_devd;
123
124static int
125read_usage_times(int *load)
126{
127	static long *cp_times = NULL, *cp_times_old = NULL;
128	static int ncpus = 0;
129	size_t cp_times_len;
130	int error, cpu, i, total;
131
132	if (cp_times == NULL) {
133		cp_times_len = 0;
134		error = sysctl(cp_times_mib, 2, NULL, &cp_times_len, NULL, 0);
135		if (error)
136			return (error);
137		if ((cp_times = malloc(cp_times_len)) == NULL)
138			return (errno);
139		if ((cp_times_old = malloc(cp_times_len)) == NULL) {
140			free(cp_times);
141			cp_times = NULL;
142			return (errno);
143		}
144		ncpus = cp_times_len / (sizeof(long) * CPUSTATES);
145	}
146
147	cp_times_len = sizeof(long) * CPUSTATES * ncpus;
148	error = sysctl(cp_times_mib, 2, cp_times, &cp_times_len, NULL, 0);
149	if (error)
150		return (error);
151
152	if (load) {
153		*load = 0;
154		for (cpu = 0; cpu < ncpus; cpu++) {
155			total = 0;
156			for (i = 0; i < CPUSTATES; i++) {
157			    total += cp_times[cpu * CPUSTATES + i] -
158				cp_times_old[cpu * CPUSTATES + i];
159			}
160			if (total == 0)
161				continue;
162			*load += 100 - (cp_times[cpu * CPUSTATES + CP_IDLE] -
163			    cp_times_old[cpu * CPUSTATES + CP_IDLE]) * 100 / total;
164		}
165	}
166
167	memcpy(cp_times_old, cp_times, cp_times_len);
168
169	return (0);
170}
171
172static int
173read_freqs(int *numfreqs, int **freqs, int **power)
174{
175	char *freqstr, *p, *q;
176	int i;
177	size_t len = 0;
178
179	if (sysctl(levels_mib, 4, NULL, &len, NULL, 0))
180		return (-1);
181	if ((freqstr = malloc(len)) == NULL)
182		return (-1);
183	if (sysctl(levels_mib, 4, freqstr, &len, NULL, 0))
184		return (-1);
185
186	*numfreqs = 1;
187	for (p = freqstr; *p != '\0'; p++)
188		if (*p == ' ')
189			(*numfreqs)++;
190
191	if ((*freqs = malloc(*numfreqs * sizeof(int))) == NULL) {
192		free(freqstr);
193		return (-1);
194	}
195	if ((*power = malloc(*numfreqs * sizeof(int))) == NULL) {
196		free(freqstr);
197		free(*freqs);
198		return (-1);
199	}
200	for (i = 0, p = freqstr; i < *numfreqs; i++) {
201		q = strchr(p, ' ');
202		if (q != NULL)
203			*q = '\0';
204		if (sscanf(p, "%d/%d", &(*freqs)[i], &(*power)[i]) != 2) {
205			free(freqstr);
206			free(*freqs);
207			free(*power);
208			return (-1);
209		}
210		p = q + 1;
211	}
212
213	free(freqstr);
214	return (0);
215}
216
217static int
218get_freq(void)
219{
220	size_t len;
221	int curfreq;
222
223	len = sizeof(curfreq);
224	if (sysctl(freq_mib, 4, &curfreq, &len, NULL, 0) != 0) {
225		if (vflag)
226			warn("error reading current CPU frequency");
227		curfreq = 0;
228	}
229	return (curfreq);
230}
231
232static int
233set_freq(int freq)
234{
235
236	if (sysctl(freq_mib, 4, NULL, NULL, &freq, sizeof(freq))) {
237		if (errno != EPERM)
238			return (-1);
239	}
240
241	return (0);
242}
243
244static int
245get_freq_id(int freq, int *freqs, int numfreqs)
246{
247	int i = 1;
248
249	while (i < numfreqs) {
250		if (freqs[i] < freq)
251			break;
252		i++;
253	}
254	return (i - 1);
255}
256
257/*
258 * Try to use ACPI to find the AC line status.  If this fails, fall back
259 * to APM.  If nothing succeeds, we'll just run in default mode.
260 */
261static void
262acline_init()
263{
264	acline_mib_len = 4;
265
266	if (sysctlnametomib(ACPIAC, acline_mib, &acline_mib_len) == 0) {
267		acline_mode = ac_sysctl;
268		if (vflag)
269			warnx("using sysctl for AC line status");
270#if __powerpc__
271	} else if (sysctlnametomib(PMUAC, acline_mib, &acline_mib_len) == 0) {
272		acline_mode = ac_sysctl;
273		if (vflag)
274			warnx("using sysctl for AC line status");
275#endif
276#ifdef USE_APM
277	} else if ((apm_fd = open(APMDEV, O_RDONLY)) >= 0) {
278		if (vflag)
279			warnx("using APM for AC line status");
280		acline_mode = ac_apm;
281#endif
282	} else {
283		warnx("unable to determine AC line status");
284		acline_mode = ac_none;
285	}
286}
287
288static void
289acline_read(void)
290{
291	if (acline_mode == ac_acpi_devd) {
292		char buf[DEVCTL_MAXBUF], *ptr;
293		ssize_t rlen;
294		int notify;
295
296		rlen = read(devd_pipe, buf, sizeof(buf));
297		if (rlen == 0 || (rlen < 0 && errno != EWOULDBLOCK)) {
298			if (vflag)
299				warnx("lost devd connection, switching to sysctl");
300			devd_close();
301			acline_mode = ac_sysctl;
302			/* FALLTHROUGH */
303		}
304		if (rlen > 0 &&
305		    (ptr = strstr(buf, "system=ACPI")) != NULL &&
306		    (ptr = strstr(ptr, "subsystem=ACAD")) != NULL &&
307		    (ptr = strstr(ptr, "notify=")) != NULL &&
308		    sscanf(ptr, "notify=%x", &notify) == 1)
309			acline_status = (notify ? SRC_AC : SRC_BATTERY);
310	}
311	if (acline_mode == ac_sysctl) {
312		int acline;
313		size_t len;
314
315		len = sizeof(acline);
316		if (sysctl(acline_mib, acline_mib_len, &acline, &len,
317		    NULL, 0) == 0)
318			acline_status = (acline ? SRC_AC : SRC_BATTERY);
319		else
320			acline_status = SRC_UNKNOWN;
321	}
322#ifdef USE_APM
323	if (acline_mode == ac_apm) {
324		struct apm_info info;
325
326		if (ioctl(apm_fd, APMIO_GETINFO, &info) == 0) {
327			acline_status = (info.ai_acline ? SRC_AC : SRC_BATTERY);
328		} else {
329			close(apm_fd);
330			apm_fd = -1;
331			acline_mode = ac_none;
332			acline_status = SRC_UNKNOWN;
333		}
334	}
335#endif
336	/* try to (re)connect to devd */
337	if (acline_mode == ac_sysctl) {
338		struct timeval now;
339
340		gettimeofday(&now, NULL);
341		if (now.tv_sec > tried_devd.tv_sec + DEVD_RETRY_INTERVAL) {
342			if (devd_init() >= 0) {
343				if (vflag)
344					warnx("using devd for AC line status");
345				acline_mode = ac_acpi_devd;
346			}
347			tried_devd = now;
348		}
349	}
350}
351
352static int
353devd_init(void)
354{
355	struct sockaddr_un devd_addr;
356
357	bzero(&devd_addr, sizeof(devd_addr));
358	if ((devd_pipe = socket(PF_LOCAL, SOCK_STREAM, 0)) < 0) {
359		if (vflag)
360			warn("%s(): socket()", __func__);
361		return (-1);
362	}
363
364	devd_addr.sun_family = PF_LOCAL;
365	strlcpy(devd_addr.sun_path, DEVDPIPE, sizeof(devd_addr.sun_path));
366	if (connect(devd_pipe, (struct sockaddr *)&devd_addr,
367	    sizeof(devd_addr)) == -1) {
368		if (vflag)
369			warn("%s(): connect()", __func__);
370		close(devd_pipe);
371		devd_pipe = -1;
372		return (-1);
373	}
374
375	if (fcntl(devd_pipe, F_SETFL, O_NONBLOCK) == -1) {
376		if (vflag)
377			warn("%s(): fcntl()", __func__);
378		close(devd_pipe);
379		return (-1);
380	}
381
382	return (devd_pipe);
383}
384
385static void
386devd_close(void)
387{
388
389	close(devd_pipe);
390	devd_pipe = -1;
391}
392
393static void
394parse_mode(char *arg, int *mode, int ch)
395{
396
397	if (strcmp(arg, "minimum") == 0 || strcmp(arg, "min") == 0)
398		*mode = MODE_MIN;
399	else if (strcmp(arg, "maximum") == 0 || strcmp(arg, "max") == 0)
400		*mode = MODE_MAX;
401	else if (strcmp(arg, "adaptive") == 0 || strcmp(arg, "adp") == 0)
402		*mode = MODE_ADAPTIVE;
403	else if (strcmp(arg, "hiadaptive") == 0 || strcmp(arg, "hadp") == 0)
404		*mode = MODE_HIADAPTIVE;
405	else
406		errx(1, "bad option: -%c %s", (char)ch, optarg);
407}
408
409static void
410handle_sigs(int __unused sig)
411{
412
413	exit_requested = 1;
414}
415
416static void
417usage(void)
418{
419
420	fprintf(stderr,
421"usage: powerd [-v] [-a mode] [-b mode] [-i %%] [-n mode] [-p ival] [-r %%] [-P pidfile]\n");
422	exit(1);
423}
424
425int
426main(int argc, char * argv[])
427{
428	struct timeval timeout;
429	fd_set fdset;
430	int nfds;
431	struct pidfh *pfh = NULL;
432	const char *pidfile = NULL;
433	int freq, curfreq, initfreq, *freqs, i, j, *mwatts, numfreqs, load;
434	int ch, mode, mode_ac, mode_battery, mode_none;
435	uint64_t mjoules_used;
436	size_t len;
437
438	/* Default mode for all AC states is adaptive. */
439	mode_ac = mode_none = MODE_HIADAPTIVE;
440	mode_battery = MODE_ADAPTIVE;
441	cpu_running_mark = DEFAULT_ACTIVE_PERCENT;
442	cpu_idle_mark = DEFAULT_IDLE_PERCENT;
443	poll_ival = DEFAULT_POLL_INTERVAL;
444	mjoules_used = 0;
445	vflag = 0;
446
447	/* User must be root to control frequencies. */
448	if (geteuid() != 0)
449		errx(1, "must be root to run");
450
451	while ((ch = getopt(argc, argv, "a:b:i:n:p:P:r:v")) != -1)
452		switch (ch) {
453		case 'a':
454			parse_mode(optarg, &mode_ac, ch);
455			break;
456		case 'b':
457			parse_mode(optarg, &mode_battery, ch);
458			break;
459		case 'i':
460			cpu_idle_mark = atoi(optarg);
461			if (cpu_idle_mark < 0 || cpu_idle_mark > 100) {
462				warnx("%d is not a valid percent",
463				    cpu_idle_mark);
464				usage();
465			}
466			break;
467		case 'n':
468			parse_mode(optarg, &mode_none, ch);
469			break;
470		case 'p':
471			poll_ival = atoi(optarg);
472			if (poll_ival < 5) {
473				warnx("poll interval is in units of ms");
474				usage();
475			}
476			break;
477		case 'P':
478			pidfile = optarg;
479			break;
480		case 'r':
481			cpu_running_mark = atoi(optarg);
482			if (cpu_running_mark <= 0 || cpu_running_mark > 100) {
483				warnx("%d is not a valid percent",
484				    cpu_running_mark);
485				usage();
486			}
487			break;
488		case 'v':
489			vflag = 1;
490			break;
491		default:
492			usage();
493		}
494
495	mode = mode_none;
496
497	/* Poll interval is in units of ms. */
498	poll_ival *= 1000;
499
500	/* Look up various sysctl MIBs. */
501	len = 2;
502	if (sysctlnametomib("kern.cp_times", cp_times_mib, &len))
503		err(1, "lookup kern.cp_times");
504	len = 4;
505	if (sysctlnametomib("dev.cpu.0.freq", freq_mib, &len))
506		err(1, "lookup freq");
507	len = 4;
508	if (sysctlnametomib("dev.cpu.0.freq_levels", levels_mib, &len))
509		err(1, "lookup freq_levels");
510
511	/* Check if we can read the load and supported freqs. */
512	if (read_usage_times(NULL))
513		err(1, "read_usage_times");
514	if (read_freqs(&numfreqs, &freqs, &mwatts))
515		err(1, "error reading supported CPU frequencies");
516
517	/* Run in the background unless in verbose mode. */
518	if (!vflag) {
519		pid_t otherpid;
520
521		pfh = pidfile_open(pidfile, 0600, &otherpid);
522		if (pfh == NULL) {
523			if (errno == EEXIST) {
524				errx(1, "powerd already running, pid: %d",
525				    otherpid);
526			}
527			warn("cannot open pid file");
528		}
529		if (daemon(0, 0) != 0) {
530			warn("cannot enter daemon mode, exiting");
531			pidfile_remove(pfh);
532			exit(EXIT_FAILURE);
533
534		}
535		pidfile_write(pfh);
536	}
537
538	/* Decide whether to use ACPI or APM to read the AC line status. */
539	acline_init();
540
541	/*
542	 * Exit cleanly on signals.
543	 */
544	signal(SIGINT, handle_sigs);
545	signal(SIGTERM, handle_sigs);
546
547	freq = initfreq = get_freq();
548	if (freq < 1)
549		freq = 1;
550	/* Main loop. */
551	for (;;) {
552		FD_ZERO(&fdset);
553		if (devd_pipe >= 0) {
554			FD_SET(devd_pipe, &fdset);
555			nfds = devd_pipe + 1;
556		} else {
557			nfds = 0;
558		}
559		timeout.tv_sec = poll_ival / 1000000;
560		timeout.tv_usec = poll_ival % 1000000;
561		select(nfds, &fdset, NULL, &fdset, &timeout);
562
563		/* If the user requested we quit, print some statistics. */
564		if (exit_requested) {
565			if (vflag && mjoules_used != 0)
566				printf("total joules used: %u.%03u\n",
567				    (u_int)(mjoules_used / 1000),
568				    (int)mjoules_used % 1000);
569			break;
570		}
571
572		/* Read the current AC status and record the mode. */
573		acline_read();
574		switch (acline_status) {
575		case SRC_AC:
576			mode = mode_ac;
577			break;
578		case SRC_BATTERY:
579			mode = mode_battery;
580			break;
581		case SRC_UNKNOWN:
582			mode = mode_none;
583			break;
584		default:
585			errx(1, "invalid AC line status %d", acline_status);
586		}
587
588		/* Read the current frequency. */
589		if ((curfreq = get_freq()) == 0)
590			continue;
591
592		i = get_freq_id(curfreq, freqs, numfreqs);
593
594		if (vflag) {
595			/* Keep a sum of all power actually used. */
596			if (mwatts[i] != -1)
597				mjoules_used +=
598				    (mwatts[i] * (poll_ival / 1000)) / 1000;
599		}
600
601		/* Always switch to the lowest frequency in min mode. */
602		if (mode == MODE_MIN) {
603			freq = freqs[numfreqs - 1];
604			if (curfreq != freq) {
605				if (vflag) {
606					printf("now operating on %s power; "
607					    "changing frequency to %d MHz\n",
608					    modes[acline_status], freq);
609				}
610				if (set_freq(freq) != 0) {
611					warn("error setting CPU freq %d",
612					    freq);
613					continue;
614				}
615			}
616			continue;
617		}
618
619		/* Always switch to the highest frequency in max mode. */
620		if (mode == MODE_MAX) {
621			freq = freqs[0];
622			if (curfreq != freq) {
623				if (vflag) {
624					printf("now operating on %s power; "
625					    "changing frequency to %d MHz\n",
626					    modes[acline_status], freq);
627				}
628				if (set_freq(freq) != 0) {
629					warn("error setting CPU freq %d",
630				    	    freq);
631					continue;
632				}
633			}
634			continue;
635		}
636
637		/* Adaptive mode; get the current CPU usage times. */
638		if (read_usage_times(&load)) {
639			if (vflag)
640				warn("read_usage_times() failed");
641			continue;
642		}
643
644		if (mode == MODE_ADAPTIVE) {
645			if (load > cpu_running_mark) {
646				if (load > 95 || load > cpu_running_mark * 2)
647					freq *= 2;
648				else
649					freq = freq * load / cpu_running_mark;
650				if (freq > freqs[0])
651					freq = freqs[0];
652			} else if (load < cpu_idle_mark &&
653			    curfreq * load < freqs[get_freq_id(
654			    freq * 7 / 8, freqs, numfreqs)] *
655			    cpu_running_mark) {
656				freq = freq * 7 / 8;
657				if (freq < freqs[numfreqs - 1])
658					freq = freqs[numfreqs - 1];
659			}
660		} else { /* MODE_HIADAPTIVE */
661			if (load > cpu_running_mark / 2) {
662				if (load > 95 || load > cpu_running_mark)
663					freq *= 4;
664				else
665					freq = freq * load * 2 / cpu_running_mark;
666				if (freq > freqs[0] * 2)
667					freq = freqs[0] * 2;
668			} else if (load < cpu_idle_mark / 2 &&
669			    curfreq * load < freqs[get_freq_id(
670			    freq * 31 / 32, freqs, numfreqs)] *
671			    cpu_running_mark / 2) {
672				freq = freq * 31 / 32;
673				if (freq < freqs[numfreqs - 1])
674					freq = freqs[numfreqs - 1];
675			}
676		}
677		if (vflag) {
678		    printf("load %3d%%, current freq %4d MHz (%2d), wanted freq %4d MHz\n",
679			load, curfreq, i, freq);
680		}
681		j = get_freq_id(freq, freqs, numfreqs);
682		if (i != j) {
683			if (vflag) {
684				printf("changing clock"
685				    " speed from %d MHz to %d MHz\n",
686				    freqs[i], freqs[j]);
687			}
688			if (set_freq(freqs[j]))
689				warn("error setting CPU frequency %d",
690				    freqs[j]);
691		}
692	}
693	if (set_freq(initfreq))
694		warn("error setting CPU frequency %d", initfreq);
695	free(freqs);
696	free(mwatts);
697	devd_close();
698	if (!vflag)
699		pidfile_remove(pfh);
700
701	exit(0);
702}
703