local_apic.c revision 262141
1/*-
2 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
3 * Copyright (c) 1996, by Steve Passe
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. The name of the developer may NOT be used to endorse or promote products
12 *    derived from this software without specific prior written permission.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/*
31 * Local APIC support on Pentium and later processors.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/sys/x86/x86/local_apic.c 262141 2014-02-18 01:15:32Z jhb $");
36
37#include "opt_atpic.h"
38#include "opt_hwpmc_hooks.h"
39#include "opt_kdtrace.h"
40
41#include "opt_ddb.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/bus.h>
46#include <sys/kernel.h>
47#include <sys/lock.h>
48#include <sys/mutex.h>
49#include <sys/pcpu.h>
50#include <sys/proc.h>
51#include <sys/sched.h>
52#include <sys/smp.h>
53#include <sys/timeet.h>
54
55#include <vm/vm.h>
56#include <vm/pmap.h>
57
58#include <x86/apicreg.h>
59#include <machine/cpu.h>
60#include <machine/cputypes.h>
61#include <machine/frame.h>
62#include <machine/intr_machdep.h>
63#include <machine/apicvar.h>
64#include <x86/mca.h>
65#include <machine/md_var.h>
66#include <machine/smp.h>
67#include <machine/specialreg.h>
68
69#ifdef DDB
70#include <sys/interrupt.h>
71#include <ddb/ddb.h>
72#endif
73
74#ifdef __amd64__
75#define	SDT_APIC	SDT_SYSIGT
76#define	SDT_APICT	SDT_SYSIGT
77#define	GSEL_APIC	0
78#else
79#define	SDT_APIC	SDT_SYS386IGT
80#define	SDT_APICT	SDT_SYS386TGT
81#define	GSEL_APIC	GSEL(GCODE_SEL, SEL_KPL)
82#endif
83
84/* Sanity checks on IDT vectors. */
85CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
86CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
87CTASSERT(APIC_LOCAL_INTS == 240);
88CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
89
90/* Magic IRQ values for the timer and syscalls. */
91#define	IRQ_TIMER	(NUM_IO_INTS + 1)
92#define	IRQ_SYSCALL	(NUM_IO_INTS + 2)
93#define	IRQ_DTRACE_RET	(NUM_IO_INTS + 3)
94#define	IRQ_EVTCHN	(NUM_IO_INTS + 4)
95
96/*
97 * Support for local APICs.  Local APICs manage interrupts on each
98 * individual processor as opposed to I/O APICs which receive interrupts
99 * from I/O devices and then forward them on to the local APICs.
100 *
101 * Local APICs can also send interrupts to each other thus providing the
102 * mechanism for IPIs.
103 */
104
105struct lvt {
106	u_int lvt_edgetrigger:1;
107	u_int lvt_activehi:1;
108	u_int lvt_masked:1;
109	u_int lvt_active:1;
110	u_int lvt_mode:16;
111	u_int lvt_vector:8;
112};
113
114struct lapic {
115	struct lvt la_lvts[APIC_LVT_MAX + 1];
116	u_int la_id:8;
117	u_int la_cluster:4;
118	u_int la_cluster_id:2;
119	u_int la_present:1;
120	u_long *la_timer_count;
121	u_long la_timer_period;
122	u_int la_timer_mode;
123	uint32_t lvt_timer_cache;
124	/* Include IDT_SYSCALL to make indexing easier. */
125	int la_ioint_irqs[APIC_NUM_IOINTS + 1];
126} static lapics[MAX_APIC_ID + 1];
127
128/* Global defaults for local APIC LVT entries. */
129static struct lvt lvts[APIC_LVT_MAX + 1] = {
130	{ 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },	/* LINT0: masked ExtINT */
131	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* LINT1: NMI */
132	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },	/* Timer */
133	{ 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },	/* Error */
134	{ 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },	/* PMC */
135	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },	/* Thermal */
136	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },	/* CMCI */
137};
138
139static inthand_t *ioint_handlers[] = {
140	NULL,			/* 0 - 31 */
141	IDTVEC(apic_isr1),	/* 32 - 63 */
142	IDTVEC(apic_isr2),	/* 64 - 95 */
143	IDTVEC(apic_isr3),	/* 96 - 127 */
144	IDTVEC(apic_isr4),	/* 128 - 159 */
145	IDTVEC(apic_isr5),	/* 160 - 191 */
146	IDTVEC(apic_isr6),	/* 192 - 223 */
147	IDTVEC(apic_isr7),	/* 224 - 255 */
148};
149
150
151static u_int32_t lapic_timer_divisors[] = {
152	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
153	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
154};
155
156extern inthand_t IDTVEC(rsvd);
157
158volatile lapic_t *lapic;
159vm_paddr_t lapic_paddr;
160static u_long lapic_timer_divisor;
161static struct eventtimer lapic_et;
162
163static void	lapic_enable(void);
164static void	lapic_resume(struct pic *pic, bool suspend_cancelled);
165static void	lapic_timer_oneshot(struct lapic *,
166		    u_int count, int enable_int);
167static void	lapic_timer_periodic(struct lapic *,
168		    u_int count, int enable_int);
169static void	lapic_timer_stop(struct lapic *);
170static void	lapic_timer_set_divisor(u_int divisor);
171static uint32_t	lvt_mode(struct lapic *la, u_int pin, uint32_t value);
172static int	lapic_et_start(struct eventtimer *et,
173    sbintime_t first, sbintime_t period);
174static int	lapic_et_stop(struct eventtimer *et);
175
176struct pic lapic_pic = { .pic_resume = lapic_resume };
177
178static uint32_t
179lvt_mode(struct lapic *la, u_int pin, uint32_t value)
180{
181	struct lvt *lvt;
182
183	KASSERT(pin <= APIC_LVT_MAX, ("%s: pin %u out of range", __func__, pin));
184	if (la->la_lvts[pin].lvt_active)
185		lvt = &la->la_lvts[pin];
186	else
187		lvt = &lvts[pin];
188
189	value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
190	    APIC_LVT_VECTOR);
191	if (lvt->lvt_edgetrigger == 0)
192		value |= APIC_LVT_TM;
193	if (lvt->lvt_activehi == 0)
194		value |= APIC_LVT_IIPP_INTALO;
195	if (lvt->lvt_masked)
196		value |= APIC_LVT_M;
197	value |= lvt->lvt_mode;
198	switch (lvt->lvt_mode) {
199	case APIC_LVT_DM_NMI:
200	case APIC_LVT_DM_SMI:
201	case APIC_LVT_DM_INIT:
202	case APIC_LVT_DM_EXTINT:
203		if (!lvt->lvt_edgetrigger) {
204			printf("lapic%u: Forcing LINT%u to edge trigger\n",
205			    la->la_id, pin);
206			value |= APIC_LVT_TM;
207		}
208		/* Use a vector of 0. */
209		break;
210	case APIC_LVT_DM_FIXED:
211		value |= lvt->lvt_vector;
212		break;
213	default:
214		panic("bad APIC LVT delivery mode: %#x\n", value);
215	}
216	return (value);
217}
218
219/*
220 * Map the local APIC and setup necessary interrupt vectors.
221 */
222void
223lapic_init(vm_paddr_t addr)
224{
225	u_int regs[4];
226	int i, arat;
227
228	/* Map the local APIC and setup the spurious interrupt handler. */
229	KASSERT(trunc_page(addr) == addr,
230	    ("local APIC not aligned on a page boundary"));
231	lapic_paddr = addr;
232	lapic = pmap_mapdev(addr, sizeof(lapic_t));
233	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
234	    GSEL_APIC);
235
236	/* Perform basic initialization of the BSP's local APIC. */
237	lapic_enable();
238
239	/* Set BSP's per-CPU local APIC ID. */
240	PCPU_SET(apic_id, lapic_id());
241
242	/* Local APIC timer interrupt. */
243	setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC);
244
245	/* Local APIC error interrupt. */
246	setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC);
247
248	/* XXX: Thermal interrupt */
249
250	/* Local APIC CMCI. */
251	setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC);
252
253	if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
254		arat = 0;
255		/* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */
256		if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) {
257			do_cpuid(0x06, regs);
258			if ((regs[0] & CPUTPM1_ARAT) != 0)
259				arat = 1;
260		}
261		bzero(&lapic_et, sizeof(lapic_et));
262		lapic_et.et_name = "LAPIC";
263		lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
264		    ET_FLAGS_PERCPU;
265		lapic_et.et_quality = 600;
266		if (!arat) {
267			lapic_et.et_flags |= ET_FLAGS_C3STOP;
268			lapic_et.et_quality -= 200;
269		}
270		lapic_et.et_frequency = 0;
271		/* We don't know frequency yet, so trying to guess. */
272		lapic_et.et_min_period = 0x00001000LL;
273		lapic_et.et_max_period = SBT_1S;
274		lapic_et.et_start = lapic_et_start;
275		lapic_et.et_stop = lapic_et_stop;
276		lapic_et.et_priv = NULL;
277		et_register(&lapic_et);
278	}
279}
280
281/*
282 * Create a local APIC instance.
283 */
284void
285lapic_create(u_int apic_id, int boot_cpu)
286{
287	int i;
288
289	if (apic_id > MAX_APIC_ID) {
290		printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
291		if (boot_cpu)
292			panic("Can't ignore BSP");
293		return;
294	}
295	KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
296	    apic_id));
297
298	/*
299	 * Assume no local LVT overrides and a cluster of 0 and
300	 * intra-cluster ID of 0.
301	 */
302	lapics[apic_id].la_present = 1;
303	lapics[apic_id].la_id = apic_id;
304	for (i = 0; i <= APIC_LVT_MAX; i++) {
305		lapics[apic_id].la_lvts[i] = lvts[i];
306		lapics[apic_id].la_lvts[i].lvt_active = 0;
307	}
308	for (i = 0; i <= APIC_NUM_IOINTS; i++)
309	    lapics[apic_id].la_ioint_irqs[i] = -1;
310	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
311	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
312	    IRQ_TIMER;
313#ifdef KDTRACE_HOOKS
314	lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] =
315	    IRQ_DTRACE_RET;
316#endif
317#ifdef XENHVM
318	lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN;
319#endif
320
321
322#ifdef SMP
323	cpu_add(apic_id, boot_cpu);
324#endif
325}
326
327/*
328 * Dump contents of local APIC registers
329 */
330void
331lapic_dump(const char* str)
332{
333	uint32_t maxlvt;
334
335	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
336	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
337	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
338	    lapic->id, lapic->version, lapic->ldr, lapic->dfr);
339	printf("  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
340	    lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
341	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
342	    lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error);
343	if (maxlvt >= APIC_LVT_PMC)
344		printf(" pmc: 0x%08x", lapic->lvt_pcint);
345	printf("\n");
346	if (maxlvt >= APIC_LVT_CMCI)
347		printf("   cmci: 0x%08x\n", lapic->lvt_cmci);
348}
349
350void
351lapic_setup(int boot)
352{
353	struct lapic *la;
354	u_int32_t maxlvt;
355	register_t saveintr;
356	char buf[MAXCOMLEN + 1];
357
358	la = &lapics[lapic_id()];
359	KASSERT(la->la_present, ("missing APIC structure"));
360	saveintr = intr_disable();
361	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
362
363	/* Initialize the TPR to allow all interrupts. */
364	lapic_set_tpr(0);
365
366	/* Setup spurious vector and enable the local APIC. */
367	lapic_enable();
368
369	/* Program LINT[01] LVT entries. */
370	lapic->lvt_lint0 = lvt_mode(la, APIC_LVT_LINT0, lapic->lvt_lint0);
371	lapic->lvt_lint1 = lvt_mode(la, APIC_LVT_LINT1, lapic->lvt_lint1);
372
373	/* Program the PMC LVT entry if present. */
374	if (maxlvt >= APIC_LVT_PMC)
375		lapic->lvt_pcint = lvt_mode(la, APIC_LVT_PMC, lapic->lvt_pcint);
376
377	/* Program timer LVT and setup handler. */
378	la->lvt_timer_cache = lapic->lvt_timer =
379	    lvt_mode(la, APIC_LVT_TIMER, lapic->lvt_timer);
380	if (boot) {
381		snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid));
382		intrcnt_add(buf, &la->la_timer_count);
383	}
384
385	/* Setup the timer if configured. */
386	if (la->la_timer_mode != 0) {
387		KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
388		    lapic_id()));
389		lapic_timer_set_divisor(lapic_timer_divisor);
390		if (la->la_timer_mode == 1)
391			lapic_timer_periodic(la, la->la_timer_period, 1);
392		else
393			lapic_timer_oneshot(la, la->la_timer_period, 1);
394	}
395
396	/* Program error LVT and clear any existing errors. */
397	lapic->lvt_error = lvt_mode(la, APIC_LVT_ERROR, lapic->lvt_error);
398	lapic->esr = 0;
399
400	/* XXX: Thermal LVT */
401
402	/* Program the CMCI LVT entry if present. */
403	if (maxlvt >= APIC_LVT_CMCI)
404		lapic->lvt_cmci = lvt_mode(la, APIC_LVT_CMCI, lapic->lvt_cmci);
405
406	intr_restore(saveintr);
407}
408
409void
410lapic_reenable_pmc(void)
411{
412#ifdef HWPMC_HOOKS
413	uint32_t value;
414
415	value =  lapic->lvt_pcint;
416	value &= ~APIC_LVT_M;
417	lapic->lvt_pcint = value;
418#endif
419}
420
421#ifdef HWPMC_HOOKS
422static void
423lapic_update_pmc(void *dummy)
424{
425	struct lapic *la;
426
427	la = &lapics[lapic_id()];
428	lapic->lvt_pcint = lvt_mode(la, APIC_LVT_PMC, lapic->lvt_pcint);
429}
430#endif
431
432int
433lapic_enable_pmc(void)
434{
435#ifdef HWPMC_HOOKS
436	u_int32_t maxlvt;
437
438	/* Fail if the local APIC is not present. */
439	if (lapic == NULL)
440		return (0);
441
442	/* Fail if the PMC LVT is not present. */
443	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
444	if (maxlvt < APIC_LVT_PMC)
445		return (0);
446
447	lvts[APIC_LVT_PMC].lvt_masked = 0;
448
449#ifdef SMP
450	/*
451	 * If hwpmc was loaded at boot time then the APs may not be
452	 * started yet.  In that case, don't forward the request to
453	 * them as they will program the lvt when they start.
454	 */
455	if (smp_started)
456		smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
457	else
458#endif
459		lapic_update_pmc(NULL);
460	return (1);
461#else
462	return (0);
463#endif
464}
465
466void
467lapic_disable_pmc(void)
468{
469#ifdef HWPMC_HOOKS
470	u_int32_t maxlvt;
471
472	/* Fail if the local APIC is not present. */
473	if (lapic == NULL)
474		return;
475
476	/* Fail if the PMC LVT is not present. */
477	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
478	if (maxlvt < APIC_LVT_PMC)
479		return;
480
481	lvts[APIC_LVT_PMC].lvt_masked = 1;
482
483#ifdef SMP
484	/* The APs should always be started when hwpmc is unloaded. */
485	KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
486#endif
487	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
488#endif
489}
490
491static int
492lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
493{
494	struct lapic *la;
495	u_long value;
496
497	la = &lapics[PCPU_GET(apic_id)];
498	if (et->et_frequency == 0) {
499		/* Start off with a divisor of 2 (power on reset default). */
500		lapic_timer_divisor = 2;
501		/* Try to calibrate the local APIC timer. */
502		do {
503			lapic_timer_set_divisor(lapic_timer_divisor);
504			lapic_timer_oneshot(la, APIC_TIMER_MAX_COUNT, 0);
505			DELAY(1000000);
506			value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer;
507			if (value != APIC_TIMER_MAX_COUNT)
508				break;
509			lapic_timer_divisor <<= 1;
510		} while (lapic_timer_divisor <= 128);
511		if (lapic_timer_divisor > 128)
512			panic("lapic: Divisor too big");
513		if (bootverbose)
514			printf("lapic: Divisor %lu, Frequency %lu Hz\n",
515			    lapic_timer_divisor, value);
516		et->et_frequency = value;
517		et->et_min_period = (0x00000002LLU << 32) / et->et_frequency;
518		et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency;
519	}
520	if (la->la_timer_mode == 0)
521		lapic_timer_set_divisor(lapic_timer_divisor);
522	if (period != 0) {
523		la->la_timer_mode = 1;
524		la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 32;
525		lapic_timer_periodic(la, la->la_timer_period, 1);
526	} else {
527		la->la_timer_mode = 2;
528		la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 32;
529		lapic_timer_oneshot(la, la->la_timer_period, 1);
530	}
531	return (0);
532}
533
534static int
535lapic_et_stop(struct eventtimer *et)
536{
537	struct lapic *la = &lapics[PCPU_GET(apic_id)];
538
539	la->la_timer_mode = 0;
540	lapic_timer_stop(la);
541	return (0);
542}
543
544void
545lapic_disable(void)
546{
547	uint32_t value;
548
549	/* Software disable the local APIC. */
550	value = lapic->svr;
551	value &= ~APIC_SVR_SWEN;
552	lapic->svr = value;
553}
554
555static void
556lapic_enable(void)
557{
558	u_int32_t value;
559
560	/* Program the spurious vector to enable the local APIC. */
561	value = lapic->svr;
562	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
563	value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
564	lapic->svr = value;
565}
566
567/* Reset the local APIC on the BSP during resume. */
568static void
569lapic_resume(struct pic *pic, bool suspend_cancelled)
570{
571
572	lapic_setup(0);
573}
574
575int
576lapic_id(void)
577{
578
579	KASSERT(lapic != NULL, ("local APIC is not mapped"));
580	return (lapic->id >> APIC_ID_SHIFT);
581}
582
583int
584lapic_intr_pending(u_int vector)
585{
586	volatile u_int32_t *irr;
587
588	/*
589	 * The IRR registers are an array of 128-bit registers each of
590	 * which only describes 32 interrupts in the low 32 bits..  Thus,
591	 * we divide the vector by 32 to get the 128-bit index.  We then
592	 * multiply that index by 4 to get the equivalent index from
593	 * treating the IRR as an array of 32-bit registers.  Finally, we
594	 * modulus the vector by 32 to determine the individual bit to
595	 * test.
596	 */
597	irr = &lapic->irr0;
598	return (irr[(vector / 32) * 4] & 1 << (vector % 32));
599}
600
601void
602lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
603{
604	struct lapic *la;
605
606	KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
607	    __func__, apic_id));
608	KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
609	    __func__, cluster));
610	KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
611	    ("%s: intra cluster id %u too big", __func__, cluster_id));
612	la = &lapics[apic_id];
613	la->la_cluster = cluster;
614	la->la_cluster_id = cluster_id;
615}
616
617int
618lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
619{
620
621	if (pin > APIC_LVT_MAX)
622		return (EINVAL);
623	if (apic_id == APIC_ID_ALL) {
624		lvts[pin].lvt_masked = masked;
625		if (bootverbose)
626			printf("lapic:");
627	} else {
628		KASSERT(lapics[apic_id].la_present,
629		    ("%s: missing APIC %u", __func__, apic_id));
630		lapics[apic_id].la_lvts[pin].lvt_masked = masked;
631		lapics[apic_id].la_lvts[pin].lvt_active = 1;
632		if (bootverbose)
633			printf("lapic%u:", apic_id);
634	}
635	if (bootverbose)
636		printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
637	return (0);
638}
639
640int
641lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
642{
643	struct lvt *lvt;
644
645	if (pin > APIC_LVT_MAX)
646		return (EINVAL);
647	if (apic_id == APIC_ID_ALL) {
648		lvt = &lvts[pin];
649		if (bootverbose)
650			printf("lapic:");
651	} else {
652		KASSERT(lapics[apic_id].la_present,
653		    ("%s: missing APIC %u", __func__, apic_id));
654		lvt = &lapics[apic_id].la_lvts[pin];
655		lvt->lvt_active = 1;
656		if (bootverbose)
657			printf("lapic%u:", apic_id);
658	}
659	lvt->lvt_mode = mode;
660	switch (mode) {
661	case APIC_LVT_DM_NMI:
662	case APIC_LVT_DM_SMI:
663	case APIC_LVT_DM_INIT:
664	case APIC_LVT_DM_EXTINT:
665		lvt->lvt_edgetrigger = 1;
666		lvt->lvt_activehi = 1;
667		if (mode == APIC_LVT_DM_EXTINT)
668			lvt->lvt_masked = 1;
669		else
670			lvt->lvt_masked = 0;
671		break;
672	default:
673		panic("Unsupported delivery mode: 0x%x\n", mode);
674	}
675	if (bootverbose) {
676		printf(" Routing ");
677		switch (mode) {
678		case APIC_LVT_DM_NMI:
679			printf("NMI");
680			break;
681		case APIC_LVT_DM_SMI:
682			printf("SMI");
683			break;
684		case APIC_LVT_DM_INIT:
685			printf("INIT");
686			break;
687		case APIC_LVT_DM_EXTINT:
688			printf("ExtINT");
689			break;
690		}
691		printf(" -> LINT%u\n", pin);
692	}
693	return (0);
694}
695
696int
697lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
698{
699
700	if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM)
701		return (EINVAL);
702	if (apic_id == APIC_ID_ALL) {
703		lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
704		if (bootverbose)
705			printf("lapic:");
706	} else {
707		KASSERT(lapics[apic_id].la_present,
708		    ("%s: missing APIC %u", __func__, apic_id));
709		lapics[apic_id].la_lvts[pin].lvt_active = 1;
710		lapics[apic_id].la_lvts[pin].lvt_activehi =
711		    (pol == INTR_POLARITY_HIGH);
712		if (bootverbose)
713			printf("lapic%u:", apic_id);
714	}
715	if (bootverbose)
716		printf(" LINT%u polarity: %s\n", pin,
717		    pol == INTR_POLARITY_HIGH ? "high" : "low");
718	return (0);
719}
720
721int
722lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger)
723{
724
725	if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
726		return (EINVAL);
727	if (apic_id == APIC_ID_ALL) {
728		lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
729		if (bootverbose)
730			printf("lapic:");
731	} else {
732		KASSERT(lapics[apic_id].la_present,
733		    ("%s: missing APIC %u", __func__, apic_id));
734		lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
735		    (trigger == INTR_TRIGGER_EDGE);
736		lapics[apic_id].la_lvts[pin].lvt_active = 1;
737		if (bootverbose)
738			printf("lapic%u:", apic_id);
739	}
740	if (bootverbose)
741		printf(" LINT%u trigger: %s\n", pin,
742		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
743	return (0);
744}
745
746/*
747 * Adjust the TPR of the current CPU so that it blocks all interrupts below
748 * the passed in vector.
749 */
750void
751lapic_set_tpr(u_int vector)
752{
753#ifdef CHEAP_TPR
754	lapic->tpr = vector;
755#else
756	u_int32_t tpr;
757
758	tpr = lapic->tpr & ~APIC_TPR_PRIO;
759	tpr |= vector;
760	lapic->tpr = tpr;
761#endif
762}
763
764void
765lapic_eoi(void)
766{
767
768	lapic->eoi = 0;
769}
770
771void
772lapic_handle_intr(int vector, struct trapframe *frame)
773{
774	struct intsrc *isrc;
775
776	isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
777	    vector));
778	intr_execute_handlers(isrc, frame);
779}
780
781void
782lapic_handle_timer(struct trapframe *frame)
783{
784	struct lapic *la;
785	struct trapframe *oldframe;
786	struct thread *td;
787
788	/* Send EOI first thing. */
789	lapic_eoi();
790
791#if defined(SMP) && !defined(SCHED_ULE)
792	/*
793	 * Don't do any accounting for the disabled HTT cores, since it
794	 * will provide misleading numbers for the userland.
795	 *
796	 * No locking is necessary here, since even if we lose the race
797	 * when hlt_cpus_mask changes it is not a big deal, really.
798	 *
799	 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
800	 * and unlike other schedulers it actually schedules threads to
801	 * those CPUs.
802	 */
803	if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
804		return;
805#endif
806
807	/* Look up our local APIC structure for the tick counters. */
808	la = &lapics[PCPU_GET(apic_id)];
809	(*la->la_timer_count)++;
810	critical_enter();
811	if (lapic_et.et_active) {
812		td = curthread;
813		td->td_intr_nesting_level++;
814		oldframe = td->td_intr_frame;
815		td->td_intr_frame = frame;
816		lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
817		td->td_intr_frame = oldframe;
818		td->td_intr_nesting_level--;
819	}
820	critical_exit();
821}
822
823static void
824lapic_timer_set_divisor(u_int divisor)
825{
826
827	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
828	KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) /
829	    sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor));
830	lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1];
831}
832
833static void
834lapic_timer_oneshot(struct lapic *la, u_int count, int enable_int)
835{
836	u_int32_t value;
837
838	value = la->lvt_timer_cache;
839	value &= ~APIC_LVTT_TM;
840	value |= APIC_LVTT_TM_ONE_SHOT;
841	if (enable_int)
842		value &= ~APIC_LVT_M;
843	lapic->lvt_timer = value;
844	lapic->icr_timer = count;
845}
846
847static void
848lapic_timer_periodic(struct lapic *la, u_int count, int enable_int)
849{
850	u_int32_t value;
851
852	value = la->lvt_timer_cache;
853	value &= ~APIC_LVTT_TM;
854	value |= APIC_LVTT_TM_PERIODIC;
855	if (enable_int)
856		value &= ~APIC_LVT_M;
857	lapic->lvt_timer = value;
858	lapic->icr_timer = count;
859}
860
861static void
862lapic_timer_stop(struct lapic *la)
863{
864	u_int32_t value;
865
866	value = la->lvt_timer_cache;
867	value &= ~APIC_LVTT_TM;
868	value |= APIC_LVT_M;
869	lapic->lvt_timer = value;
870}
871
872void
873lapic_handle_cmc(void)
874{
875
876	lapic_eoi();
877	cmc_intr();
878}
879
880/*
881 * Called from the mca_init() to activate the CMC interrupt if this CPU is
882 * responsible for monitoring any MC banks for CMC events.  Since mca_init()
883 * is called prior to lapic_setup() during boot, this just needs to unmask
884 * this CPU's LVT_CMCI entry.
885 */
886void
887lapic_enable_cmc(void)
888{
889	u_int apic_id;
890
891#ifdef DEV_ATPIC
892	if (lapic == NULL)
893		return;
894#endif
895	apic_id = PCPU_GET(apic_id);
896	KASSERT(lapics[apic_id].la_present,
897	    ("%s: missing APIC %u", __func__, apic_id));
898	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0;
899	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1;
900	if (bootverbose)
901		printf("lapic%u: CMCI unmasked\n", apic_id);
902}
903
904void
905lapic_handle_error(void)
906{
907	u_int32_t esr;
908
909	/*
910	 * Read the contents of the error status register.  Write to
911	 * the register first before reading from it to force the APIC
912	 * to update its value to indicate any errors that have
913	 * occurred since the previous write to the register.
914	 */
915	lapic->esr = 0;
916	esr = lapic->esr;
917
918	printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
919	lapic_eoi();
920}
921
922u_int
923apic_cpuid(u_int apic_id)
924{
925#ifdef SMP
926	return apic_cpuids[apic_id];
927#else
928	return 0;
929#endif
930}
931
932/* Request a free IDT vector to be used by the specified IRQ. */
933u_int
934apic_alloc_vector(u_int apic_id, u_int irq)
935{
936	u_int vector;
937
938	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
939
940	/*
941	 * Search for a free vector.  Currently we just use a very simple
942	 * algorithm to find the first free vector.
943	 */
944	mtx_lock_spin(&icu_lock);
945	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
946		if (lapics[apic_id].la_ioint_irqs[vector] != -1)
947			continue;
948		lapics[apic_id].la_ioint_irqs[vector] = irq;
949		mtx_unlock_spin(&icu_lock);
950		return (vector + APIC_IO_INTS);
951	}
952	mtx_unlock_spin(&icu_lock);
953	return (0);
954}
955
956/*
957 * Request 'count' free contiguous IDT vectors to be used by 'count'
958 * IRQs.  'count' must be a power of two and the vectors will be
959 * aligned on a boundary of 'align'.  If the request cannot be
960 * satisfied, 0 is returned.
961 */
962u_int
963apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
964{
965	u_int first, run, vector;
966
967	KASSERT(powerof2(count), ("bad count"));
968	KASSERT(powerof2(align), ("bad align"));
969	KASSERT(align >= count, ("align < count"));
970#ifdef INVARIANTS
971	for (run = 0; run < count; run++)
972		KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u",
973		    irqs[run], run));
974#endif
975
976	/*
977	 * Search for 'count' free vectors.  As with apic_alloc_vector(),
978	 * this just uses a simple first fit algorithm.
979	 */
980	run = 0;
981	first = 0;
982	mtx_lock_spin(&icu_lock);
983	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
984
985		/* Vector is in use, end run. */
986		if (lapics[apic_id].la_ioint_irqs[vector] != -1) {
987			run = 0;
988			first = 0;
989			continue;
990		}
991
992		/* Start a new run if run == 0 and vector is aligned. */
993		if (run == 0) {
994			if ((vector & (align - 1)) != 0)
995				continue;
996			first = vector;
997		}
998		run++;
999
1000		/* Keep looping if the run isn't long enough yet. */
1001		if (run < count)
1002			continue;
1003
1004		/* Found a run, assign IRQs and return the first vector. */
1005		for (vector = 0; vector < count; vector++)
1006			lapics[apic_id].la_ioint_irqs[first + vector] =
1007			    irqs[vector];
1008		mtx_unlock_spin(&icu_lock);
1009		return (first + APIC_IO_INTS);
1010	}
1011	mtx_unlock_spin(&icu_lock);
1012	printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
1013	return (0);
1014}
1015
1016/*
1017 * Enable a vector for a particular apic_id.  Since all lapics share idt
1018 * entries and ioint_handlers this enables the vector on all lapics.  lapics
1019 * which do not have the vector configured would report spurious interrupts
1020 * should it fire.
1021 */
1022void
1023apic_enable_vector(u_int apic_id, u_int vector)
1024{
1025
1026	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1027	KASSERT(ioint_handlers[vector / 32] != NULL,
1028	    ("No ISR handler for vector %u", vector));
1029#ifdef KDTRACE_HOOKS
1030	KASSERT(vector != IDT_DTRACE_RET,
1031	    ("Attempt to overwrite DTrace entry"));
1032#endif
1033	setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL,
1034	    GSEL_APIC);
1035}
1036
1037void
1038apic_disable_vector(u_int apic_id, u_int vector)
1039{
1040
1041	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
1042#ifdef KDTRACE_HOOKS
1043	KASSERT(vector != IDT_DTRACE_RET,
1044	    ("Attempt to overwrite DTrace entry"));
1045#endif
1046	KASSERT(ioint_handlers[vector / 32] != NULL,
1047	    ("No ISR handler for vector %u", vector));
1048#ifdef notyet
1049	/*
1050	 * We can not currently clear the idt entry because other cpus
1051	 * may have a valid vector at this offset.
1052	 */
1053	setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC);
1054#endif
1055}
1056
1057/* Release an APIC vector when it's no longer in use. */
1058void
1059apic_free_vector(u_int apic_id, u_int vector, u_int irq)
1060{
1061	struct thread *td;
1062
1063	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1064	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1065	    ("Vector %u does not map to an IRQ line", vector));
1066	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
1067	KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
1068	    irq, ("IRQ mismatch"));
1069#ifdef KDTRACE_HOOKS
1070	KASSERT(vector != IDT_DTRACE_RET,
1071	    ("Attempt to overwrite DTrace entry"));
1072#endif
1073
1074	/*
1075	 * Bind us to the cpu that owned the vector before freeing it so
1076	 * we don't lose an interrupt delivery race.
1077	 */
1078	td = curthread;
1079	if (!rebooting) {
1080		thread_lock(td);
1081		if (sched_is_bound(td))
1082			panic("apic_free_vector: Thread already bound.\n");
1083		sched_bind(td, apic_cpuid(apic_id));
1084		thread_unlock(td);
1085	}
1086	mtx_lock_spin(&icu_lock);
1087	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1;
1088	mtx_unlock_spin(&icu_lock);
1089	if (!rebooting) {
1090		thread_lock(td);
1091		sched_unbind(td);
1092		thread_unlock(td);
1093	}
1094}
1095
1096/* Map an IDT vector (APIC) to an IRQ (interrupt source). */
1097u_int
1098apic_idt_to_irq(u_int apic_id, u_int vector)
1099{
1100	int irq;
1101
1102	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
1103	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
1104	    ("Vector %u does not map to an IRQ line", vector));
1105#ifdef KDTRACE_HOOKS
1106	KASSERT(vector != IDT_DTRACE_RET,
1107	    ("Attempt to overwrite DTrace entry"));
1108#endif
1109	irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
1110	if (irq < 0)
1111		irq = 0;
1112	return (irq);
1113}
1114
1115#ifdef DDB
1116/*
1117 * Dump data about APIC IDT vector mappings.
1118 */
1119DB_SHOW_COMMAND(apic, db_show_apic)
1120{
1121	struct intsrc *isrc;
1122	int i, verbose;
1123	u_int apic_id;
1124	u_int irq;
1125
1126	if (strcmp(modif, "vv") == 0)
1127		verbose = 2;
1128	else if (strcmp(modif, "v") == 0)
1129		verbose = 1;
1130	else
1131		verbose = 0;
1132	for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
1133		if (lapics[apic_id].la_present == 0)
1134			continue;
1135		db_printf("Interrupts bound to lapic %u\n", apic_id);
1136		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
1137			irq = lapics[apic_id].la_ioint_irqs[i];
1138			if (irq == -1 || irq == IRQ_SYSCALL)
1139				continue;
1140#ifdef KDTRACE_HOOKS
1141			if (irq == IRQ_DTRACE_RET)
1142				continue;
1143#endif
1144#ifdef XENHVM
1145			if (irq == IRQ_EVTCHN)
1146				continue;
1147#endif
1148			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
1149			if (irq == IRQ_TIMER)
1150				db_printf("lapic timer\n");
1151			else if (irq < NUM_IO_INTS) {
1152				isrc = intr_lookup_source(irq);
1153				if (isrc == NULL || verbose == 0)
1154					db_printf("IRQ %u\n", irq);
1155				else
1156					db_dump_intr_event(isrc->is_event,
1157					    verbose == 2);
1158			} else
1159				db_printf("IRQ %u ???\n", irq);
1160		}
1161	}
1162}
1163
1164static void
1165dump_mask(const char *prefix, uint32_t v, int base)
1166{
1167	int i, first;
1168
1169	first = 1;
1170	for (i = 0; i < 32; i++)
1171		if (v & (1 << i)) {
1172			if (first) {
1173				db_printf("%s:", prefix);
1174				first = 0;
1175			}
1176			db_printf(" %02x", base + i);
1177		}
1178	if (!first)
1179		db_printf("\n");
1180}
1181
1182/* Show info from the lapic regs for this CPU. */
1183DB_SHOW_COMMAND(lapic, db_show_lapic)
1184{
1185	uint32_t v;
1186
1187	db_printf("lapic ID = %d\n", lapic_id());
1188	v = lapic->version;
1189	db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
1190	    v & 0xf);
1191	db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
1192	v = lapic->svr;
1193	db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
1194	    v & APIC_SVR_ENABLE ? "enabled" : "disabled");
1195	db_printf("TPR      = %02x\n", lapic->tpr);
1196
1197#define dump_field(prefix, index)					\
1198	dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index,	\
1199	    index * 32)
1200
1201	db_printf("In-service Interrupts:\n");
1202	dump_field(isr, 0);
1203	dump_field(isr, 1);
1204	dump_field(isr, 2);
1205	dump_field(isr, 3);
1206	dump_field(isr, 4);
1207	dump_field(isr, 5);
1208	dump_field(isr, 6);
1209	dump_field(isr, 7);
1210
1211	db_printf("TMR Interrupts:\n");
1212	dump_field(tmr, 0);
1213	dump_field(tmr, 1);
1214	dump_field(tmr, 2);
1215	dump_field(tmr, 3);
1216	dump_field(tmr, 4);
1217	dump_field(tmr, 5);
1218	dump_field(tmr, 6);
1219	dump_field(tmr, 7);
1220
1221	db_printf("IRR Interrupts:\n");
1222	dump_field(irr, 0);
1223	dump_field(irr, 1);
1224	dump_field(irr, 2);
1225	dump_field(irr, 3);
1226	dump_field(irr, 4);
1227	dump_field(irr, 5);
1228	dump_field(irr, 6);
1229	dump_field(irr, 7);
1230
1231#undef dump_field
1232}
1233#endif
1234
1235/*
1236 * APIC probing support code.  This includes code to manage enumerators.
1237 */
1238
1239static SLIST_HEAD(, apic_enumerator) enumerators =
1240	SLIST_HEAD_INITIALIZER(enumerators);
1241static struct apic_enumerator *best_enum;
1242
1243void
1244apic_register_enumerator(struct apic_enumerator *enumerator)
1245{
1246#ifdef INVARIANTS
1247	struct apic_enumerator *apic_enum;
1248
1249	SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
1250		if (apic_enum == enumerator)
1251			panic("%s: Duplicate register of %s", __func__,
1252			    enumerator->apic_name);
1253	}
1254#endif
1255	SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
1256}
1257
1258/*
1259 * We have to look for CPU's very, very early because certain subsystems
1260 * want to know how many CPU's we have extremely early on in the boot
1261 * process.
1262 */
1263static void
1264apic_init(void *dummy __unused)
1265{
1266	struct apic_enumerator *enumerator;
1267#ifndef __amd64__
1268	uint64_t apic_base;
1269#endif
1270	int retval, best;
1271
1272	/* We only support built in local APICs. */
1273	if (!(cpu_feature & CPUID_APIC))
1274		return;
1275
1276	/* Don't probe if APIC mode is disabled. */
1277	if (resource_disabled("apic", 0))
1278		return;
1279
1280	/* Probe all the enumerators to find the best match. */
1281	best_enum = NULL;
1282	best = 0;
1283	SLIST_FOREACH(enumerator, &enumerators, apic_next) {
1284		retval = enumerator->apic_probe();
1285		if (retval > 0)
1286			continue;
1287		if (best_enum == NULL || best < retval) {
1288			best_enum = enumerator;
1289			best = retval;
1290		}
1291	}
1292	if (best_enum == NULL) {
1293		if (bootverbose)
1294			printf("APIC: Could not find any APICs.\n");
1295#ifndef DEV_ATPIC
1296		panic("running without device atpic requires a local APIC");
1297#endif
1298		return;
1299	}
1300
1301	if (bootverbose)
1302		printf("APIC: Using the %s enumerator.\n",
1303		    best_enum->apic_name);
1304
1305#ifndef __amd64__
1306	/*
1307	 * To work around an errata, we disable the local APIC on some
1308	 * CPUs during early startup.  We need to turn the local APIC back
1309	 * on on such CPUs now.
1310	 */
1311	if (cpu == CPU_686 && cpu_vendor_id == CPU_VENDOR_INTEL &&
1312	    (cpu_id & 0xff0) == 0x610) {
1313		apic_base = rdmsr(MSR_APICBASE);
1314		apic_base |= APICBASE_ENABLED;
1315		wrmsr(MSR_APICBASE, apic_base);
1316	}
1317#endif
1318
1319	/* Probe the CPU's in the system. */
1320	retval = best_enum->apic_probe_cpus();
1321	if (retval != 0)
1322		printf("%s: Failed to probe CPUs: returned %d\n",
1323		    best_enum->apic_name, retval);
1324
1325}
1326SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
1327
1328/*
1329 * Setup the local APIC.  We have to do this prior to starting up the APs
1330 * in the SMP case.
1331 */
1332static void
1333apic_setup_local(void *dummy __unused)
1334{
1335	int retval;
1336
1337	if (best_enum == NULL)
1338		return;
1339
1340	/* Initialize the local APIC. */
1341	retval = best_enum->apic_setup_local();
1342	if (retval != 0)
1343		printf("%s: Failed to setup the local APIC: returned %d\n",
1344		    best_enum->apic_name, retval);
1345}
1346SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
1347
1348/*
1349 * Setup the I/O APICs.
1350 */
1351static void
1352apic_setup_io(void *dummy __unused)
1353{
1354	int retval;
1355
1356	if (best_enum == NULL)
1357		return;
1358
1359	/*
1360	 * Local APIC must be registered before other PICs and pseudo PICs
1361	 * for proper suspend/resume order.
1362	 */
1363#ifndef XEN
1364	intr_register_pic(&lapic_pic);
1365#endif
1366
1367	retval = best_enum->apic_setup_io();
1368	if (retval != 0)
1369		printf("%s: Failed to setup I/O APICs: returned %d\n",
1370		    best_enum->apic_name, retval);
1371#ifdef XEN
1372	return;
1373#endif
1374	/*
1375	 * Finish setting up the local APIC on the BSP once we know how to
1376	 * properly program the LINT pins.
1377	 */
1378	lapic_setup(1);
1379	if (bootverbose)
1380		lapic_dump("BSP");
1381
1382	/* Enable the MSI "pic". */
1383	msi_init();
1384}
1385SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL);
1386
1387#ifdef SMP
1388/*
1389 * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
1390 * private to the MD code.  The public interface for the rest of the
1391 * kernel is defined in mp_machdep.c.
1392 */
1393int
1394lapic_ipi_wait(int delay)
1395{
1396	int x, incr;
1397
1398	/*
1399	 * Wait delay loops for IPI to be sent.  This is highly bogus
1400	 * since this is sensitive to CPU clock speed.  If delay is
1401	 * -1, we wait forever.
1402	 */
1403	if (delay == -1) {
1404		incr = 0;
1405		delay = 1;
1406	} else
1407		incr = 1;
1408	for (x = 0; x < delay; x += incr) {
1409		if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
1410			return (1);
1411		ia32_pause();
1412	}
1413	return (0);
1414}
1415
1416void
1417lapic_ipi_raw(register_t icrlo, u_int dest)
1418{
1419	register_t value, saveintr;
1420
1421	/* XXX: Need more sanity checking of icrlo? */
1422	KASSERT(lapic != NULL, ("%s called too early", __func__));
1423	KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
1424	    ("%s: invalid dest field", __func__));
1425	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
1426	    ("%s: reserved bits set in ICR LO register", __func__));
1427
1428	/* Set destination in ICR HI register if it is being used. */
1429	saveintr = intr_disable();
1430	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
1431		value = lapic->icr_hi;
1432		value &= ~APIC_ID_MASK;
1433		value |= dest << APIC_ID_SHIFT;
1434		lapic->icr_hi = value;
1435	}
1436
1437	/* Program the contents of the IPI and dispatch it. */
1438	value = lapic->icr_lo;
1439	value &= APIC_ICRLO_RESV_MASK;
1440	value |= icrlo;
1441	lapic->icr_lo = value;
1442	intr_restore(saveintr);
1443}
1444
1445#define	BEFORE_SPIN	1000000
1446#ifdef DETECT_DEADLOCK
1447#define	AFTER_SPIN	1000
1448#endif
1449
1450void
1451lapic_ipi_vectored(u_int vector, int dest)
1452{
1453	register_t icrlo, destfield;
1454
1455	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
1456	    ("%s: invalid vector %d", __func__, vector));
1457
1458	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE;
1459
1460	/*
1461	 * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
1462	 * Use special rules regard NMI if passed, otherwise specify
1463	 * the vector.
1464	 */
1465	if (vector == IPI_STOP_HARD)
1466		icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT;
1467	else
1468		icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT;
1469	destfield = 0;
1470	switch (dest) {
1471	case APIC_IPI_DEST_SELF:
1472		icrlo |= APIC_DEST_SELF;
1473		break;
1474	case APIC_IPI_DEST_ALL:
1475		icrlo |= APIC_DEST_ALLISELF;
1476		break;
1477	case APIC_IPI_DEST_OTHERS:
1478		icrlo |= APIC_DEST_ALLESELF;
1479		break;
1480	default:
1481		KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
1482		    ("%s: invalid destination 0x%x", __func__, dest));
1483		destfield = dest;
1484	}
1485
1486	/* Wait for an earlier IPI to finish. */
1487	if (!lapic_ipi_wait(BEFORE_SPIN)) {
1488		if (panicstr != NULL)
1489			return;
1490		else
1491			panic("APIC: Previous IPI is stuck");
1492	}
1493
1494	lapic_ipi_raw(icrlo, destfield);
1495
1496#ifdef DETECT_DEADLOCK
1497	/* Wait for IPI to be delivered. */
1498	if (!lapic_ipi_wait(AFTER_SPIN)) {
1499#ifdef needsattention
1500		/*
1501		 * XXX FIXME:
1502		 *
1503		 * The above function waits for the message to actually be
1504		 * delivered.  It breaks out after an arbitrary timeout
1505		 * since the message should eventually be delivered (at
1506		 * least in theory) and that if it wasn't we would catch
1507		 * the failure with the check above when the next IPI is
1508		 * sent.
1509		 *
1510		 * We could skip this wait entirely, EXCEPT it probably
1511		 * protects us from other routines that assume that the
1512		 * message was delivered and acted upon when this function
1513		 * returns.
1514		 */
1515		printf("APIC: IPI might be stuck\n");
1516#else /* !needsattention */
1517		/* Wait until mesage is sent without a timeout. */
1518		while (lapic->icr_lo & APIC_DELSTAT_PEND)
1519			ia32_pause();
1520#endif /* needsattention */
1521	}
1522#endif /* DETECT_DEADLOCK */
1523}
1524#endif /* SMP */
1525