vlapic.c revision 266339
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/sys/amd64/vmm/io/vlapic.c 266339 2014-05-17 19:11:08Z jhb $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/io/vlapic.c 266339 2014-05-17 19:11:08Z jhb $");
31
32#include <sys/param.h>
33#include <sys/lock.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/mutex.h>
37#include <sys/systm.h>
38#include <sys/smp.h>
39
40#include <x86/specialreg.h>
41#include <x86/apicreg.h>
42
43#include <machine/clock.h>
44#include <machine/smp.h>
45
46#include <machine/vmm.h>
47
48#include "vmm_ipi.h"
49#include "vmm_lapic.h"
50#include "vmm_ktr.h"
51#include "vmm_stat.h"
52
53#include "vlapic.h"
54#include "vlapic_priv.h"
55#include "vioapic.h"
56
57#define	PRIO(x)			((x) >> 4)
58
59#define VLAPIC_VERSION		(16)
60
61#define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
62
63/*
64 * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
65 * vlapic_callout_handler() and vcpu accesses to:
66 * - timer_freq_bt, timer_period_bt, timer_fire_bt
67 * - timer LVT register
68 */
69#define	VLAPIC_TIMER_LOCK(vlapic)	mtx_lock_spin(&((vlapic)->timer_mtx))
70#define	VLAPIC_TIMER_UNLOCK(vlapic)	mtx_unlock_spin(&((vlapic)->timer_mtx))
71#define	VLAPIC_TIMER_LOCKED(vlapic)	mtx_owned(&((vlapic)->timer_mtx))
72
73#define VLAPIC_BUS_FREQ	tsc_freq
74
75static __inline uint32_t
76vlapic_get_id(struct vlapic *vlapic)
77{
78
79	if (x2apic(vlapic))
80		return (vlapic->vcpuid);
81	else
82		return (vlapic->vcpuid << 24);
83}
84
85static uint32_t
86x2apic_ldr(struct vlapic *vlapic)
87{
88	int apicid;
89	uint32_t ldr;
90
91	apicid = vlapic_get_id(vlapic);
92	ldr = 1 << (apicid & 0xf);
93	ldr |= (apicid & 0xffff0) << 12;
94	return (ldr);
95}
96
97void
98vlapic_dfr_write_handler(struct vlapic *vlapic)
99{
100	struct LAPIC *lapic;
101
102	lapic = vlapic->apic_page;
103	if (x2apic(vlapic)) {
104		VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x",
105		    lapic->dfr);
106		lapic->dfr = 0;
107		return;
108	}
109
110	lapic->dfr &= APIC_DFR_MODEL_MASK;
111	lapic->dfr |= APIC_DFR_RESERVED;
112
113	if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
114		VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
115	else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
116		VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
117	else
118		VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr);
119}
120
121void
122vlapic_ldr_write_handler(struct vlapic *vlapic)
123{
124	struct LAPIC *lapic;
125
126	lapic = vlapic->apic_page;
127
128	/* LDR is read-only in x2apic mode */
129	if (x2apic(vlapic)) {
130		VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x",
131		    lapic->ldr);
132		lapic->ldr = x2apic_ldr(vlapic);
133	} else {
134		lapic->ldr &= ~APIC_LDR_RESERVED;
135		VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
136	}
137}
138
139void
140vlapic_id_write_handler(struct vlapic *vlapic)
141{
142	struct LAPIC *lapic;
143
144	/*
145	 * We don't allow the ID register to be modified so reset it back to
146	 * its default value.
147	 */
148	lapic = vlapic->apic_page;
149	lapic->id = vlapic_get_id(vlapic);
150}
151
152static int
153vlapic_timer_divisor(uint32_t dcr)
154{
155	switch (dcr & 0xB) {
156	case APIC_TDCR_1:
157		return (1);
158	case APIC_TDCR_2:
159		return (2);
160	case APIC_TDCR_4:
161		return (4);
162	case APIC_TDCR_8:
163		return (8);
164	case APIC_TDCR_16:
165		return (16);
166	case APIC_TDCR_32:
167		return (32);
168	case APIC_TDCR_64:
169		return (64);
170	case APIC_TDCR_128:
171		return (128);
172	default:
173		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
174	}
175}
176
177#if 0
178static inline void
179vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
180{
181	printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
182	    *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
183	    *lvt & APIC_LVTT_M);
184}
185#endif
186
187static uint32_t
188vlapic_get_ccr(struct vlapic *vlapic)
189{
190	struct bintime bt_now, bt_rem;
191	struct LAPIC *lapic;
192	uint32_t ccr;
193
194	ccr = 0;
195	lapic = vlapic->apic_page;
196
197	VLAPIC_TIMER_LOCK(vlapic);
198	if (callout_active(&vlapic->callout)) {
199		/*
200		 * If the timer is scheduled to expire in the future then
201		 * compute the value of 'ccr' based on the remaining time.
202		 */
203		binuptime(&bt_now);
204		if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
205			bt_rem = vlapic->timer_fire_bt;
206			bintime_sub(&bt_rem, &bt_now);
207			ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
208			ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
209		}
210	}
211	KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
212	    "icr_timer is %#x", ccr, lapic->icr_timer));
213	VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
214	    ccr, lapic->icr_timer);
215	VLAPIC_TIMER_UNLOCK(vlapic);
216	return (ccr);
217}
218
219void
220vlapic_dcr_write_handler(struct vlapic *vlapic)
221{
222	struct LAPIC *lapic;
223	int divisor;
224
225	lapic = vlapic->apic_page;
226	VLAPIC_TIMER_LOCK(vlapic);
227
228	divisor = vlapic_timer_divisor(lapic->dcr_timer);
229	VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d",
230	    lapic->dcr_timer, divisor);
231
232	/*
233	 * Update the timer frequency and the timer period.
234	 *
235	 * XXX changes to the frequency divider will not take effect until
236	 * the timer is reloaded.
237	 */
238	FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
239	vlapic->timer_period_bt = vlapic->timer_freq_bt;
240	bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
241
242	VLAPIC_TIMER_UNLOCK(vlapic);
243}
244
245void
246vlapic_esr_write_handler(struct vlapic *vlapic)
247{
248	struct LAPIC *lapic;
249
250	lapic = vlapic->apic_page;
251	lapic->esr = vlapic->esr_pending;
252	vlapic->esr_pending = 0;
253}
254
255int
256vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
257{
258	struct LAPIC *lapic;
259	uint32_t *irrptr, *tmrptr, mask;
260	int idx;
261
262	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
263
264	lapic = vlapic->apic_page;
265	if (!(lapic->svr & APIC_SVR_ENABLE)) {
266		VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
267		    "interrupt %d", vector);
268		return (0);
269	}
270
271	if (vector < 16) {
272		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR);
273		VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d",
274		    vector);
275		return (1);
276	}
277
278	if (vlapic->ops.set_intr_ready)
279		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
280
281	idx = (vector / 32) * 4;
282	mask = 1 << (vector % 32);
283
284	irrptr = &lapic->irr0;
285	atomic_set_int(&irrptr[idx], mask);
286
287	/*
288	 * Verify that the trigger-mode of the interrupt matches with
289	 * the vlapic TMR registers.
290	 */
291	tmrptr = &lapic->tmr0;
292	KASSERT((tmrptr[idx] & mask) == (level ? mask : 0),
293	    ("vlapic TMR[%d] is 0x%08x but interrupt is %s-triggered",
294	    idx / 4, tmrptr[idx], level ? "level" : "edge"));
295
296	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
297	return (1);
298}
299
300static __inline uint32_t *
301vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
302{
303	struct LAPIC	*lapic = vlapic->apic_page;
304	int 		 i;
305
306	switch (offset) {
307	case APIC_OFFSET_CMCI_LVT:
308		return (&lapic->lvt_cmci);
309	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
310		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
311		return ((&lapic->lvt_timer) + i);;
312	default:
313		panic("vlapic_get_lvt: invalid LVT\n");
314	}
315}
316
317static __inline int
318lvt_off_to_idx(uint32_t offset)
319{
320	int index;
321
322	switch (offset) {
323	case APIC_OFFSET_CMCI_LVT:
324		index = APIC_LVT_CMCI;
325		break;
326	case APIC_OFFSET_TIMER_LVT:
327		index = APIC_LVT_TIMER;
328		break;
329	case APIC_OFFSET_THERM_LVT:
330		index = APIC_LVT_THERMAL;
331		break;
332	case APIC_OFFSET_PERF_LVT:
333		index = APIC_LVT_PMC;
334		break;
335	case APIC_OFFSET_LINT0_LVT:
336		index = APIC_LVT_LINT0;
337		break;
338	case APIC_OFFSET_LINT1_LVT:
339		index = APIC_LVT_LINT1;
340		break;
341	case APIC_OFFSET_ERROR_LVT:
342		index = APIC_LVT_ERROR;
343		break;
344	default:
345		index = -1;
346		break;
347	}
348	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
349	    "invalid lvt index %d for offset %#x", index, offset));
350
351	return (index);
352}
353
354static __inline uint32_t
355vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
356{
357	int idx;
358	uint32_t val;
359
360	idx = lvt_off_to_idx(offset);
361	val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
362	return (val);
363}
364
365void
366vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
367{
368	uint32_t *lvtptr, mask, val;
369	struct LAPIC *lapic;
370	int idx;
371
372	lapic = vlapic->apic_page;
373	lvtptr = vlapic_get_lvtptr(vlapic, offset);
374	val = *lvtptr;
375	idx = lvt_off_to_idx(offset);
376
377	if (!(lapic->svr & APIC_SVR_ENABLE))
378		val |= APIC_LVT_M;
379	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
380	switch (offset) {
381	case APIC_OFFSET_TIMER_LVT:
382		mask |= APIC_LVTT_TM;
383		break;
384	case APIC_OFFSET_ERROR_LVT:
385		break;
386	case APIC_OFFSET_LINT0_LVT:
387	case APIC_OFFSET_LINT1_LVT:
388		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
389		/* FALLTHROUGH */
390	default:
391		mask |= APIC_LVT_DM;
392		break;
393	}
394	val &= mask;
395	*lvtptr = val;
396	atomic_store_rel_32(&vlapic->lvt_last[idx], val);
397}
398
399static void
400vlapic_mask_lvts(struct vlapic *vlapic)
401{
402	struct LAPIC *lapic = vlapic->apic_page;
403
404	lapic->lvt_cmci |= APIC_LVT_M;
405	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
406
407	lapic->lvt_timer |= APIC_LVT_M;
408	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
409
410	lapic->lvt_thermal |= APIC_LVT_M;
411	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
412
413	lapic->lvt_pcint |= APIC_LVT_M;
414	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
415
416	lapic->lvt_lint0 |= APIC_LVT_M;
417	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
418
419	lapic->lvt_lint1 |= APIC_LVT_M;
420	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
421
422	lapic->lvt_error |= APIC_LVT_M;
423	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
424}
425
426static int
427vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt)
428{
429	uint32_t vec, mode;
430
431	if (lvt & APIC_LVT_M)
432		return (0);
433
434	vec = lvt & APIC_LVT_VECTOR;
435	mode = lvt & APIC_LVT_DM;
436
437	switch (mode) {
438	case APIC_LVT_DM_FIXED:
439		if (vec < 16) {
440			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
441			return (0);
442		}
443		if (vlapic_set_intr_ready(vlapic, vec, false))
444			vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
445		break;
446	case APIC_LVT_DM_NMI:
447		vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
448		break;
449	default:
450		// Other modes ignored
451		return (0);
452	}
453	return (1);
454}
455
456#if 1
457static void
458dump_isrvec_stk(struct vlapic *vlapic)
459{
460	int i;
461	uint32_t *isrptr;
462
463	isrptr = &vlapic->apic_page->isr0;
464	for (i = 0; i < 8; i++)
465		printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
466
467	for (i = 0; i <= vlapic->isrvec_stk_top; i++)
468		printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
469}
470#endif
471
472/*
473 * Algorithm adopted from section "Interrupt, Task and Processor Priority"
474 * in Intel Architecture Manual Vol 3a.
475 */
476static void
477vlapic_update_ppr(struct vlapic *vlapic)
478{
479	int isrvec, tpr, ppr;
480
481	/*
482	 * Note that the value on the stack at index 0 is always 0.
483	 *
484	 * This is a placeholder for the value of ISRV when none of the
485	 * bits is set in the ISRx registers.
486	 */
487	isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
488	tpr = vlapic->apic_page->tpr;
489
490#if 1
491	{
492		int i, lastprio, curprio, vector, idx;
493		uint32_t *isrptr;
494
495		if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
496			panic("isrvec_stk is corrupted: %d", isrvec);
497
498		/*
499		 * Make sure that the priority of the nested interrupts is
500		 * always increasing.
501		 */
502		lastprio = -1;
503		for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
504			curprio = PRIO(vlapic->isrvec_stk[i]);
505			if (curprio <= lastprio) {
506				dump_isrvec_stk(vlapic);
507				panic("isrvec_stk does not satisfy invariant");
508			}
509			lastprio = curprio;
510		}
511
512		/*
513		 * Make sure that each bit set in the ISRx registers has a
514		 * corresponding entry on the isrvec stack.
515		 */
516		i = 1;
517		isrptr = &vlapic->apic_page->isr0;
518		for (vector = 0; vector < 256; vector++) {
519			idx = (vector / 32) * 4;
520			if (isrptr[idx] & (1 << (vector % 32))) {
521				if (i > vlapic->isrvec_stk_top ||
522				    vlapic->isrvec_stk[i] != vector) {
523					dump_isrvec_stk(vlapic);
524					panic("ISR and isrvec_stk out of sync");
525				}
526				i++;
527			}
528		}
529	}
530#endif
531
532	if (PRIO(tpr) >= PRIO(isrvec))
533		ppr = tpr;
534	else
535		ppr = isrvec & 0xf0;
536
537	vlapic->apic_page->ppr = ppr;
538	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
539}
540
541static void
542vlapic_process_eoi(struct vlapic *vlapic)
543{
544	struct LAPIC	*lapic = vlapic->apic_page;
545	uint32_t	*isrptr, *tmrptr;
546	int		i, idx, bitpos, vector;
547
548	isrptr = &lapic->isr0;
549	tmrptr = &lapic->tmr0;
550
551	/*
552	 * The x86 architecture reserves the the first 32 vectors for use
553	 * by the processor.
554	 */
555	for (i = 7; i > 0; i--) {
556		idx = i * 4;
557		bitpos = fls(isrptr[idx]);
558		if (bitpos-- != 0) {
559			if (vlapic->isrvec_stk_top <= 0) {
560				panic("invalid vlapic isrvec_stk_top %d",
561				      vlapic->isrvec_stk_top);
562			}
563			isrptr[idx] &= ~(1 << bitpos);
564			VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
565			vlapic->isrvec_stk_top--;
566			vlapic_update_ppr(vlapic);
567			if ((tmrptr[idx] & (1 << bitpos)) != 0) {
568				vector = i * 32 + bitpos;
569				vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
570				    vector);
571			}
572			return;
573		}
574	}
575}
576
577static __inline int
578vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
579{
580
581	return (lvt & mask);
582}
583
584static __inline int
585vlapic_periodic_timer(struct vlapic *vlapic)
586{
587	uint32_t lvt;
588
589	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
590
591	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
592}
593
594static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
595
596void
597vlapic_set_error(struct vlapic *vlapic, uint32_t mask)
598{
599	uint32_t lvt;
600
601	vlapic->esr_pending |= mask;
602	if (vlapic->esr_firing)
603		return;
604	vlapic->esr_firing = 1;
605
606	// The error LVT always uses the fixed delivery mode.
607	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
608	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
609		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
610	}
611	vlapic->esr_firing = 0;
612}
613
614static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
615
616static void
617vlapic_fire_timer(struct vlapic *vlapic)
618{
619	uint32_t lvt;
620
621	KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
622
623	// The timer LVT always uses the fixed delivery mode.
624	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
625	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
626		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
627	}
628}
629
630static VMM_STAT(VLAPIC_INTR_CMC,
631    "corrected machine check interrupts generated by vlapic");
632
633void
634vlapic_fire_cmci(struct vlapic *vlapic)
635{
636	uint32_t lvt;
637
638	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
639	if (vlapic_fire_lvt(vlapic, lvt)) {
640		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
641	}
642}
643
644static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
645    "lvts triggered");
646
647int
648vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
649{
650	uint32_t lvt;
651
652	switch (vector) {
653	case APIC_LVT_LINT0:
654		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT);
655		break;
656	case APIC_LVT_LINT1:
657		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT);
658		break;
659	case APIC_LVT_TIMER:
660		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
661		lvt |= APIC_LVT_DM_FIXED;
662		break;
663	case APIC_LVT_ERROR:
664		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
665		lvt |= APIC_LVT_DM_FIXED;
666		break;
667	case APIC_LVT_PMC:
668		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT);
669		break;
670	case APIC_LVT_THERMAL:
671		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT);
672		break;
673	case APIC_LVT_CMCI:
674		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
675		break;
676	default:
677		return (EINVAL);
678	}
679	if (vlapic_fire_lvt(vlapic, lvt)) {
680		vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
681		    LVTS_TRIGGERRED, vector, 1);
682	}
683	return (0);
684}
685
686static void
687vlapic_callout_handler(void *arg)
688{
689	struct vlapic *vlapic;
690	struct bintime bt, btnow;
691	sbintime_t rem_sbt;
692
693	vlapic = arg;
694
695	VLAPIC_TIMER_LOCK(vlapic);
696	if (callout_pending(&vlapic->callout))	/* callout was reset */
697		goto done;
698
699	if (!callout_active(&vlapic->callout))	/* callout was stopped */
700		goto done;
701
702	callout_deactivate(&vlapic->callout);
703
704	vlapic_fire_timer(vlapic);
705
706	if (vlapic_periodic_timer(vlapic)) {
707		binuptime(&btnow);
708		KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
709		    ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx",
710		    btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
711		    vlapic->timer_fire_bt.frac));
712
713		/*
714		 * Compute the delta between when the timer was supposed to
715		 * fire and the present time.
716		 */
717		bt = btnow;
718		bintime_sub(&bt, &vlapic->timer_fire_bt);
719
720		rem_sbt = bttosbt(vlapic->timer_period_bt);
721		if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) {
722			/*
723			 * Adjust the time until the next countdown downward
724			 * to account for the lost time.
725			 */
726			rem_sbt -= bttosbt(bt);
727		} else {
728			/*
729			 * If the delta is greater than the timer period then
730			 * just reset our time base instead of trying to catch
731			 * up.
732			 */
733			vlapic->timer_fire_bt = btnow;
734			VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
735			    "usecs, period is %lu usecs - resetting time base",
736			    bttosbt(bt) / SBT_1US,
737			    bttosbt(vlapic->timer_period_bt) / SBT_1US);
738		}
739
740		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
741		callout_reset_sbt(&vlapic->callout, rem_sbt, 0,
742		    vlapic_callout_handler, vlapic, 0);
743	}
744done:
745	VLAPIC_TIMER_UNLOCK(vlapic);
746}
747
748void
749vlapic_icrtmr_write_handler(struct vlapic *vlapic)
750{
751	struct LAPIC *lapic;
752	sbintime_t sbt;
753	uint32_t icr_timer;
754
755	VLAPIC_TIMER_LOCK(vlapic);
756
757	lapic = vlapic->apic_page;
758	icr_timer = lapic->icr_timer;
759
760	vlapic->timer_period_bt = vlapic->timer_freq_bt;
761	bintime_mul(&vlapic->timer_period_bt, icr_timer);
762
763	if (icr_timer != 0) {
764		binuptime(&vlapic->timer_fire_bt);
765		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
766
767		sbt = bttosbt(vlapic->timer_period_bt);
768		callout_reset_sbt(&vlapic->callout, sbt, 0,
769		    vlapic_callout_handler, vlapic, 0);
770	} else
771		callout_stop(&vlapic->callout);
772
773	VLAPIC_TIMER_UNLOCK(vlapic);
774}
775
776/*
777 * This function populates 'dmask' with the set of vcpus that match the
778 * addressing specified by the (dest, phys, lowprio) tuple.
779 *
780 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
781 * or xAPIC (8-bit) destination field.
782 */
783static void
784vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
785    bool lowprio, bool x2apic_dest)
786{
787	struct vlapic *vlapic;
788	uint32_t dfr, ldr, ldest, cluster;
789	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
790	cpuset_t amask;
791	int vcpuid;
792
793	if ((x2apic_dest && dest == 0xffffffff) ||
794	    (!x2apic_dest && dest == 0xff)) {
795		/*
796		 * Broadcast in both logical and physical modes.
797		 */
798		*dmask = vm_active_cpus(vm);
799		return;
800	}
801
802	if (phys) {
803		/*
804		 * Physical mode: destination is APIC ID.
805		 */
806		CPU_ZERO(dmask);
807		vcpuid = vm_apicid2vcpuid(vm, dest);
808		if (vcpuid < VM_MAXCPU)
809			CPU_SET(vcpuid, dmask);
810	} else {
811		/*
812		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
813		 * bitmask. This model is only avilable in the xAPIC mode.
814		 */
815		mda_flat_ldest = dest & 0xff;
816
817		/*
818		 * In the "Cluster Model" the MDA is used to identify a
819		 * specific cluster and a set of APICs in that cluster.
820		 */
821		if (x2apic_dest) {
822			mda_cluster_id = dest >> 16;
823			mda_cluster_ldest = dest & 0xffff;
824		} else {
825			mda_cluster_id = (dest >> 4) & 0xf;
826			mda_cluster_ldest = dest & 0xf;
827		}
828
829		/*
830		 * Logical mode: match each APIC that has a bit set
831		 * in it's LDR that matches a bit in the ldest.
832		 */
833		CPU_ZERO(dmask);
834		amask = vm_active_cpus(vm);
835		while ((vcpuid = CPU_FFS(&amask)) != 0) {
836			vcpuid--;
837			CPU_CLR(vcpuid, &amask);
838
839			vlapic = vm_lapic(vm, vcpuid);
840			dfr = vlapic->apic_page->dfr;
841			ldr = vlapic->apic_page->ldr;
842
843			if ((dfr & APIC_DFR_MODEL_MASK) ==
844			    APIC_DFR_MODEL_FLAT) {
845				ldest = ldr >> 24;
846				mda_ldest = mda_flat_ldest;
847			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
848			    APIC_DFR_MODEL_CLUSTER) {
849				if (x2apic(vlapic)) {
850					cluster = ldr >> 16;
851					ldest = ldr & 0xffff;
852				} else {
853					cluster = ldr >> 28;
854					ldest = (ldr >> 24) & 0xf;
855				}
856				if (cluster != mda_cluster_id)
857					continue;
858				mda_ldest = mda_cluster_ldest;
859			} else {
860				/*
861				 * Guest has configured a bad logical
862				 * model for this vcpu - skip it.
863				 */
864				VLAPIC_CTR1(vlapic, "vlapic has bad logical "
865				    "model %x - cannot deliver interrupt", dfr);
866				continue;
867			}
868
869			if ((mda_ldest & ldest) != 0) {
870				CPU_SET(vcpuid, dmask);
871				if (lowprio)
872					break;
873			}
874		}
875	}
876}
877
878static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
879
880int
881vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
882{
883	int i;
884	bool phys;
885	cpuset_t dmask;
886	uint64_t icrval;
887	uint32_t dest, vec, mode;
888	struct vlapic *vlapic2;
889	struct vm_exit *vmexit;
890	struct LAPIC *lapic;
891
892	lapic = vlapic->apic_page;
893	lapic->icr_lo &= ~APIC_DELSTAT_PEND;
894	icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
895
896	if (x2apic(vlapic))
897		dest = icrval >> 32;
898	else
899		dest = icrval >> (32 + 24);
900	vec = icrval & APIC_VECTOR_MASK;
901	mode = icrval & APIC_DELMODE_MASK;
902
903	if (mode == APIC_DELMODE_FIXED && vec < 16) {
904		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
905		VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
906		return (0);
907	}
908
909	VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
910
911	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
912		switch (icrval & APIC_DEST_MASK) {
913		case APIC_DEST_DESTFLD:
914			phys = ((icrval & APIC_DESTMODE_LOG) == 0);
915			vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
916			    x2apic(vlapic));
917			break;
918		case APIC_DEST_SELF:
919			CPU_SETOF(vlapic->vcpuid, &dmask);
920			break;
921		case APIC_DEST_ALLISELF:
922			dmask = vm_active_cpus(vlapic->vm);
923			break;
924		case APIC_DEST_ALLESELF:
925			dmask = vm_active_cpus(vlapic->vm);
926			CPU_CLR(vlapic->vcpuid, &dmask);
927			break;
928		default:
929			CPU_ZERO(&dmask);	/* satisfy gcc */
930			break;
931		}
932
933		while ((i = CPU_FFS(&dmask)) != 0) {
934			i--;
935			CPU_CLR(i, &dmask);
936			if (mode == APIC_DELMODE_FIXED) {
937				lapic_intr_edge(vlapic->vm, i, vec);
938				vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
939						    IPIS_SENT, i, 1);
940				VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
941				    "to vcpuid %d", vec, i);
942			} else {
943				vm_inject_nmi(vlapic->vm, i);
944				VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
945				    "to vcpuid %d", i);
946			}
947		}
948
949		return (0);	/* handled completely in the kernel */
950	}
951
952	if (mode == APIC_DELMODE_INIT) {
953		if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
954			return (0);
955
956		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
957			vlapic2 = vm_lapic(vlapic->vm, dest);
958
959			/* move from INIT to waiting-for-SIPI state */
960			if (vlapic2->boot_state == BS_INIT) {
961				vlapic2->boot_state = BS_SIPI;
962			}
963
964			return (0);
965		}
966	}
967
968	if (mode == APIC_DELMODE_STARTUP) {
969		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
970			vlapic2 = vm_lapic(vlapic->vm, dest);
971
972			/*
973			 * Ignore SIPIs in any state other than wait-for-SIPI
974			 */
975			if (vlapic2->boot_state != BS_SIPI)
976				return (0);
977
978			/*
979			 * XXX this assumes that the startup IPI always succeeds
980			 */
981			vlapic2->boot_state = BS_RUNNING;
982			vm_activate_cpu(vlapic2->vm, dest);
983
984			*retu = true;
985			vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
986			vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
987			vmexit->u.spinup_ap.vcpu = dest;
988			vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
989
990			return (0);
991		}
992	}
993
994	/*
995	 * This will cause a return to userland.
996	 */
997	return (1);
998}
999
1000int
1001vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
1002{
1003	struct LAPIC	*lapic = vlapic->apic_page;
1004	int	  	 idx, i, bitpos, vector;
1005	uint32_t	*irrptr, val;
1006
1007	if (vlapic->ops.pending_intr)
1008		return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
1009
1010	irrptr = &lapic->irr0;
1011
1012	/*
1013	 * The x86 architecture reserves the the first 32 vectors for use
1014	 * by the processor.
1015	 */
1016	for (i = 7; i > 0; i--) {
1017		idx = i * 4;
1018		val = atomic_load_acq_int(&irrptr[idx]);
1019		bitpos = fls(val);
1020		if (bitpos != 0) {
1021			vector = i * 32 + (bitpos - 1);
1022			if (PRIO(vector) > PRIO(lapic->ppr)) {
1023				VLAPIC_CTR1(vlapic, "pending intr %d", vector);
1024				if (vecptr != NULL)
1025					*vecptr = vector;
1026				return (1);
1027			} else
1028				break;
1029		}
1030	}
1031	return (0);
1032}
1033
1034void
1035vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1036{
1037	struct LAPIC	*lapic = vlapic->apic_page;
1038	uint32_t	*irrptr, *isrptr;
1039	int		idx, stk_top;
1040
1041	if (vlapic->ops.intr_accepted)
1042		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
1043
1044	/*
1045	 * clear the ready bit for vector being accepted in irr
1046	 * and set the vector as in service in isr.
1047	 */
1048	idx = (vector / 32) * 4;
1049
1050	irrptr = &lapic->irr0;
1051	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1052	VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
1053
1054	isrptr = &lapic->isr0;
1055	isrptr[idx] |= 1 << (vector % 32);
1056	VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
1057
1058	/*
1059	 * Update the PPR
1060	 */
1061	vlapic->isrvec_stk_top++;
1062
1063	stk_top = vlapic->isrvec_stk_top;
1064	if (stk_top >= ISRVEC_STK_SIZE)
1065		panic("isrvec_stk_top overflow %d", stk_top);
1066
1067	vlapic->isrvec_stk[stk_top] = vector;
1068	vlapic_update_ppr(vlapic);
1069}
1070
1071void
1072vlapic_svr_write_handler(struct vlapic *vlapic)
1073{
1074	struct LAPIC *lapic;
1075	uint32_t old, new, changed;
1076
1077	lapic = vlapic->apic_page;
1078
1079	new = lapic->svr;
1080	old = vlapic->svr_last;
1081	vlapic->svr_last = new;
1082
1083	changed = old ^ new;
1084	if ((changed & APIC_SVR_ENABLE) != 0) {
1085		if ((new & APIC_SVR_ENABLE) == 0) {
1086			/*
1087			 * The apic is now disabled so stop the apic timer
1088			 * and mask all the LVT entries.
1089			 */
1090			VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
1091			VLAPIC_TIMER_LOCK(vlapic);
1092			callout_stop(&vlapic->callout);
1093			VLAPIC_TIMER_UNLOCK(vlapic);
1094			vlapic_mask_lvts(vlapic);
1095		} else {
1096			/*
1097			 * The apic is now enabled so restart the apic timer
1098			 * if it is configured in periodic mode.
1099			 */
1100			VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
1101			if (vlapic_periodic_timer(vlapic))
1102				vlapic_icrtmr_write_handler(vlapic);
1103		}
1104	}
1105}
1106
1107int
1108vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
1109{
1110	struct LAPIC	*lapic = vlapic->apic_page;
1111	uint32_t	*reg;
1112	int		 i;
1113
1114	if (offset > sizeof(*lapic)) {
1115		*data = 0;
1116		goto done;
1117	}
1118
1119	offset &= ~3;
1120	switch(offset)
1121	{
1122		case APIC_OFFSET_ID:
1123			*data = lapic->id;
1124			break;
1125		case APIC_OFFSET_VER:
1126			*data = lapic->version;
1127			break;
1128		case APIC_OFFSET_TPR:
1129			*data = lapic->tpr;
1130			break;
1131		case APIC_OFFSET_APR:
1132			*data = lapic->apr;
1133			break;
1134		case APIC_OFFSET_PPR:
1135			*data = lapic->ppr;
1136			break;
1137		case APIC_OFFSET_EOI:
1138			*data = lapic->eoi;
1139			break;
1140		case APIC_OFFSET_LDR:
1141			*data = lapic->ldr;
1142			break;
1143		case APIC_OFFSET_DFR:
1144			*data = lapic->dfr;
1145			break;
1146		case APIC_OFFSET_SVR:
1147			*data = lapic->svr;
1148			break;
1149		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1150			i = (offset - APIC_OFFSET_ISR0) >> 2;
1151			reg = &lapic->isr0;
1152			*data = *(reg + i);
1153			break;
1154		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1155			i = (offset - APIC_OFFSET_TMR0) >> 2;
1156			reg = &lapic->tmr0;
1157			*data = *(reg + i);
1158			break;
1159		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1160			i = (offset - APIC_OFFSET_IRR0) >> 2;
1161			reg = &lapic->irr0;
1162			*data = atomic_load_acq_int(reg + i);
1163			break;
1164		case APIC_OFFSET_ESR:
1165			*data = lapic->esr;
1166			break;
1167		case APIC_OFFSET_ICR_LOW:
1168			*data = lapic->icr_lo;
1169			if (x2apic(vlapic))
1170				*data |= (uint64_t)lapic->icr_hi << 32;
1171			break;
1172		case APIC_OFFSET_ICR_HI:
1173			*data = lapic->icr_hi;
1174			break;
1175		case APIC_OFFSET_CMCI_LVT:
1176		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1177			*data = vlapic_get_lvt(vlapic, offset);
1178#ifdef INVARIANTS
1179			reg = vlapic_get_lvtptr(vlapic, offset);
1180			KASSERT(*data == *reg, ("inconsistent lvt value at "
1181			    "offset %#lx: %#lx/%#x", offset, *data, *reg));
1182#endif
1183			break;
1184		case APIC_OFFSET_TIMER_ICR:
1185			*data = lapic->icr_timer;
1186			break;
1187		case APIC_OFFSET_TIMER_CCR:
1188			*data = vlapic_get_ccr(vlapic);
1189			break;
1190		case APIC_OFFSET_TIMER_DCR:
1191			*data = lapic->dcr_timer;
1192			break;
1193		case APIC_OFFSET_RRR:
1194		default:
1195			*data = 0;
1196			break;
1197	}
1198done:
1199	VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data);
1200	return 0;
1201}
1202
1203int
1204vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
1205{
1206	struct LAPIC	*lapic = vlapic->apic_page;
1207	uint32_t	*regptr;
1208	int		retval;
1209
1210	KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE,
1211	    ("vlapic_write: invalid offset %#lx", offset));
1212
1213	VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data);
1214
1215	if (offset > sizeof(*lapic)) {
1216		return 0;
1217	}
1218
1219	retval = 0;
1220	switch(offset)
1221	{
1222		case APIC_OFFSET_ID:
1223			lapic->id = data;
1224			vlapic_id_write_handler(vlapic);
1225			break;
1226		case APIC_OFFSET_TPR:
1227			lapic->tpr = data & 0xff;
1228			vlapic_update_ppr(vlapic);
1229			break;
1230		case APIC_OFFSET_EOI:
1231			vlapic_process_eoi(vlapic);
1232			break;
1233		case APIC_OFFSET_LDR:
1234			lapic->ldr = data;
1235			vlapic_ldr_write_handler(vlapic);
1236			break;
1237		case APIC_OFFSET_DFR:
1238			lapic->dfr = data;
1239			vlapic_dfr_write_handler(vlapic);
1240			break;
1241		case APIC_OFFSET_SVR:
1242			lapic->svr = data;
1243			vlapic_svr_write_handler(vlapic);
1244			break;
1245		case APIC_OFFSET_ICR_LOW:
1246			lapic->icr_lo = data;
1247			if (x2apic(vlapic))
1248				lapic->icr_hi = data >> 32;
1249			retval = vlapic_icrlo_write_handler(vlapic, retu);
1250			break;
1251		case APIC_OFFSET_ICR_HI:
1252			lapic->icr_hi = data;
1253			break;
1254		case APIC_OFFSET_CMCI_LVT:
1255		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1256			regptr = vlapic_get_lvtptr(vlapic, offset);
1257			*regptr = data;
1258			vlapic_lvt_write_handler(vlapic, offset);
1259			break;
1260		case APIC_OFFSET_TIMER_ICR:
1261			lapic->icr_timer = data;
1262			vlapic_icrtmr_write_handler(vlapic);
1263			break;
1264
1265		case APIC_OFFSET_TIMER_DCR:
1266			lapic->dcr_timer = data;
1267			vlapic_dcr_write_handler(vlapic);
1268			break;
1269
1270		case APIC_OFFSET_ESR:
1271			vlapic_esr_write_handler(vlapic);
1272			break;
1273		case APIC_OFFSET_VER:
1274		case APIC_OFFSET_APR:
1275		case APIC_OFFSET_PPR:
1276		case APIC_OFFSET_RRR:
1277		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1278		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1279		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1280		case APIC_OFFSET_TIMER_CCR:
1281		default:
1282			// Read only.
1283			break;
1284	}
1285
1286	return (retval);
1287}
1288
1289static void
1290vlapic_reset(struct vlapic *vlapic)
1291{
1292	struct LAPIC *lapic;
1293
1294	lapic = vlapic->apic_page;
1295	bzero(lapic, sizeof(struct LAPIC));
1296
1297	lapic->id = vlapic_get_id(vlapic);
1298	lapic->version = VLAPIC_VERSION;
1299	lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
1300	lapic->dfr = 0xffffffff;
1301	lapic->svr = APIC_SVR_VECTOR;
1302	vlapic_mask_lvts(vlapic);
1303	vlapic_reset_tmr(vlapic);
1304
1305	lapic->dcr_timer = 0;
1306	vlapic_dcr_write_handler(vlapic);
1307
1308	if (vlapic->vcpuid == 0)
1309		vlapic->boot_state = BS_RUNNING;	/* BSP */
1310	else
1311		vlapic->boot_state = BS_INIT;		/* AP */
1312
1313	vlapic->svr_last = lapic->svr;
1314}
1315
1316void
1317vlapic_init(struct vlapic *vlapic)
1318{
1319	KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1320	KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU,
1321	    ("vlapic_init: vcpuid is not initialized"));
1322	KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1323	    "initialized"));
1324
1325	/*
1326	 * If the vlapic is configured in x2apic mode then it will be
1327	 * accessed in the critical section via the MSR emulation code.
1328	 *
1329	 * Therefore the timer mutex must be a spinlock because blockable
1330	 * mutexes cannot be acquired in a critical section.
1331	 */
1332	mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
1333	callout_init(&vlapic->callout, 1);
1334
1335	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1336
1337	if (vlapic->vcpuid == 0)
1338		vlapic->msr_apicbase |= APICBASE_BSP;
1339
1340	vlapic_reset(vlapic);
1341}
1342
1343void
1344vlapic_cleanup(struct vlapic *vlapic)
1345{
1346
1347	callout_drain(&vlapic->callout);
1348}
1349
1350uint64_t
1351vlapic_get_apicbase(struct vlapic *vlapic)
1352{
1353
1354	return (vlapic->msr_apicbase);
1355}
1356
1357void
1358vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new)
1359{
1360	struct LAPIC *lapic;
1361	enum x2apic_state state;
1362	uint64_t old;
1363	int err;
1364
1365	err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state);
1366	if (err)
1367		panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
1368
1369	if (state == X2APIC_DISABLED)
1370		new &= ~APICBASE_X2APIC;
1371
1372	old = vlapic->msr_apicbase;
1373	vlapic->msr_apicbase = new;
1374
1375	/*
1376	 * If the vlapic is switching between xAPIC and x2APIC modes then
1377	 * reset the mode-dependent registers.
1378	 */
1379	if ((old ^ new) & APICBASE_X2APIC) {
1380		lapic = vlapic->apic_page;
1381		lapic->id = vlapic_get_id(vlapic);
1382		if (x2apic(vlapic)) {
1383			lapic->ldr = x2apic_ldr(vlapic);
1384			lapic->dfr = 0;
1385		} else {
1386			lapic->ldr = 0;
1387			lapic->dfr = 0xffffffff;
1388		}
1389	}
1390}
1391
1392void
1393vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1394{
1395	struct vlapic *vlapic;
1396
1397	vlapic = vm_lapic(vm, vcpuid);
1398
1399	if (state == X2APIC_DISABLED)
1400		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1401}
1402
1403void
1404vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1405    int delmode, int vec)
1406{
1407	bool lowprio;
1408	int vcpuid;
1409	cpuset_t dmask;
1410
1411	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
1412		VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode);
1413		return;
1414	}
1415	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
1416
1417	/*
1418	 * We don't provide any virtual interrupt redirection hardware so
1419	 * all interrupts originating from the ioapic or MSI specify the
1420	 * 'dest' in the legacy xAPIC format.
1421	 */
1422	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1423
1424	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
1425		vcpuid--;
1426		CPU_CLR(vcpuid, &dmask);
1427		lapic_set_intr(vm, vcpuid, vec, level);
1428	}
1429}
1430
1431void
1432vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum)
1433{
1434	/*
1435	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
1436	 *
1437	 * This is done by leveraging features like Posted Interrupts (Intel)
1438	 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1439	 *
1440	 * If neither of these features are available then fallback to
1441	 * sending an IPI to 'hostcpu'.
1442	 */
1443	if (vlapic->ops.post_intr)
1444		(*vlapic->ops.post_intr)(vlapic, hostcpu);
1445	else
1446		ipi_cpu(hostcpu, ipinum);
1447}
1448
1449bool
1450vlapic_enabled(struct vlapic *vlapic)
1451{
1452	struct LAPIC *lapic = vlapic->apic_page;
1453
1454	if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
1455	    (lapic->svr & APIC_SVR_ENABLE) != 0)
1456		return (true);
1457	else
1458		return (false);
1459}
1460
1461static void
1462vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
1463{
1464	struct LAPIC *lapic;
1465	uint32_t *tmrptr, mask;
1466	int idx;
1467
1468	lapic = vlapic->apic_page;
1469	tmrptr = &lapic->tmr0;
1470	idx = (vector / 32) * 4;
1471	mask = 1 << (vector % 32);
1472	if (level)
1473		tmrptr[idx] |= mask;
1474	else
1475		tmrptr[idx] &= ~mask;
1476
1477	if (vlapic->ops.set_tmr != NULL)
1478		(*vlapic->ops.set_tmr)(vlapic, vector, level);
1479}
1480
1481void
1482vlapic_reset_tmr(struct vlapic *vlapic)
1483{
1484	int vector;
1485
1486	VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
1487
1488	for (vector = 0; vector <= 255; vector++)
1489		vlapic_set_tmr(vlapic, vector, false);
1490}
1491
1492void
1493vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
1494    int delmode, int vector)
1495{
1496	cpuset_t dmask;
1497	bool lowprio;
1498
1499	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
1500
1501	/*
1502	 * A level trigger is valid only for fixed and lowprio delivery modes.
1503	 */
1504	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
1505		VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
1506		    "delivery-mode %d", delmode);
1507		return;
1508	}
1509
1510	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
1511	vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
1512
1513	if (!CPU_ISSET(vlapic->vcpuid, &dmask))
1514		return;
1515
1516	VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
1517	vlapic_set_tmr(vlapic, vector, true);
1518}
1519