vlapic.c revision 270070
152284Sobrien/*-
2169689Skan * Copyright (c) 2011 NetApp, Inc.
352284Sobrien * All rights reserved.
490075Sobrien *
552284Sobrien * Redistribution and use in source and binary forms, with or without
690075Sobrien * modification, are permitted provided that the following conditions
790075Sobrien * are met:
890075Sobrien * 1. Redistributions of source code must retain the above copyright
990075Sobrien *    notice, this list of conditions and the following disclaimer.
1052284Sobrien * 2. Redistributions in binary form must reproduce the above copyright
1190075Sobrien *    notice, this list of conditions and the following disclaimer in the
1290075Sobrien *    documentation and/or other materials provided with the distribution.
1390075Sobrien *
1490075Sobrien * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
1552284Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1652284Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1790075Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18169689Skan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19169689Skan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2052284Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2152284Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2252284Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23132718Skan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24132718Skan * SUCH DAMAGE.
2552284Sobrien *
2652284Sobrien * $FreeBSD: stable/10/sys/amd64/vmm/io/vlapic.c 270070 2014-08-17 00:52:07Z grehan $
2790075Sobrien */
28169689Skan
2952284Sobrien#include <sys/cdefs.h>
3052284Sobrien__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/io/vlapic.c 270070 2014-08-17 00:52:07Z grehan $");
3152284Sobrien
3252284Sobrien#include <sys/param.h>
3352284Sobrien#include <sys/lock.h>
3452284Sobrien#include <sys/kernel.h>
3552284Sobrien#include <sys/malloc.h>
36132718Skan#include <sys/mutex.h>
3752284Sobrien#include <sys/systm.h>
3890075Sobrien#include <sys/smp.h>
3952284Sobrien
4052284Sobrien#include <x86/specialreg.h>
4152284Sobrien#include <x86/apicreg.h>
4252284Sobrien
4352284Sobrien#include <machine/clock.h>
4452284Sobrien#include <machine/smp.h>
45132718Skan
4652284Sobrien#include <machine/vmm.h>
4752284Sobrien
4852284Sobrien#include "vmm_ipi.h"
4952284Sobrien#include "vmm_lapic.h"
5052284Sobrien#include "vmm_ktr.h"
5152284Sobrien#include "vmm_stat.h"
52117395Skan
53117395Skan#include "vlapic.h"
54117395Skan#include "vlapic_priv.h"
55117395Skan#include "vioapic.h"
56117395Skan
57132718Skan#define	PRIO(x)			((x) >> 4)
58117395Skan
59117395Skan#define VLAPIC_VERSION		(16)
60117395Skan
61117395Skan#define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
62117395Skan
63117395Skan/*
64117395Skan * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
65117395Skan * vlapic_callout_handler() and vcpu accesses to:
66117395Skan * - timer_freq_bt, timer_period_bt, timer_fire_bt
67117395Skan * - timer LVT register
68132718Skan */
69117395Skan#define	VLAPIC_TIMER_LOCK(vlapic)	mtx_lock_spin(&((vlapic)->timer_mtx))
70117395Skan#define	VLAPIC_TIMER_UNLOCK(vlapic)	mtx_unlock_spin(&((vlapic)->timer_mtx))
71117395Skan#define	VLAPIC_TIMER_LOCKED(vlapic)	mtx_owned(&((vlapic)->timer_mtx))
72117395Skan
73117395Skan/*
74117395Skan * APIC timer frequency:
75132718Skan * - arbitrary but chosen to be in the ballpark of contemporary hardware.
76117395Skan * - power-of-two to avoid loss of precision when converted to a bintime.
77117395Skan */
78117395Skan#define VLAPIC_BUS_FREQ		(128 * 1024 * 1024)
79117395Skan
80117395Skanstatic __inline uint32_t
81117395Skanvlapic_get_id(struct vlapic *vlapic)
82117395Skan{
83117395Skan
84117395Skan	if (x2apic(vlapic))
85117395Skan		return (vlapic->vcpuid);
86117395Skan	else
87117395Skan		return (vlapic->vcpuid << 24);
88117395Skan}
89117395Skan
90132718Skanstatic uint32_t
91117395Skanx2apic_ldr(struct vlapic *vlapic)
92117395Skan{
93117395Skan	int apicid;
94132718Skan	uint32_t ldr;
95117395Skan
96117395Skan	apicid = vlapic_get_id(vlapic);
97117395Skan	ldr = 1 << (apicid & 0xf);
98117395Skan	ldr |= (apicid & 0xffff0) << 12;
99117395Skan	return (ldr);
100117395Skan}
101117395Skan
102117395Skanvoid
103117395Skanvlapic_dfr_write_handler(struct vlapic *vlapic)
104117395Skan{
105117395Skan	struct LAPIC *lapic;
106117395Skan
107169689Skan	lapic = vlapic->apic_page;
108169689Skan	if (x2apic(vlapic)) {
109169689Skan		VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x",
110169689Skan		    lapic->dfr);
111169689Skan		lapic->dfr = 0;
112169689Skan		return;
113169689Skan	}
114169689Skan
115169689Skan	lapic->dfr &= APIC_DFR_MODEL_MASK;
116169689Skan	lapic->dfr |= APIC_DFR_RESERVED;
117169689Skan
118169689Skan	if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
119169689Skan		VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
120169689Skan	else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
121169689Skan		VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
122169689Skan	else
123169689Skan		VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr);
124169689Skan}
125169689Skan
126169689Skanvoid
127169689Skanvlapic_ldr_write_handler(struct vlapic *vlapic)
128169689Skan{
129169689Skan	struct LAPIC *lapic;
130169689Skan
131169689Skan	lapic = vlapic->apic_page;
132169689Skan
13352284Sobrien	/* LDR is read-only in x2apic mode */
13452284Sobrien	if (x2apic(vlapic)) {
13552284Sobrien		VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x",
136132718Skan		    lapic->ldr);
13752284Sobrien		lapic->ldr = x2apic_ldr(vlapic);
13890075Sobrien	} else {
13952284Sobrien		lapic->ldr &= ~APIC_LDR_RESERVED;
14052284Sobrien		VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
14152284Sobrien	}
14252284Sobrien}
14352284Sobrien
14452284Sobrienvoid
14552284Sobrienvlapic_id_write_handler(struct vlapic *vlapic)
14652284Sobrien{
14752284Sobrien	struct LAPIC *lapic;
14852284Sobrien
14952284Sobrien	/*
15052284Sobrien	 * We don't allow the ID register to be modified so reset it back to
15152284Sobrien	 * its default value.
15252284Sobrien	 */
15352284Sobrien	lapic = vlapic->apic_page;
15452284Sobrien	lapic->id = vlapic_get_id(vlapic);
15552284Sobrien}
15652284Sobrien
15752284Sobrienstatic int
15852284Sobrienvlapic_timer_divisor(uint32_t dcr)
15952284Sobrien{
160132718Skan	switch (dcr & 0xB) {
16152284Sobrien	case APIC_TDCR_1:
16290075Sobrien		return (1);
16352284Sobrien	case APIC_TDCR_2:
16452284Sobrien		return (2);
16590075Sobrien	case APIC_TDCR_4:
16652284Sobrien		return (4);
16752284Sobrien	case APIC_TDCR_8:
16852284Sobrien		return (8);
16952284Sobrien	case APIC_TDCR_16:
17052284Sobrien		return (16);
17152284Sobrien	case APIC_TDCR_32:
17252284Sobrien		return (32);
17352284Sobrien	case APIC_TDCR_64:
17452284Sobrien		return (64);
17552284Sobrien	case APIC_TDCR_128:
17652284Sobrien		return (128);
17752284Sobrien	default:
178132718Skan		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
17952284Sobrien	}
18090075Sobrien}
18152284Sobrien
18252284Sobrien#if 0
18390075Sobrienstatic inline void
18490075Sobrienvlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
185132718Skan{
18690075Sobrien	printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
18790075Sobrien	    *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
18890075Sobrien	    *lvt & APIC_LVTT_M);
189117395Skan}
19052284Sobrien#endif
19152284Sobrien
19252284Sobrienstatic uint32_t
193132718Skanvlapic_get_ccr(struct vlapic *vlapic)
19452284Sobrien{
195132718Skan	struct bintime bt_now, bt_rem;
19652284Sobrien	struct LAPIC *lapic;
19752284Sobrien	uint32_t ccr;
19890075Sobrien
19952284Sobrien	ccr = 0;
20052284Sobrien	lapic = vlapic->apic_page;
201132718Skan
20252284Sobrien	VLAPIC_TIMER_LOCK(vlapic);
20390075Sobrien	if (callout_active(&vlapic->callout)) {
20490075Sobrien		/*
205132718Skan		 * If the timer is scheduled to expire in the future then
20690075Sobrien		 * compute the value of 'ccr' based on the remaining time.
20790075Sobrien		 */
20890075Sobrien		binuptime(&bt_now);
20990075Sobrien		if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
21090075Sobrien			bt_rem = vlapic->timer_fire_bt;
21152284Sobrien			bintime_sub(&bt_rem, &bt_now);
21252284Sobrien			ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
21352284Sobrien			ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
21452284Sobrien		}
21552284Sobrien	}
216132718Skan	KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
21752284Sobrien	    "icr_timer is %#x", ccr, lapic->icr_timer));
21890075Sobrien	VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
21952284Sobrien	    ccr, lapic->icr_timer);
22052284Sobrien	VLAPIC_TIMER_UNLOCK(vlapic);
22152284Sobrien	return (ccr);
22252284Sobrien}
22352284Sobrien
22490075Sobrienvoid
22552284Sobrienvlapic_dcr_write_handler(struct vlapic *vlapic)
22652284Sobrien{
227132718Skan	struct LAPIC *lapic;
22852284Sobrien	int divisor;
22990075Sobrien
23052284Sobrien	lapic = vlapic->apic_page;
23152284Sobrien	VLAPIC_TIMER_LOCK(vlapic);
23252284Sobrien
23352284Sobrien	divisor = vlapic_timer_divisor(lapic->dcr_timer);
23452284Sobrien	VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d",
23552284Sobrien	    lapic->dcr_timer, divisor);
23652284Sobrien
237117395Skan	/*
23852284Sobrien	 * Update the timer frequency and the timer period.
239117395Skan	 *
240132718Skan	 * XXX changes to the frequency divider will not take effect until
24152284Sobrien	 * the timer is reloaded.
242117395Skan	 */
243117395Skan	FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
244117395Skan	vlapic->timer_period_bt = vlapic->timer_freq_bt;
245117395Skan	bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
246117395Skan
247117395Skan	VLAPIC_TIMER_UNLOCK(vlapic);
24852284Sobrien}
249117395Skan
25052284Sobrienvoid
25190075Sobrienvlapic_esr_write_handler(struct vlapic *vlapic)
252117395Skan{
253117395Skan	struct LAPIC *lapic;
25452284Sobrien
25590075Sobrien	lapic = vlapic->apic_page;
256117395Skan	lapic->esr = vlapic->esr_pending;
25752284Sobrien	vlapic->esr_pending = 0;
25852284Sobrien}
259117395Skan
260132718Skanint
261117395Skanvlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
262117395Skan{
263117395Skan	struct LAPIC *lapic;
264117395Skan	uint32_t *irrptr, *tmrptr, mask;
265117395Skan	int idx;
266117395Skan
267117395Skan	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
268117395Skan
269117395Skan	lapic = vlapic->apic_page;
270117395Skan	if (!(lapic->svr & APIC_SVR_ENABLE)) {
271117395Skan		VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
27252284Sobrien		    "interrupt %d", vector);
27352284Sobrien		return (0);
27452284Sobrien	}
275132718Skan
27652284Sobrien	if (vector < 16) {
277117395Skan		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR);
278117395Skan		VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d",
279117395Skan		    vector);
280132718Skan		return (1);
28152284Sobrien	}
282117395Skan
283117395Skan	if (vlapic->ops.set_intr_ready)
284132718Skan		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
285132718Skan
286132718Skan	idx = (vector / 32) * 4;
287132718Skan	mask = 1 << (vector % 32);
288132718Skan
289132718Skan	irrptr = &lapic->irr0;
29052284Sobrien	atomic_set_int(&irrptr[idx], mask);
29152284Sobrien
29252284Sobrien	/*
29390075Sobrien	 * Verify that the trigger-mode of the interrupt matches with
29452284Sobrien	 * the vlapic TMR registers.
29552284Sobrien	 */
296132718Skan	tmrptr = &lapic->tmr0;
29752284Sobrien	if ((tmrptr[idx] & mask) != (level ? mask : 0)) {
298117395Skan		VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but "
299117395Skan		    "interrupt is %s-triggered", idx / 4, tmrptr[idx],
300117395Skan		    level ? "level" : "edge");
301117395Skan	}
302117395Skan
303132718Skan	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
304117395Skan	return (1);
305169689Skan}
306117395Skan
307117395Skanstatic __inline uint32_t *
308117395Skanvlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
309117395Skan{
310117395Skan	struct LAPIC	*lapic = vlapic->apic_page;
31152284Sobrien	int 		 i;
312117395Skan
313117395Skan	switch (offset) {
314117395Skan	case APIC_OFFSET_CMCI_LVT:
315117395Skan		return (&lapic->lvt_cmci);
316117395Skan	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
31752284Sobrien		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
31852284Sobrien		return ((&lapic->lvt_timer) + i);;
319169689Skan	default:
320169689Skan		panic("vlapic_get_lvt: invalid LVT\n");
321169689Skan	}
322169689Skan}
323169689Skan
324169689Skanstatic __inline int
325169689Skanlvt_off_to_idx(uint32_t offset)
326169689Skan{
327169689Skan	int index;
328169689Skan
329169689Skan	switch (offset) {
330169689Skan	case APIC_OFFSET_CMCI_LVT:
331169689Skan		index = APIC_LVT_CMCI;
332169689Skan		break;
333169689Skan	case APIC_OFFSET_TIMER_LVT:
334169689Skan		index = APIC_LVT_TIMER;
335169689Skan		break;
336169689Skan	case APIC_OFFSET_THERM_LVT:
33790075Sobrien		index = APIC_LVT_THERMAL;
338117395Skan		break;
33952284Sobrien	case APIC_OFFSET_PERF_LVT:
340117395Skan		index = APIC_LVT_PMC;
341132718Skan		break;
34252284Sobrien	case APIC_OFFSET_LINT0_LVT:
343117395Skan		index = APIC_LVT_LINT0;
344117395Skan		break;
345117395Skan	case APIC_OFFSET_LINT1_LVT:
346117395Skan		index = APIC_LVT_LINT1;
347117395Skan		break;
34852284Sobrien	case APIC_OFFSET_ERROR_LVT:
349117395Skan		index = APIC_LVT_ERROR;
35052284Sobrien		break;
35190075Sobrien	default:
352132718Skan		index = -1;
353117395Skan		break;
35452284Sobrien	}
35590075Sobrien	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
356117395Skan	    "invalid lvt index %d for offset %#x", index, offset));
35752284Sobrien
35890075Sobrien	return (index);
359117395Skan}
360132718Skan
361117395Skanstatic __inline uint32_t
362117395Skanvlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
363117395Skan{
364117395Skan	int idx;
365117395Skan	uint32_t val;
366117395Skan
367117395Skan	idx = lvt_off_to_idx(offset);
368117395Skan	val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
369117395Skan	return (val);
370117395Skan}
37190075Sobrien
372117395Skanvoid
37390075Sobrienvlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
374117395Skan{
375132718Skan	uint32_t *lvtptr, mask, val;
37690075Sobrien	struct LAPIC *lapic;
377117395Skan	int idx;
378117395Skan
379117395Skan	lapic = vlapic->apic_page;
380117395Skan	lvtptr = vlapic_get_lvtptr(vlapic, offset);
381117395Skan	val = *lvtptr;
382117395Skan	idx = lvt_off_to_idx(offset);
383117395Skan
38490075Sobrien	if (!(lapic->svr & APIC_SVR_ENABLE))
38590075Sobrien		val |= APIC_LVT_M;
386132718Skan	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
387117395Skan	switch (offset) {
38890075Sobrien	case APIC_OFFSET_TIMER_LVT:
389117395Skan		mask |= APIC_LVTT_TM;
390117395Skan		break;
39190075Sobrien	case APIC_OFFSET_ERROR_LVT:
39290075Sobrien		break;
393117395Skan	case APIC_OFFSET_LINT0_LVT:
394132718Skan	case APIC_OFFSET_LINT1_LVT:
395117395Skan		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
396117395Skan		/* FALLTHROUGH */
397117395Skan	default:
398117395Skan		mask |= APIC_LVT_DM;
399117395Skan		break;
400117395Skan	}
401117395Skan	val &= mask;
402117395Skan	*lvtptr = val;
403117395Skan	atomic_store_rel_32(&vlapic->lvt_last[idx], val);
404117395Skan}
40552284Sobrien
406117395Skanstatic void
40752284Sobrienvlapic_mask_lvts(struct vlapic *vlapic)
408117395Skan{
409132718Skan	struct LAPIC *lapic = vlapic->apic_page;
41052284Sobrien
411117395Skan	lapic->lvt_cmci |= APIC_LVT_M;
412117395Skan	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
413117395Skan
414117395Skan	lapic->lvt_timer |= APIC_LVT_M;
415117395Skan	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
41652284Sobrien
417117395Skan	lapic->lvt_thermal |= APIC_LVT_M;
41852284Sobrien	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
41990075Sobrien
420132718Skan	lapic->lvt_pcint |= APIC_LVT_M;
421117395Skan	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
42252284Sobrien
42390075Sobrien	lapic->lvt_lint0 |= APIC_LVT_M;
424117395Skan	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
42552284Sobrien
42652284Sobrien	lapic->lvt_lint1 |= APIC_LVT_M;
427117395Skan	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
428132718Skan
429117395Skan	lapic->lvt_error |= APIC_LVT_M;
430117395Skan	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
431117395Skan}
432117395Skan
433117395Skanstatic int
43490075Sobrienvlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt)
435117395Skan{
436117395Skan	uint32_t vec, mode;
437117395Skan
438117395Skan	if (lvt & APIC_LVT_M)
439117395Skan		return (0);
440117395Skan
441117395Skan	vec = lvt & APIC_LVT_VECTOR;
442132718Skan	mode = lvt & APIC_LVT_DM;
44390075Sobrien
444117395Skan	switch (mode) {
44590075Sobrien	case APIC_LVT_DM_FIXED:
44690075Sobrien		if (vec < 16) {
447117395Skan			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
44890075Sobrien			return (0);
449117395Skan		}
45090075Sobrien		if (vlapic_set_intr_ready(vlapic, vec, false))
451117395Skan			vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
45290075Sobrien		break;
45390075Sobrien	case APIC_LVT_DM_NMI:
45452284Sobrien		vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
455117395Skan		break;
45652284Sobrien	case APIC_LVT_DM_EXTINT:
457117395Skan		vm_inject_extint(vlapic->vm, vlapic->vcpuid);
458132718Skan		break;
45952284Sobrien	default:
460117395Skan		// Other modes ignored
461117395Skan		return (0);
462117395Skan	}
463117395Skan	return (1);
464117395Skan}
465117395Skan
46652284Sobrien#if 1
467117395Skanstatic void
46852284Sobriendump_isrvec_stk(struct vlapic *vlapic)
46990075Sobrien{
470117395Skan	int i;
471117395Skan	uint32_t *isrptr;
47252284Sobrien
47390075Sobrien	isrptr = &vlapic->apic_page->isr0;
474117395Skan	for (i = 0; i < 8; i++)
47552284Sobrien		printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
47652284Sobrien
477117395Skan	for (i = 0; i <= vlapic->isrvec_stk_top; i++)
478132718Skan		printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
479117395Skan}
480117395Skan#endif
481117395Skan
482117395Skan/*
483117395Skan * Algorithm adopted from section "Interrupt, Task and Processor Priority"
484117395Skan * in Intel Architecture Manual Vol 3a.
485117395Skan */
486117395Skanstatic void
487117395Skanvlapic_update_ppr(struct vlapic *vlapic)
488117395Skan{
489117395Skan	int isrvec, tpr, ppr;
49090075Sobrien
491117395Skan	/*
49252284Sobrien	 * Note that the value on the stack at index 0 is always 0.
493117395Skan	 *
494132718Skan	 * This is a placeholder for the value of ISRV when none of the
49552284Sobrien	 * bits is set in the ISRx registers.
496117395Skan	 */
497117395Skan	isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
498117395Skan	tpr = vlapic->apic_page->tpr;
499117395Skan
500117395Skan#if 1
501117395Skan	{
50252284Sobrien		int i, lastprio, curprio, vector, idx;
503117395Skan		uint32_t *isrptr;
50452284Sobrien
50590075Sobrien		if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
506117395Skan			panic("isrvec_stk is corrupted: %d", isrvec);
507117395Skan
50852284Sobrien		/*
50990075Sobrien		 * Make sure that the priority of the nested interrupts is
510117395Skan		 * always increasing.
51152284Sobrien		 */
51252284Sobrien		lastprio = -1;
513117395Skan		for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
514132718Skan			curprio = PRIO(vlapic->isrvec_stk[i]);
515117395Skan			if (curprio <= lastprio) {
516117395Skan				dump_isrvec_stk(vlapic);
517117395Skan				panic("isrvec_stk does not satisfy invariant");
518117395Skan			}
519117395Skan			lastprio = curprio;
520117395Skan		}
521117395Skan
522117395Skan		/*
523117395Skan		 * Make sure that each bit set in the ISRx registers has a
524117395Skan		 * corresponding entry on the isrvec stack.
525117395Skan		 */
52690075Sobrien		i = 1;
52790075Sobrien		isrptr = &vlapic->apic_page->isr0;
52890075Sobrien		for (vector = 0; vector < 256; vector++) {
52952284Sobrien			idx = (vector / 32) * 4;
53052284Sobrien			if (isrptr[idx] & (1 << (vector % 32))) {
531132718Skan				if (i > vlapic->isrvec_stk_top ||
53252284Sobrien				    vlapic->isrvec_stk[i] != vector) {
53390075Sobrien					dump_isrvec_stk(vlapic);
53490075Sobrien					panic("ISR and isrvec_stk out of sync");
53590075Sobrien				}
536169689Skan				i++;
53752284Sobrien			}
538169689Skan		}
53990075Sobrien	}
540169689Skan#endif
54190075Sobrien
542117395Skan	if (PRIO(tpr) >= PRIO(isrvec))
543169689Skan		ppr = tpr;
54490075Sobrien	else
54590075Sobrien		ppr = isrvec & 0xf0;
54652284Sobrien
54752284Sobrien	vlapic->apic_page->ppr = ppr;
54890075Sobrien	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
54990075Sobrien}
55090075Sobrien
551169689Skanstatic void
55290075Sobrienvlapic_process_eoi(struct vlapic *vlapic)
55390075Sobrien{
55490075Sobrien	struct LAPIC	*lapic = vlapic->apic_page;
55552284Sobrien	uint32_t	*isrptr, *tmrptr;
556169689Skan	int		i, idx, bitpos, vector;
55790075Sobrien
55890075Sobrien	isrptr = &lapic->isr0;
55990075Sobrien	tmrptr = &lapic->tmr0;
56090075Sobrien
56190075Sobrien	/*
56290075Sobrien	 * The x86 architecture reserves the the first 32 vectors for use
56390075Sobrien	 * by the processor.
56490075Sobrien	 */
56590075Sobrien	for (i = 7; i > 0; i--) {
56690075Sobrien		idx = i * 4;
56790075Sobrien		bitpos = fls(isrptr[idx]);
56890075Sobrien		if (bitpos-- != 0) {
56990075Sobrien			if (vlapic->isrvec_stk_top <= 0) {
57090075Sobrien				panic("invalid vlapic isrvec_stk_top %d",
571132718Skan				      vlapic->isrvec_stk_top);
57290075Sobrien			}
57390075Sobrien			isrptr[idx] &= ~(1 << bitpos);
57490075Sobrien			VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
57590075Sobrien			vlapic->isrvec_stk_top--;
576169689Skan			vlapic_update_ppr(vlapic);
57790075Sobrien			if ((tmrptr[idx] & (1 << bitpos)) != 0) {
578169689Skan				vector = i * 32 + bitpos;
57952284Sobrien				vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
580169689Skan				    vector);
58190075Sobrien			}
582117395Skan			return;
58390075Sobrien		}
58490075Sobrien	}
58552284Sobrien}
58652284Sobrien
58752284Sobrienstatic __inline int
58890075Sobrienvlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
58990075Sobrien{
59090075Sobrien
591169689Skan	return (lvt & mask);
59290075Sobrien}
59390075Sobrien
59490075Sobrienstatic __inline int
59552284Sobrienvlapic_periodic_timer(struct vlapic *vlapic)
596169689Skan{
59790075Sobrien	uint32_t lvt;
59890075Sobrien
59990075Sobrien	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
60090075Sobrien
60190075Sobrien	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
60290075Sobrien}
60390075Sobrien
60490075Sobrienstatic VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
60590075Sobrien
60690075Sobrienvoid
60790075Sobrienvlapic_set_error(struct vlapic *vlapic, uint32_t mask)
60890075Sobrien{
60990075Sobrien	uint32_t lvt;
61090075Sobrien
611132718Skan	vlapic->esr_pending |= mask;
61290075Sobrien	if (vlapic->esr_firing)
61390075Sobrien		return;
61490075Sobrien	vlapic->esr_firing = 1;
61590075Sobrien
616169689Skan	// The error LVT always uses the fixed delivery mode.
61790075Sobrien	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
618169689Skan	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
61952284Sobrien		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
620169689Skan	}
62190075Sobrien	vlapic->esr_firing = 0;
622117395Skan}
62352284Sobrien
62490075Sobrienstatic VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
62590075Sobrien
62690075Sobrienstatic void
62752284Sobrienvlapic_fire_timer(struct vlapic *vlapic)
628169689Skan{
62990075Sobrien	uint32_t lvt;
63090075Sobrien
631169689Skan	KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
63290075Sobrien
63390075Sobrien	// The timer LVT always uses the fixed delivery mode.
63490075Sobrien	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
63552284Sobrien	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
636169689Skan		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
63790075Sobrien	}
63890075Sobrien}
63990075Sobrien
64090075Sobrienstatic VMM_STAT(VLAPIC_INTR_CMC,
64190075Sobrien    "corrected machine check interrupts generated by vlapic");
64290075Sobrien
64390075Sobrienvoid
64490075Sobrienvlapic_fire_cmci(struct vlapic *vlapic)
64552284Sobrien{
64652284Sobrien	uint32_t lvt;
64790075Sobrien
64890075Sobrien	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
64952284Sobrien	if (vlapic_fire_lvt(vlapic, lvt)) {
65052284Sobrien		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
651132718Skan	}
65252284Sobrien}
65390075Sobrien
65490075Sobrienstatic VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
65590075Sobrien    "lvts triggered");
656169689Skan
65752284Sobrienint
658169689Skanvlapic_trigger_lvt(struct vlapic *vlapic, int vector)
65952284Sobrien{
660169689Skan	uint32_t lvt;
66190075Sobrien
662117395Skan	if (vlapic_enabled(vlapic) == false) {
66352284Sobrien		/*
66490075Sobrien		 * When the local APIC is global/hardware disabled,
66552284Sobrien		 * LINT[1:0] pins are configured as INTR and NMI pins,
66652284Sobrien		 * respectively.
66752284Sobrien		*/
668169689Skan		switch (vector) {
66990075Sobrien			case APIC_LVT_LINT0:
67090075Sobrien				vm_inject_extint(vlapic->vm, vlapic->vcpuid);
671169689Skan				break;
67290075Sobrien			case APIC_LVT_LINT1:
67390075Sobrien				vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
67490075Sobrien				break;
67552284Sobrien			default:
676169689Skan				break;
67790075Sobrien		}
67890075Sobrien		return (0);
679117395Skan	}
68090075Sobrien
68190075Sobrien	switch (vector) {
68290075Sobrien	case APIC_LVT_LINT0:
68390075Sobrien		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT);
68490075Sobrien		break;
68590075Sobrien	case APIC_LVT_LINT1:
68690075Sobrien		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT);
68790075Sobrien		break;
68890075Sobrien	case APIC_LVT_TIMER:
68990075Sobrien		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
69090075Sobrien		lvt |= APIC_LVT_DM_FIXED;
691132718Skan		break;
69290075Sobrien	case APIC_LVT_ERROR:
693169689Skan		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
694169689Skan		lvt |= APIC_LVT_DM_FIXED;
69590075Sobrien		break;
696169689Skan	case APIC_LVT_PMC:
697169689Skan		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT);
69890075Sobrien		break;
69990075Sobrien	case APIC_LVT_THERMAL:
70090075Sobrien		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT);
70190075Sobrien		break;
70290075Sobrien	case APIC_LVT_CMCI:
70390075Sobrien		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
704132718Skan		break;
70590075Sobrien	default:
70690075Sobrien		return (EINVAL);
70790075Sobrien	}
70890075Sobrien	if (vlapic_fire_lvt(vlapic, lvt)) {
70990075Sobrien		vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
71052284Sobrien		    LVTS_TRIGGERRED, vector, 1);
71190075Sobrien	}
71252284Sobrien	return (0);
71390075Sobrien}
71490075Sobrien
71590075Sobrienstatic void
71690075Sobrienvlapic_callout_handler(void *arg)
71790075Sobrien{
71852284Sobrien	struct vlapic *vlapic;
71990075Sobrien	struct bintime bt, btnow;
72090075Sobrien	sbintime_t rem_sbt;
72190075Sobrien
72290075Sobrien	vlapic = arg;
72352284Sobrien
72490075Sobrien	VLAPIC_TIMER_LOCK(vlapic);
72590075Sobrien	if (callout_pending(&vlapic->callout))	/* callout was reset */
72690075Sobrien		goto done;
72790075Sobrien
72852284Sobrien	if (!callout_active(&vlapic->callout))	/* callout was stopped */
72990075Sobrien		goto done;
73090075Sobrien
73152284Sobrien	callout_deactivate(&vlapic->callout);
73252284Sobrien
73352284Sobrien	vlapic_fire_timer(vlapic);
734132718Skan
73552284Sobrien	if (vlapic_periodic_timer(vlapic)) {
73690075Sobrien		binuptime(&btnow);
73790075Sobrien		KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
73890075Sobrien		    ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx",
73952284Sobrien		    btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
74052284Sobrien		    vlapic->timer_fire_bt.frac));
74152284Sobrien
74290075Sobrien		/*
74390075Sobrien		 * Compute the delta between when the timer was supposed to
74490075Sobrien		 * fire and the present time.
74590075Sobrien		 */
74690075Sobrien		bt = btnow;
74790075Sobrien		bintime_sub(&bt, &vlapic->timer_fire_bt);
74890075Sobrien
74990075Sobrien		rem_sbt = bttosbt(vlapic->timer_period_bt);
75090075Sobrien		if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) {
75152284Sobrien			/*
75252284Sobrien			 * Adjust the time until the next countdown downward
75352284Sobrien			 * to account for the lost time.
75452284Sobrien			 */
755132718Skan			rem_sbt -= bttosbt(bt);
75690075Sobrien		} else {
75790075Sobrien			/*
75890075Sobrien			 * If the delta is greater than the timer period then
759117395Skan			 * just reset our time base instead of trying to catch
76090075Sobrien			 * up.
76190075Sobrien			 */
76290075Sobrien			vlapic->timer_fire_bt = btnow;
76390075Sobrien			VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
76490075Sobrien			    "usecs, period is %lu usecs - resetting time base",
76590075Sobrien			    bttosbt(bt) / SBT_1US,
766117395Skan			    bttosbt(vlapic->timer_period_bt) / SBT_1US);
76790075Sobrien		}
76890075Sobrien
76990075Sobrien		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
770117395Skan		callout_reset_sbt(&vlapic->callout, rem_sbt, 0,
771117395Skan		    vlapic_callout_handler, vlapic, 0);
77290075Sobrien	}
77390075Sobriendone:
774117395Skan	VLAPIC_TIMER_UNLOCK(vlapic);
77590075Sobrien}
77690075Sobrien
77790075Sobrienvoid
778132718Skanvlapic_icrtmr_write_handler(struct vlapic *vlapic)
779117395Skan{
780117395Skan	struct LAPIC *lapic;
781117395Skan	sbintime_t sbt;
782117395Skan	uint32_t icr_timer;
783117395Skan
784132718Skan	VLAPIC_TIMER_LOCK(vlapic);
785132718Skan
78652284Sobrien	lapic = vlapic->apic_page;
78752284Sobrien	icr_timer = lapic->icr_timer;
78852284Sobrien
78952284Sobrien	vlapic->timer_period_bt = vlapic->timer_freq_bt;
79052284Sobrien	bintime_mul(&vlapic->timer_period_bt, icr_timer);
79152284Sobrien
79252284Sobrien	if (icr_timer != 0) {
79352284Sobrien		binuptime(&vlapic->timer_fire_bt);
79452284Sobrien		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
79590075Sobrien
79652284Sobrien		sbt = bttosbt(vlapic->timer_period_bt);
79752284Sobrien		callout_reset_sbt(&vlapic->callout, sbt, 0,
798		    vlapic_callout_handler, vlapic, 0);
799	} else
800		callout_stop(&vlapic->callout);
801
802	VLAPIC_TIMER_UNLOCK(vlapic);
803}
804
805/*
806 * This function populates 'dmask' with the set of vcpus that match the
807 * addressing specified by the (dest, phys, lowprio) tuple.
808 *
809 * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
810 * or xAPIC (8-bit) destination field.
811 */
812static void
813vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
814    bool lowprio, bool x2apic_dest)
815{
816	struct vlapic *vlapic;
817	uint32_t dfr, ldr, ldest, cluster;
818	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
819	cpuset_t amask;
820	int vcpuid;
821
822	if ((x2apic_dest && dest == 0xffffffff) ||
823	    (!x2apic_dest && dest == 0xff)) {
824		/*
825		 * Broadcast in both logical and physical modes.
826		 */
827		*dmask = vm_active_cpus(vm);
828		return;
829	}
830
831	if (phys) {
832		/*
833		 * Physical mode: destination is APIC ID.
834		 */
835		CPU_ZERO(dmask);
836		vcpuid = vm_apicid2vcpuid(vm, dest);
837		if (vcpuid < VM_MAXCPU)
838			CPU_SET(vcpuid, dmask);
839	} else {
840		/*
841		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
842		 * bitmask. This model is only avilable in the xAPIC mode.
843		 */
844		mda_flat_ldest = dest & 0xff;
845
846		/*
847		 * In the "Cluster Model" the MDA is used to identify a
848		 * specific cluster and a set of APICs in that cluster.
849		 */
850		if (x2apic_dest) {
851			mda_cluster_id = dest >> 16;
852			mda_cluster_ldest = dest & 0xffff;
853		} else {
854			mda_cluster_id = (dest >> 4) & 0xf;
855			mda_cluster_ldest = dest & 0xf;
856		}
857
858		/*
859		 * Logical mode: match each APIC that has a bit set
860		 * in it's LDR that matches a bit in the ldest.
861		 */
862		CPU_ZERO(dmask);
863		amask = vm_active_cpus(vm);
864		while ((vcpuid = CPU_FFS(&amask)) != 0) {
865			vcpuid--;
866			CPU_CLR(vcpuid, &amask);
867
868			vlapic = vm_lapic(vm, vcpuid);
869			dfr = vlapic->apic_page->dfr;
870			ldr = vlapic->apic_page->ldr;
871
872			if ((dfr & APIC_DFR_MODEL_MASK) ==
873			    APIC_DFR_MODEL_FLAT) {
874				ldest = ldr >> 24;
875				mda_ldest = mda_flat_ldest;
876			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
877			    APIC_DFR_MODEL_CLUSTER) {
878				if (x2apic(vlapic)) {
879					cluster = ldr >> 16;
880					ldest = ldr & 0xffff;
881				} else {
882					cluster = ldr >> 28;
883					ldest = (ldr >> 24) & 0xf;
884				}
885				if (cluster != mda_cluster_id)
886					continue;
887				mda_ldest = mda_cluster_ldest;
888			} else {
889				/*
890				 * Guest has configured a bad logical
891				 * model for this vcpu - skip it.
892				 */
893				VLAPIC_CTR1(vlapic, "vlapic has bad logical "
894				    "model %x - cannot deliver interrupt", dfr);
895				continue;
896			}
897
898			if ((mda_ldest & ldest) != 0) {
899				CPU_SET(vcpuid, dmask);
900				if (lowprio)
901					break;
902			}
903		}
904	}
905}
906
907static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
908
909int
910vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
911{
912	int i;
913	bool phys;
914	cpuset_t dmask;
915	uint64_t icrval;
916	uint32_t dest, vec, mode;
917	struct vlapic *vlapic2;
918	struct vm_exit *vmexit;
919	struct LAPIC *lapic;
920
921	lapic = vlapic->apic_page;
922	lapic->icr_lo &= ~APIC_DELSTAT_PEND;
923	icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
924
925	if (x2apic(vlapic))
926		dest = icrval >> 32;
927	else
928		dest = icrval >> (32 + 24);
929	vec = icrval & APIC_VECTOR_MASK;
930	mode = icrval & APIC_DELMODE_MASK;
931
932	if (mode == APIC_DELMODE_FIXED && vec < 16) {
933		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
934		VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
935		return (0);
936	}
937
938	VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
939
940	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
941		switch (icrval & APIC_DEST_MASK) {
942		case APIC_DEST_DESTFLD:
943			phys = ((icrval & APIC_DESTMODE_LOG) == 0);
944			vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
945			    x2apic(vlapic));
946			break;
947		case APIC_DEST_SELF:
948			CPU_SETOF(vlapic->vcpuid, &dmask);
949			break;
950		case APIC_DEST_ALLISELF:
951			dmask = vm_active_cpus(vlapic->vm);
952			break;
953		case APIC_DEST_ALLESELF:
954			dmask = vm_active_cpus(vlapic->vm);
955			CPU_CLR(vlapic->vcpuid, &dmask);
956			break;
957		default:
958			CPU_ZERO(&dmask);	/* satisfy gcc */
959			break;
960		}
961
962		while ((i = CPU_FFS(&dmask)) != 0) {
963			i--;
964			CPU_CLR(i, &dmask);
965			if (mode == APIC_DELMODE_FIXED) {
966				lapic_intr_edge(vlapic->vm, i, vec);
967				vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
968						    IPIS_SENT, i, 1);
969				VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
970				    "to vcpuid %d", vec, i);
971			} else {
972				vm_inject_nmi(vlapic->vm, i);
973				VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
974				    "to vcpuid %d", i);
975			}
976		}
977
978		return (0);	/* handled completely in the kernel */
979	}
980
981	if (mode == APIC_DELMODE_INIT) {
982		if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
983			return (0);
984
985		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
986			vlapic2 = vm_lapic(vlapic->vm, dest);
987
988			/* move from INIT to waiting-for-SIPI state */
989			if (vlapic2->boot_state == BS_INIT) {
990				vlapic2->boot_state = BS_SIPI;
991			}
992
993			return (0);
994		}
995	}
996
997	if (mode == APIC_DELMODE_STARTUP) {
998		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
999			vlapic2 = vm_lapic(vlapic->vm, dest);
1000
1001			/*
1002			 * Ignore SIPIs in any state other than wait-for-SIPI
1003			 */
1004			if (vlapic2->boot_state != BS_SIPI)
1005				return (0);
1006
1007			vlapic2->boot_state = BS_RUNNING;
1008
1009			*retu = true;
1010			vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
1011			vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
1012			vmexit->u.spinup_ap.vcpu = dest;
1013			vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
1014
1015			return (0);
1016		}
1017	}
1018
1019	/*
1020	 * This will cause a return to userland.
1021	 */
1022	return (1);
1023}
1024
1025void
1026vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val)
1027{
1028	int vec;
1029
1030	KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode"));
1031
1032	vec = val & 0xff;
1033	lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec);
1034	vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT,
1035	    vlapic->vcpuid, 1);
1036	VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec);
1037}
1038
1039int
1040vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
1041{
1042	struct LAPIC	*lapic = vlapic->apic_page;
1043	int	  	 idx, i, bitpos, vector;
1044	uint32_t	*irrptr, val;
1045
1046	if (vlapic->ops.pending_intr)
1047		return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
1048
1049	irrptr = &lapic->irr0;
1050
1051	/*
1052	 * The x86 architecture reserves the the first 32 vectors for use
1053	 * by the processor.
1054	 */
1055	for (i = 7; i > 0; i--) {
1056		idx = i * 4;
1057		val = atomic_load_acq_int(&irrptr[idx]);
1058		bitpos = fls(val);
1059		if (bitpos != 0) {
1060			vector = i * 32 + (bitpos - 1);
1061			if (PRIO(vector) > PRIO(lapic->ppr)) {
1062				VLAPIC_CTR1(vlapic, "pending intr %d", vector);
1063				if (vecptr != NULL)
1064					*vecptr = vector;
1065				return (1);
1066			} else
1067				break;
1068		}
1069	}
1070	return (0);
1071}
1072
1073void
1074vlapic_intr_accepted(struct vlapic *vlapic, int vector)
1075{
1076	struct LAPIC	*lapic = vlapic->apic_page;
1077	uint32_t	*irrptr, *isrptr;
1078	int		idx, stk_top;
1079
1080	if (vlapic->ops.intr_accepted)
1081		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
1082
1083	/*
1084	 * clear the ready bit for vector being accepted in irr
1085	 * and set the vector as in service in isr.
1086	 */
1087	idx = (vector / 32) * 4;
1088
1089	irrptr = &lapic->irr0;
1090	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
1091	VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
1092
1093	isrptr = &lapic->isr0;
1094	isrptr[idx] |= 1 << (vector % 32);
1095	VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
1096
1097	/*
1098	 * Update the PPR
1099	 */
1100	vlapic->isrvec_stk_top++;
1101
1102	stk_top = vlapic->isrvec_stk_top;
1103	if (stk_top >= ISRVEC_STK_SIZE)
1104		panic("isrvec_stk_top overflow %d", stk_top);
1105
1106	vlapic->isrvec_stk[stk_top] = vector;
1107	vlapic_update_ppr(vlapic);
1108}
1109
1110void
1111vlapic_svr_write_handler(struct vlapic *vlapic)
1112{
1113	struct LAPIC *lapic;
1114	uint32_t old, new, changed;
1115
1116	lapic = vlapic->apic_page;
1117
1118	new = lapic->svr;
1119	old = vlapic->svr_last;
1120	vlapic->svr_last = new;
1121
1122	changed = old ^ new;
1123	if ((changed & APIC_SVR_ENABLE) != 0) {
1124		if ((new & APIC_SVR_ENABLE) == 0) {
1125			/*
1126			 * The apic is now disabled so stop the apic timer
1127			 * and mask all the LVT entries.
1128			 */
1129			VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
1130			VLAPIC_TIMER_LOCK(vlapic);
1131			callout_stop(&vlapic->callout);
1132			VLAPIC_TIMER_UNLOCK(vlapic);
1133			vlapic_mask_lvts(vlapic);
1134		} else {
1135			/*
1136			 * The apic is now enabled so restart the apic timer
1137			 * if it is configured in periodic mode.
1138			 */
1139			VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
1140			if (vlapic_periodic_timer(vlapic))
1141				vlapic_icrtmr_write_handler(vlapic);
1142		}
1143	}
1144}
1145
1146int
1147vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
1148    uint64_t *data, bool *retu)
1149{
1150	struct LAPIC	*lapic = vlapic->apic_page;
1151	uint32_t	*reg;
1152	int		 i;
1153
1154	/* Ignore MMIO accesses in x2APIC mode */
1155	if (x2apic(vlapic) && mmio_access) {
1156		VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode",
1157		    offset);
1158		*data = 0;
1159		goto done;
1160	}
1161
1162	if (!x2apic(vlapic) && !mmio_access) {
1163		/*
1164		 * XXX Generate GP fault for MSR accesses in xAPIC mode
1165		 */
1166		VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in "
1167		    "xAPIC mode", offset);
1168		*data = 0;
1169		goto done;
1170	}
1171
1172	if (offset > sizeof(*lapic)) {
1173		*data = 0;
1174		goto done;
1175	}
1176
1177	offset &= ~3;
1178	switch(offset)
1179	{
1180		case APIC_OFFSET_ID:
1181			*data = lapic->id;
1182			break;
1183		case APIC_OFFSET_VER:
1184			*data = lapic->version;
1185			break;
1186		case APIC_OFFSET_TPR:
1187			*data = lapic->tpr;
1188			break;
1189		case APIC_OFFSET_APR:
1190			*data = lapic->apr;
1191			break;
1192		case APIC_OFFSET_PPR:
1193			*data = lapic->ppr;
1194			break;
1195		case APIC_OFFSET_EOI:
1196			*data = lapic->eoi;
1197			break;
1198		case APIC_OFFSET_LDR:
1199			*data = lapic->ldr;
1200			break;
1201		case APIC_OFFSET_DFR:
1202			*data = lapic->dfr;
1203			break;
1204		case APIC_OFFSET_SVR:
1205			*data = lapic->svr;
1206			break;
1207		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1208			i = (offset - APIC_OFFSET_ISR0) >> 2;
1209			reg = &lapic->isr0;
1210			*data = *(reg + i);
1211			break;
1212		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1213			i = (offset - APIC_OFFSET_TMR0) >> 2;
1214			reg = &lapic->tmr0;
1215			*data = *(reg + i);
1216			break;
1217		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1218			i = (offset - APIC_OFFSET_IRR0) >> 2;
1219			reg = &lapic->irr0;
1220			*data = atomic_load_acq_int(reg + i);
1221			break;
1222		case APIC_OFFSET_ESR:
1223			*data = lapic->esr;
1224			break;
1225		case APIC_OFFSET_ICR_LOW:
1226			*data = lapic->icr_lo;
1227			if (x2apic(vlapic))
1228				*data |= (uint64_t)lapic->icr_hi << 32;
1229			break;
1230		case APIC_OFFSET_ICR_HI:
1231			*data = lapic->icr_hi;
1232			break;
1233		case APIC_OFFSET_CMCI_LVT:
1234		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1235			*data = vlapic_get_lvt(vlapic, offset);
1236#ifdef INVARIANTS
1237			reg = vlapic_get_lvtptr(vlapic, offset);
1238			KASSERT(*data == *reg, ("inconsistent lvt value at "
1239			    "offset %#lx: %#lx/%#x", offset, *data, *reg));
1240#endif
1241			break;
1242		case APIC_OFFSET_TIMER_ICR:
1243			*data = lapic->icr_timer;
1244			break;
1245		case APIC_OFFSET_TIMER_CCR:
1246			*data = vlapic_get_ccr(vlapic);
1247			break;
1248		case APIC_OFFSET_TIMER_DCR:
1249			*data = lapic->dcr_timer;
1250			break;
1251		case APIC_OFFSET_SELF_IPI:
1252			/*
1253			 * XXX generate a GP fault if vlapic is in x2apic mode
1254			 */
1255			*data = 0;
1256			break;
1257		case APIC_OFFSET_RRR:
1258		default:
1259			*data = 0;
1260			break;
1261	}
1262done:
1263	VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data);
1264	return 0;
1265}
1266
1267int
1268vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
1269    uint64_t data, bool *retu)
1270{
1271	struct LAPIC	*lapic = vlapic->apic_page;
1272	uint32_t	*regptr;
1273	int		retval;
1274
1275	KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE,
1276	    ("vlapic_write: invalid offset %#lx", offset));
1277
1278	VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx",
1279	    offset, data);
1280
1281	if (offset > sizeof(*lapic))
1282		return (0);
1283
1284	/* Ignore MMIO accesses in x2APIC mode */
1285	if (x2apic(vlapic) && mmio_access) {
1286		VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx "
1287		    "in x2APIC mode", data, offset);
1288		return (0);
1289	}
1290
1291	/*
1292	 * XXX Generate GP fault for MSR accesses in xAPIC mode
1293	 */
1294	if (!x2apic(vlapic) && !mmio_access) {
1295		VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx "
1296		    "in xAPIC mode", data, offset);
1297		return (0);
1298	}
1299
1300	retval = 0;
1301	switch(offset)
1302	{
1303		case APIC_OFFSET_ID:
1304			lapic->id = data;
1305			vlapic_id_write_handler(vlapic);
1306			break;
1307		case APIC_OFFSET_TPR:
1308			lapic->tpr = data & 0xff;
1309			vlapic_update_ppr(vlapic);
1310			break;
1311		case APIC_OFFSET_EOI:
1312			vlapic_process_eoi(vlapic);
1313			break;
1314		case APIC_OFFSET_LDR:
1315			lapic->ldr = data;
1316			vlapic_ldr_write_handler(vlapic);
1317			break;
1318		case APIC_OFFSET_DFR:
1319			lapic->dfr = data;
1320			vlapic_dfr_write_handler(vlapic);
1321			break;
1322		case APIC_OFFSET_SVR:
1323			lapic->svr = data;
1324			vlapic_svr_write_handler(vlapic);
1325			break;
1326		case APIC_OFFSET_ICR_LOW:
1327			lapic->icr_lo = data;
1328			if (x2apic(vlapic))
1329				lapic->icr_hi = data >> 32;
1330			retval = vlapic_icrlo_write_handler(vlapic, retu);
1331			break;
1332		case APIC_OFFSET_ICR_HI:
1333			lapic->icr_hi = data;
1334			break;
1335		case APIC_OFFSET_CMCI_LVT:
1336		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
1337			regptr = vlapic_get_lvtptr(vlapic, offset);
1338			*regptr = data;
1339			vlapic_lvt_write_handler(vlapic, offset);
1340			break;
1341		case APIC_OFFSET_TIMER_ICR:
1342			lapic->icr_timer = data;
1343			vlapic_icrtmr_write_handler(vlapic);
1344			break;
1345
1346		case APIC_OFFSET_TIMER_DCR:
1347			lapic->dcr_timer = data;
1348			vlapic_dcr_write_handler(vlapic);
1349			break;
1350
1351		case APIC_OFFSET_ESR:
1352			vlapic_esr_write_handler(vlapic);
1353			break;
1354
1355		case APIC_OFFSET_SELF_IPI:
1356			if (x2apic(vlapic))
1357				vlapic_self_ipi_handler(vlapic, data);
1358			break;
1359
1360		case APIC_OFFSET_VER:
1361		case APIC_OFFSET_APR:
1362		case APIC_OFFSET_PPR:
1363		case APIC_OFFSET_RRR:
1364		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
1365		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
1366		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1367		case APIC_OFFSET_TIMER_CCR:
1368		default:
1369			// Read only.
1370			break;
1371	}
1372
1373	return (retval);
1374}
1375
1376static void
1377vlapic_reset(struct vlapic *vlapic)
1378{
1379	struct LAPIC *lapic;
1380
1381	lapic = vlapic->apic_page;
1382	bzero(lapic, sizeof(struct LAPIC));
1383
1384	lapic->id = vlapic_get_id(vlapic);
1385	lapic->version = VLAPIC_VERSION;
1386	lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
1387	lapic->dfr = 0xffffffff;
1388	lapic->svr = APIC_SVR_VECTOR;
1389	vlapic_mask_lvts(vlapic);
1390	vlapic_reset_tmr(vlapic);
1391
1392	lapic->dcr_timer = 0;
1393	vlapic_dcr_write_handler(vlapic);
1394
1395	if (vlapic->vcpuid == 0)
1396		vlapic->boot_state = BS_RUNNING;	/* BSP */
1397	else
1398		vlapic->boot_state = BS_INIT;		/* AP */
1399
1400	vlapic->svr_last = lapic->svr;
1401}
1402
1403void
1404vlapic_init(struct vlapic *vlapic)
1405{
1406	KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1407	KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU,
1408	    ("vlapic_init: vcpuid is not initialized"));
1409	KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
1410	    "initialized"));
1411
1412	/*
1413	 * If the vlapic is configured in x2apic mode then it will be
1414	 * accessed in the critical section via the MSR emulation code.
1415	 *
1416	 * Therefore the timer mutex must be a spinlock because blockable
1417	 * mutexes cannot be acquired in a critical section.
1418	 */
1419	mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
1420	callout_init(&vlapic->callout, 1);
1421
1422	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
1423
1424	if (vlapic->vcpuid == 0)
1425		vlapic->msr_apicbase |= APICBASE_BSP;
1426
1427	vlapic_reset(vlapic);
1428}
1429
1430void
1431vlapic_cleanup(struct vlapic *vlapic)
1432{
1433
1434	callout_drain(&vlapic->callout);
1435}
1436
1437uint64_t
1438vlapic_get_apicbase(struct vlapic *vlapic)
1439{
1440
1441	return (vlapic->msr_apicbase);
1442}
1443
1444int
1445vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new)
1446{
1447
1448	if (vlapic->msr_apicbase != new) {
1449		VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx "
1450		    "not supported", vlapic->msr_apicbase, new);
1451		return (-1);
1452	}
1453
1454	return (0);
1455}
1456
1457void
1458vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
1459{
1460	struct vlapic *vlapic;
1461	struct LAPIC *lapic;
1462
1463	vlapic = vm_lapic(vm, vcpuid);
1464
1465	if (state == X2APIC_DISABLED)
1466		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
1467	else
1468		vlapic->msr_apicbase |= APICBASE_X2APIC;
1469
1470	/*
1471	 * Reset the local APIC registers whose values are mode-dependent.
1472	 *
1473	 * XXX this works because the APIC mode can be changed only at vcpu
1474	 * initialization time.
1475	 */
1476	lapic = vlapic->apic_page;
1477	lapic->id = vlapic_get_id(vlapic);
1478	if (x2apic(vlapic)) {
1479		lapic->ldr = x2apic_ldr(vlapic);
1480		lapic->dfr = 0;
1481	} else {
1482		lapic->ldr = 0;
1483		lapic->dfr = 0xffffffff;
1484	}
1485
1486	if (state == X2APIC_ENABLED) {
1487		if (vlapic->ops.enable_x2apic_mode)
1488			(*vlapic->ops.enable_x2apic_mode)(vlapic);
1489	}
1490}
1491
1492void
1493vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
1494    int delmode, int vec)
1495{
1496	bool lowprio;
1497	int vcpuid;
1498	cpuset_t dmask;
1499
1500	if (delmode != IOART_DELFIXED &&
1501	    delmode != IOART_DELLOPRI &&
1502	    delmode != IOART_DELEXINT) {
1503		VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode);
1504		return;
1505	}
1506	lowprio = (delmode == IOART_DELLOPRI);
1507
1508	/*
1509	 * We don't provide any virtual interrupt redirection hardware so
1510	 * all interrupts originating from the ioapic or MSI specify the
1511	 * 'dest' in the legacy xAPIC format.
1512	 */
1513	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
1514
1515	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
1516		vcpuid--;
1517		CPU_CLR(vcpuid, &dmask);
1518		if (delmode == IOART_DELEXINT) {
1519			vm_inject_extint(vm, vcpuid);
1520		} else {
1521			lapic_set_intr(vm, vcpuid, vec, level);
1522		}
1523	}
1524}
1525
1526void
1527vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum)
1528{
1529	/*
1530	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
1531	 *
1532	 * This is done by leveraging features like Posted Interrupts (Intel)
1533	 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
1534	 *
1535	 * If neither of these features are available then fallback to
1536	 * sending an IPI to 'hostcpu'.
1537	 */
1538	if (vlapic->ops.post_intr)
1539		(*vlapic->ops.post_intr)(vlapic, hostcpu);
1540	else
1541		ipi_cpu(hostcpu, ipinum);
1542}
1543
1544bool
1545vlapic_enabled(struct vlapic *vlapic)
1546{
1547	struct LAPIC *lapic = vlapic->apic_page;
1548
1549	if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
1550	    (lapic->svr & APIC_SVR_ENABLE) != 0)
1551		return (true);
1552	else
1553		return (false);
1554}
1555
1556static void
1557vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
1558{
1559	struct LAPIC *lapic;
1560	uint32_t *tmrptr, mask;
1561	int idx;
1562
1563	lapic = vlapic->apic_page;
1564	tmrptr = &lapic->tmr0;
1565	idx = (vector / 32) * 4;
1566	mask = 1 << (vector % 32);
1567	if (level)
1568		tmrptr[idx] |= mask;
1569	else
1570		tmrptr[idx] &= ~mask;
1571
1572	if (vlapic->ops.set_tmr != NULL)
1573		(*vlapic->ops.set_tmr)(vlapic, vector, level);
1574}
1575
1576void
1577vlapic_reset_tmr(struct vlapic *vlapic)
1578{
1579	int vector;
1580
1581	VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
1582
1583	for (vector = 0; vector <= 255; vector++)
1584		vlapic_set_tmr(vlapic, vector, false);
1585}
1586
1587void
1588vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
1589    int delmode, int vector)
1590{
1591	cpuset_t dmask;
1592	bool lowprio;
1593
1594	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
1595
1596	/*
1597	 * A level trigger is valid only for fixed and lowprio delivery modes.
1598	 */
1599	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
1600		VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
1601		    "delivery-mode %d", delmode);
1602		return;
1603	}
1604
1605	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
1606	vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
1607
1608	if (!CPU_ISSET(vlapic->vcpuid, &dmask))
1609		return;
1610
1611	VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
1612	vlapic_set_tmr(vlapic, vector, true);
1613}
1614