11556Srgrimes/*-
21556Srgrimes * Copyright (c) 2011 NetApp, Inc.
31556Srgrimes * All rights reserved.
41556Srgrimes *
51556Srgrimes * Redistribution and use in source and binary forms, with or without
61556Srgrimes * modification, are permitted provided that the following conditions
71556Srgrimes * are met:
81556Srgrimes * 1. Redistributions of source code must retain the above copyright
91556Srgrimes *    notice, this list of conditions and the following disclaimer.
101556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111556Srgrimes *    notice, this list of conditions and the following disclaimer in the
121556Srgrimes *    documentation and/or other materials provided with the distribution.
131556Srgrimes *
141556Srgrimes * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
151556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
171556Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
181556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221556Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231556Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241556Srgrimes * SUCH DAMAGE.
251556Srgrimes *
261556Srgrimes * $FreeBSD$
271556Srgrimes */
281556Srgrimes
291556Srgrimes#include <sys/cdefs.h>
301556Srgrimes__FBSDID("$FreeBSD$");
311556Srgrimes
321556Srgrimes#include <sys/param.h>
331556Srgrimes#include <sys/pcpu.h>
3436150Scharnier#include <sys/systm.h>
3536150Scharnier#include <sys/cpuset.h>
3636150Scharnier
371556Srgrimes#include <machine/clock.h>
3899110Sobrien#include <machine/cpufunc.h>
3999110Sobrien#include <machine/md_var.h>
401556Srgrimes#include <machine/segments.h>
41100437Stjr#include <machine/specialreg.h>
4217987Speter
43102576Skeramida#include <machine/vmm.h>
4417987Speter
45153091Sstefanf#include "vmm_host.h"
4645266Scracauer#include "x86.h"
4753891Scracauer
4817987Speter#define	CPUID_VM_HIGH		0x40000000
491556Srgrimes
501556Srgrimesstatic const char bhyve_id[12] = "bhyve bhyve ";
511556Srgrimes
521556Srgrimesstatic uint64_t bhyve_xcpuids;
531556Srgrimes
541556Srgrimesint
551556Srgrimesx86_emulate_cpuid(struct vm *vm, int vcpu_id,
561556Srgrimes		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
571556Srgrimes{
581556Srgrimes	const struct xsave_limits *limits;
591556Srgrimes	uint64_t cr4;
601556Srgrimes	int error, enable_invpcid;
611556Srgrimes	unsigned int 	func, regs[4];
621556Srgrimes	enum x2apic_state x2apic_state;
631556Srgrimes
641556Srgrimes	/*
651556Srgrimes	 * Requests for invalid CPUID levels should map to the highest
661556Srgrimes	 * available level instead.
671556Srgrimes	 */
681556Srgrimes	if (cpu_exthigh != 0 && *eax >= 0x80000000) {
691556Srgrimes		if (*eax > cpu_exthigh)
7017987Speter			*eax = cpu_exthigh;
711556Srgrimes	} else if (*eax >= 0x40000000) {
7217987Speter		if (*eax > CPUID_VM_HIGH)
731556Srgrimes			*eax = CPUID_VM_HIGH;
7417987Speter	} else if (*eax > cpu_high) {
751556Srgrimes		*eax = cpu_high;
761556Srgrimes	}
77201053Sjilles
781556Srgrimes	func = *eax;
791556Srgrimes
801556Srgrimes	/*
81193169Sstefanf	 * In general the approach used for CPU topology is to
821556Srgrimes	 * advertise a flat topology where all CPUs are packages with
831556Srgrimes	 * no multi-core or SMT.
841556Srgrimes	 */
851556Srgrimes	switch (func) {
861556Srgrimes		/*
8717987Speter		 * Pass these through to the guest
881556Srgrimes		 */
891556Srgrimes		case CPUID_0000_0000:
90149933Sstefanf		case CPUID_0000_0002:
91149933Sstefanf		case CPUID_0000_0003:
9290111Simp		case CPUID_8000_0000:
9390111Simp		case CPUID_8000_0002:
9490111Simp		case CPUID_8000_0003:
9590111Simp		case CPUID_8000_0004:
9690111Simp		case CPUID_8000_0006:
9790111Simp		case CPUID_8000_0008:
981556Srgrimes			cpuid_count(*eax, *ecx, regs);
991556Srgrimes			break;
1001556Srgrimes
1011556Srgrimes		case CPUID_8000_0001:
1021556Srgrimes			/*
1031556Srgrimes			 * Hide rdtscp/ia32_tsc_aux until we know how
1041556Srgrimes			 * to deal with them.
1051556Srgrimes			 */
1061556Srgrimes			cpuid_count(*eax, *ecx, regs);
1071556Srgrimes			regs[3] &= ~AMDID_RDTSCP;
1081556Srgrimes			break;
1091556Srgrimes
1101556Srgrimes		case CPUID_8000_0007:
1111556Srgrimes			cpuid_count(*eax, *ecx, regs);
1121556Srgrimes			/*
1131556Srgrimes			 * If the host TSCs are not synchronized across
1141556Srgrimes			 * physical cpus then we cannot advertise an
1151556Srgrimes			 * invariant tsc to a vcpu.
1161556Srgrimes			 *
1171556Srgrimes			 * XXX This still falls short because the vcpu
1181556Srgrimes			 * can observe the TSC moving backwards as it
1191556Srgrimes			 * migrates across physical cpus. But at least
1201556Srgrimes			 * it should discourage the guest from using the
12146684Skris			 * TSC to keep track of time.
1221556Srgrimes			 */
1231556Srgrimes			if (!smp_tsc)
12417987Speter				regs[3] &= ~AMDPM_TSC_INVARIANT;
12590111Simp			break;
1261556Srgrimes
1271556Srgrimes		case CPUID_0000_0001:
1281556Srgrimes			do_cpuid(1, regs);
1291556Srgrimes
1301556Srgrimes			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
1311556Srgrimes			if (error) {
1321556Srgrimes				panic("x86_emulate_cpuid: error %d "
1331556Srgrimes				      "fetching x2apic state", error);
1341556Srgrimes			}
1351556Srgrimes
1361556Srgrimes			/*
1371556Srgrimes			 * Override the APIC ID only in ebx
1381556Srgrimes			 */
1391556Srgrimes			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
1401556Srgrimes			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
1411556Srgrimes
1421556Srgrimes			/*
1431556Srgrimes			 * Don't expose VMX, SpeedStep or TME capability.
1441556Srgrimes			 * Advertise x2APIC capability and Hypervisor guest.
1451556Srgrimes			 */
146193169Sstefanf			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
1471556Srgrimes
1481556Srgrimes			regs[2] |= CPUID2_HV;
1491556Srgrimes
1501556Srgrimes			if (x2apic_state != X2APIC_DISABLED)
1511556Srgrimes				regs[2] |= CPUID2_X2APIC;
1521556Srgrimes			else
1531556Srgrimes				regs[2] &= ~CPUID2_X2APIC;
1541556Srgrimes
1551556Srgrimes			/*
1561556Srgrimes			 * Only advertise CPUID2_XSAVE in the guest if
157193169Sstefanf			 * the host is using XSAVE.
15890111Simp			 */
1591556Srgrimes			if (!(regs[2] & CPUID2_OSXSAVE))
1601556Srgrimes				regs[2] &= ~CPUID2_XSAVE;
161194128Sjilles
1621556Srgrimes			/*
163194128Sjilles			 * If CPUID2_XSAVE is being advertised and the
164194128Sjilles			 * guest has set CR4_XSAVE, set
1651556Srgrimes			 * CPUID2_OSXSAVE.
1661556Srgrimes			 */
1671556Srgrimes			regs[2] &= ~CPUID2_OSXSAVE;
168194128Sjilles			if (regs[2] & CPUID2_XSAVE) {
169194128Sjilles				error = vm_get_register(vm, vcpu_id,
170194128Sjilles				    VM_REG_GUEST_CR4, &cr4);
171194128Sjilles				if (error)
172194128Sjilles					panic("x86_emulate_cpuid: error %d "
173194128Sjilles					      "fetching %%cr4", error);
1741556Srgrimes				if (cr4 & CR4_XSAVE)
1751556Srgrimes					regs[2] |= CPUID2_OSXSAVE;
1761556Srgrimes			}
1771556Srgrimes
178194128Sjilles			/*
179194128Sjilles			 * Hide monitor/mwait until we know how to deal with
1801556Srgrimes			 * these instructions.
1811556Srgrimes			 */
1821556Srgrimes			regs[2] &= ~CPUID2_MON;
1831556Srgrimes
1841556Srgrimes                        /*
1851556Srgrimes			 * Hide the performance and debug features.
1861556Srgrimes			 */
1871556Srgrimes			regs[2] &= ~CPUID2_PDCM;
1881556Srgrimes
18990111Simp			/*
19017987Speter			 * No TSC deadline support in the APIC yet
191149927Sstefanf			 */
192149927Sstefanf			regs[2] &= ~CPUID2_TSCDLT;
193149927Sstefanf
1941556Srgrimes			/*
1951556Srgrimes			 * Hide thermal monitoring
1961556Srgrimes			 */
1971556Srgrimes			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
1981556Srgrimes
19917987Speter			/*
2001556Srgrimes			 * Machine check handling is done in the host.
20117987Speter			 * Hide MTRR capability.
202149802Sstefanf			 */
2031556Srgrimes			regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR);
2041556Srgrimes
205149932Sstefanf                        /*
2061556Srgrimes                        * Hide the debug store capability.
2071556Srgrimes                        */
2081556Srgrimes			regs[3] &= ~CPUID_DS;
2091556Srgrimes
2101556Srgrimes			/*
2111556Srgrimes			 * Disable multi-core.
21218754Ssteve			 */
2131556Srgrimes			regs[1] &= ~CPUID_HTT_CORES;
21418754Ssteve			regs[3] &= ~CPUID_HTT;
2151556Srgrimes			break;
2161556Srgrimes
2171556Srgrimes		case CPUID_0000_0004:
2181556Srgrimes			do_cpuid(4, regs);
2191556Srgrimes
2201556Srgrimes			/*
2211556Srgrimes			 * Do not expose topology.
2221556Srgrimes			 *
2231556Srgrimes			 * The maximum number of processor cores in
2241556Srgrimes			 * this physical processor package and the
2251556Srgrimes			 * maximum number of threads sharing this
2261556Srgrimes			 * cache are encoded with "plus 1" encoding.
2271556Srgrimes			 * Adding one to the value in this register
2281556Srgrimes			 * field to obtains the actual value.
2291556Srgrimes			 *
2301556Srgrimes			 * Therefore 0 for both indicates 1 core per
231149927Sstefanf			 * package and no cache sharing.
2321556Srgrimes			 */
2331556Srgrimes			regs[0] &= 0xffff8000;
2341556Srgrimes			break;
2351556Srgrimes
2361556Srgrimes		case CPUID_0000_0007:
2371556Srgrimes			regs[0] = 0;
2381556Srgrimes			regs[1] = 0;
2391556Srgrimes			regs[2] = 0;
24020425Ssteve			regs[3] = 0;
2411556Srgrimes
24217987Speter			/* leaf 0 */
2431556Srgrimes			if (*ecx == 0) {
24420425Ssteve				cpuid_count(*eax, *ecx, regs);
24520425Ssteve
2461556Srgrimes				/* Only leaf 0 is supported */
2471556Srgrimes				regs[0] = 0;
2481556Srgrimes
2491556Srgrimes				/*
250149933Sstefanf				 * Expose known-safe features.
2511556Srgrimes				 */
2521556Srgrimes				regs[1] &= (CPUID_STDEXT_FSGSBASE |
253149933Sstefanf				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
2541556Srgrimes				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2 |
2551556Srgrimes				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
2561556Srgrimes				    CPUID_STDEXT_AVX512F |
2571556Srgrimes				    CPUID_STDEXT_AVX512PF |
2581556Srgrimes				    CPUID_STDEXT_AVX512ER |
2591556Srgrimes				    CPUID_STDEXT_AVX512CD);
2601556Srgrimes				regs[2] = 0;
2611556Srgrimes				regs[3] = 0;
2621556Srgrimes
2631556Srgrimes				/* Advertise INVPCID if it is enabled. */
2641556Srgrimes				error = vm_get_capability(vm, vcpu_id,
2651556Srgrimes				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
2661556Srgrimes				if (error == 0 && enable_invpcid)
2671556Srgrimes					regs[1] |= CPUID_STDEXT_INVPCID;
2681556Srgrimes			}
269149927Sstefanf			break;
2701556Srgrimes
2711556Srgrimes		case CPUID_0000_0006:
2721556Srgrimes		case CPUID_0000_000A:
273149927Sstefanf			/*
2741556Srgrimes			 * Handle the access, but report 0 for
2751556Srgrimes			 * all options
2761556Srgrimes			 */
2771556Srgrimes			regs[0] = 0;
2781556Srgrimes			regs[1] = 0;
2791556Srgrimes			regs[2] = 0;
2801556Srgrimes			regs[3] = 0;
2811556Srgrimes			break;
2821556Srgrimes
283149927Sstefanf		case CPUID_0000_000B:
2841556Srgrimes			/*
2851556Srgrimes			 * Processor topology enumeration
2861556Srgrimes			 */
2871556Srgrimes			regs[0] = 0;
2881556Srgrimes			regs[1] = 0;
289149933Sstefanf			regs[2] = *ecx & 0xff;
29017987Speter			regs[3] = vcpu_id;
2911556Srgrimes			break;
2921556Srgrimes
2931556Srgrimes		case CPUID_0000_000D:
2941556Srgrimes			limits = vmm_get_xsave_limits();
2951556Srgrimes			if (!limits->xsave_enabled) {
2961556Srgrimes				regs[0] = 0;
2971556Srgrimes				regs[1] = 0;
2981556Srgrimes				regs[2] = 0;
2991556Srgrimes				regs[3] = 0;
3001556Srgrimes				break;
3011556Srgrimes			}
3021556Srgrimes
3031556Srgrimes			cpuid_count(*eax, *ecx, regs);
3041556Srgrimes			switch (*ecx) {
3051556Srgrimes			case 0:
3061556Srgrimes				/*
3071556Srgrimes				 * Only permit the guest to use bits
3081556Srgrimes				 * that are active in the host in
3091556Srgrimes				 * %xcr0.  Also, claim that the
3101556Srgrimes				 * maximum save area size is
3111556Srgrimes				 * equivalent to the host's current
3121556Srgrimes				 * save area size.  Since this runs
313149933Sstefanf				 * "inside" of vmrun(), it runs with
3141556Srgrimes				 * the guest's xcr0, so the current
3151556Srgrimes				 * save area size is correct as-is.
3161556Srgrimes				 */
3171556Srgrimes				regs[0] &= limits->xcr0_allowed;
3181556Srgrimes				regs[2] = limits->xsave_max_size;
3191556Srgrimes				regs[3] &= (limits->xcr0_allowed >> 32);
3201556Srgrimes				break;
3211556Srgrimes			case 1:
3221556Srgrimes				/* Only permit XSAVEOPT. */
3231556Srgrimes				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
3241556Srgrimes				regs[1] = 0;
325149933Sstefanf				regs[2] = 0;
32617987Speter				regs[3] = 0;
3271556Srgrimes				break;
3281556Srgrimes			default:
3291556Srgrimes				/*
3301556Srgrimes				 * If the leaf is for a permitted feature,
3311556Srgrimes				 * pass through as-is, otherwise return
3321556Srgrimes				 * all zeroes.
3331556Srgrimes				 */
3341556Srgrimes				if (!(limits->xcr0_allowed & (1ul << *ecx))) {
33517987Speter					regs[0] = 0;
3361556Srgrimes					regs[1] = 0;
3371556Srgrimes					regs[2] = 0;
3381556Srgrimes					regs[3] = 0;
3391556Srgrimes				}
3401556Srgrimes				break;
3411556Srgrimes			}
3421556Srgrimes			break;
3431556Srgrimes
3441556Srgrimes		case 0x40000000:
3451556Srgrimes			regs[0] = CPUID_VM_HIGH;
346149933Sstefanf			bcopy(bhyve_id, &regs[1], 4);
3471556Srgrimes			bcopy(bhyve_id + 4, &regs[2], 4);
3481556Srgrimes			bcopy(bhyve_id + 8, &regs[3], 4);
3491556Srgrimes			break;
3501556Srgrimes
3511556Srgrimes		default:
3521556Srgrimes			/*
3531556Srgrimes			 * The leaf value has already been clamped so
3541556Srgrimes			 * simply pass this through, keeping count of
3551556Srgrimes			 * how many unhandled leaf values have been seen.
3561556Srgrimes			 */
3571556Srgrimes			atomic_add_long(&bhyve_xcpuids, 1);
3581556Srgrimes			cpuid_count(*eax, *ecx, regs);
3591556Srgrimes			break;
3601556Srgrimes	}
3611556Srgrimes
3621556Srgrimes	*eax = regs[0];
3631556Srgrimes	*ebx = regs[1];
3641556Srgrimes	*ecx = regs[2];
36590111Simp	*edx = regs[3];
36617987Speter
3671556Srgrimes	return (1);
3681556Srgrimes}
3691556Srgrimes