vmx_msr.c revision 276349
11541Srgrimes/*-
21541Srgrimes * Copyright (c) 2011 NetApp, Inc.
31541Srgrimes * All rights reserved.
41541Srgrimes *
51541Srgrimes * Redistribution and use in source and binary forms, with or without
61541Srgrimes * modification, are permitted provided that the following conditions
71541Srgrimes * are met:
81541Srgrimes * 1. Redistributions of source code must retain the above copyright
91541Srgrimes *    notice, this list of conditions and the following disclaimer.
101541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer in the
121541Srgrimes *    documentation and/or other materials provided with the distribution.
131541Srgrimes *
141541Srgrimes * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
151541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
171541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
181541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241541Srgrimes * SUCH DAMAGE.
251541Srgrimes *
261541Srgrimes * $FreeBSD: stable/10/sys/amd64/vmm/intel/vmx_msr.c 276349 2014-12-28 21:27:13Z neel $
271541Srgrimes */
281541Srgrimes
291541Srgrimes#include <sys/cdefs.h>
301541Srgrimes__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/intel/vmx_msr.c 276349 2014-12-28 21:27:13Z neel $");
311541Srgrimes
321541Srgrimes#include <sys/param.h>
331541Srgrimes#include <sys/systm.h>
341541Srgrimes#include <sys/cpuset.h>
351541Srgrimes
361541Srgrimes#include <machine/clock.h>
371541Srgrimes#include <machine/cpufunc.h>
381541Srgrimes#include <machine/md_var.h>
3944739Sjulian#include <machine/specialreg.h>
401541Srgrimes#include <machine/vmm.h>
411541Srgrimes
421541Srgrimes#include "vmx.h"
431541Srgrimes#include "vmx_msr.h"
441541Srgrimes
451541Srgrimesstatic boolean_t
461541Srgrimesvmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
471541Srgrimes{
481541Srgrimes
4933058Sbde	if (msr_val & (1UL << (bitpos + 32)))
5033058Sbde		return (TRUE);
519507Sdg	else
521541Srgrimes		return (FALSE);
5344739Sjulian}
5444739Sjulian
555455Sdgstatic boolean_t
565455Sdgvmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
5740286Sdg{
589507Sdg
599507Sdg	if ((msr_val & (1UL << bitpos)) == 0)
6043129Sdillon		return (TRUE);
6112767Sdyson	else
6242957Sdillon		return (FALSE);
6344739Sjulian}
641541Srgrimes
651541Srgrimesuint32_t
661541Srgrimesvmx_revision(void)
671541Srgrimes{
681541Srgrimes
691541Srgrimes	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
701541Srgrimes}
711541Srgrimes
721541Srgrimes/*
731541Srgrimes * Generate a bitmask to be used for the VMCS execution control fields.
741541Srgrimes *
751541Srgrimes * The caller specifies what bits should be set to one in 'ones_mask'
761541Srgrimes * and what bits should be set to zero in 'zeros_mask'. The don't-care
771541Srgrimes * bits are set to the default value. The default values are obtained
781541Srgrimes * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
791541Srgrimes * VMX Capabilities".
801541Srgrimes *
811541Srgrimes * Returns zero on success and non-zero on error.
8234206Sdyson */
8334206Sdysonint
8434206Sdysonvmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
851541Srgrimes	       uint32_t zeros_mask, uint32_t *retval)
8630354Sphk{
8730354Sphk	int i;
8830354Sphk	uint64_t val, trueval;
8930354Sphk	boolean_t true_ctls_avail, one_allowed, zero_allowed;
9030354Sphk
919759Sbde	/* We cannot ask the same bit to be set to both '1' and '0' */
929759Sbde	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
9342957Sdillon		return (EINVAL);
949759Sbde
9540286Sdg	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
967090Sbde		true_ctls_avail = TRUE;
979507Sdg	else
9842957Sdillon		true_ctls_avail = FALSE;
9942957Sdillon
1005455Sdg	val = rdmsr(ctl_reg);
1019507Sdg	if (true_ctls_avail)
1025455Sdg		trueval = rdmsr(true_ctl_reg);		/* step c */
1035455Sdg	else
1045455Sdg		trueval = val;				/* step a */
1055455Sdg
1065455Sdg	for (i = 0; i < 32; i++) {
10744739Sjulian		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
10844739Sjulian		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
10944739Sjulian
11044739Sjulian		KASSERT(one_allowed || zero_allowed,
11144739Sjulian			("invalid zero/one setting for bit %d of ctl 0x%0x, "
11242957Sdillon			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
11342957Sdillon
11442957Sdillon		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
11542957Sdillon			if (ones_mask & (1 << i))
11642957Sdillon				return (EINVAL);
11742957Sdillon			*retval &= ~(1 << i);
11842957Sdillon		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
11942957Sdillon			if (zeros_mask & (1 << i))
12042957Sdillon				return (EINVAL);
12142957Sdillon			*retval |= 1 << i;
12242957Sdillon		} else {
12343129Sdillon			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
12442957Sdillon				*retval &= ~(1 << i);
12542957Sdillon			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
12642957Sdillon				*retval |= 1 << i;
12742957Sdillon			else if (!true_ctls_avail)
12842957Sdillon				*retval &= ~(1 << i);	/* b(iii) */
12942957Sdillon			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
13042957Sdillon				*retval &= ~(1 << i);
13143129Sdillon			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
13243129Sdillon				*retval |= 1 << i;
13342957Sdillon			else {
13442957Sdillon				panic("vmx_set_ctlreg: unable to determine "
13542957Sdillon				      "correct value of ctl bit %d for msr "
13642957Sdillon				      "0x%0x and true msr 0x%0x", i, ctl_reg,
13742957Sdillon				      true_ctl_reg);
13842957Sdillon			}
13942957Sdillon		}
14042957Sdillon	}
14142957Sdillon
14242957Sdillon	return (0);
14342957Sdillon}
14442957Sdillon
14542957Sdillonvoid
14642957Sdillonmsr_bitmap_initialize(char *bitmap)
14742957Sdillon{
14842957Sdillon
14942957Sdillon	memset(bitmap, 0xff, PAGE_SIZE);
15042957Sdillon}
15142957Sdillon
15242957Sdillonint
15342957Sdillonmsr_bitmap_change_access(char *bitmap, u_int msr, int access)
15442957Sdillon{
15542957Sdillon	int byte, bit;
15642957Sdillon
15742957Sdillon	if (msr <= 0x00001FFF)
15842957Sdillon		byte = msr / 8;
15942957Sdillon	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
1601541Srgrimes		byte = 1024 + (msr - 0xC0000000) / 8;
1611541Srgrimes	else
1625455Sdg		return (EINVAL);
163
164	bit = msr & 0x7;
165
166	if (access & MSR_BITMAP_ACCESS_READ)
167		bitmap[byte] &= ~(1 << bit);
168	else
169		bitmap[byte] |= 1 << bit;
170
171	byte += 2048;
172	if (access & MSR_BITMAP_ACCESS_WRITE)
173		bitmap[byte] &= ~(1 << bit);
174	else
175		bitmap[byte] |= 1 << bit;
176
177	return (0);
178}
179
180static uint64_t misc_enable;
181static uint64_t platform_info;
182static uint64_t turbo_ratio_limit;
183static uint64_t host_msrs[GUEST_MSR_NUM];
184
185static bool
186nehalem_cpu(void)
187{
188	u_int family, model;
189
190	/*
191	 * The family:model numbers belonging to the Nehalem microarchitecture
192	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
193	 */
194	family = CPUID_TO_FAMILY(cpu_id);
195	model = CPUID_TO_MODEL(cpu_id);
196	if (family == 0x6) {
197		switch (model) {
198		case 0x1A:
199		case 0x1E:
200		case 0x1F:
201		case 0x2E:
202			return (true);
203		default:
204			break;
205		}
206	}
207	return (false);
208}
209
210static bool
211westmere_cpu(void)
212{
213	u_int family, model;
214
215	/*
216	 * The family:model numbers belonging to the Westmere microarchitecture
217	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
218	 */
219	family = CPUID_TO_FAMILY(cpu_id);
220	model = CPUID_TO_MODEL(cpu_id);
221	if (family == 0x6) {
222		switch (model) {
223		case 0x25:
224		case 0x2C:
225			return (true);
226		default:
227			break;
228		}
229	}
230	return (false);
231}
232
233void
234vmx_msr_init(void)
235{
236	uint64_t bus_freq, ratio;
237	int i;
238
239	/*
240	 * It is safe to cache the values of the following MSRs because
241	 * they don't change based on curcpu, curproc or curthread.
242	 */
243	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
244	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
245	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
246	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
247
248	/*
249	 * Initialize emulated MSRs
250	 */
251	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
252	/*
253	 * Set mandatory bits
254	 *  11:   branch trace disabled
255	 *  12:   PEBS unavailable
256	 * Clear unsupported features
257	 *  16:   SpeedStep enable
258	 *  18:   enable MONITOR FSM
259	 */
260	misc_enable |= (1 << 12) | (1 << 11);
261	misc_enable &= ~((1 << 18) | (1 << 16));
262
263	if (nehalem_cpu() || westmere_cpu())
264		bus_freq = 133330000;		/* 133Mhz */
265	else
266		bus_freq = 100000000;		/* 100Mhz */
267
268	/*
269	 * XXXtime
270	 * The ratio should really be based on the virtual TSC frequency as
271	 * opposed to the host TSC.
272	 */
273	ratio = (tsc_freq / bus_freq) & 0xff;
274
275	/*
276	 * The register definition is based on the micro-architecture
277	 * but the following bits are always the same:
278	 * [15:8]  Maximum Non-Turbo Ratio
279	 * [28]    Programmable Ratio Limit for Turbo Mode
280	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
281	 * [47:40] Maximum Efficiency Ratio
282	 *
283	 * The other bits can be safely set to 0 on all
284	 * micro-architectures up to Haswell.
285	 */
286	platform_info = (ratio << 8) | (ratio << 40);
287
288	/*
289	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
290	 * dependent on the maximum cores per package supported by the micro-
291	 * architecture. For e.g., Westmere supports 6 cores per package and
292	 * uses the low 48 bits. Sandybridge support 8 cores per package and
293	 * uses up all 64 bits.
294	 *
295	 * However, the unused bits are reserved so we pretend that all bits
296	 * in this MSR are valid.
297	 */
298	for (i = 0; i < 8; i++)
299		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
300}
301
302void
303vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
304{
305	/*
306	 * The permissions bitmap is shared between all vcpus so initialize it
307	 * once when initializing the vBSP.
308	 */
309	if (vcpuid == 0) {
310		guest_msr_rw(vmx, MSR_LSTAR);
311		guest_msr_rw(vmx, MSR_CSTAR);
312		guest_msr_rw(vmx, MSR_STAR);
313		guest_msr_rw(vmx, MSR_SF_MASK);
314		guest_msr_rw(vmx, MSR_KGSBASE);
315	}
316	return;
317}
318
319void
320vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
321{
322	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
323
324	/* Save host MSRs (if any) and restore guest MSRs */
325	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
326	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
327	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
328	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
329	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
330}
331
332void
333vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
334{
335	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
336
337	/* Save guest MSRs */
338	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
339	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
340	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
341	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
342	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
343
344	/* Restore host MSRs */
345	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
346	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
347	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
348	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
349
350	/* MSR_KGSBASE will be restored on the way back to userspace */
351}
352
353int
354vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
355{
356	int error = 0;
357
358	switch (num) {
359	case MSR_IA32_MISC_ENABLE:
360		*val = misc_enable;
361		break;
362	case MSR_PLATFORM_INFO:
363		*val = platform_info;
364		break;
365	case MSR_TURBO_RATIO_LIMIT:
366	case MSR_TURBO_RATIO_LIMIT1:
367		*val = turbo_ratio_limit;
368		break;
369	default:
370		error = EINVAL;
371		break;
372	}
373	return (error);
374}
375
376int
377vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
378{
379	int error = 0;
380
381	switch (num) {
382	default:
383		error = EINVAL;
384		break;
385	}
386
387	return (error);
388}
389