vmx_msr.c revision 284900
1/*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: stable/10/sys/amd64/vmm/intel/vmx_msr.c 284900 2015-06-28 03:22:26Z neel $
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/10/sys/amd64/vmm/intel/vmx_msr.c 284900 2015-06-28 03:22:26Z neel $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34
35#include <machine/clock.h>
36#include <machine/cpufunc.h>
37#include <machine/md_var.h>
38#include <machine/specialreg.h>
39#include <machine/vmm.h>
40
41#include "vmx.h"
42#include "vmx_msr.h"
43
44static boolean_t
45vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
46{
47
48	if (msr_val & (1UL << (bitpos + 32)))
49		return (TRUE);
50	else
51		return (FALSE);
52}
53
54static boolean_t
55vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
56{
57
58	if ((msr_val & (1UL << bitpos)) == 0)
59		return (TRUE);
60	else
61		return (FALSE);
62}
63
64uint32_t
65vmx_revision(void)
66{
67
68	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
69}
70
71/*
72 * Generate a bitmask to be used for the VMCS execution control fields.
73 *
74 * The caller specifies what bits should be set to one in 'ones_mask'
75 * and what bits should be set to zero in 'zeros_mask'. The don't-care
76 * bits are set to the default value. The default values are obtained
77 * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
78 * VMX Capabilities".
79 *
80 * Returns zero on success and non-zero on error.
81 */
82int
83vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
84	       uint32_t zeros_mask, uint32_t *retval)
85{
86	int i;
87	uint64_t val, trueval;
88	boolean_t true_ctls_avail, one_allowed, zero_allowed;
89
90	/* We cannot ask the same bit to be set to both '1' and '0' */
91	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
92		return (EINVAL);
93
94	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
95		true_ctls_avail = TRUE;
96	else
97		true_ctls_avail = FALSE;
98
99	val = rdmsr(ctl_reg);
100	if (true_ctls_avail)
101		trueval = rdmsr(true_ctl_reg);		/* step c */
102	else
103		trueval = val;				/* step a */
104
105	for (i = 0; i < 32; i++) {
106		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
107		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
108
109		KASSERT(one_allowed || zero_allowed,
110			("invalid zero/one setting for bit %d of ctl 0x%0x, "
111			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
112
113		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
114			if (ones_mask & (1 << i))
115				return (EINVAL);
116			*retval &= ~(1 << i);
117		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
118			if (zeros_mask & (1 << i))
119				return (EINVAL);
120			*retval |= 1 << i;
121		} else {
122			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
123				*retval &= ~(1 << i);
124			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
125				*retval |= 1 << i;
126			else if (!true_ctls_avail)
127				*retval &= ~(1 << i);	/* b(iii) */
128			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
129				*retval &= ~(1 << i);
130			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
131				*retval |= 1 << i;
132			else {
133				panic("vmx_set_ctlreg: unable to determine "
134				      "correct value of ctl bit %d for msr "
135				      "0x%0x and true msr 0x%0x", i, ctl_reg,
136				      true_ctl_reg);
137			}
138		}
139	}
140
141	return (0);
142}
143
144void
145msr_bitmap_initialize(char *bitmap)
146{
147
148	memset(bitmap, 0xff, PAGE_SIZE);
149}
150
151int
152msr_bitmap_change_access(char *bitmap, u_int msr, int access)
153{
154	int byte, bit;
155
156	if (msr <= 0x00001FFF)
157		byte = msr / 8;
158	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
159		byte = 1024 + (msr - 0xC0000000) / 8;
160	else
161		return (EINVAL);
162
163	bit = msr & 0x7;
164
165	if (access & MSR_BITMAP_ACCESS_READ)
166		bitmap[byte] &= ~(1 << bit);
167	else
168		bitmap[byte] |= 1 << bit;
169
170	byte += 2048;
171	if (access & MSR_BITMAP_ACCESS_WRITE)
172		bitmap[byte] &= ~(1 << bit);
173	else
174		bitmap[byte] |= 1 << bit;
175
176	return (0);
177}
178
179static uint64_t misc_enable;
180static uint64_t platform_info;
181static uint64_t turbo_ratio_limit;
182static uint64_t host_msrs[GUEST_MSR_NUM];
183
184static bool
185nehalem_cpu(void)
186{
187	u_int family, model;
188
189	/*
190	 * The family:model numbers belonging to the Nehalem microarchitecture
191	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
192	 */
193	family = CPUID_TO_FAMILY(cpu_id);
194	model = CPUID_TO_MODEL(cpu_id);
195	if (family == 0x6) {
196		switch (model) {
197		case 0x1A:
198		case 0x1E:
199		case 0x1F:
200		case 0x2E:
201			return (true);
202		default:
203			break;
204		}
205	}
206	return (false);
207}
208
209static bool
210westmere_cpu(void)
211{
212	u_int family, model;
213
214	/*
215	 * The family:model numbers belonging to the Westmere microarchitecture
216	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
217	 */
218	family = CPUID_TO_FAMILY(cpu_id);
219	model = CPUID_TO_MODEL(cpu_id);
220	if (family == 0x6) {
221		switch (model) {
222		case 0x25:
223		case 0x2C:
224			return (true);
225		default:
226			break;
227		}
228	}
229	return (false);
230}
231
232static bool
233pat_valid(uint64_t val)
234{
235	int i, pa;
236
237	/*
238	 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
239	 *
240	 * Extract PA0 through PA7 and validate that each one encodes a
241	 * valid memory type.
242	 */
243	for (i = 0; i < 8; i++) {
244		pa = (val >> (i * 8)) & 0xff;
245		if (pa == 2 || pa == 3 || pa >= 8)
246			return (false);
247	}
248	return (true);
249}
250
251void
252vmx_msr_init(void)
253{
254	uint64_t bus_freq, ratio;
255	int i;
256
257	/*
258	 * It is safe to cache the values of the following MSRs because
259	 * they don't change based on curcpu, curproc or curthread.
260	 */
261	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
262	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
263	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
264	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
265
266	/*
267	 * Initialize emulated MSRs
268	 */
269	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
270	/*
271	 * Set mandatory bits
272	 *  11:   branch trace disabled
273	 *  12:   PEBS unavailable
274	 * Clear unsupported features
275	 *  16:   SpeedStep enable
276	 *  18:   enable MONITOR FSM
277	 */
278	misc_enable |= (1 << 12) | (1 << 11);
279	misc_enable &= ~((1 << 18) | (1 << 16));
280
281	if (nehalem_cpu() || westmere_cpu())
282		bus_freq = 133330000;		/* 133Mhz */
283	else
284		bus_freq = 100000000;		/* 100Mhz */
285
286	/*
287	 * XXXtime
288	 * The ratio should really be based on the virtual TSC frequency as
289	 * opposed to the host TSC.
290	 */
291	ratio = (tsc_freq / bus_freq) & 0xff;
292
293	/*
294	 * The register definition is based on the micro-architecture
295	 * but the following bits are always the same:
296	 * [15:8]  Maximum Non-Turbo Ratio
297	 * [28]    Programmable Ratio Limit for Turbo Mode
298	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
299	 * [47:40] Maximum Efficiency Ratio
300	 *
301	 * The other bits can be safely set to 0 on all
302	 * micro-architectures up to Haswell.
303	 */
304	platform_info = (ratio << 8) | (ratio << 40);
305
306	/*
307	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
308	 * dependent on the maximum cores per package supported by the micro-
309	 * architecture. For e.g., Westmere supports 6 cores per package and
310	 * uses the low 48 bits. Sandybridge support 8 cores per package and
311	 * uses up all 64 bits.
312	 *
313	 * However, the unused bits are reserved so we pretend that all bits
314	 * in this MSR are valid.
315	 */
316	for (i = 0; i < 8; i++)
317		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
318}
319
320void
321vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
322{
323	uint64_t *guest_msrs;
324
325	guest_msrs = vmx->guest_msrs[vcpuid];
326
327	/*
328	 * The permissions bitmap is shared between all vcpus so initialize it
329	 * once when initializing the vBSP.
330	 */
331	if (vcpuid == 0) {
332		guest_msr_rw(vmx, MSR_LSTAR);
333		guest_msr_rw(vmx, MSR_CSTAR);
334		guest_msr_rw(vmx, MSR_STAR);
335		guest_msr_rw(vmx, MSR_SF_MASK);
336		guest_msr_rw(vmx, MSR_KGSBASE);
337	}
338
339	/*
340	 * Initialize guest IA32_PAT MSR with default value after reset.
341	 */
342	guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
343	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
344	    PAT_VALUE(2, PAT_UNCACHED)		|
345	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
346	    PAT_VALUE(4, PAT_WRITE_BACK)	|
347	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
348	    PAT_VALUE(6, PAT_UNCACHED)		|
349	    PAT_VALUE(7, PAT_UNCACHEABLE);
350
351	return;
352}
353
354void
355vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
356{
357	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
358
359	/* Save host MSRs (if any) and restore guest MSRs */
360	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
361	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
362	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
363	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
364	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
365}
366
367void
368vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
369{
370	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
371
372	/* Save guest MSRs */
373	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
374	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
375	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
376	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
377	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
378
379	/* Restore host MSRs */
380	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
381	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
382	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
383	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
384
385	/* MSR_KGSBASE will be restored on the way back to userspace */
386}
387
388int
389vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
390{
391	const uint64_t *guest_msrs;
392	int error;
393
394	guest_msrs = vmx->guest_msrs[vcpuid];
395	error = 0;
396
397	switch (num) {
398	case MSR_MCG_CAP:
399	case MSR_MCG_STATUS:
400		*val = 0;
401		break;
402	case MSR_MTRRcap:
403	case MSR_MTRRdefType:
404	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
405	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
406	case MSR_MTRR64kBase:
407		*val = 0;
408		break;
409	case MSR_IA32_MISC_ENABLE:
410		*val = misc_enable;
411		break;
412	case MSR_PLATFORM_INFO:
413		*val = platform_info;
414		break;
415	case MSR_TURBO_RATIO_LIMIT:
416	case MSR_TURBO_RATIO_LIMIT1:
417		*val = turbo_ratio_limit;
418		break;
419	case MSR_PAT:
420		*val = guest_msrs[IDX_MSR_PAT];
421		break;
422	default:
423		error = EINVAL;
424		break;
425	}
426	return (error);
427}
428
429int
430vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
431{
432	uint64_t *guest_msrs;
433	uint64_t changed;
434	int error;
435
436	guest_msrs = vmx->guest_msrs[vcpuid];
437	error = 0;
438
439	switch (num) {
440	case MSR_MCG_CAP:
441	case MSR_MCG_STATUS:
442		break;		/* ignore writes */
443	case MSR_MTRRcap:
444		vm_inject_gp(vmx->vm, vcpuid);
445		break;
446	case MSR_MTRRdefType:
447	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
448	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
449	case MSR_MTRR64kBase:
450		break;		/* Ignore writes */
451	case MSR_IA32_MISC_ENABLE:
452		changed = val ^ misc_enable;
453		/*
454		 * If the host has disabled the NX feature then the guest
455		 * also cannot use it. However, a Linux guest will try to
456		 * enable the NX feature by writing to the MISC_ENABLE MSR.
457		 *
458		 * This can be safely ignored because the memory management
459		 * code looks at CPUID.80000001H:EDX.NX to check if the
460		 * functionality is actually enabled.
461		 */
462		changed &= ~(1UL << 34);
463
464		/*
465		 * Punt to userspace if any other bits are being modified.
466		 */
467		if (changed)
468			error = EINVAL;
469
470		break;
471	case MSR_PAT:
472		if (pat_valid(val))
473			guest_msrs[IDX_MSR_PAT] = val;
474		else
475			vm_inject_gp(vmx->vm, vcpuid);
476		break;
477	case MSR_TSC:
478		error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
479		break;
480	default:
481		error = EINVAL;
482		break;
483	}
484
485	return (error);
486}
487