hv_hv.c revision 303984
1279377Simp/*-
2279377Simp * Copyright (c) 2009-2012 Microsoft Corp.
3279377Simp * Copyright (c) 2012 NetApp Inc.
4279377Simp * Copyright (c) 2012 Citrix Inc.
5279377Simp * All rights reserved.
6279377Simp *
7279377Simp * Redistribution and use in source and binary forms, with or without
8279377Simp * modification, are permitted provided that the following conditions
9279377Simp * are met:
10279377Simp * 1. Redistributions of source code must retain the above copyright
11279377Simp *    notice unmodified, this list of conditions, and the following
12279377Simp *    disclaimer.
13279377Simp * 2. Redistributions in binary form must reproduce the above copyright
14279377Simp *    notice, this list of conditions and the following disclaimer in the
15279377Simp *    documentation and/or other materials provided with the distribution.
16279377Simp *
17279377Simp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18279377Simp * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19279377Simp * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20279377Simp * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21279377Simp * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22279377Simp * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23279377Simp * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24279377Simp * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25279377Simp * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26279377Simp * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27279377Simp */
28279377Simp
29279377Simp/**
30279377Simp * Implements low-level interactions with Hypver-V/Azure
31279377Simp */
32279377Simp#include <sys/cdefs.h>
33279377Simp__FBSDID("$FreeBSD: releng/10.3/sys/dev/hyperv/vmbus/hv_hv.c 303984 2016-08-12 04:01:16Z glebius $");
34279377Simp
35279377Simp#include <sys/param.h>
36279377Simp#include <sys/kernel.h>
37279377Simp#include <sys/malloc.h>
38279377Simp#include <sys/pcpu.h>
39279377Simp#include <sys/timetc.h>
40279377Simp#include <machine/bus.h>
41279377Simp#include <machine/md_var.h>
42#include <vm/vm.h>
43#include <vm/vm_param.h>
44#include <vm/pmap.h>
45
46
47#include "hv_vmbus_priv.h"
48
49#define HV_NANOSECONDS_PER_SEC		1000000000L
50
51
52static u_int hv_get_timecount(struct timecounter *tc);
53
54u_int	hyperv_features;
55u_int	hyperv_recommends;
56
57/**
58 * Globals
59 */
60hv_vmbus_context hv_vmbus_g_context = {
61	.syn_ic_initialized = FALSE,
62	.hypercall_page = NULL,
63};
64
65static struct timecounter hv_timecounter = {
66	hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100
67};
68
69static u_int
70hv_get_timecount(struct timecounter *tc)
71{
72	u_int now = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
73	return (now);
74}
75
76/**
77 * @brief Query the cpuid for presence of windows hypervisor
78 */
79int
80hv_vmbus_query_hypervisor_presence(void)
81{
82	if (vm_guest != VM_GUEST_HV)
83		return (0);
84
85	return (hv_high >= HV_X64_CPUID_MIN && hv_high <= HV_X64_CPUID_MAX);
86}
87
88/**
89 * @brief Get version of the windows hypervisor
90 */
91static int
92hv_vmbus_get_hypervisor_version(void)
93{
94	u_int regs[4];
95	unsigned int maxLeaf;
96	unsigned int op;
97
98	/*
99	 * Its assumed that this is called after confirming that
100	 * Viridian is present
101	 * Query id and revision.
102	 */
103	op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
104	do_cpuid(op, regs);
105
106	maxLeaf = regs[0];
107	op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
108	do_cpuid(op, regs);
109
110	if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) {
111	    op = HV_CPU_ID_FUNCTION_MS_HV_VERSION;
112	    do_cpuid(op, regs);
113	}
114	return (maxLeaf);
115}
116
117/**
118 * @brief Invoke the specified hypercall
119 */
120static uint64_t
121hv_vmbus_do_hypercall(uint64_t control, void* input, void* output)
122{
123#ifdef __x86_64__
124	uint64_t hv_status = 0;
125	uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
126	uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
127	volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
128
129	__asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8");
130	__asm__ __volatile__ ("call *%3" : "=a"(hv_status):
131				"c" (control), "d" (input_address),
132				"m" (hypercall_page));
133	return (hv_status);
134#else
135	uint32_t control_high = control >> 32;
136	uint32_t control_low = control & 0xFFFFFFFF;
137	uint32_t hv_status_high = 1;
138	uint32_t hv_status_low = 1;
139	uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
140	uint32_t input_address_high = input_address >> 32;
141	uint32_t input_address_low = input_address & 0xFFFFFFFF;
142	uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
143	uint32_t output_address_high = output_address >> 32;
144	uint32_t output_address_low = output_address & 0xFFFFFFFF;
145	volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
146
147	__asm__ __volatile__ ("call *%8" : "=d"(hv_status_high),
148				"=a"(hv_status_low) : "d" (control_high),
149				"a" (control_low), "b" (input_address_high),
150				"c" (input_address_low),
151				"D"(output_address_high),
152				"S"(output_address_low), "m" (hypercall_page));
153	return (hv_status_low | ((uint64_t)hv_status_high << 32));
154#endif /* __x86_64__ */
155}
156
157/**
158 *  @brief Main initialization routine.
159 *
160 *  This routine must be called
161 *  before any other routines in here are called
162 */
163int
164hv_vmbus_init(void)
165{
166	int					max_leaf;
167	hv_vmbus_x64_msr_hypercall_contents	hypercall_msr;
168	void* 					virt_addr = 0;
169
170	memset(
171	    hv_vmbus_g_context.syn_ic_event_page,
172	    0,
173	    sizeof(hv_vmbus_handle) * MAXCPU);
174
175	memset(
176	    hv_vmbus_g_context.syn_ic_msg_page,
177	    0,
178	    sizeof(hv_vmbus_handle) * MAXCPU);
179
180	if (vm_guest != VM_GUEST_HV)
181	    goto cleanup;
182
183	max_leaf = hv_vmbus_get_hypervisor_version();
184
185	/*
186	 * Write our OS info
187	 */
188	uint64_t os_guest_info = HV_FREEBSD_GUEST_ID;
189	wrmsr(HV_X64_MSR_GUEST_OS_ID, os_guest_info);
190	hv_vmbus_g_context.guest_id = os_guest_info;
191
192	/*
193	 * See if the hypercall page is already set
194	 */
195	hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
196	virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
197	KASSERT(virt_addr != NULL,
198	    ("Error VMBUS: malloc failed to allocate page during init!"));
199	if (virt_addr == NULL)
200	    goto cleanup;
201
202	hypercall_msr.u.enable = 1;
203	hypercall_msr.u.guest_physical_address =
204	    (hv_get_phys_addr(virt_addr) >> PAGE_SHIFT);
205	wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t);
206
207	/*
208	 * Confirm that hypercall page did get set up
209	 */
210	hypercall_msr.as_uint64_t = 0;
211	hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
212
213	if (!hypercall_msr.u.enable)
214	    goto cleanup;
215
216	hv_vmbus_g_context.hypercall_page = virt_addr;
217
218	hv_et_init();
219
220	return (0);
221
222	cleanup:
223	if (virt_addr != NULL) {
224	    if (hypercall_msr.u.enable) {
225		hypercall_msr.as_uint64_t = 0;
226		wrmsr(HV_X64_MSR_HYPERCALL,
227					hypercall_msr.as_uint64_t);
228	    }
229
230	    free(virt_addr, M_DEVBUF);
231	}
232	return (ENOTSUP);
233}
234
235/**
236 * @brief Cleanup routine, called normally during driver unloading or exiting
237 */
238void
239hv_vmbus_cleanup(void)
240{
241	hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
242
243	if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) {
244	    if (hv_vmbus_g_context.hypercall_page != NULL) {
245		hypercall_msr.as_uint64_t = 0;
246		wrmsr(HV_X64_MSR_HYPERCALL,
247					hypercall_msr.as_uint64_t);
248		free(hv_vmbus_g_context.hypercall_page, M_DEVBUF);
249		hv_vmbus_g_context.hypercall_page = NULL;
250	    }
251	}
252}
253
254/**
255 * @brief Post a message using the hypervisor message IPC.
256 * (This involves a hypercall.)
257 */
258hv_vmbus_status
259hv_vmbus_post_msg_via_msg_ipc(
260	hv_vmbus_connection_id	connection_id,
261	hv_vmbus_msg_type	message_type,
262	void*			payload,
263	size_t			payload_size)
264{
265	struct alignedinput {
266	    uint64_t alignment8;
267	    hv_vmbus_input_post_message msg;
268	};
269
270	hv_vmbus_input_post_message*	aligned_msg;
271	hv_vmbus_status 		status;
272	size_t				addr;
273
274	if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
275	    return (EMSGSIZE);
276
277	addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF,
278			    M_ZERO | M_NOWAIT);
279	KASSERT(addr != 0,
280	    ("Error VMBUS: malloc failed to allocate message buffer!"));
281	if (addr == 0)
282	    return (ENOMEM);
283
284	aligned_msg = (hv_vmbus_input_post_message*)
285	    (HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN));
286
287	aligned_msg->connection_id = connection_id;
288	aligned_msg->message_type = message_type;
289	aligned_msg->payload_size = payload_size;
290	memcpy((void*) aligned_msg->payload, payload, payload_size);
291
292	status = hv_vmbus_do_hypercall(
293		    HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF;
294
295	free((void *) addr, M_DEVBUF);
296	return (status);
297}
298
299/**
300 * @brief Signal an event on the specified connection using the hypervisor
301 * event IPC. (This involves a hypercall.)
302 */
303hv_vmbus_status
304hv_vmbus_signal_event(void *con_id)
305{
306	hv_vmbus_status status;
307
308	status = hv_vmbus_do_hypercall(
309		    HV_CALL_SIGNAL_EVENT,
310		    con_id,
311		    0) & 0xFFFF;
312
313	return (status);
314}
315
316/**
317 * @brief hv_vmbus_synic_init
318 */
319void
320hv_vmbus_synic_init(void *arg)
321
322{
323	int			cpu;
324	uint64_t		hv_vcpu_index;
325	hv_vmbus_synic_simp	simp;
326	hv_vmbus_synic_siefp	siefp;
327	hv_vmbus_synic_scontrol sctrl;
328	hv_vmbus_synic_sint	shared_sint;
329	uint64_t		version;
330	hv_setup_args* 		setup_args = (hv_setup_args *)arg;
331
332	cpu = PCPU_GET(cpuid);
333
334	if (hv_vmbus_g_context.hypercall_page == NULL)
335	    return;
336
337	/*
338	 * TODO: Check the version
339	 */
340	version = rdmsr(HV_X64_MSR_SVERSION);
341
342	hv_vmbus_g_context.syn_ic_msg_page[cpu] =
343	    setup_args->page_buffers[2 * cpu];
344	hv_vmbus_g_context.syn_ic_event_page[cpu] =
345	    setup_args->page_buffers[2 * cpu + 1];
346
347	/*
348	 * Setup the Synic's message page
349	 */
350
351	simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
352	simp.u.simp_enabled = 1;
353	simp.u.base_simp_gpa = ((hv_get_phys_addr(
354	    hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT);
355
356	wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
357
358	/*
359	 * Setup the Synic's event page
360	 */
361	siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
362	siefp.u.siefp_enabled = 1;
363	siefp.u.base_siefp_gpa = ((hv_get_phys_addr(
364	    hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT);
365
366	wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
367
368	/*HV_SHARED_SINT_IDT_VECTOR + 0x20; */
369	shared_sint.as_uint64_t = 0;
370	shared_sint.u.vector = setup_args->vector;
371	shared_sint.u.masked = FALSE;
372	shared_sint.u.auto_eoi = TRUE;
373
374	wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
375	    shared_sint.as_uint64_t);
376
377	/* Enable the global synic bit */
378	sctrl.as_uint64_t = rdmsr(HV_X64_MSR_SCONTROL);
379	sctrl.u.enable = 1;
380
381	wrmsr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t);
382
383	hv_vmbus_g_context.syn_ic_initialized = TRUE;
384
385	/*
386	 * Set up the cpuid mapping from Hyper-V to FreeBSD.
387	 * The array is indexed using FreeBSD cpuid.
388	 */
389	hv_vcpu_index = rdmsr(HV_X64_MSR_VP_INDEX);
390	hv_vmbus_g_context.hv_vcpu_index[cpu] = (uint32_t)hv_vcpu_index;
391
392	return;
393}
394
395/**
396 * @brief Cleanup routine for hv_vmbus_synic_init()
397 */
398void hv_vmbus_synic_cleanup(void *arg)
399{
400	hv_vmbus_synic_sint	shared_sint;
401	hv_vmbus_synic_simp	simp;
402	hv_vmbus_synic_siefp	siefp;
403
404	if (!hv_vmbus_g_context.syn_ic_initialized)
405	    return;
406
407	shared_sint.as_uint64_t = rdmsr(
408	    HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT);
409
410	shared_sint.u.masked = 1;
411
412	/*
413	 * Disable the interrupt
414	 */
415	wrmsr(
416	    HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
417	    shared_sint.as_uint64_t);
418
419	simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
420	simp.u.simp_enabled = 0;
421	simp.u.base_simp_gpa = 0;
422
423	wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
424
425	siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
426	siefp.u.siefp_enabled = 0;
427	siefp.u.base_siefp_gpa = 0;
428
429	wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
430}
431
432static bool
433hyperv_identify(void)
434{
435	u_int regs[4];
436	unsigned int maxLeaf;
437	unsigned int op;
438
439	if (vm_guest != VM_GUEST_HV)
440		return (false);
441
442	op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
443	do_cpuid(op, regs);
444	maxLeaf = regs[0];
445	if (maxLeaf < HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS)
446		return (false);
447
448	op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
449	do_cpuid(op, regs);
450	if (regs[0] != 0x31237648 /* HV#1 */)
451		return (false);
452
453	op = HV_CPU_ID_FUNCTION_MS_HV_FEATURES;
454	do_cpuid(op, regs);
455	if ((regs[0] & HV_FEATURE_MSR_HYPERCALL) == 0) {
456		/*
457		 * Hyper-V w/o Hypercall is impossible; someone
458		 * is faking Hyper-V.
459		 */
460		return (false);
461	}
462	hyperv_features = regs[0];
463
464	op = HV_CPU_ID_FUNCTION_MS_HV_VERSION;
465	do_cpuid(op, regs);
466	printf("Hyper-V Version: %d.%d.%d [SP%d]\n",
467	    regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]);
468
469	printf("  Features: 0x%b\n", hyperv_features,
470	    "\020"
471	    "\001VPRUNTIME"
472	    "\002TMREFCNT"
473	    "\003SYNCIC"
474	    "\004SYNCTM"
475	    "\005APIC"
476	    "\006HYERCALL"
477	    "\007VPINDEX"
478	    "\010RESET"
479	    "\011STATS"
480	    "\012REFTSC"
481	    "\013IDLE"
482	    "\014TMFREQ"
483	    "\015DEBUG");
484
485	op = HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION;
486	do_cpuid(op, regs);
487	hyperv_recommends = regs[0];
488	if (bootverbose)
489		printf("  Recommends: %08x %08x\n", regs[0], regs[1]);
490
491	op = HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS;
492	do_cpuid(op, regs);
493	if (bootverbose) {
494		printf("  Limits: Vcpu:%d Lcpu:%d Int:%d\n",
495		    regs[0], regs[1], regs[2]);
496	}
497
498	if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE) {
499		op = HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE;
500		do_cpuid(op, regs);
501		if (bootverbose) {
502			printf("  HW Features: %08x AMD: %08x\n",
503			    regs[0], regs[3]);
504		}
505	}
506
507	return (true);
508}
509
510static void
511hyperv_init(void *dummy __unused)
512{
513	if (!hyperv_identify())
514		return;
515
516	if (hyperv_features & HV_FEATURE_MSR_TIME_REFCNT) {
517		/* Register virtual timecount */
518		tc_init(&hv_timecounter);
519	}
520}
521SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init, NULL);
522