1240941Sneel/*-
2240941Sneel * Copyright (c) 2012 Sandvine, Inc.
3240941Sneel * Copyright (c) 2012 NetApp, Inc.
4240941Sneel * All rights reserved.
5240941Sneel *
6240941Sneel * Redistribution and use in source and binary forms, with or without
7240941Sneel * modification, are permitted provided that the following conditions
8240941Sneel * are met:
9240941Sneel * 1. Redistributions of source code must retain the above copyright
10240941Sneel *    notice, this list of conditions and the following disclaimer.
11240941Sneel * 2. Redistributions in binary form must reproduce the above copyright
12240941Sneel *    notice, this list of conditions and the following disclaimer in the
13240941Sneel *    documentation and/or other materials provided with the distribution.
14240941Sneel *
15250175Semaste * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16240941Sneel * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17240941Sneel * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18250175Semaste * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19240941Sneel * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20240941Sneel * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21240941Sneel * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22240941Sneel * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23240941Sneel * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24240941Sneel * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25240941Sneel * SUCH DAMAGE.
26240941Sneel *
27240941Sneel * $FreeBSD$
28240941Sneel */
29240941Sneel
30240941Sneel#include <sys/cdefs.h>
31240941Sneel__FBSDID("$FreeBSD$");
32240941Sneel
33243640Sneel#ifdef _KERNEL
34240941Sneel#include <sys/param.h>
35240941Sneel#include <sys/pcpu.h>
36240941Sneel#include <sys/systm.h>
37268976Sjhb#include <sys/proc.h>
38240941Sneel
39240941Sneel#include <vm/vm.h>
40240941Sneel#include <vm/pmap.h>
41240941Sneel
42240941Sneel#include <machine/vmparam.h>
43240941Sneel#include <machine/vmm.h>
44243640Sneel#else	/* !_KERNEL */
45243640Sneel#include <sys/types.h>
46243640Sneel#include <sys/errno.h>
47270159Sgrehan#include <sys/_iovec.h>
48240941Sneel
49243640Sneel#include <machine/vmm.h>
50240941Sneel
51268976Sjhb#include <assert.h>
52243640Sneel#include <vmmapi.h>
53268976Sjhb#define	KASSERT(exp,msg)	assert((exp))
54243640Sneel#endif	/* _KERNEL */
55240941Sneel
56268976Sjhb#include <machine/vmm_instruction_emul.h>
57268976Sjhb#include <x86/psl.h>
58268976Sjhb#include <x86/specialreg.h>
59268976Sjhb
60243640Sneel/* struct vie_op.op_type */
61243640Sneelenum {
62243640Sneel	VIE_OP_TYPE_NONE = 0,
63243640Sneel	VIE_OP_TYPE_MOV,
64267396Sjhb	VIE_OP_TYPE_MOVSX,
65267396Sjhb	VIE_OP_TYPE_MOVZX,
66243640Sneel	VIE_OP_TYPE_AND,
67253585Sneel	VIE_OP_TYPE_OR,
68271659Sgrehan	VIE_OP_TYPE_SUB,
69267396Sjhb	VIE_OP_TYPE_TWO_BYTE,
70270159Sgrehan	VIE_OP_TYPE_PUSH,
71270159Sgrehan	VIE_OP_TYPE_CMP,
72243640Sneel	VIE_OP_TYPE_LAST
73243640Sneel};
74243640Sneel
75243640Sneel/* struct vie_op.op_flags */
76270159Sgrehan#define	VIE_OP_F_IMM		(1 << 0)  /* 16/32-bit immediate operand */
77270159Sgrehan#define	VIE_OP_F_IMM8		(1 << 1)  /* 8-bit immediate operand */
78270159Sgrehan#define	VIE_OP_F_MOFFSET	(1 << 2)  /* 16/32/64-bit immediate moffset */
79270159Sgrehan#define	VIE_OP_F_NO_MODRM	(1 << 3)
80243640Sneel
81267396Sjhbstatic const struct vie_op two_byte_opcodes[256] = {
82267396Sjhb	[0xB6] = {
83267396Sjhb		.op_byte = 0xB6,
84267396Sjhb		.op_type = VIE_OP_TYPE_MOVZX,
85267396Sjhb	},
86270159Sgrehan	[0xB7] = {
87270159Sgrehan		.op_byte = 0xB7,
88270159Sgrehan		.op_type = VIE_OP_TYPE_MOVZX,
89270159Sgrehan	},
90267396Sjhb	[0xBE] = {
91267396Sjhb		.op_byte = 0xBE,
92267396Sjhb		.op_type = VIE_OP_TYPE_MOVSX,
93267396Sjhb	},
94267396Sjhb};
95267396Sjhb
96243640Sneelstatic const struct vie_op one_byte_opcodes[256] = {
97267396Sjhb	[0x0F] = {
98267396Sjhb		.op_byte = 0x0F,
99267396Sjhb		.op_type = VIE_OP_TYPE_TWO_BYTE
100267396Sjhb	},
101271659Sgrehan	[0x2B] = {
102271659Sgrehan		.op_byte = 0x2B,
103271659Sgrehan		.op_type = VIE_OP_TYPE_SUB,
104271659Sgrehan	},
105270159Sgrehan	[0x3B] = {
106270159Sgrehan		.op_byte = 0x3B,
107270159Sgrehan		.op_type = VIE_OP_TYPE_CMP,
108270159Sgrehan	},
109246108Sneel	[0x88] = {
110246108Sneel		.op_byte = 0x88,
111246108Sneel		.op_type = VIE_OP_TYPE_MOV,
112246108Sneel	},
113243640Sneel	[0x89] = {
114243640Sneel		.op_byte = 0x89,
115243640Sneel		.op_type = VIE_OP_TYPE_MOV,
116243640Sneel	},
117254964Sneel	[0x8A] = {
118254964Sneel		.op_byte = 0x8A,
119254964Sneel		.op_type = VIE_OP_TYPE_MOV,
120254964Sneel	},
121243640Sneel	[0x8B] = {
122243640Sneel		.op_byte = 0x8B,
123243640Sneel		.op_type = VIE_OP_TYPE_MOV,
124243640Sneel	},
125270159Sgrehan	[0xA1] = {
126270159Sgrehan		.op_byte = 0xA1,
127270159Sgrehan		.op_type = VIE_OP_TYPE_MOV,
128270159Sgrehan		.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
129270159Sgrehan	},
130270159Sgrehan	[0xA3] = {
131270159Sgrehan		.op_byte = 0xA3,
132270159Sgrehan		.op_type = VIE_OP_TYPE_MOV,
133270159Sgrehan		.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
134270159Sgrehan	},
135270159Sgrehan	[0xC6] = {
136270159Sgrehan		/* XXX Group 11 extended opcode - not just MOV */
137270159Sgrehan		.op_byte = 0xC6,
138270159Sgrehan		.op_type = VIE_OP_TYPE_MOV,
139270159Sgrehan		.op_flags = VIE_OP_F_IMM8,
140270159Sgrehan	},
141243640Sneel	[0xC7] = {
142243640Sneel		.op_byte = 0xC7,
143243640Sneel		.op_type = VIE_OP_TYPE_MOV,
144243640Sneel		.op_flags = VIE_OP_F_IMM,
145243640Sneel	},
146243640Sneel	[0x23] = {
147243640Sneel		.op_byte = 0x23,
148243640Sneel		.op_type = VIE_OP_TYPE_AND,
149243667Sgrehan	},
150243667Sgrehan	[0x81] = {
151243703Sgrehan		/* XXX Group 1 extended opcode - not just AND */
152243667Sgrehan		.op_byte = 0x81,
153243667Sgrehan		.op_type = VIE_OP_TYPE_AND,
154243667Sgrehan		.op_flags = VIE_OP_F_IMM,
155253585Sneel	},
156253585Sneel	[0x83] = {
157253585Sneel		/* XXX Group 1 extended opcode - not just OR */
158253585Sneel		.op_byte = 0x83,
159253585Sneel		.op_type = VIE_OP_TYPE_OR,
160253585Sneel		.op_flags = VIE_OP_F_IMM8,
161253585Sneel	},
162270159Sgrehan	[0xFF] = {
163270159Sgrehan		/* XXX Group 5 extended opcode - not just PUSH */
164270159Sgrehan		.op_byte = 0xFF,
165270159Sgrehan		.op_type = VIE_OP_TYPE_PUSH,
166270159Sgrehan	}
167243640Sneel};
168243640Sneel
169243640Sneel/* struct vie.mod */
170243640Sneel#define	VIE_MOD_INDIRECT		0
171243640Sneel#define	VIE_MOD_INDIRECT_DISP8		1
172243640Sneel#define	VIE_MOD_INDIRECT_DISP32		2
173243640Sneel#define	VIE_MOD_DIRECT			3
174243640Sneel
175243640Sneel/* struct vie.rm */
176243640Sneel#define	VIE_RM_SIB			4
177243640Sneel#define	VIE_RM_DISP32			5
178243640Sneel
179243640Sneel#define	GB				(1024 * 1024 * 1024)
180243640Sneel
181240941Sneelstatic enum vm_reg_name gpr_map[16] = {
182240941Sneel	VM_REG_GUEST_RAX,
183240941Sneel	VM_REG_GUEST_RCX,
184240941Sneel	VM_REG_GUEST_RDX,
185240941Sneel	VM_REG_GUEST_RBX,
186240941Sneel	VM_REG_GUEST_RSP,
187240941Sneel	VM_REG_GUEST_RBP,
188240941Sneel	VM_REG_GUEST_RSI,
189240941Sneel	VM_REG_GUEST_RDI,
190240941Sneel	VM_REG_GUEST_R8,
191240941Sneel	VM_REG_GUEST_R9,
192240941Sneel	VM_REG_GUEST_R10,
193240941Sneel	VM_REG_GUEST_R11,
194240941Sneel	VM_REG_GUEST_R12,
195240941Sneel	VM_REG_GUEST_R13,
196240941Sneel	VM_REG_GUEST_R14,
197240941Sneel	VM_REG_GUEST_R15
198240941Sneel};
199240941Sneel
200243640Sneelstatic uint64_t size2mask[] = {
201243640Sneel	[1] = 0xff,
202243640Sneel	[2] = 0xffff,
203243640Sneel	[4] = 0xffffffff,
204243640Sneel	[8] = 0xffffffffffffffff,
205243640Sneel};
206243640Sneel
207243640Sneelstatic int
208243640Sneelvie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval)
209243640Sneel{
210243640Sneel	int error;
211243640Sneel
212243640Sneel	error = vm_get_register(vm, vcpuid, reg, rval);
213243640Sneel
214243640Sneel	return (error);
215243640Sneel}
216243640Sneel
217270159Sgrehanstatic void
218270159Sgrehanvie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr)
219246108Sneel{
220270159Sgrehan	*lhbr = 0;
221270159Sgrehan	*reg = gpr_map[vie->reg];
222246108Sneel
223246108Sneel	/*
224270159Sgrehan	 * 64-bit mode imposes limitations on accessing legacy high byte
225270159Sgrehan	 * registers (lhbr).
226246108Sneel	 *
227246108Sneel	 * The legacy high-byte registers cannot be addressed if the REX
228246108Sneel	 * prefix is present. In this case the values 4, 5, 6 and 7 of the
229246108Sneel	 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively.
230246108Sneel	 *
231246108Sneel	 * If the REX prefix is not present then the values 4, 5, 6 and 7
232246108Sneel	 * of the 'ModRM:reg' field address the legacy high-byte registers,
233246108Sneel	 * %ah, %ch, %dh and %bh respectively.
234246108Sneel	 */
235246108Sneel	if (!vie->rex_present) {
236246108Sneel		if (vie->reg & 0x4) {
237270159Sgrehan			*lhbr = 1;
238270159Sgrehan			*reg = gpr_map[vie->reg & 0x3];
239246108Sneel		}
240246108Sneel	}
241270159Sgrehan}
242246108Sneel
243270159Sgrehanstatic int
244270159Sgrehanvie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval)
245270159Sgrehan{
246270159Sgrehan	uint64_t val;
247270159Sgrehan	int error, lhbr;
248270159Sgrehan	enum vm_reg_name reg;
249270159Sgrehan
250270159Sgrehan	vie_calc_bytereg(vie, &reg, &lhbr);
251246108Sneel	error = vm_get_register(vm, vcpuid, reg, &val);
252270159Sgrehan
253270159Sgrehan	/*
254270159Sgrehan	 * To obtain the value of a legacy high byte register shift the
255270159Sgrehan	 * base register right by 8 bits (%ah = %rax >> 8).
256270159Sgrehan	 */
257270159Sgrehan	if (lhbr)
258270159Sgrehan		*rval = val >> 8;
259270159Sgrehan	else
260270159Sgrehan		*rval = val;
261246108Sneel	return (error);
262246108Sneel}
263246108Sneel
264270159Sgrehanstatic int
265270159Sgrehanvie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte)
266270159Sgrehan{
267270159Sgrehan	uint64_t origval, val, mask;
268270159Sgrehan	int error, lhbr;
269270159Sgrehan	enum vm_reg_name reg;
270270159Sgrehan
271270159Sgrehan	vie_calc_bytereg(vie, &reg, &lhbr);
272270159Sgrehan	error = vm_get_register(vm, vcpuid, reg, &origval);
273270159Sgrehan	if (error == 0) {
274270159Sgrehan		val = byte;
275270159Sgrehan		mask = 0xff;
276270159Sgrehan		if (lhbr) {
277270159Sgrehan			/*
278270159Sgrehan			 * Shift left by 8 to store 'byte' in a legacy high
279270159Sgrehan			 * byte register.
280270159Sgrehan			 */
281270159Sgrehan			val <<= 8;
282270159Sgrehan			mask <<= 8;
283270159Sgrehan		}
284270159Sgrehan		val |= origval & ~mask;
285270159Sgrehan		error = vm_set_register(vm, vcpuid, reg, val);
286270159Sgrehan	}
287270159Sgrehan	return (error);
288270159Sgrehan}
289270159Sgrehan
290268976Sjhbint
291243640Sneelvie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
292243640Sneel		    uint64_t val, int size)
293243640Sneel{
294243640Sneel	int error;
295243640Sneel	uint64_t origval;
296243640Sneel
297243640Sneel	switch (size) {
298243640Sneel	case 1:
299243640Sneel	case 2:
300243640Sneel		error = vie_read_register(vm, vcpuid, reg, &origval);
301243640Sneel		if (error)
302243640Sneel			return (error);
303243640Sneel		val &= size2mask[size];
304243640Sneel		val |= origval & ~size2mask[size];
305243640Sneel		break;
306243640Sneel	case 4:
307243640Sneel		val &= 0xffffffffUL;
308243640Sneel		break;
309243640Sneel	case 8:
310243640Sneel		break;
311243640Sneel	default:
312243640Sneel		return (EINVAL);
313243640Sneel	}
314243640Sneel
315243640Sneel	error = vm_set_register(vm, vcpuid, reg, val);
316243640Sneel	return (error);
317243640Sneel}
318243640Sneel
319243640Sneel/*
320270159Sgrehan * Return the status flags that would result from doing (x - y).
321243640Sneel */
322270159Sgrehanstatic u_long
323270159Sgrehangetcc16(uint16_t x, uint16_t y)
324270159Sgrehan{
325270159Sgrehan	u_long rflags;
326270159Sgrehan
327270159Sgrehan	__asm __volatile("sub %1,%2; pushfq; popq %0" :
328270159Sgrehan	    "=r" (rflags) : "m" (y), "r" (x));
329270159Sgrehan	return (rflags);
330270159Sgrehan}
331270159Sgrehan
332270159Sgrehanstatic u_long
333270159Sgrehangetcc32(uint32_t x, uint32_t y)
334270159Sgrehan{
335270159Sgrehan	u_long rflags;
336270159Sgrehan
337270159Sgrehan	__asm __volatile("sub %1,%2; pushfq; popq %0" :
338270159Sgrehan	    "=r" (rflags) : "m" (y), "r" (x));
339270159Sgrehan	return (rflags);
340270159Sgrehan}
341270159Sgrehan
342270159Sgrehanstatic u_long
343270159Sgrehangetcc64(uint64_t x, uint64_t y)
344270159Sgrehan{
345270159Sgrehan	u_long rflags;
346270159Sgrehan
347270159Sgrehan	__asm __volatile("sub %1,%2; pushfq; popq %0" :
348270159Sgrehan	    "=r" (rflags) : "m" (y), "r" (x));
349270159Sgrehan	return (rflags);
350270159Sgrehan}
351270159Sgrehan
352270159Sgrehanstatic u_long
353270159Sgrehangetcc(int opsize, uint64_t x, uint64_t y)
354270159Sgrehan{
355270159Sgrehan	KASSERT(opsize == 2 || opsize == 4 || opsize == 8,
356270159Sgrehan	    ("getcc: invalid operand size %d", opsize));
357270159Sgrehan
358270159Sgrehan	if (opsize == 2)
359270159Sgrehan		return (getcc16(x, y));
360270159Sgrehan	else if (opsize == 4)
361270159Sgrehan		return (getcc32(x, y));
362270159Sgrehan	else
363270159Sgrehan		return (getcc64(x, y));
364270159Sgrehan}
365270159Sgrehan
366243640Sneelstatic int
367243640Sneelemulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
368243640Sneel	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
369243640Sneel{
370243640Sneel	int error, size;
371243640Sneel	enum vm_reg_name reg;
372246108Sneel	uint8_t byte;
373243640Sneel	uint64_t val;
374243640Sneel
375270159Sgrehan	size = vie->opsize;
376243640Sneel	error = EINVAL;
377243640Sneel
378243640Sneel	switch (vie->op.op_byte) {
379246108Sneel	case 0x88:
380246108Sneel		/*
381246108Sneel		 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m)
382246108Sneel		 * 88/r:	mov r/m8, r8
383246108Sneel		 * REX + 88/r:	mov r/m8, r8 (%ah, %ch, %dh, %bh not available)
384246108Sneel		 */
385270159Sgrehan		size = 1;	/* override for byte operation */
386246108Sneel		error = vie_read_bytereg(vm, vcpuid, vie, &byte);
387246108Sneel		if (error == 0)
388246108Sneel			error = memwrite(vm, vcpuid, gpa, byte, size, arg);
389246108Sneel		break;
390243640Sneel	case 0x89:
391243640Sneel		/*
392243640Sneel		 * MOV from reg (ModRM:reg) to mem (ModRM:r/m)
393270159Sgrehan		 * 89/r:	mov r/m16, r16
394243640Sneel		 * 89/r:	mov r/m32, r32
395243640Sneel		 * REX.W + 89/r	mov r/m64, r64
396243640Sneel		 */
397243640Sneel		reg = gpr_map[vie->reg];
398243640Sneel		error = vie_read_register(vm, vcpuid, reg, &val);
399243640Sneel		if (error == 0) {
400243640Sneel			val &= size2mask[size];
401243640Sneel			error = memwrite(vm, vcpuid, gpa, val, size, arg);
402243640Sneel		}
403243640Sneel		break;
404254964Sneel	case 0x8A:
405270159Sgrehan		/*
406270159Sgrehan		 * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg)
407270159Sgrehan		 * 8A/r:	mov r8, r/m8
408270159Sgrehan		 * REX + 8A/r:	mov r8, r/m8
409270159Sgrehan		 */
410270159Sgrehan		size = 1;	/* override for byte operation */
411270159Sgrehan		error = memread(vm, vcpuid, gpa, &val, size, arg);
412270159Sgrehan		if (error == 0)
413270159Sgrehan			error = vie_write_bytereg(vm, vcpuid, vie, val);
414270159Sgrehan		break;
415243640Sneel	case 0x8B:
416243640Sneel		/*
417243640Sneel		 * MOV from mem (ModRM:r/m) to reg (ModRM:reg)
418270159Sgrehan		 * 8B/r:	mov r16, r/m16
419243640Sneel		 * 8B/r:	mov r32, r/m32
420243640Sneel		 * REX.W 8B/r:	mov r64, r/m64
421243640Sneel		 */
422243640Sneel		error = memread(vm, vcpuid, gpa, &val, size, arg);
423243640Sneel		if (error == 0) {
424243640Sneel			reg = gpr_map[vie->reg];
425243640Sneel			error = vie_update_register(vm, vcpuid, reg, val, size);
426243640Sneel		}
427243640Sneel		break;
428270159Sgrehan	case 0xA1:
429270159Sgrehan		/*
430270159Sgrehan		 * MOV from seg:moffset to AX/EAX/RAX
431270159Sgrehan		 * A1:		mov AX, moffs16
432270159Sgrehan		 * A1:		mov EAX, moffs32
433270159Sgrehan		 * REX.W + A1:	mov RAX, moffs64
434270159Sgrehan		 */
435270159Sgrehan		error = memread(vm, vcpuid, gpa, &val, size, arg);
436270159Sgrehan		if (error == 0) {
437270159Sgrehan			reg = VM_REG_GUEST_RAX;
438270159Sgrehan			error = vie_update_register(vm, vcpuid, reg, val, size);
439270159Sgrehan		}
440270159Sgrehan		break;
441270159Sgrehan	case 0xA3:
442270159Sgrehan		/*
443270159Sgrehan		 * MOV from AX/EAX/RAX to seg:moffset
444270159Sgrehan		 * A3:		mov moffs16, AX
445270159Sgrehan		 * A3:		mov moffs32, EAX
446270159Sgrehan		 * REX.W + A3:	mov moffs64, RAX
447270159Sgrehan		 */
448270159Sgrehan		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
449270159Sgrehan		if (error == 0) {
450270159Sgrehan			val &= size2mask[size];
451270159Sgrehan			error = memwrite(vm, vcpuid, gpa, val, size, arg);
452270159Sgrehan		}
453270159Sgrehan		break;
454270159Sgrehan	case 0xC6:
455270159Sgrehan		/*
456270159Sgrehan		 * MOV from imm8 to mem (ModRM:r/m)
457270159Sgrehan		 * C6/0		mov r/m8, imm8
458270159Sgrehan		 * REX + C6/0	mov r/m8, imm8
459270159Sgrehan		 */
460270159Sgrehan		size = 1;	/* override for byte operation */
461270159Sgrehan		error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg);
462270159Sgrehan		break;
463243640Sneel	case 0xC7:
464243640Sneel		/*
465270159Sgrehan		 * MOV from imm16/imm32 to mem (ModRM:r/m)
466270159Sgrehan		 * C7/0		mov r/m16, imm16
467243640Sneel		 * C7/0		mov r/m32, imm32
468243640Sneel		 * REX.W + C7/0	mov r/m64, imm32 (sign-extended to 64-bits)
469243640Sneel		 */
470270159Sgrehan		val = vie->immediate & size2mask[size];
471243640Sneel		error = memwrite(vm, vcpuid, gpa, val, size, arg);
472243640Sneel		break;
473243640Sneel	default:
474243640Sneel		break;
475243640Sneel	}
476243640Sneel
477243640Sneel	return (error);
478243640Sneel}
479243640Sneel
480243640Sneelstatic int
481267396Sjhbemulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
482267396Sjhb	     mem_region_read_t memread, mem_region_write_t memwrite,
483267396Sjhb	     void *arg)
484267396Sjhb{
485267396Sjhb	int error, size;
486267396Sjhb	enum vm_reg_name reg;
487267396Sjhb	uint64_t val;
488267396Sjhb
489270159Sgrehan	size = vie->opsize;
490267396Sjhb	error = EINVAL;
491267396Sjhb
492267396Sjhb	switch (vie->op.op_byte) {
493267396Sjhb	case 0xB6:
494267396Sjhb		/*
495267396Sjhb		 * MOV and zero extend byte from mem (ModRM:r/m) to
496267396Sjhb		 * reg (ModRM:reg).
497267396Sjhb		 *
498270159Sgrehan		 * 0F B6/r		movzx r16, r/m8
499270159Sgrehan		 * 0F B6/r		movzx r32, r/m8
500270159Sgrehan		 * REX.W + 0F B6/r	movzx r64, r/m8
501267396Sjhb		 */
502267396Sjhb
503267396Sjhb		/* get the first operand */
504267396Sjhb		error = memread(vm, vcpuid, gpa, &val, 1, arg);
505267396Sjhb		if (error)
506267396Sjhb			break;
507267396Sjhb
508267396Sjhb		/* get the second operand */
509267396Sjhb		reg = gpr_map[vie->reg];
510267396Sjhb
511270159Sgrehan		/* zero-extend byte */
512270159Sgrehan		val = (uint8_t)val;
513267396Sjhb
514267396Sjhb		/* write the result */
515267396Sjhb		error = vie_update_register(vm, vcpuid, reg, val, size);
516267396Sjhb		break;
517270159Sgrehan	case 0xB7:
518270159Sgrehan		/*
519270159Sgrehan		 * MOV and zero extend word from mem (ModRM:r/m) to
520270159Sgrehan		 * reg (ModRM:reg).
521270159Sgrehan		 *
522270159Sgrehan		 * 0F B7/r		movzx r32, r/m16
523270159Sgrehan		 * REX.W + 0F B7/r	movzx r64, r/m16
524270159Sgrehan		 */
525270159Sgrehan		error = memread(vm, vcpuid, gpa, &val, 2, arg);
526270159Sgrehan		if (error)
527270159Sgrehan			return (error);
528270159Sgrehan
529270159Sgrehan		reg = gpr_map[vie->reg];
530270159Sgrehan
531270159Sgrehan		/* zero-extend word */
532270159Sgrehan		val = (uint16_t)val;
533270159Sgrehan
534270159Sgrehan		error = vie_update_register(vm, vcpuid, reg, val, size);
535270159Sgrehan		break;
536267396Sjhb	case 0xBE:
537267396Sjhb		/*
538267396Sjhb		 * MOV and sign extend byte from mem (ModRM:r/m) to
539267396Sjhb		 * reg (ModRM:reg).
540267396Sjhb		 *
541270159Sgrehan		 * 0F BE/r		movsx r16, r/m8
542270159Sgrehan		 * 0F BE/r		movsx r32, r/m8
543270159Sgrehan		 * REX.W + 0F BE/r	movsx r64, r/m8
544267396Sjhb		 */
545267396Sjhb
546267396Sjhb		/* get the first operand */
547267396Sjhb		error = memread(vm, vcpuid, gpa, &val, 1, arg);
548267396Sjhb		if (error)
549267396Sjhb			break;
550267396Sjhb
551267396Sjhb		/* get the second operand */
552267396Sjhb		reg = gpr_map[vie->reg];
553267396Sjhb
554267396Sjhb		/* sign extend byte */
555267396Sjhb		val = (int8_t)val;
556267396Sjhb
557267396Sjhb		/* write the result */
558267396Sjhb		error = vie_update_register(vm, vcpuid, reg, val, size);
559267396Sjhb		break;
560267396Sjhb	default:
561267396Sjhb		break;
562267396Sjhb	}
563267396Sjhb	return (error);
564267396Sjhb}
565267396Sjhb
566267396Sjhbstatic int
567243640Sneelemulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
568243640Sneel	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
569243640Sneel{
570243640Sneel	int error, size;
571243640Sneel	enum vm_reg_name reg;
572243640Sneel	uint64_t val1, val2;
573243640Sneel
574270159Sgrehan	size = vie->opsize;
575243640Sneel	error = EINVAL;
576243640Sneel
577243640Sneel	switch (vie->op.op_byte) {
578243640Sneel	case 0x23:
579243640Sneel		/*
580243640Sneel		 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the
581243640Sneel		 * result in reg.
582243640Sneel		 *
583270159Sgrehan		 * 23/r		and r16, r/m16
584243640Sneel		 * 23/r		and r32, r/m32
585243640Sneel		 * REX.W + 23/r	and r64, r/m64
586243640Sneel		 */
587243640Sneel
588243640Sneel		/* get the first operand */
589243640Sneel		reg = gpr_map[vie->reg];
590243640Sneel		error = vie_read_register(vm, vcpuid, reg, &val1);
591243640Sneel		if (error)
592243640Sneel			break;
593243640Sneel
594243640Sneel		/* get the second operand */
595243640Sneel		error = memread(vm, vcpuid, gpa, &val2, size, arg);
596243640Sneel		if (error)
597243640Sneel			break;
598243640Sneel
599243640Sneel		/* perform the operation and write the result */
600243640Sneel		val1 &= val2;
601243640Sneel		error = vie_update_register(vm, vcpuid, reg, val1, size);
602243640Sneel		break;
603243667Sgrehan	case 0x81:
604243667Sgrehan		/*
605271659Sgrehan		 * AND/OR mem (ModRM:r/m) with immediate and store the
606253585Sneel		 * result in mem.
607243667Sgrehan		 *
608271659Sgrehan		 * AND: i = 4
609271659Sgrehan		 * OR:  i = 1
610271659Sgrehan		 * 81 /i		op r/m16, imm16
611271659Sgrehan		 * 81 /i		op r/m32, imm32
612271659Sgrehan		 * REX.W + 81 /i	op r/m64, imm32 sign-extended to 64
613243703Sgrehan		 *
614243667Sgrehan		 */
615243703Sgrehan
616243667Sgrehan		/* get the first operand */
617243667Sgrehan                error = memread(vm, vcpuid, gpa, &val1, size, arg);
618243667Sgrehan                if (error)
619243667Sgrehan			break;
620243667Sgrehan
621243667Sgrehan                /*
622271659Sgrehan                 * perform the operation with the pre-fetched immediate
623271659Sgrehan                 * operand and write the result
624271659Sgrehan                 */
625271659Sgrehan		switch (vie->reg & 7) {
626271659Sgrehan		case 0x4:
627271659Sgrehan			/* modrm:reg == b100, AND */
628271659Sgrehan			val1 &= vie->immediate;
629271659Sgrehan			break;
630271659Sgrehan		case 0x1:
631271659Sgrehan			/* modrm:reg == b001, OR */
632271659Sgrehan			val1 |= vie->immediate;
633271659Sgrehan			break;
634271659Sgrehan		default:
635271659Sgrehan			error = EINVAL;
636271659Sgrehan			break;
637271659Sgrehan		}
638271659Sgrehan		if (error)
639271659Sgrehan			break;
640271659Sgrehan
641271659Sgrehan		error = memwrite(vm, vcpuid, gpa, val1, size, arg);
642243667Sgrehan		break;
643243640Sneel	default:
644243640Sneel		break;
645243640Sneel	}
646243640Sneel	return (error);
647243640Sneel}
648243640Sneel
649253585Sneelstatic int
650253585Sneelemulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
651253585Sneel	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
652253585Sneel{
653253585Sneel	int error, size;
654253585Sneel	uint64_t val1;
655253585Sneel
656270159Sgrehan	size = vie->opsize;
657253585Sneel	error = EINVAL;
658253585Sneel
659253585Sneel	switch (vie->op.op_byte) {
660253585Sneel	case 0x83:
661253585Sneel		/*
662253585Sneel		 * OR mem (ModRM:r/m) with immediate and store the
663253585Sneel		 * result in mem.
664253585Sneel		 *
665270159Sgrehan		 * 83 /1		OR r/m16, imm8 sign-extended to 16
666270159Sgrehan		 * 83 /1		OR r/m32, imm8 sign-extended to 32
667270159Sgrehan		 * REX.W + 83/1		OR r/m64, imm8 sign-extended to 64
668253585Sneel		 *
669253585Sneel		 * Currently, only the OR operation of the 0x83 opcode
670253585Sneel		 * is implemented (ModRM:reg = b001).
671253585Sneel		 */
672253585Sneel		if ((vie->reg & 7) != 1)
673253585Sneel			break;
674253585Sneel
675253585Sneel		/* get the first operand */
676253585Sneel                error = memread(vm, vcpuid, gpa, &val1, size, arg);
677253585Sneel                if (error)
678253585Sneel			break;
679253585Sneel
680253585Sneel                /*
681253585Sneel		 * perform the operation with the pre-fetched immediate
682253585Sneel		 * operand and write the result
683253585Sneel		 */
684253585Sneel                val1 |= vie->immediate;
685253585Sneel                error = memwrite(vm, vcpuid, gpa, val1, size, arg);
686253585Sneel		break;
687253585Sneel	default:
688253585Sneel		break;
689253585Sneel	}
690253585Sneel	return (error);
691253585Sneel}
692253585Sneel
693270159Sgrehan#define	RFLAGS_STATUS_BITS    (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
694270159Sgrehan
695270159Sgrehanstatic int
696270159Sgrehanemulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
697270159Sgrehan	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
698270159Sgrehan{
699270159Sgrehan	int error, size;
700270159Sgrehan	uint64_t op1, op2, rflags, rflags2;
701270159Sgrehan	enum vm_reg_name reg;
702270159Sgrehan
703270159Sgrehan	size = vie->opsize;
704270159Sgrehan	switch (vie->op.op_byte) {
705270159Sgrehan	case 0x3B:
706270159Sgrehan		/*
707270159Sgrehan		 * 3B/r		CMP r16, r/m16
708270159Sgrehan		 * 3B/r		CMP r32, r/m32
709270159Sgrehan		 * REX.W + 3B/r	CMP r64, r/m64
710270159Sgrehan		 *
711270159Sgrehan		 * Compare first operand (reg) with second operand (r/m) and
712270159Sgrehan		 * set status flags in EFLAGS register. The comparison is
713270159Sgrehan		 * performed by subtracting the second operand from the first
714270159Sgrehan		 * operand and then setting the status flags.
715270159Sgrehan		 */
716270159Sgrehan
717270159Sgrehan		/* Get the first operand */
718270159Sgrehan		reg = gpr_map[vie->reg];
719270159Sgrehan		error = vie_read_register(vm, vcpuid, reg, &op1);
720270159Sgrehan		if (error)
721270159Sgrehan			return (error);
722270159Sgrehan
723270159Sgrehan		/* Get the second operand */
724270159Sgrehan		error = memread(vm, vcpuid, gpa, &op2, size, arg);
725270159Sgrehan		if (error)
726270159Sgrehan			return (error);
727270159Sgrehan
728270159Sgrehan		break;
729270159Sgrehan	default:
730270159Sgrehan		return (EINVAL);
731270159Sgrehan	}
732270159Sgrehan	rflags2 = getcc(size, op1, op2);
733270159Sgrehan	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
734270159Sgrehan	if (error)
735270159Sgrehan		return (error);
736270159Sgrehan	rflags &= ~RFLAGS_STATUS_BITS;
737270159Sgrehan	rflags |= rflags2 & RFLAGS_STATUS_BITS;
738270159Sgrehan
739270159Sgrehan	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
740270159Sgrehan	return (error);
741270159Sgrehan}
742270159Sgrehan
743270159Sgrehanstatic int
744271659Sgrehanemulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
745271659Sgrehan	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
746271659Sgrehan{
747271659Sgrehan	int error, size;
748271659Sgrehan	uint64_t nval, rflags, rflags2, val1, val2;
749271659Sgrehan	enum vm_reg_name reg;
750271659Sgrehan
751271659Sgrehan	size = vie->opsize;
752271659Sgrehan	error = EINVAL;
753271659Sgrehan
754271659Sgrehan	switch (vie->op.op_byte) {
755271659Sgrehan	case 0x2B:
756271659Sgrehan		/*
757271659Sgrehan		 * SUB r/m from r and store the result in r
758271659Sgrehan		 *
759271659Sgrehan		 * 2B/r            SUB r16, r/m16
760271659Sgrehan		 * 2B/r            SUB r32, r/m32
761271659Sgrehan		 * REX.W + 2B/r    SUB r64, r/m64
762271659Sgrehan		 */
763271659Sgrehan
764271659Sgrehan		/* get the first operand */
765271659Sgrehan		reg = gpr_map[vie->reg];
766271659Sgrehan		error = vie_read_register(vm, vcpuid, reg, &val1);
767271659Sgrehan		if (error)
768271659Sgrehan			break;
769271659Sgrehan
770271659Sgrehan		/* get the second operand */
771271659Sgrehan		error = memread(vm, vcpuid, gpa, &val2, size, arg);
772271659Sgrehan		if (error)
773271659Sgrehan			break;
774271659Sgrehan
775271659Sgrehan		/* perform the operation and write the result */
776271659Sgrehan		nval = val1 - val2;
777271659Sgrehan		error = vie_update_register(vm, vcpuid, reg, nval, size);
778271659Sgrehan		break;
779271659Sgrehan	default:
780271659Sgrehan		break;
781271659Sgrehan	}
782271659Sgrehan
783271659Sgrehan	if (!error) {
784271659Sgrehan		rflags2 = getcc(size, val1, val2);
785271659Sgrehan		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
786271659Sgrehan		    &rflags);
787271659Sgrehan		if (error)
788271659Sgrehan			return (error);
789271659Sgrehan
790271659Sgrehan		rflags &= ~RFLAGS_STATUS_BITS;
791271659Sgrehan		rflags |= rflags2 & RFLAGS_STATUS_BITS;
792271659Sgrehan		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
793271659Sgrehan		    rflags, 8);
794271659Sgrehan	}
795271659Sgrehan
796271659Sgrehan	return (error);
797271659Sgrehan}
798271659Sgrehan
799271659Sgrehanstatic int
800270159Sgrehanemulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
801270159Sgrehan    struct vm_guest_paging *paging, mem_region_read_t memread,
802270159Sgrehan    mem_region_write_t memwrite, void *arg)
803270159Sgrehan{
804270159Sgrehan#ifdef _KERNEL
805270159Sgrehan	struct vm_copyinfo copyinfo[2];
806270159Sgrehan#else
807270159Sgrehan	struct iovec copyinfo[2];
808270159Sgrehan#endif
809270159Sgrehan	struct seg_desc ss_desc;
810270159Sgrehan	uint64_t cr0, rflags, rsp, stack_gla, val;
811270159Sgrehan	int error, size, stackaddrsize;
812270159Sgrehan
813270159Sgrehan	/*
814270159Sgrehan	 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
815270159Sgrehan	 *
816270159Sgrehan	 * PUSH is part of the group 5 extended opcodes and is identified
817270159Sgrehan	 * by ModRM:reg = b110.
818270159Sgrehan	 */
819270159Sgrehan	if ((vie->reg & 7) != 6)
820270159Sgrehan		return (EINVAL);
821270159Sgrehan
822270159Sgrehan	size = vie->opsize;
823270159Sgrehan	/*
824270159Sgrehan	 * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
825270159Sgrehan	 */
826270159Sgrehan	if (paging->cpu_mode == CPU_MODE_REAL) {
827270159Sgrehan		stackaddrsize = 2;
828270159Sgrehan	} else if (paging->cpu_mode == CPU_MODE_64BIT) {
829270159Sgrehan		/*
830270159Sgrehan		 * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3
831270159Sgrehan		 * - Stack pointer size is always 64-bits.
832270159Sgrehan		 * - PUSH/POP of 32-bit values is not possible in 64-bit mode.
833270159Sgrehan		 * - 16-bit PUSH/POP is supported by using the operand size
834270159Sgrehan		 *   override prefix (66H).
835270159Sgrehan		 */
836270159Sgrehan		stackaddrsize = 8;
837270159Sgrehan		size = vie->opsize_override ? 2 : 8;
838270159Sgrehan	} else {
839270159Sgrehan		/*
840270159Sgrehan		 * In protected or compability mode the 'B' flag in the
841270159Sgrehan		 * stack-segment descriptor determines the size of the
842270159Sgrehan		 * stack pointer.
843270159Sgrehan		 */
844270159Sgrehan		error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc);
845270159Sgrehan		KASSERT(error == 0, ("%s: error %d getting SS descriptor",
846270159Sgrehan		    __func__, error));
847270159Sgrehan		if (SEG_DESC_DEF32(ss_desc.access))
848270159Sgrehan			stackaddrsize = 4;
849270159Sgrehan		else
850270159Sgrehan			stackaddrsize = 2;
851270159Sgrehan	}
852270159Sgrehan
853270159Sgrehan	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
854270159Sgrehan	KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
855270159Sgrehan
856270159Sgrehan	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
857270159Sgrehan	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
858270159Sgrehan
859270159Sgrehan	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
860270159Sgrehan	KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
861270159Sgrehan
862270159Sgrehan	rsp -= size;
863270159Sgrehan	if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc,
864270159Sgrehan	    rsp, size, stackaddrsize, PROT_WRITE, &stack_gla)) {
865270159Sgrehan		vm_inject_ss(vm, vcpuid, 0);
866270159Sgrehan		return (0);
867270159Sgrehan	}
868270159Sgrehan
869270159Sgrehan	if (vie_canonical_check(paging->cpu_mode, stack_gla)) {
870270159Sgrehan		vm_inject_ss(vm, vcpuid, 0);
871270159Sgrehan		return (0);
872270159Sgrehan	}
873270159Sgrehan
874270159Sgrehan	if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) {
875270159Sgrehan		vm_inject_ac(vm, vcpuid, 0);
876270159Sgrehan		return (0);
877270159Sgrehan	}
878270159Sgrehan
879270159Sgrehan	error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, PROT_WRITE,
880270159Sgrehan	    copyinfo, nitems(copyinfo));
881270159Sgrehan	if (error == -1) {
882270159Sgrehan		/*
883270159Sgrehan		 * XXX cannot return a negative error value here because it
884270159Sgrehan		 * ends up being the return value of the VM_RUN() ioctl and
885270159Sgrehan		 * is interpreted as a pseudo-error (for e.g. ERESTART).
886270159Sgrehan		 */
887270159Sgrehan		return (EFAULT);
888270159Sgrehan	} else if (error == 1) {
889270159Sgrehan		/* Resume guest execution to handle page fault */
890270159Sgrehan		return (0);
891270159Sgrehan	}
892270159Sgrehan
893270159Sgrehan	error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
894270159Sgrehan	if (error == 0) {
895270159Sgrehan		vm_copyout(vm, vcpuid, &val, copyinfo, size);
896270159Sgrehan		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
897270159Sgrehan		    stackaddrsize);
898270159Sgrehan		KASSERT(error == 0, ("error %d updating rsp", error));
899270159Sgrehan	}
900270159Sgrehan#ifdef _KERNEL
901270159Sgrehan	vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
902270159Sgrehan#endif
903270159Sgrehan	return (error);
904270159Sgrehan}
905270159Sgrehan
906243640Sneelint
907243640Sneelvmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
908270159Sgrehan    struct vm_guest_paging *paging, mem_region_read_t memread,
909270159Sgrehan    mem_region_write_t memwrite, void *memarg)
910243640Sneel{
911243640Sneel	int error;
912243640Sneel
913243640Sneel	if (!vie->decoded)
914243640Sneel		return (EINVAL);
915243640Sneel
916243640Sneel	switch (vie->op.op_type) {
917270159Sgrehan	case VIE_OP_TYPE_PUSH:
918270159Sgrehan		error = emulate_push(vm, vcpuid, gpa, vie, paging, memread,
919270159Sgrehan		    memwrite, memarg);
920270159Sgrehan		break;
921270159Sgrehan	case VIE_OP_TYPE_CMP:
922270159Sgrehan		error = emulate_cmp(vm, vcpuid, gpa, vie,
923270159Sgrehan				    memread, memwrite, memarg);
924270159Sgrehan		break;
925243640Sneel	case VIE_OP_TYPE_MOV:
926243640Sneel		error = emulate_mov(vm, vcpuid, gpa, vie,
927243640Sneel				    memread, memwrite, memarg);
928243640Sneel		break;
929267396Sjhb	case VIE_OP_TYPE_MOVSX:
930267396Sjhb	case VIE_OP_TYPE_MOVZX:
931267396Sjhb		error = emulate_movx(vm, vcpuid, gpa, vie,
932267396Sjhb				     memread, memwrite, memarg);
933267396Sjhb		break;
934243640Sneel	case VIE_OP_TYPE_AND:
935243640Sneel		error = emulate_and(vm, vcpuid, gpa, vie,
936243640Sneel				    memread, memwrite, memarg);
937243640Sneel		break;
938253585Sneel	case VIE_OP_TYPE_OR:
939253585Sneel		error = emulate_or(vm, vcpuid, gpa, vie,
940253585Sneel				    memread, memwrite, memarg);
941253585Sneel		break;
942271659Sgrehan	case VIE_OP_TYPE_SUB:
943271659Sgrehan		error = emulate_sub(vm, vcpuid, gpa, vie,
944271659Sgrehan				    memread, memwrite, memarg);
945271659Sgrehan		break;
946243640Sneel	default:
947243640Sneel		error = EINVAL;
948243640Sneel		break;
949243640Sneel	}
950243640Sneel
951243640Sneel	return (error);
952243640Sneel}
953243640Sneel
954268976Sjhbint
955268976Sjhbvie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
956268976Sjhb{
957268976Sjhb	KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
958268976Sjhb	    ("%s: invalid size %d", __func__, size));
959268976Sjhb	KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl));
960268976Sjhb
961268976Sjhb	if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
962268976Sjhb		return (0);
963268976Sjhb
964268976Sjhb	return ((gla & (size - 1)) ? 1 : 0);
965268976Sjhb}
966268976Sjhb
967268976Sjhbint
968268976Sjhbvie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
969268976Sjhb{
970268976Sjhb	uint64_t mask;
971268976Sjhb
972268976Sjhb	if (cpu_mode != CPU_MODE_64BIT)
973268976Sjhb		return (0);
974268976Sjhb
975268976Sjhb	/*
976268976Sjhb	 * The value of the bit 47 in the 'gla' should be replicated in the
977268976Sjhb	 * most significant 16 bits.
978268976Sjhb	 */
979268976Sjhb	mask = ~((1UL << 48) - 1);
980268976Sjhb	if (gla & (1UL << 47))
981268976Sjhb		return ((gla & mask) != mask);
982268976Sjhb	else
983268976Sjhb		return ((gla & mask) != 0);
984268976Sjhb}
985268976Sjhb
986268976Sjhbuint64_t
987268976Sjhbvie_size2mask(int size)
988268976Sjhb{
989268976Sjhb	KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
990268976Sjhb	    ("vie_size2mask: invalid size %d", size));
991268976Sjhb	return (size2mask[size]);
992268976Sjhb}
993268976Sjhb
994268976Sjhbint
995268976Sjhbvie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
996268976Sjhb    struct seg_desc *desc, uint64_t offset, int length, int addrsize,
997268976Sjhb    int prot, uint64_t *gla)
998268976Sjhb{
999268976Sjhb	uint64_t firstoff, low_limit, high_limit, segbase;
1000268976Sjhb	int glasize, type;
1001268976Sjhb
1002268976Sjhb	KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS,
1003268976Sjhb	    ("%s: invalid segment %d", __func__, seg));
1004268976Sjhb	KASSERT(length == 1 || length == 2 || length == 4 || length == 8,
1005268976Sjhb	    ("%s: invalid operand size %d", __func__, length));
1006268976Sjhb	KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
1007268976Sjhb	    ("%s: invalid prot %#x", __func__, prot));
1008268976Sjhb
1009268976Sjhb	firstoff = offset;
1010268976Sjhb	if (cpu_mode == CPU_MODE_64BIT) {
1011268976Sjhb		KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address "
1012268976Sjhb		    "size %d for cpu_mode %d", __func__, addrsize, cpu_mode));
1013268976Sjhb		glasize = 8;
1014268976Sjhb	} else {
1015268976Sjhb		KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address "
1016268976Sjhb		    "size %d for cpu mode %d", __func__, addrsize, cpu_mode));
1017268976Sjhb		glasize = 4;
1018268976Sjhb		/*
1019268976Sjhb		 * If the segment selector is loaded with a NULL selector
1020268976Sjhb		 * then the descriptor is unusable and attempting to use
1021268976Sjhb		 * it results in a #GP(0).
1022268976Sjhb		 */
1023270159Sgrehan		if (SEG_DESC_UNUSABLE(desc->access))
1024268976Sjhb			return (-1);
1025268976Sjhb
1026268976Sjhb		/*
1027268976Sjhb		 * The processor generates a #NP exception when a segment
1028268976Sjhb		 * register is loaded with a selector that points to a
1029268976Sjhb		 * descriptor that is not present. If this was the case then
1030268976Sjhb		 * it would have been checked before the VM-exit.
1031268976Sjhb		 */
1032270159Sgrehan		KASSERT(SEG_DESC_PRESENT(desc->access),
1033270159Sgrehan		    ("segment %d not present: %#x", seg, desc->access));
1034268976Sjhb
1035268976Sjhb		/*
1036268976Sjhb		 * The descriptor type must indicate a code/data segment.
1037268976Sjhb		 */
1038270159Sgrehan		type = SEG_DESC_TYPE(desc->access);
1039268976Sjhb		KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
1040268976Sjhb		    "descriptor type %#x", seg, type));
1041268976Sjhb
1042268976Sjhb		if (prot & PROT_READ) {
1043268976Sjhb			/* #GP on a read access to a exec-only code segment */
1044268976Sjhb			if ((type & 0xA) == 0x8)
1045268976Sjhb				return (-1);
1046268976Sjhb		}
1047268976Sjhb
1048268976Sjhb		if (prot & PROT_WRITE) {
1049268976Sjhb			/*
1050268976Sjhb			 * #GP on a write access to a code segment or a
1051268976Sjhb			 * read-only data segment.
1052268976Sjhb			 */
1053268976Sjhb			if (type & 0x8)			/* code segment */
1054268976Sjhb				return (-1);
1055268976Sjhb
1056268976Sjhb			if ((type & 0xA) == 0)		/* read-only data seg */
1057268976Sjhb				return (-1);
1058268976Sjhb		}
1059268976Sjhb
1060268976Sjhb		/*
1061268976Sjhb		 * 'desc->limit' is fully expanded taking granularity into
1062268976Sjhb		 * account.
1063268976Sjhb		 */
1064268976Sjhb		if ((type & 0xC) == 0x4) {
1065268976Sjhb			/* expand-down data segment */
1066268976Sjhb			low_limit = desc->limit + 1;
1067270159Sgrehan			high_limit = SEG_DESC_DEF32(desc->access) ?
1068270159Sgrehan			    0xffffffff : 0xffff;
1069268976Sjhb		} else {
1070268976Sjhb			/* code segment or expand-up data segment */
1071268976Sjhb			low_limit = 0;
1072268976Sjhb			high_limit = desc->limit;
1073268976Sjhb		}
1074268976Sjhb
1075268976Sjhb		while (length > 0) {
1076268976Sjhb			offset &= vie_size2mask(addrsize);
1077268976Sjhb			if (offset < low_limit || offset > high_limit)
1078268976Sjhb				return (-1);
1079268976Sjhb			offset++;
1080268976Sjhb			length--;
1081268976Sjhb		}
1082268976Sjhb	}
1083268976Sjhb
1084268976Sjhb	/*
1085268976Sjhb	 * In 64-bit mode all segments except %fs and %gs have a segment
1086268976Sjhb	 * base address of 0.
1087268976Sjhb	 */
1088268976Sjhb	if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
1089268976Sjhb	    seg != VM_REG_GUEST_GS) {
1090268976Sjhb		segbase = 0;
1091268976Sjhb	} else {
1092268976Sjhb		segbase = desc->base;
1093268976Sjhb	}
1094268976Sjhb
1095268976Sjhb	/*
1096268976Sjhb	 * Truncate 'firstoff' to the effective address size before adding
1097268976Sjhb	 * it to the segment base.
1098268976Sjhb	 */
1099268976Sjhb	firstoff &= vie_size2mask(addrsize);
1100268976Sjhb	*gla = (segbase + firstoff) & vie_size2mask(glasize);
1101268976Sjhb	return (0);
1102268976Sjhb}
1103268976Sjhb
1104243640Sneel#ifdef _KERNEL
1105256072Sneelvoid
1106240941Sneelvie_init(struct vie *vie)
1107240941Sneel{
1108240941Sneel
1109240941Sneel	bzero(vie, sizeof(struct vie));
1110240941Sneel
1111240941Sneel	vie->base_register = VM_REG_LAST;
1112240941Sneel	vie->index_register = VM_REG_LAST;
1113240941Sneel}
1114240941Sneel
1115240941Sneelstatic int
1116268976Sjhbpf_error_code(int usermode, int prot, int rsvd, uint64_t pte)
1117240941Sneel{
1118268976Sjhb	int error_code = 0;
1119268976Sjhb
1120268976Sjhb	if (pte & PG_V)
1121268976Sjhb		error_code |= PGEX_P;
1122268976Sjhb	if (prot & VM_PROT_WRITE)
1123268976Sjhb		error_code |= PGEX_W;
1124268976Sjhb	if (usermode)
1125268976Sjhb		error_code |= PGEX_U;
1126268976Sjhb	if (rsvd)
1127268976Sjhb		error_code |= PGEX_RSV;
1128268976Sjhb	if (prot & VM_PROT_EXECUTE)
1129268976Sjhb		error_code |= PGEX_I;
1130268976Sjhb
1131268976Sjhb	return (error_code);
1132268976Sjhb}
1133268976Sjhb
1134268976Sjhbstatic void
1135268976Sjhbptp_release(void **cookie)
1136268976Sjhb{
1137268976Sjhb	if (*cookie != NULL) {
1138268976Sjhb		vm_gpa_release(*cookie);
1139268976Sjhb		*cookie = NULL;
1140268976Sjhb	}
1141268976Sjhb}
1142268976Sjhb
1143268976Sjhbstatic void *
1144268976Sjhbptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie)
1145268976Sjhb{
1146268976Sjhb	void *ptr;
1147268976Sjhb
1148268976Sjhb	ptp_release(cookie);
1149268976Sjhb	ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie);
1150268976Sjhb	return (ptr);
1151268976Sjhb}
1152268976Sjhb
1153268976Sjhbint
1154268976Sjhbvmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
1155268976Sjhb    uint64_t gla, int prot, uint64_t *gpa)
1156268976Sjhb{
1157268976Sjhb	int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable;
1158268976Sjhb	u_int retries;
1159268976Sjhb	uint64_t *ptpbase, ptpphys, pte, pgsize;
1160267399Sjhb	uint32_t *ptpbase32, pte32;
1161256072Sneel	void *cookie;
1162240941Sneel
1163268976Sjhb	usermode = (paging->cpl == 3 ? 1 : 0);
1164268976Sjhb	writable = prot & VM_PROT_WRITE;
1165268976Sjhb	cookie = NULL;
1166268976Sjhb	retval = 0;
1167268976Sjhb	retries = 0;
1168268976Sjhbrestart:
1169268976Sjhb	ptpphys = paging->cr3;		/* root of the page tables */
1170268976Sjhb	ptp_release(&cookie);
1171268976Sjhb	if (retries++ > 0)
1172268976Sjhb		maybe_yield();
1173268976Sjhb
1174268976Sjhb	if (vie_canonical_check(paging->cpu_mode, gla)) {
1175268976Sjhb		/*
1176268976Sjhb		 * XXX assuming a non-stack reference otherwise a stack fault
1177268976Sjhb		 * should be generated.
1178268976Sjhb		 */
1179268976Sjhb		vm_inject_gp(vm, vcpuid);
1180268976Sjhb		goto fault;
1181268976Sjhb	}
1182268976Sjhb
1183268976Sjhb	if (paging->paging_mode == PAGING_MODE_FLAT) {
1184267399Sjhb		*gpa = gla;
1185268976Sjhb		goto done;
1186267399Sjhb	}
1187267399Sjhb
1188268976Sjhb	if (paging->paging_mode == PAGING_MODE_32) {
1189267399Sjhb		nlevels = 2;
1190267399Sjhb		while (--nlevels >= 0) {
1191267399Sjhb			/* Zero out the lower 12 bits. */
1192267399Sjhb			ptpphys &= ~0xfff;
1193267399Sjhb
1194268976Sjhb			ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
1195268976Sjhb
1196267399Sjhb			if (ptpbase32 == NULL)
1197267399Sjhb				goto error;
1198267399Sjhb
1199267399Sjhb			ptpshift = PAGE_SHIFT + nlevels * 10;
1200267399Sjhb			ptpindex = (gla >> ptpshift) & 0x3FF;
1201267399Sjhb			pgsize = 1UL << ptpshift;
1202267399Sjhb
1203267399Sjhb			pte32 = ptpbase32[ptpindex];
1204267399Sjhb
1205268976Sjhb			if ((pte32 & PG_V) == 0 ||
1206268976Sjhb			    (usermode && (pte32 & PG_U) == 0) ||
1207268976Sjhb			    (writable && (pte32 & PG_RW) == 0)) {
1208268976Sjhb				pfcode = pf_error_code(usermode, prot, 0,
1209268976Sjhb				    pte32);
1210268976Sjhb				vm_inject_pf(vm, vcpuid, pfcode, gla);
1211268976Sjhb				goto fault;
1212268976Sjhb			}
1213267399Sjhb
1214268976Sjhb			/*
1215268976Sjhb			 * Emulate the x86 MMU's management of the accessed
1216268976Sjhb			 * and dirty flags. While the accessed flag is set
1217268976Sjhb			 * at every level of the page table, the dirty flag
1218268976Sjhb			 * is only set at the last level providing the guest
1219268976Sjhb			 * physical address.
1220268976Sjhb			 */
1221268976Sjhb			if ((pte32 & PG_A) == 0) {
1222268976Sjhb				if (atomic_cmpset_32(&ptpbase32[ptpindex],
1223268976Sjhb				    pte32, pte32 | PG_A) == 0) {
1224268976Sjhb					goto restart;
1225268976Sjhb				}
1226268976Sjhb			}
1227267399Sjhb
1228268976Sjhb			/* XXX must be ignored if CR4.PSE=0 */
1229268976Sjhb			if (nlevels > 0 && (pte32 & PG_PS) != 0)
1230267399Sjhb				break;
1231267399Sjhb
1232267399Sjhb			ptpphys = pte32;
1233267399Sjhb		}
1234267399Sjhb
1235268976Sjhb		/* Set the dirty bit in the page table entry if necessary */
1236268976Sjhb		if (writable && (pte32 & PG_M) == 0) {
1237268976Sjhb			if (atomic_cmpset_32(&ptpbase32[ptpindex],
1238268976Sjhb			    pte32, pte32 | PG_M) == 0) {
1239268976Sjhb				goto restart;
1240268976Sjhb			}
1241268976Sjhb		}
1242268976Sjhb
1243267399Sjhb		/* Zero out the lower 'ptpshift' bits */
1244267399Sjhb		pte32 >>= ptpshift; pte32 <<= ptpshift;
1245267399Sjhb		*gpa = pte32 | (gla & (pgsize - 1));
1246268976Sjhb		goto done;
1247267399Sjhb	}
1248267399Sjhb
1249268976Sjhb	if (paging->paging_mode == PAGING_MODE_PAE) {
1250268976Sjhb		/* Zero out the lower 5 bits and the upper 32 bits */
1251268976Sjhb		ptpphys &= 0xffffffe0UL;
1252267399Sjhb
1253268976Sjhb		ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie);
1254267399Sjhb		if (ptpbase == NULL)
1255267399Sjhb			goto error;
1256267399Sjhb
1257267399Sjhb		ptpindex = (gla >> 30) & 0x3;
1258267399Sjhb
1259267399Sjhb		pte = ptpbase[ptpindex];
1260267399Sjhb
1261268976Sjhb		if ((pte & PG_V) == 0) {
1262268976Sjhb			pfcode = pf_error_code(usermode, prot, 0, pte);
1263268976Sjhb			vm_inject_pf(vm, vcpuid, pfcode, gla);
1264268976Sjhb			goto fault;
1265268976Sjhb		}
1266267399Sjhb
1267267399Sjhb		ptpphys = pte;
1268267399Sjhb
1269267399Sjhb		nlevels = 2;
1270267399Sjhb	} else
1271267399Sjhb		nlevels = 4;
1272240941Sneel	while (--nlevels >= 0) {
1273240941Sneel		/* Zero out the lower 12 bits and the upper 12 bits */
1274240941Sneel		ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
1275240941Sneel
1276268976Sjhb		ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
1277256072Sneel		if (ptpbase == NULL)
1278240941Sneel			goto error;
1279240941Sneel
1280240941Sneel		ptpshift = PAGE_SHIFT + nlevels * 9;
1281240941Sneel		ptpindex = (gla >> ptpshift) & 0x1FF;
1282240941Sneel		pgsize = 1UL << ptpshift;
1283240941Sneel
1284240941Sneel		pte = ptpbase[ptpindex];
1285240941Sneel
1286268976Sjhb		if ((pte & PG_V) == 0 ||
1287268976Sjhb		    (usermode && (pte & PG_U) == 0) ||
1288268976Sjhb		    (writable && (pte & PG_RW) == 0)) {
1289268976Sjhb			pfcode = pf_error_code(usermode, prot, 0, pte);
1290268976Sjhb			vm_inject_pf(vm, vcpuid, pfcode, gla);
1291268976Sjhb			goto fault;
1292268976Sjhb		}
1293256072Sneel
1294268976Sjhb		/* Set the accessed bit in the page table entry */
1295268976Sjhb		if ((pte & PG_A) == 0) {
1296268976Sjhb			if (atomic_cmpset_64(&ptpbase[ptpindex],
1297268976Sjhb			    pte, pte | PG_A) == 0) {
1298268976Sjhb				goto restart;
1299268976Sjhb			}
1300268976Sjhb		}
1301240941Sneel
1302268976Sjhb		if (nlevels > 0 && (pte & PG_PS) != 0) {
1303268976Sjhb			if (pgsize > 1 * GB) {
1304268976Sjhb				pfcode = pf_error_code(usermode, prot, 1, pte);
1305268976Sjhb				vm_inject_pf(vm, vcpuid, pfcode, gla);
1306268976Sjhb				goto fault;
1307268976Sjhb			}
1308268976Sjhb			break;
1309240941Sneel		}
1310240941Sneel
1311240941Sneel		ptpphys = pte;
1312240941Sneel	}
1313240941Sneel
1314268976Sjhb	/* Set the dirty bit in the page table entry if necessary */
1315268976Sjhb	if (writable && (pte & PG_M) == 0) {
1316268976Sjhb		if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0)
1317268976Sjhb			goto restart;
1318268976Sjhb	}
1319268976Sjhb
1320240941Sneel	/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
1321240941Sneel	pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
1322240941Sneel	*gpa = pte | (gla & (pgsize - 1));
1323268976Sjhbdone:
1324268976Sjhb	ptp_release(&cookie);
1325268976Sjhb	return (retval);
1326240941Sneelerror:
1327268976Sjhb	retval = -1;
1328268976Sjhb	goto done;
1329268976Sjhbfault:
1330268976Sjhb	retval = 1;
1331268976Sjhb	goto done;
1332240941Sneel}
1333240941Sneel
1334240978Sneelint
1335270159Sgrehanvmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
1336268976Sjhb    uint64_t rip, int inst_length, struct vie *vie)
1337240941Sneel{
1338270159Sgrehan	struct vm_copyinfo copyinfo[2];
1339270159Sgrehan	int error, prot;
1340240941Sneel
1341240978Sneel	if (inst_length > VIE_INST_SIZE)
1342240978Sneel		panic("vmm_fetch_instruction: invalid length %d", inst_length);
1343240978Sneel
1344270159Sgrehan	prot = PROT_READ | PROT_EXEC;
1345270159Sgrehan	error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot,
1346270159Sgrehan	    copyinfo, nitems(copyinfo));
1347270159Sgrehan	if (error == 0) {
1348270159Sgrehan		vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length);
1349270159Sgrehan		vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
1350270159Sgrehan		vie->num_valid = inst_length;
1351240941Sneel	}
1352270159Sgrehan	return (error);
1353240941Sneel}
1354240941Sneel
1355240941Sneelstatic int
1356240941Sneelvie_peek(struct vie *vie, uint8_t *x)
1357240941Sneel{
1358243640Sneel
1359240941Sneel	if (vie->num_processed < vie->num_valid) {
1360240941Sneel		*x = vie->inst[vie->num_processed];
1361240941Sneel		return (0);
1362240941Sneel	} else
1363240941Sneel		return (-1);
1364240941Sneel}
1365240941Sneel
1366240941Sneelstatic void
1367240941Sneelvie_advance(struct vie *vie)
1368240941Sneel{
1369240941Sneel
1370240941Sneel	vie->num_processed++;
1371240941Sneel}
1372240941Sneel
1373240941Sneelstatic int
1374270159Sgrehandecode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
1375240941Sneel{
1376240941Sneel	uint8_t x;
1377240941Sneel
1378270159Sgrehan	while (1) {
1379270159Sgrehan		if (vie_peek(vie, &x))
1380270159Sgrehan			return (-1);
1381240941Sneel
1382270159Sgrehan		if (x == 0x66)
1383270159Sgrehan			vie->opsize_override = 1;
1384270159Sgrehan		else if (x == 0x67)
1385270159Sgrehan			vie->addrsize_override = 1;
1386270159Sgrehan		else
1387270159Sgrehan			break;
1388270159Sgrehan
1389270159Sgrehan		vie_advance(vie);
1390270159Sgrehan	}
1391270159Sgrehan
1392270159Sgrehan	/*
1393270159Sgrehan	 * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2:
1394270159Sgrehan	 * - Only one REX prefix is allowed per instruction.
1395270159Sgrehan	 * - The REX prefix must immediately precede the opcode byte or the
1396270159Sgrehan	 *   escape opcode byte.
1397270159Sgrehan	 * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3)
1398270159Sgrehan	 *   the mandatory prefix must come before the REX prefix.
1399270159Sgrehan	 */
1400270159Sgrehan	if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) {
1401246108Sneel		vie->rex_present = 1;
1402240941Sneel		vie->rex_w = x & 0x8 ? 1 : 0;
1403240941Sneel		vie->rex_r = x & 0x4 ? 1 : 0;
1404240941Sneel		vie->rex_x = x & 0x2 ? 1 : 0;
1405240941Sneel		vie->rex_b = x & 0x1 ? 1 : 0;
1406240941Sneel		vie_advance(vie);
1407240941Sneel	}
1408240941Sneel
1409270159Sgrehan	/*
1410270159Sgrehan	 * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1
1411270159Sgrehan	 */
1412270159Sgrehan	if (cpu_mode == CPU_MODE_64BIT) {
1413270159Sgrehan		/*
1414270159Sgrehan		 * Default address size is 64-bits and default operand size
1415270159Sgrehan		 * is 32-bits.
1416270159Sgrehan		 */
1417270159Sgrehan		vie->addrsize = vie->addrsize_override ? 4 : 8;
1418270159Sgrehan		if (vie->rex_w)
1419270159Sgrehan			vie->opsize = 8;
1420270159Sgrehan		else if (vie->opsize_override)
1421270159Sgrehan			vie->opsize = 2;
1422270159Sgrehan		else
1423270159Sgrehan			vie->opsize = 4;
1424270159Sgrehan	} else if (cs_d) {
1425270159Sgrehan		/* Default address and operand sizes are 32-bits */
1426270159Sgrehan		vie->addrsize = vie->addrsize_override ? 2 : 4;
1427270159Sgrehan		vie->opsize = vie->opsize_override ? 2 : 4;
1428270159Sgrehan	} else {
1429270159Sgrehan		/* Default address and operand sizes are 16-bits */
1430270159Sgrehan		vie->addrsize = vie->addrsize_override ? 4 : 2;
1431270159Sgrehan		vie->opsize = vie->opsize_override ? 4 : 2;
1432270159Sgrehan	}
1433240941Sneel	return (0);
1434240941Sneel}
1435240941Sneel
1436240941Sneelstatic int
1437267396Sjhbdecode_two_byte_opcode(struct vie *vie)
1438267396Sjhb{
1439267396Sjhb	uint8_t x;
1440267396Sjhb
1441267396Sjhb	if (vie_peek(vie, &x))
1442267396Sjhb		return (-1);
1443267396Sjhb
1444267396Sjhb	vie->op = two_byte_opcodes[x];
1445267396Sjhb
1446267396Sjhb	if (vie->op.op_type == VIE_OP_TYPE_NONE)
1447267396Sjhb		return (-1);
1448267396Sjhb
1449267396Sjhb	vie_advance(vie);
1450267396Sjhb	return (0);
1451267396Sjhb}
1452267396Sjhb
1453267396Sjhbstatic int
1454240941Sneeldecode_opcode(struct vie *vie)
1455240941Sneel{
1456240941Sneel	uint8_t x;
1457240941Sneel
1458240941Sneel	if (vie_peek(vie, &x))
1459240941Sneel		return (-1);
1460240941Sneel
1461243640Sneel	vie->op = one_byte_opcodes[x];
1462240941Sneel
1463243640Sneel	if (vie->op.op_type == VIE_OP_TYPE_NONE)
1464243640Sneel		return (-1);
1465243640Sneel
1466240941Sneel	vie_advance(vie);
1467267396Sjhb
1468267396Sjhb	if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE)
1469267396Sjhb		return (decode_two_byte_opcode(vie));
1470267396Sjhb
1471243640Sneel	return (0);
1472240941Sneel}
1473240941Sneel
1474240941Sneelstatic int
1475268976Sjhbdecode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode)
1476240941Sneel{
1477240941Sneel	uint8_t x;
1478240941Sneel
1479270159Sgrehan	if (cpu_mode == CPU_MODE_REAL)
1480270159Sgrehan		return (-1);
1481270159Sgrehan
1482270159Sgrehan	if (vie->op.op_flags & VIE_OP_F_NO_MODRM)
1483270159Sgrehan		return (0);
1484270159Sgrehan
1485240941Sneel	if (vie_peek(vie, &x))
1486240941Sneel		return (-1);
1487240941Sneel
1488240941Sneel	vie->mod = (x >> 6) & 0x3;
1489240941Sneel	vie->rm =  (x >> 0) & 0x7;
1490240941Sneel	vie->reg = (x >> 3) & 0x7;
1491240941Sneel
1492243640Sneel	/*
1493243640Sneel	 * A direct addressing mode makes no sense in the context of an EPT
1494243640Sneel	 * fault. There has to be a memory access involved to cause the
1495243640Sneel	 * EPT fault.
1496243640Sneel	 */
1497243640Sneel	if (vie->mod == VIE_MOD_DIRECT)
1498243640Sneel		return (-1);
1499243640Sneel
1500240941Sneel	if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
1501240941Sneel	    (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
1502243640Sneel		/*
1503243640Sneel		 * Table 2-5: Special Cases of REX Encodings
1504243640Sneel		 *
1505243640Sneel		 * mod=0, r/m=5 is used in the compatibility mode to
1506243640Sneel		 * indicate a disp32 without a base register.
1507243640Sneel		 *
1508243640Sneel		 * mod!=3, r/m=4 is used in the compatibility mode to
1509243640Sneel		 * indicate that the SIB byte is present.
1510243640Sneel		 *
1511243640Sneel		 * The 'b' bit in the REX prefix is don't care in
1512243640Sneel		 * this case.
1513243640Sneel		 */
1514240941Sneel	} else {
1515240941Sneel		vie->rm |= (vie->rex_b << 3);
1516240941Sneel	}
1517240941Sneel
1518240941Sneel	vie->reg |= (vie->rex_r << 3);
1519240941Sneel
1520243640Sneel	/* SIB */
1521240941Sneel	if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
1522243640Sneel		goto done;
1523240941Sneel
1524240941Sneel	vie->base_register = gpr_map[vie->rm];
1525240941Sneel
1526240941Sneel	switch (vie->mod) {
1527240941Sneel	case VIE_MOD_INDIRECT_DISP8:
1528240941Sneel		vie->disp_bytes = 1;
1529240941Sneel		break;
1530240941Sneel	case VIE_MOD_INDIRECT_DISP32:
1531240941Sneel		vie->disp_bytes = 4;
1532240941Sneel		break;
1533240941Sneel	case VIE_MOD_INDIRECT:
1534240941Sneel		if (vie->rm == VIE_RM_DISP32) {
1535240941Sneel			vie->disp_bytes = 4;
1536249879Sgrehan			/*
1537249879Sgrehan			 * Table 2-7. RIP-Relative Addressing
1538249879Sgrehan			 *
1539249879Sgrehan			 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32
1540249879Sgrehan			 * whereas in compatibility mode it just implies disp32.
1541249879Sgrehan			 */
1542249879Sgrehan
1543249879Sgrehan			if (cpu_mode == CPU_MODE_64BIT)
1544249879Sgrehan				vie->base_register = VM_REG_GUEST_RIP;
1545249879Sgrehan			else
1546249879Sgrehan				vie->base_register = VM_REG_LAST;
1547240941Sneel		}
1548240941Sneel		break;
1549240941Sneel	}
1550240941Sneel
1551243640Sneeldone:
1552240941Sneel	vie_advance(vie);
1553240941Sneel
1554240941Sneel	return (0);
1555240941Sneel}
1556240941Sneel
1557240941Sneelstatic int
1558243640Sneeldecode_sib(struct vie *vie)
1559243640Sneel{
1560243640Sneel	uint8_t x;
1561243640Sneel
1562243640Sneel	/* Proceed only if SIB byte is present */
1563243640Sneel	if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB)
1564243640Sneel		return (0);
1565243640Sneel
1566243640Sneel	if (vie_peek(vie, &x))
1567243640Sneel		return (-1);
1568243640Sneel
1569243640Sneel	/* De-construct the SIB byte */
1570243640Sneel	vie->ss = (x >> 6) & 0x3;
1571243640Sneel	vie->index = (x >> 3) & 0x7;
1572243640Sneel	vie->base = (x >> 0) & 0x7;
1573243640Sneel
1574243640Sneel	/* Apply the REX prefix modifiers */
1575243640Sneel	vie->index |= vie->rex_x << 3;
1576243640Sneel	vie->base |= vie->rex_b << 3;
1577243640Sneel
1578243640Sneel	switch (vie->mod) {
1579243640Sneel	case VIE_MOD_INDIRECT_DISP8:
1580243640Sneel		vie->disp_bytes = 1;
1581243640Sneel		break;
1582243640Sneel	case VIE_MOD_INDIRECT_DISP32:
1583243640Sneel		vie->disp_bytes = 4;
1584243640Sneel		break;
1585243640Sneel	}
1586243640Sneel
1587243640Sneel	if (vie->mod == VIE_MOD_INDIRECT &&
1588243640Sneel	    (vie->base == 5 || vie->base == 13)) {
1589243640Sneel		/*
1590243640Sneel		 * Special case when base register is unused if mod = 0
1591243640Sneel		 * and base = %rbp or %r13.
1592243640Sneel		 *
1593243640Sneel		 * Documented in:
1594243640Sneel		 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
1595243640Sneel		 * Table 2-5: Special Cases of REX Encodings
1596243640Sneel		 */
1597243640Sneel		vie->disp_bytes = 4;
1598243640Sneel	} else {
1599243640Sneel		vie->base_register = gpr_map[vie->base];
1600243640Sneel	}
1601243640Sneel
1602243640Sneel	/*
1603243640Sneel	 * All encodings of 'index' are valid except for %rsp (4).
1604243640Sneel	 *
1605243640Sneel	 * Documented in:
1606243640Sneel	 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
1607243640Sneel	 * Table 2-5: Special Cases of REX Encodings
1608243640Sneel	 */
1609243640Sneel	if (vie->index != 4)
1610243640Sneel		vie->index_register = gpr_map[vie->index];
1611243640Sneel
1612243640Sneel	/* 'scale' makes sense only in the context of an index register */
1613243640Sneel	if (vie->index_register < VM_REG_LAST)
1614243640Sneel		vie->scale = 1 << vie->ss;
1615243640Sneel
1616243640Sneel	vie_advance(vie);
1617243640Sneel
1618243640Sneel	return (0);
1619243640Sneel}
1620243640Sneel
1621243640Sneelstatic int
1622240941Sneeldecode_displacement(struct vie *vie)
1623240941Sneel{
1624240941Sneel	int n, i;
1625240941Sneel	uint8_t x;
1626240941Sneel
1627240941Sneel	union {
1628240941Sneel		char	buf[4];
1629240941Sneel		int8_t	signed8;
1630240941Sneel		int32_t	signed32;
1631240941Sneel	} u;
1632240941Sneel
1633240941Sneel	if ((n = vie->disp_bytes) == 0)
1634240941Sneel		return (0);
1635240941Sneel
1636240941Sneel	if (n != 1 && n != 4)
1637240941Sneel		panic("decode_displacement: invalid disp_bytes %d", n);
1638240941Sneel
1639240941Sneel	for (i = 0; i < n; i++) {
1640240941Sneel		if (vie_peek(vie, &x))
1641240941Sneel			return (-1);
1642240941Sneel
1643240941Sneel		u.buf[i] = x;
1644240941Sneel		vie_advance(vie);
1645240941Sneel	}
1646240941Sneel
1647240941Sneel	if (n == 1)
1648240941Sneel		vie->displacement = u.signed8;		/* sign-extended */
1649240941Sneel	else
1650240941Sneel		vie->displacement = u.signed32;		/* sign-extended */
1651240941Sneel
1652240941Sneel	return (0);
1653240941Sneel}
1654240941Sneel
1655240941Sneelstatic int
1656240941Sneeldecode_immediate(struct vie *vie)
1657240941Sneel{
1658240941Sneel	int i, n;
1659240941Sneel	uint8_t x;
1660240941Sneel	union {
1661240941Sneel		char	buf[4];
1662243640Sneel		int8_t	signed8;
1663270159Sgrehan		int16_t	signed16;
1664240941Sneel		int32_t	signed32;
1665240941Sneel	} u;
1666240941Sneel
1667255638Sneel	/* Figure out immediate operand size (if any) */
1668270159Sgrehan	if (vie->op.op_flags & VIE_OP_F_IMM) {
1669270159Sgrehan		/*
1670270159Sgrehan		 * Section 2.2.1.5 "Immediates", Intel SDM:
1671270159Sgrehan		 * In 64-bit mode the typical size of immediate operands
1672270159Sgrehan		 * remains 32-bits. When the operand size if 64-bits, the
1673270159Sgrehan		 * processor sign-extends all immediates to 64-bits prior
1674270159Sgrehan		 * to their use.
1675270159Sgrehan		 */
1676270159Sgrehan		if (vie->opsize == 4 || vie->opsize == 8)
1677270159Sgrehan			vie->imm_bytes = 4;
1678270159Sgrehan		else
1679270159Sgrehan			vie->imm_bytes = 2;
1680270159Sgrehan	} else if (vie->op.op_flags & VIE_OP_F_IMM8) {
1681255638Sneel		vie->imm_bytes = 1;
1682270159Sgrehan	}
1683255638Sneel
1684240941Sneel	if ((n = vie->imm_bytes) == 0)
1685240941Sneel		return (0);
1686240941Sneel
1687270159Sgrehan	KASSERT(n == 1 || n == 2 || n == 4,
1688270159Sgrehan	    ("%s: invalid number of immediate bytes: %d", __func__, n));
1689240941Sneel
1690240941Sneel	for (i = 0; i < n; i++) {
1691240941Sneel		if (vie_peek(vie, &x))
1692240941Sneel			return (-1);
1693240941Sneel
1694240941Sneel		u.buf[i] = x;
1695240941Sneel		vie_advance(vie);
1696240941Sneel	}
1697270159Sgrehan
1698270159Sgrehan	/* sign-extend the immediate value before use */
1699243640Sneel	if (n == 1)
1700270159Sgrehan		vie->immediate = u.signed8;
1701270159Sgrehan	else if (n == 2)
1702270159Sgrehan		vie->immediate = u.signed16;
1703243640Sneel	else
1704270159Sgrehan		vie->immediate = u.signed32;
1705240941Sneel
1706240941Sneel	return (0);
1707240941Sneel}
1708240941Sneel
1709270159Sgrehanstatic int
1710270159Sgrehandecode_moffset(struct vie *vie)
1711270159Sgrehan{
1712270159Sgrehan	int i, n;
1713270159Sgrehan	uint8_t x;
1714270159Sgrehan	union {
1715270159Sgrehan		char	buf[8];
1716270159Sgrehan		uint64_t u64;
1717270159Sgrehan	} u;
1718270159Sgrehan
1719270159Sgrehan	if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0)
1720270159Sgrehan		return (0);
1721270159Sgrehan
1722270159Sgrehan	/*
1723270159Sgrehan	 * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM:
1724270159Sgrehan	 * The memory offset size follows the address-size of the instruction.
1725270159Sgrehan	 */
1726270159Sgrehan	n = vie->addrsize;
1727270159Sgrehan	KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n));
1728270159Sgrehan
1729270159Sgrehan	u.u64 = 0;
1730270159Sgrehan	for (i = 0; i < n; i++) {
1731270159Sgrehan		if (vie_peek(vie, &x))
1732270159Sgrehan			return (-1);
1733270159Sgrehan
1734270159Sgrehan		u.buf[i] = x;
1735270159Sgrehan		vie_advance(vie);
1736270159Sgrehan	}
1737270159Sgrehan	vie->displacement = u.u64;
1738270159Sgrehan	return (0);
1739270159Sgrehan}
1740270159Sgrehan
1741243640Sneel/*
1742252641Sneel * Verify that all the bytes in the instruction buffer were consumed.
1743252641Sneel */
1744252641Sneelstatic int
1745252641Sneelverify_inst_length(struct vie *vie)
1746252641Sneel{
1747252641Sneel
1748252641Sneel	if (vie->num_processed == vie->num_valid)
1749252641Sneel		return (0);
1750252641Sneel	else
1751252641Sneel		return (-1);
1752252641Sneel}
1753252641Sneel
1754252641Sneel/*
1755243640Sneel * Verify that the 'guest linear address' provided as collateral of the nested
1756243640Sneel * page table fault matches with our instruction decoding.
1757243640Sneel */
1758243640Sneelstatic int
1759243640Sneelverify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
1760243640Sneel{
1761243640Sneel	int error;
1762270159Sgrehan	uint64_t base, idx, gla2;
1763243640Sneel
1764248855Sneel	/* Skip 'gla' verification */
1765248855Sneel	if (gla == VIE_INVALID_GLA)
1766248855Sneel		return (0);
1767248855Sneel
1768243640Sneel	base = 0;
1769243640Sneel	if (vie->base_register != VM_REG_LAST) {
1770243640Sneel		error = vm_get_register(vm, cpuid, vie->base_register, &base);
1771243640Sneel		if (error) {
1772243640Sneel			printf("verify_gla: error %d getting base reg %d\n",
1773243640Sneel				error, vie->base_register);
1774243640Sneel			return (-1);
1775243640Sneel		}
1776249879Sgrehan
1777249879Sgrehan		/*
1778249879Sgrehan		 * RIP-relative addressing starts from the following
1779249879Sgrehan		 * instruction
1780249879Sgrehan		 */
1781249879Sgrehan		if (vie->base_register == VM_REG_GUEST_RIP)
1782249879Sgrehan			base += vie->num_valid;
1783243640Sneel	}
1784243640Sneel
1785243640Sneel	idx = 0;
1786243640Sneel	if (vie->index_register != VM_REG_LAST) {
1787243640Sneel		error = vm_get_register(vm, cpuid, vie->index_register, &idx);
1788243640Sneel		if (error) {
1789243640Sneel			printf("verify_gla: error %d getting index reg %d\n",
1790243640Sneel				error, vie->index_register);
1791243640Sneel			return (-1);
1792243640Sneel		}
1793243640Sneel	}
1794243640Sneel
1795270159Sgrehan	/* XXX assuming that the base address of the segment is 0 */
1796270159Sgrehan	gla2 = base + vie->scale * idx + vie->displacement;
1797270159Sgrehan	gla2 &= size2mask[vie->addrsize];
1798270159Sgrehan	if (gla != gla2) {
1799243640Sneel		printf("verify_gla mismatch: "
1800243640Sneel		       "base(0x%0lx), scale(%d), index(0x%0lx), "
1801270159Sgrehan		       "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n",
1802270159Sgrehan		       base, vie->scale, idx, vie->displacement, gla, gla2);
1803243640Sneel		return (-1);
1804243640Sneel	}
1805243640Sneel
1806243640Sneel	return (0);
1807243640Sneel}
1808243640Sneel
1809240941Sneelint
1810267399Sjhbvmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
1811270159Sgrehan		       enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
1812240941Sneel{
1813243640Sneel
1814270159Sgrehan	if (decode_prefixes(vie, cpu_mode, cs_d))
1815270159Sgrehan		return (-1);
1816240941Sneel
1817240941Sneel	if (decode_opcode(vie))
1818240941Sneel		return (-1);
1819240941Sneel
1820267399Sjhb	if (decode_modrm(vie, cpu_mode))
1821240941Sneel		return (-1);
1822240941Sneel
1823243640Sneel	if (decode_sib(vie))
1824243640Sneel		return (-1);
1825243640Sneel
1826240941Sneel	if (decode_displacement(vie))
1827240941Sneel		return (-1);
1828270159Sgrehan
1829240941Sneel	if (decode_immediate(vie))
1830240941Sneel		return (-1);
1831240941Sneel
1832270159Sgrehan	if (decode_moffset(vie))
1833270159Sgrehan		return (-1);
1834270159Sgrehan
1835252641Sneel	if (verify_inst_length(vie))
1836252641Sneel		return (-1);
1837252641Sneel
1838243640Sneel	if (verify_gla(vm, cpuid, gla, vie))
1839243640Sneel		return (-1);
1840243640Sneel
1841243640Sneel	vie->decoded = 1;	/* success */
1842243640Sneel
1843240941Sneel	return (0);
1844240941Sneel}
1845243640Sneel#endif	/* _KERNEL */
1846