1240941Sneel/*-
2240941Sneel * Copyright (c) 2012 Sandvine, Inc.
3240941Sneel * Copyright (c) 2012 NetApp, Inc.
4240941Sneel * All rights reserved.
5240941Sneel *
6240941Sneel * Redistribution and use in source and binary forms, with or without
7240941Sneel * modification, are permitted provided that the following conditions
8240941Sneel * are met:
9240941Sneel * 1. Redistributions of source code must retain the above copyright
10240941Sneel *    notice, this list of conditions and the following disclaimer.
11240941Sneel * 2. Redistributions in binary form must reproduce the above copyright
12240941Sneel *    notice, this list of conditions and the following disclaimer in the
13240941Sneel *    documentation and/or other materials provided with the distribution.
14240941Sneel *
15250175Semaste * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16240941Sneel * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17240941Sneel * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18250175Semaste * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19240941Sneel * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20240941Sneel * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21240941Sneel * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22240941Sneel * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23240941Sneel * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24240941Sneel * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25240941Sneel * SUCH DAMAGE.
26240941Sneel *
27240941Sneel * $FreeBSD$
28240941Sneel */
29240941Sneel
30240941Sneel#include <sys/cdefs.h>
31240941Sneel__FBSDID("$FreeBSD$");
32240941Sneel
33243640Sneel#ifdef _KERNEL
34240941Sneel#include <sys/param.h>
35240941Sneel#include <sys/pcpu.h>
36240941Sneel#include <sys/systm.h>
37268976Sjhb#include <sys/proc.h>
38240941Sneel
39240941Sneel#include <vm/vm.h>
40240941Sneel#include <vm/pmap.h>
41240941Sneel
42240941Sneel#include <machine/vmparam.h>
43240941Sneel#include <machine/vmm.h>
44243640Sneel#else	/* !_KERNEL */
45243640Sneel#include <sys/types.h>
46243640Sneel#include <sys/errno.h>
47270159Sgrehan#include <sys/_iovec.h>
48240941Sneel
49243640Sneel#include <machine/vmm.h>
50240941Sneel
51268976Sjhb#include <assert.h>
52243640Sneel#include <vmmapi.h>
53268976Sjhb#define	KASSERT(exp,msg)	assert((exp))
54243640Sneel#endif	/* _KERNEL */
55240941Sneel
56268976Sjhb#include <machine/vmm_instruction_emul.h>
57268976Sjhb#include <x86/psl.h>
58268976Sjhb#include <x86/specialreg.h>
59268976Sjhb
60243640Sneel/* struct vie_op.op_type */
61243640Sneelenum {
62243640Sneel	VIE_OP_TYPE_NONE = 0,
63243640Sneel	VIE_OP_TYPE_MOV,
64267396Sjhb	VIE_OP_TYPE_MOVSX,
65267396Sjhb	VIE_OP_TYPE_MOVZX,
66243640Sneel	VIE_OP_TYPE_AND,
67253585Sneel	VIE_OP_TYPE_OR,
68271659Sgrehan	VIE_OP_TYPE_SUB,
69267396Sjhb	VIE_OP_TYPE_TWO_BYTE,
70270159Sgrehan	VIE_OP_TYPE_PUSH,
71270159Sgrehan	VIE_OP_TYPE_CMP,
72276349Sneel	VIE_OP_TYPE_POP,
73284894Sneel	VIE_OP_TYPE_MOVS,
74284899Sneel	VIE_OP_TYPE_GROUP1,
75284899Sneel	VIE_OP_TYPE_STOS,
76284900Sneel	VIE_OP_TYPE_BITTEST,
77243640Sneel	VIE_OP_TYPE_LAST
78243640Sneel};
79243640Sneel
80243640Sneel/* struct vie_op.op_flags */
81270159Sgrehan#define	VIE_OP_F_IMM		(1 << 0)  /* 16/32-bit immediate operand */
82270159Sgrehan#define	VIE_OP_F_IMM8		(1 << 1)  /* 8-bit immediate operand */
83270159Sgrehan#define	VIE_OP_F_MOFFSET	(1 << 2)  /* 16/32/64-bit immediate moffset */
84270159Sgrehan#define	VIE_OP_F_NO_MODRM	(1 << 3)
85284894Sneel#define	VIE_OP_F_NO_GLA_VERIFICATION (1 << 4)
86243640Sneel
87267396Sjhbstatic const struct vie_op two_byte_opcodes[256] = {
88267396Sjhb	[0xB6] = {
89267396Sjhb		.op_byte = 0xB6,
90267396Sjhb		.op_type = VIE_OP_TYPE_MOVZX,
91267396Sjhb	},
92270159Sgrehan	[0xB7] = {
93270159Sgrehan		.op_byte = 0xB7,
94270159Sgrehan		.op_type = VIE_OP_TYPE_MOVZX,
95270159Sgrehan	},
96284900Sneel	[0xBA] = {
97284900Sneel		.op_byte = 0xBA,
98284900Sneel		.op_type = VIE_OP_TYPE_BITTEST,
99284900Sneel		.op_flags = VIE_OP_F_IMM8,
100284900Sneel	},
101267396Sjhb	[0xBE] = {
102267396Sjhb		.op_byte = 0xBE,
103267396Sjhb		.op_type = VIE_OP_TYPE_MOVSX,
104267396Sjhb	},
105267396Sjhb};
106267396Sjhb
107243640Sneelstatic const struct vie_op one_byte_opcodes[256] = {
108267396Sjhb	[0x0F] = {
109267396Sjhb		.op_byte = 0x0F,
110267396Sjhb		.op_type = VIE_OP_TYPE_TWO_BYTE
111267396Sjhb	},
112271659Sgrehan	[0x2B] = {
113271659Sgrehan		.op_byte = 0x2B,
114271659Sgrehan		.op_type = VIE_OP_TYPE_SUB,
115271659Sgrehan	},
116284900Sneel	[0x39] = {
117284900Sneel		.op_byte = 0x39,
118284900Sneel		.op_type = VIE_OP_TYPE_CMP,
119284900Sneel	},
120270159Sgrehan	[0x3B] = {
121270159Sgrehan		.op_byte = 0x3B,
122270159Sgrehan		.op_type = VIE_OP_TYPE_CMP,
123270159Sgrehan	},
124246108Sneel	[0x88] = {
125246108Sneel		.op_byte = 0x88,
126246108Sneel		.op_type = VIE_OP_TYPE_MOV,
127246108Sneel	},
128243640Sneel	[0x89] = {
129243640Sneel		.op_byte = 0x89,
130243640Sneel		.op_type = VIE_OP_TYPE_MOV,
131243640Sneel	},
132254964Sneel	[0x8A] = {
133254964Sneel		.op_byte = 0x8A,
134254964Sneel		.op_type = VIE_OP_TYPE_MOV,
135254964Sneel	},
136243640Sneel	[0x8B] = {
137243640Sneel		.op_byte = 0x8B,
138243640Sneel		.op_type = VIE_OP_TYPE_MOV,
139243640Sneel	},
140270159Sgrehan	[0xA1] = {
141270159Sgrehan		.op_byte = 0xA1,
142270159Sgrehan		.op_type = VIE_OP_TYPE_MOV,
143270159Sgrehan		.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
144270159Sgrehan	},
145270159Sgrehan	[0xA3] = {
146270159Sgrehan		.op_byte = 0xA3,
147270159Sgrehan		.op_type = VIE_OP_TYPE_MOV,
148270159Sgrehan		.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
149270159Sgrehan	},
150284894Sneel	[0xA4] = {
151284894Sneel		.op_byte = 0xA4,
152284894Sneel		.op_type = VIE_OP_TYPE_MOVS,
153284894Sneel		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
154284894Sneel	},
155284894Sneel	[0xA5] = {
156284894Sneel		.op_byte = 0xA5,
157284894Sneel		.op_type = VIE_OP_TYPE_MOVS,
158284894Sneel		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
159284894Sneel	},
160284899Sneel	[0xAA] = {
161284899Sneel		.op_byte = 0xAA,
162284899Sneel		.op_type = VIE_OP_TYPE_STOS,
163284899Sneel		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
164284899Sneel	},
165284899Sneel	[0xAB] = {
166284899Sneel		.op_byte = 0xAB,
167284899Sneel		.op_type = VIE_OP_TYPE_STOS,
168284899Sneel		.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
169284899Sneel	},
170270159Sgrehan	[0xC6] = {
171270159Sgrehan		/* XXX Group 11 extended opcode - not just MOV */
172270159Sgrehan		.op_byte = 0xC6,
173270159Sgrehan		.op_type = VIE_OP_TYPE_MOV,
174270159Sgrehan		.op_flags = VIE_OP_F_IMM8,
175270159Sgrehan	},
176243640Sneel	[0xC7] = {
177243640Sneel		.op_byte = 0xC7,
178243640Sneel		.op_type = VIE_OP_TYPE_MOV,
179243640Sneel		.op_flags = VIE_OP_F_IMM,
180243640Sneel	},
181243640Sneel	[0x23] = {
182243640Sneel		.op_byte = 0x23,
183243640Sneel		.op_type = VIE_OP_TYPE_AND,
184243667Sgrehan	},
185284900Sneel	[0x80] = {
186284900Sneel		/* Group 1 extended opcode */
187284900Sneel		.op_byte = 0x80,
188284900Sneel		.op_type = VIE_OP_TYPE_GROUP1,
189284900Sneel		.op_flags = VIE_OP_F_IMM8,
190284900Sneel	},
191243667Sgrehan	[0x81] = {
192284900Sneel		/* Group 1 extended opcode */
193243667Sgrehan		.op_byte = 0x81,
194284899Sneel		.op_type = VIE_OP_TYPE_GROUP1,
195243667Sgrehan		.op_flags = VIE_OP_F_IMM,
196253585Sneel	},
197253585Sneel	[0x83] = {
198284900Sneel		/* Group 1 extended opcode */
199253585Sneel		.op_byte = 0x83,
200284899Sneel		.op_type = VIE_OP_TYPE_GROUP1,
201253585Sneel		.op_flags = VIE_OP_F_IMM8,
202253585Sneel	},
203276349Sneel	[0x8F] = {
204276349Sneel		/* XXX Group 1A extended opcode - not just POP */
205276349Sneel		.op_byte = 0x8F,
206276349Sneel		.op_type = VIE_OP_TYPE_POP,
207276349Sneel	},
208270159Sgrehan	[0xFF] = {
209270159Sgrehan		/* XXX Group 5 extended opcode - not just PUSH */
210270159Sgrehan		.op_byte = 0xFF,
211270159Sgrehan		.op_type = VIE_OP_TYPE_PUSH,
212270159Sgrehan	}
213243640Sneel};
214243640Sneel
215243640Sneel/* struct vie.mod */
216243640Sneel#define	VIE_MOD_INDIRECT		0
217243640Sneel#define	VIE_MOD_INDIRECT_DISP8		1
218243640Sneel#define	VIE_MOD_INDIRECT_DISP32		2
219243640Sneel#define	VIE_MOD_DIRECT			3
220243640Sneel
221243640Sneel/* struct vie.rm */
222243640Sneel#define	VIE_RM_SIB			4
223243640Sneel#define	VIE_RM_DISP32			5
224243640Sneel
225243640Sneel#define	GB				(1024 * 1024 * 1024)
226243640Sneel
227240941Sneelstatic enum vm_reg_name gpr_map[16] = {
228240941Sneel	VM_REG_GUEST_RAX,
229240941Sneel	VM_REG_GUEST_RCX,
230240941Sneel	VM_REG_GUEST_RDX,
231240941Sneel	VM_REG_GUEST_RBX,
232240941Sneel	VM_REG_GUEST_RSP,
233240941Sneel	VM_REG_GUEST_RBP,
234240941Sneel	VM_REG_GUEST_RSI,
235240941Sneel	VM_REG_GUEST_RDI,
236240941Sneel	VM_REG_GUEST_R8,
237240941Sneel	VM_REG_GUEST_R9,
238240941Sneel	VM_REG_GUEST_R10,
239240941Sneel	VM_REG_GUEST_R11,
240240941Sneel	VM_REG_GUEST_R12,
241240941Sneel	VM_REG_GUEST_R13,
242240941Sneel	VM_REG_GUEST_R14,
243240941Sneel	VM_REG_GUEST_R15
244240941Sneel};
245240941Sneel
246243640Sneelstatic uint64_t size2mask[] = {
247243640Sneel	[1] = 0xff,
248243640Sneel	[2] = 0xffff,
249243640Sneel	[4] = 0xffffffff,
250243640Sneel	[8] = 0xffffffffffffffff,
251243640Sneel};
252243640Sneel
253243640Sneelstatic int
254243640Sneelvie_read_register(void *vm, int vcpuid, enum vm_reg_name reg, uint64_t *rval)
255243640Sneel{
256243640Sneel	int error;
257243640Sneel
258243640Sneel	error = vm_get_register(vm, vcpuid, reg, rval);
259243640Sneel
260243640Sneel	return (error);
261243640Sneel}
262243640Sneel
263270159Sgrehanstatic void
264270159Sgrehanvie_calc_bytereg(struct vie *vie, enum vm_reg_name *reg, int *lhbr)
265246108Sneel{
266270159Sgrehan	*lhbr = 0;
267270159Sgrehan	*reg = gpr_map[vie->reg];
268246108Sneel
269246108Sneel	/*
270270159Sgrehan	 * 64-bit mode imposes limitations on accessing legacy high byte
271270159Sgrehan	 * registers (lhbr).
272246108Sneel	 *
273246108Sneel	 * The legacy high-byte registers cannot be addressed if the REX
274246108Sneel	 * prefix is present. In this case the values 4, 5, 6 and 7 of the
275246108Sneel	 * 'ModRM:reg' field address %spl, %bpl, %sil and %dil respectively.
276246108Sneel	 *
277246108Sneel	 * If the REX prefix is not present then the values 4, 5, 6 and 7
278246108Sneel	 * of the 'ModRM:reg' field address the legacy high-byte registers,
279246108Sneel	 * %ah, %ch, %dh and %bh respectively.
280246108Sneel	 */
281246108Sneel	if (!vie->rex_present) {
282246108Sneel		if (vie->reg & 0x4) {
283270159Sgrehan			*lhbr = 1;
284270159Sgrehan			*reg = gpr_map[vie->reg & 0x3];
285246108Sneel		}
286246108Sneel	}
287270159Sgrehan}
288246108Sneel
289270159Sgrehanstatic int
290270159Sgrehanvie_read_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t *rval)
291270159Sgrehan{
292270159Sgrehan	uint64_t val;
293270159Sgrehan	int error, lhbr;
294270159Sgrehan	enum vm_reg_name reg;
295270159Sgrehan
296270159Sgrehan	vie_calc_bytereg(vie, &reg, &lhbr);
297246108Sneel	error = vm_get_register(vm, vcpuid, reg, &val);
298270159Sgrehan
299270159Sgrehan	/*
300270159Sgrehan	 * To obtain the value of a legacy high byte register shift the
301270159Sgrehan	 * base register right by 8 bits (%ah = %rax >> 8).
302270159Sgrehan	 */
303270159Sgrehan	if (lhbr)
304270159Sgrehan		*rval = val >> 8;
305270159Sgrehan	else
306270159Sgrehan		*rval = val;
307246108Sneel	return (error);
308246108Sneel}
309246108Sneel
310270159Sgrehanstatic int
311270159Sgrehanvie_write_bytereg(void *vm, int vcpuid, struct vie *vie, uint8_t byte)
312270159Sgrehan{
313270159Sgrehan	uint64_t origval, val, mask;
314270159Sgrehan	int error, lhbr;
315270159Sgrehan	enum vm_reg_name reg;
316270159Sgrehan
317270159Sgrehan	vie_calc_bytereg(vie, &reg, &lhbr);
318270159Sgrehan	error = vm_get_register(vm, vcpuid, reg, &origval);
319270159Sgrehan	if (error == 0) {
320270159Sgrehan		val = byte;
321270159Sgrehan		mask = 0xff;
322270159Sgrehan		if (lhbr) {
323270159Sgrehan			/*
324270159Sgrehan			 * Shift left by 8 to store 'byte' in a legacy high
325270159Sgrehan			 * byte register.
326270159Sgrehan			 */
327270159Sgrehan			val <<= 8;
328270159Sgrehan			mask <<= 8;
329270159Sgrehan		}
330270159Sgrehan		val |= origval & ~mask;
331270159Sgrehan		error = vm_set_register(vm, vcpuid, reg, val);
332270159Sgrehan	}
333270159Sgrehan	return (error);
334270159Sgrehan}
335270159Sgrehan
336268976Sjhbint
337243640Sneelvie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
338243640Sneel		    uint64_t val, int size)
339243640Sneel{
340243640Sneel	int error;
341243640Sneel	uint64_t origval;
342243640Sneel
343243640Sneel	switch (size) {
344243640Sneel	case 1:
345243640Sneel	case 2:
346243640Sneel		error = vie_read_register(vm, vcpuid, reg, &origval);
347243640Sneel		if (error)
348243640Sneel			return (error);
349243640Sneel		val &= size2mask[size];
350243640Sneel		val |= origval & ~size2mask[size];
351243640Sneel		break;
352243640Sneel	case 4:
353243640Sneel		val &= 0xffffffffUL;
354243640Sneel		break;
355243640Sneel	case 8:
356243640Sneel		break;
357243640Sneel	default:
358243640Sneel		return (EINVAL);
359243640Sneel	}
360243640Sneel
361243640Sneel	error = vm_set_register(vm, vcpuid, reg, val);
362243640Sneel	return (error);
363243640Sneel}
364243640Sneel
365276349Sneel#define	RFLAGS_STATUS_BITS    (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
366276349Sneel
367243640Sneel/*
368270159Sgrehan * Return the status flags that would result from doing (x - y).
369243640Sneel */
370276349Sneel#define	GETCC(sz)							\
371276349Sneelstatic u_long								\
372276349Sneelgetcc##sz(uint##sz##_t x, uint##sz##_t y)				\
373276349Sneel{									\
374276349Sneel	u_long rflags;							\
375276349Sneel									\
376276349Sneel	__asm __volatile("sub %2,%1; pushfq; popq %0" :			\
377276349Sneel	    "=r" (rflags), "+r" (x) : "m" (y));				\
378276349Sneel	return (rflags);						\
379276349Sneel} struct __hack
380270159Sgrehan
381276349SneelGETCC(8);
382276349SneelGETCC(16);
383276349SneelGETCC(32);
384276349SneelGETCC(64);
385270159Sgrehan
386270159Sgrehanstatic u_long
387270159Sgrehangetcc(int opsize, uint64_t x, uint64_t y)
388270159Sgrehan{
389276349Sneel	KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
390270159Sgrehan	    ("getcc: invalid operand size %d", opsize));
391270159Sgrehan
392276349Sneel	if (opsize == 1)
393276349Sneel		return (getcc8(x, y));
394276349Sneel	else if (opsize == 2)
395270159Sgrehan		return (getcc16(x, y));
396270159Sgrehan	else if (opsize == 4)
397270159Sgrehan		return (getcc32(x, y));
398270159Sgrehan	else
399270159Sgrehan		return (getcc64(x, y));
400270159Sgrehan}
401270159Sgrehan
402243640Sneelstatic int
403243640Sneelemulate_mov(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
404243640Sneel	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
405243640Sneel{
406243640Sneel	int error, size;
407243640Sneel	enum vm_reg_name reg;
408246108Sneel	uint8_t byte;
409243640Sneel	uint64_t val;
410243640Sneel
411270159Sgrehan	size = vie->opsize;
412243640Sneel	error = EINVAL;
413243640Sneel
414243640Sneel	switch (vie->op.op_byte) {
415246108Sneel	case 0x88:
416246108Sneel		/*
417246108Sneel		 * MOV byte from reg (ModRM:reg) to mem (ModRM:r/m)
418246108Sneel		 * 88/r:	mov r/m8, r8
419246108Sneel		 * REX + 88/r:	mov r/m8, r8 (%ah, %ch, %dh, %bh not available)
420246108Sneel		 */
421270159Sgrehan		size = 1;	/* override for byte operation */
422246108Sneel		error = vie_read_bytereg(vm, vcpuid, vie, &byte);
423246108Sneel		if (error == 0)
424246108Sneel			error = memwrite(vm, vcpuid, gpa, byte, size, arg);
425246108Sneel		break;
426243640Sneel	case 0x89:
427243640Sneel		/*
428243640Sneel		 * MOV from reg (ModRM:reg) to mem (ModRM:r/m)
429270159Sgrehan		 * 89/r:	mov r/m16, r16
430243640Sneel		 * 89/r:	mov r/m32, r32
431243640Sneel		 * REX.W + 89/r	mov r/m64, r64
432243640Sneel		 */
433243640Sneel		reg = gpr_map[vie->reg];
434243640Sneel		error = vie_read_register(vm, vcpuid, reg, &val);
435243640Sneel		if (error == 0) {
436243640Sneel			val &= size2mask[size];
437243640Sneel			error = memwrite(vm, vcpuid, gpa, val, size, arg);
438243640Sneel		}
439243640Sneel		break;
440254964Sneel	case 0x8A:
441270159Sgrehan		/*
442270159Sgrehan		 * MOV byte from mem (ModRM:r/m) to reg (ModRM:reg)
443270159Sgrehan		 * 8A/r:	mov r8, r/m8
444270159Sgrehan		 * REX + 8A/r:	mov r8, r/m8
445270159Sgrehan		 */
446270159Sgrehan		size = 1;	/* override for byte operation */
447270159Sgrehan		error = memread(vm, vcpuid, gpa, &val, size, arg);
448270159Sgrehan		if (error == 0)
449270159Sgrehan			error = vie_write_bytereg(vm, vcpuid, vie, val);
450270159Sgrehan		break;
451243640Sneel	case 0x8B:
452243640Sneel		/*
453243640Sneel		 * MOV from mem (ModRM:r/m) to reg (ModRM:reg)
454270159Sgrehan		 * 8B/r:	mov r16, r/m16
455243640Sneel		 * 8B/r:	mov r32, r/m32
456243640Sneel		 * REX.W 8B/r:	mov r64, r/m64
457243640Sneel		 */
458243640Sneel		error = memread(vm, vcpuid, gpa, &val, size, arg);
459243640Sneel		if (error == 0) {
460243640Sneel			reg = gpr_map[vie->reg];
461243640Sneel			error = vie_update_register(vm, vcpuid, reg, val, size);
462243640Sneel		}
463243640Sneel		break;
464270159Sgrehan	case 0xA1:
465270159Sgrehan		/*
466270159Sgrehan		 * MOV from seg:moffset to AX/EAX/RAX
467270159Sgrehan		 * A1:		mov AX, moffs16
468270159Sgrehan		 * A1:		mov EAX, moffs32
469270159Sgrehan		 * REX.W + A1:	mov RAX, moffs64
470270159Sgrehan		 */
471270159Sgrehan		error = memread(vm, vcpuid, gpa, &val, size, arg);
472270159Sgrehan		if (error == 0) {
473270159Sgrehan			reg = VM_REG_GUEST_RAX;
474270159Sgrehan			error = vie_update_register(vm, vcpuid, reg, val, size);
475270159Sgrehan		}
476270159Sgrehan		break;
477270159Sgrehan	case 0xA3:
478270159Sgrehan		/*
479270159Sgrehan		 * MOV from AX/EAX/RAX to seg:moffset
480270159Sgrehan		 * A3:		mov moffs16, AX
481270159Sgrehan		 * A3:		mov moffs32, EAX
482270159Sgrehan		 * REX.W + A3:	mov moffs64, RAX
483270159Sgrehan		 */
484270159Sgrehan		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
485270159Sgrehan		if (error == 0) {
486270159Sgrehan			val &= size2mask[size];
487270159Sgrehan			error = memwrite(vm, vcpuid, gpa, val, size, arg);
488270159Sgrehan		}
489270159Sgrehan		break;
490270159Sgrehan	case 0xC6:
491270159Sgrehan		/*
492270159Sgrehan		 * MOV from imm8 to mem (ModRM:r/m)
493270159Sgrehan		 * C6/0		mov r/m8, imm8
494270159Sgrehan		 * REX + C6/0	mov r/m8, imm8
495270159Sgrehan		 */
496270159Sgrehan		size = 1;	/* override for byte operation */
497270159Sgrehan		error = memwrite(vm, vcpuid, gpa, vie->immediate, size, arg);
498270159Sgrehan		break;
499243640Sneel	case 0xC7:
500243640Sneel		/*
501270159Sgrehan		 * MOV from imm16/imm32 to mem (ModRM:r/m)
502270159Sgrehan		 * C7/0		mov r/m16, imm16
503243640Sneel		 * C7/0		mov r/m32, imm32
504243640Sneel		 * REX.W + C7/0	mov r/m64, imm32 (sign-extended to 64-bits)
505243640Sneel		 */
506270159Sgrehan		val = vie->immediate & size2mask[size];
507243640Sneel		error = memwrite(vm, vcpuid, gpa, val, size, arg);
508243640Sneel		break;
509243640Sneel	default:
510243640Sneel		break;
511243640Sneel	}
512243640Sneel
513243640Sneel	return (error);
514243640Sneel}
515243640Sneel
516243640Sneelstatic int
517267396Sjhbemulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
518267396Sjhb	     mem_region_read_t memread, mem_region_write_t memwrite,
519267396Sjhb	     void *arg)
520267396Sjhb{
521267396Sjhb	int error, size;
522267396Sjhb	enum vm_reg_name reg;
523267396Sjhb	uint64_t val;
524267396Sjhb
525270159Sgrehan	size = vie->opsize;
526267396Sjhb	error = EINVAL;
527267396Sjhb
528267396Sjhb	switch (vie->op.op_byte) {
529267396Sjhb	case 0xB6:
530267396Sjhb		/*
531267396Sjhb		 * MOV and zero extend byte from mem (ModRM:r/m) to
532267396Sjhb		 * reg (ModRM:reg).
533267396Sjhb		 *
534270159Sgrehan		 * 0F B6/r		movzx r16, r/m8
535270159Sgrehan		 * 0F B6/r		movzx r32, r/m8
536270159Sgrehan		 * REX.W + 0F B6/r	movzx r64, r/m8
537267396Sjhb		 */
538267396Sjhb
539267396Sjhb		/* get the first operand */
540267396Sjhb		error = memread(vm, vcpuid, gpa, &val, 1, arg);
541267396Sjhb		if (error)
542267396Sjhb			break;
543267396Sjhb
544267396Sjhb		/* get the second operand */
545267396Sjhb		reg = gpr_map[vie->reg];
546267396Sjhb
547270159Sgrehan		/* zero-extend byte */
548270159Sgrehan		val = (uint8_t)val;
549267396Sjhb
550267396Sjhb		/* write the result */
551267396Sjhb		error = vie_update_register(vm, vcpuid, reg, val, size);
552267396Sjhb		break;
553270159Sgrehan	case 0xB7:
554270159Sgrehan		/*
555270159Sgrehan		 * MOV and zero extend word from mem (ModRM:r/m) to
556270159Sgrehan		 * reg (ModRM:reg).
557270159Sgrehan		 *
558270159Sgrehan		 * 0F B7/r		movzx r32, r/m16
559270159Sgrehan		 * REX.W + 0F B7/r	movzx r64, r/m16
560270159Sgrehan		 */
561270159Sgrehan		error = memread(vm, vcpuid, gpa, &val, 2, arg);
562270159Sgrehan		if (error)
563270159Sgrehan			return (error);
564270159Sgrehan
565270159Sgrehan		reg = gpr_map[vie->reg];
566270159Sgrehan
567270159Sgrehan		/* zero-extend word */
568270159Sgrehan		val = (uint16_t)val;
569270159Sgrehan
570270159Sgrehan		error = vie_update_register(vm, vcpuid, reg, val, size);
571270159Sgrehan		break;
572267396Sjhb	case 0xBE:
573267396Sjhb		/*
574267396Sjhb		 * MOV and sign extend byte from mem (ModRM:r/m) to
575267396Sjhb		 * reg (ModRM:reg).
576267396Sjhb		 *
577270159Sgrehan		 * 0F BE/r		movsx r16, r/m8
578270159Sgrehan		 * 0F BE/r		movsx r32, r/m8
579270159Sgrehan		 * REX.W + 0F BE/r	movsx r64, r/m8
580267396Sjhb		 */
581267396Sjhb
582267396Sjhb		/* get the first operand */
583267396Sjhb		error = memread(vm, vcpuid, gpa, &val, 1, arg);
584267396Sjhb		if (error)
585267396Sjhb			break;
586267396Sjhb
587267396Sjhb		/* get the second operand */
588267396Sjhb		reg = gpr_map[vie->reg];
589267396Sjhb
590267396Sjhb		/* sign extend byte */
591267396Sjhb		val = (int8_t)val;
592267396Sjhb
593267396Sjhb		/* write the result */
594267396Sjhb		error = vie_update_register(vm, vcpuid, reg, val, size);
595267396Sjhb		break;
596267396Sjhb	default:
597267396Sjhb		break;
598267396Sjhb	}
599267396Sjhb	return (error);
600267396Sjhb}
601267396Sjhb
602284894Sneel/*
603284894Sneel * Helper function to calculate and validate a linear address.
604284894Sneel */
605267396Sjhbstatic int
606284894Sneelget_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
607284894Sneel    int opsize, int addrsize, int prot, enum vm_reg_name seg,
608284900Sneel    enum vm_reg_name gpr, uint64_t *gla, int *fault)
609284894Sneel{
610284894Sneel	struct seg_desc desc;
611284894Sneel	uint64_t cr0, val, rflags;
612284894Sneel	int error;
613284894Sneel
614284894Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
615284894Sneel	KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
616284894Sneel
617284894Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
618284894Sneel	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
619284894Sneel
620284894Sneel	error = vm_get_seg_desc(vm, vcpuid, seg, &desc);
621284894Sneel	KASSERT(error == 0, ("%s: error %d getting segment descriptor %d",
622284894Sneel	    __func__, error, seg));
623284894Sneel
624284894Sneel	error = vie_read_register(vm, vcpuid, gpr, &val);
625284894Sneel	KASSERT(error == 0, ("%s: error %d getting register %d", __func__,
626284894Sneel	    error, gpr));
627284894Sneel
628284894Sneel	if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize,
629284894Sneel	    addrsize, prot, gla)) {
630284894Sneel		if (seg == VM_REG_GUEST_SS)
631284894Sneel			vm_inject_ss(vm, vcpuid, 0);
632284894Sneel		else
633284894Sneel			vm_inject_gp(vm, vcpuid);
634284900Sneel		goto guest_fault;
635284894Sneel	}
636284894Sneel
637284894Sneel	if (vie_canonical_check(paging->cpu_mode, *gla)) {
638284894Sneel		if (seg == VM_REG_GUEST_SS)
639284894Sneel			vm_inject_ss(vm, vcpuid, 0);
640284894Sneel		else
641284894Sneel			vm_inject_gp(vm, vcpuid);
642284900Sneel		goto guest_fault;
643284894Sneel	}
644284894Sneel
645284894Sneel	if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
646284894Sneel		vm_inject_ac(vm, vcpuid, 0);
647284900Sneel		goto guest_fault;
648284894Sneel	}
649284894Sneel
650284900Sneel	*fault = 0;
651284894Sneel	return (0);
652284900Sneel
653284900Sneelguest_fault:
654284900Sneel	*fault = 1;
655284900Sneel	return (0);
656284894Sneel}
657284894Sneel
658284894Sneelstatic int
659284894Sneelemulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
660284894Sneel    struct vm_guest_paging *paging, mem_region_read_t memread,
661284894Sneel    mem_region_write_t memwrite, void *arg)
662284894Sneel{
663284894Sneel#ifdef _KERNEL
664284894Sneel	struct vm_copyinfo copyinfo[2];
665284894Sneel#else
666284894Sneel	struct iovec copyinfo[2];
667284894Sneel#endif
668284899Sneel	uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val;
669284894Sneel	uint64_t rcx, rdi, rsi, rflags;
670284900Sneel	int error, fault, opsize, seg, repeat;
671284894Sneel
672284894Sneel	opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
673284894Sneel	val = 0;
674284894Sneel	error = 0;
675284894Sneel
676284894Sneel	/*
677284894Sneel	 * XXX although the MOVS instruction is only supposed to be used with
678284894Sneel	 * the "rep" prefix some guests like FreeBSD will use "repnz" instead.
679284894Sneel	 *
680284894Sneel	 * Empirically the "repnz" prefix has identical behavior to "rep"
681284894Sneel	 * and the zero flag does not make a difference.
682284894Sneel	 */
683284894Sneel	repeat = vie->repz_present | vie->repnz_present;
684284894Sneel
685284894Sneel	if (repeat) {
686284894Sneel		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
687284894Sneel		KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
688284894Sneel
689284894Sneel		/*
690284894Sneel		 * The count register is %rcx, %ecx or %cx depending on the
691284894Sneel		 * address size of the instruction.
692284894Sneel		 */
693284900Sneel		if ((rcx & vie_size2mask(vie->addrsize)) == 0) {
694284900Sneel			error = 0;
695284900Sneel			goto done;
696284900Sneel		}
697284894Sneel	}
698284894Sneel
699284894Sneel	/*
700284894Sneel	 *	Source		Destination	Comments
701284894Sneel	 *	--------------------------------------------
702284894Sneel	 * (1)  memory		memory		n/a
703284894Sneel	 * (2)  memory		mmio		emulated
704284894Sneel	 * (3)  mmio		memory		emulated
705284899Sneel	 * (4)  mmio		mmio		emulated
706284894Sneel	 *
707284894Sneel	 * At this point we don't have sufficient information to distinguish
708284894Sneel	 * between (2), (3) and (4). We use 'vm_copy_setup()' to tease this
709284894Sneel	 * out because it will succeed only when operating on regular memory.
710284894Sneel	 *
711284894Sneel	 * XXX the emulation doesn't properly handle the case where 'gpa'
712284894Sneel	 * is straddling the boundary between the normal memory and MMIO.
713284894Sneel	 */
714284894Sneel
715284894Sneel	seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
716284894Sneel	error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
717284900Sneel	    PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault);
718284900Sneel	if (error || fault)
719284894Sneel		goto done;
720284894Sneel
721284894Sneel	error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ,
722284900Sneel	    copyinfo, nitems(copyinfo), &fault);
723284894Sneel	if (error == 0) {
724284900Sneel		if (fault)
725284900Sneel			goto done;	/* Resume guest to handle fault */
726284900Sneel
727284894Sneel		/*
728284894Sneel		 * case (2): read from system memory and write to mmio.
729284894Sneel		 */
730284894Sneel		vm_copyin(vm, vcpuid, copyinfo, &val, opsize);
731284894Sneel		vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
732284894Sneel		error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
733284899Sneel		if (error)
734284899Sneel			goto done;
735284894Sneel	} else {
736284894Sneel		/*
737284894Sneel		 * 'vm_copy_setup()' is expected to fail for cases (3) and (4)
738284894Sneel		 * if 'srcaddr' is in the mmio space.
739284894Sneel		 */
740284894Sneel
741284899Sneel		error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
742284900Sneel		    PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr,
743284900Sneel		    &fault);
744284900Sneel		if (error || fault)
745284894Sneel			goto done;
746284894Sneel
747284899Sneel		error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
748284900Sneel		    PROT_WRITE, copyinfo, nitems(copyinfo), &fault);
749284899Sneel		if (error == 0) {
750284900Sneel			if (fault)
751284900Sneel				goto done;    /* Resume guest to handle fault */
752284900Sneel
753284899Sneel			/*
754284899Sneel			 * case (3): read from MMIO and write to system memory.
755284899Sneel			 *
756284899Sneel			 * A MMIO read can have side-effects so we
757284899Sneel			 * commit to it only after vm_copy_setup() is
758284899Sneel			 * successful. If a page-fault needs to be
759284899Sneel			 * injected into the guest then it will happen
760284899Sneel			 * before the MMIO read is attempted.
761284899Sneel			 */
762284899Sneel			error = memread(vm, vcpuid, gpa, &val, opsize, arg);
763284899Sneel			if (error)
764284899Sneel				goto done;
765284899Sneel
766284899Sneel			vm_copyout(vm, vcpuid, &val, copyinfo, opsize);
767284899Sneel			vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
768284899Sneel		} else {
769284899Sneel			/*
770284899Sneel			 * Case (4): read from and write to mmio.
771284900Sneel			 *
772284900Sneel			 * Commit to the MMIO read/write (with potential
773284900Sneel			 * side-effects) only after we are sure that the
774284900Sneel			 * instruction is not going to be restarted due
775284900Sneel			 * to address translation faults.
776284899Sneel			 */
777284899Sneel			error = vm_gla2gpa(vm, vcpuid, paging, srcaddr,
778284900Sneel			    PROT_READ, &srcgpa, &fault);
779284900Sneel			if (error || fault)
780284899Sneel				goto done;
781284900Sneel
782284900Sneel			error = vm_gla2gpa(vm, vcpuid, paging, dstaddr,
783284900Sneel			   PROT_WRITE, &dstgpa, &fault);
784284900Sneel			if (error || fault)
785284900Sneel				goto done;
786284900Sneel
787284899Sneel			error = memread(vm, vcpuid, srcgpa, &val, opsize, arg);
788284899Sneel			if (error)
789284899Sneel				goto done;
790284899Sneel
791284899Sneel			error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg);
792284899Sneel			if (error)
793284899Sneel				goto done;
794284899Sneel		}
795284894Sneel	}
796284894Sneel
797284894Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
798284894Sneel	KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error));
799284894Sneel
800284894Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
801284894Sneel	KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
802284894Sneel
803284894Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
804284894Sneel	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
805284894Sneel
806284894Sneel	if (rflags & PSL_D) {
807284894Sneel		rsi -= opsize;
808284894Sneel		rdi -= opsize;
809284894Sneel	} else {
810284894Sneel		rsi += opsize;
811284894Sneel		rdi += opsize;
812284894Sneel	}
813284894Sneel
814284894Sneel	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi,
815284894Sneel	    vie->addrsize);
816284894Sneel	KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error));
817284894Sneel
818284894Sneel	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
819284894Sneel	    vie->addrsize);
820284894Sneel	KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
821284894Sneel
822284894Sneel	if (repeat) {
823284894Sneel		rcx = rcx - 1;
824284894Sneel		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
825284894Sneel		    rcx, vie->addrsize);
826284894Sneel		KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
827284894Sneel
828284894Sneel		/*
829284894Sneel		 * Repeat the instruction if the count register is not zero.
830284894Sneel		 */
831284894Sneel		if ((rcx & vie_size2mask(vie->addrsize)) != 0)
832284894Sneel			vm_restart_instruction(vm, vcpuid);
833284894Sneel	}
834284894Sneeldone:
835284900Sneel	KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d",
836284900Sneel	    __func__, error));
837284900Sneel	return (error);
838284894Sneel}
839284894Sneel
840284894Sneelstatic int
841284899Sneelemulate_stos(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
842284899Sneel    struct vm_guest_paging *paging, mem_region_read_t memread,
843284899Sneel    mem_region_write_t memwrite, void *arg)
844284899Sneel{
845284899Sneel	int error, opsize, repeat;
846284899Sneel	uint64_t val;
847284899Sneel	uint64_t rcx, rdi, rflags;
848284899Sneel
849284899Sneel	opsize = (vie->op.op_byte == 0xAA) ? 1 : vie->opsize;
850284899Sneel	repeat = vie->repz_present | vie->repnz_present;
851284899Sneel
852284899Sneel	if (repeat) {
853284899Sneel		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
854284899Sneel		KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
855284899Sneel
856284899Sneel		/*
857284899Sneel		 * The count register is %rcx, %ecx or %cx depending on the
858284899Sneel		 * address size of the instruction.
859284899Sneel		 */
860284899Sneel		if ((rcx & vie_size2mask(vie->addrsize)) == 0)
861284899Sneel			return (0);
862284899Sneel	}
863284899Sneel
864284899Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RAX, &val);
865284899Sneel	KASSERT(!error, ("%s: error %d getting rax", __func__, error));
866284899Sneel
867284899Sneel	error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
868284899Sneel	if (error)
869284899Sneel		return (error);
870284899Sneel
871284899Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
872284899Sneel	KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
873284899Sneel
874284899Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
875284899Sneel	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
876284899Sneel
877284899Sneel	if (rflags & PSL_D)
878284899Sneel		rdi -= opsize;
879284899Sneel	else
880284899Sneel		rdi += opsize;
881284899Sneel
882284899Sneel	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
883284899Sneel	    vie->addrsize);
884284899Sneel	KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
885284899Sneel
886284899Sneel	if (repeat) {
887284899Sneel		rcx = rcx - 1;
888284899Sneel		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
889284899Sneel		    rcx, vie->addrsize);
890284899Sneel		KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
891284899Sneel
892284899Sneel		/*
893284899Sneel		 * Repeat the instruction if the count register is not zero.
894284899Sneel		 */
895284899Sneel		if ((rcx & vie_size2mask(vie->addrsize)) != 0)
896284899Sneel			vm_restart_instruction(vm, vcpuid);
897284899Sneel	}
898284899Sneel
899284899Sneel	return (0);
900284899Sneel}
901284899Sneel
902284899Sneelstatic int
903243640Sneelemulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
904243640Sneel	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
905243640Sneel{
906243640Sneel	int error, size;
907243640Sneel	enum vm_reg_name reg;
908276349Sneel	uint64_t result, rflags, rflags2, val1, val2;
909243640Sneel
910270159Sgrehan	size = vie->opsize;
911243640Sneel	error = EINVAL;
912243640Sneel
913243640Sneel	switch (vie->op.op_byte) {
914243640Sneel	case 0x23:
915243640Sneel		/*
916243640Sneel		 * AND reg (ModRM:reg) and mem (ModRM:r/m) and store the
917243640Sneel		 * result in reg.
918243640Sneel		 *
919270159Sgrehan		 * 23/r		and r16, r/m16
920243640Sneel		 * 23/r		and r32, r/m32
921243640Sneel		 * REX.W + 23/r	and r64, r/m64
922243640Sneel		 */
923243640Sneel
924243640Sneel		/* get the first operand */
925243640Sneel		reg = gpr_map[vie->reg];
926243640Sneel		error = vie_read_register(vm, vcpuid, reg, &val1);
927243640Sneel		if (error)
928243640Sneel			break;
929243640Sneel
930243640Sneel		/* get the second operand */
931243640Sneel		error = memread(vm, vcpuid, gpa, &val2, size, arg);
932243640Sneel		if (error)
933243640Sneel			break;
934243640Sneel
935243640Sneel		/* perform the operation and write the result */
936276349Sneel		result = val1 & val2;
937276349Sneel		error = vie_update_register(vm, vcpuid, reg, result, size);
938243640Sneel		break;
939243667Sgrehan	case 0x81:
940284899Sneel	case 0x83:
941243667Sgrehan		/*
942284899Sneel		 * AND mem (ModRM:r/m) with immediate and store the
943253585Sneel		 * result in mem.
944243667Sgrehan		 *
945284899Sneel		 * 81 /4		and r/m16, imm16
946284899Sneel		 * 81 /4		and r/m32, imm32
947284899Sneel		 * REX.W + 81 /4	and r/m64, imm32 sign-extended to 64
948243703Sgrehan		 *
949284899Sneel		 * 83 /4		and r/m16, imm8 sign-extended to 16
950284899Sneel		 * 83 /4		and r/m32, imm8 sign-extended to 32
951284899Sneel		 * REX.W + 83/4		and r/m64, imm8 sign-extended to 64
952243667Sgrehan		 */
953243703Sgrehan
954243667Sgrehan		/* get the first operand */
955243667Sgrehan                error = memread(vm, vcpuid, gpa, &val1, size, arg);
956243667Sgrehan                if (error)
957243667Sgrehan			break;
958243667Sgrehan
959243667Sgrehan                /*
960284899Sneel		 * perform the operation with the pre-fetched immediate
961284899Sneel		 * operand and write the result
962284899Sneel		 */
963284899Sneel                result = val1 & vie->immediate;
964284899Sneel                error = memwrite(vm, vcpuid, gpa, result, size, arg);
965243667Sgrehan		break;
966243640Sneel	default:
967243640Sneel		break;
968243640Sneel	}
969276349Sneel	if (error)
970276349Sneel		return (error);
971276349Sneel
972276349Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
973276349Sneel	if (error)
974276349Sneel		return (error);
975276349Sneel
976276349Sneel	/*
977276349Sneel	 * OF and CF are cleared; the SF, ZF and PF flags are set according
978276349Sneel	 * to the result; AF is undefined.
979276349Sneel	 *
980276349Sneel	 * The updated status flags are obtained by subtracting 0 from 'result'.
981276349Sneel	 */
982276349Sneel	rflags2 = getcc(size, result, 0);
983276349Sneel	rflags &= ~RFLAGS_STATUS_BITS;
984276349Sneel	rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
985276349Sneel
986276349Sneel	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
987243640Sneel	return (error);
988243640Sneel}
989243640Sneel
990253585Sneelstatic int
991253585Sneelemulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
992253585Sneel	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
993253585Sneel{
994253585Sneel	int error, size;
995276349Sneel	uint64_t val1, result, rflags, rflags2;
996253585Sneel
997270159Sgrehan	size = vie->opsize;
998253585Sneel	error = EINVAL;
999253585Sneel
1000253585Sneel	switch (vie->op.op_byte) {
1001284899Sneel	case 0x81:
1002253585Sneel	case 0x83:
1003253585Sneel		/*
1004253585Sneel		 * OR mem (ModRM:r/m) with immediate and store the
1005253585Sneel		 * result in mem.
1006253585Sneel		 *
1007284899Sneel		 * 81 /1		or r/m16, imm16
1008284899Sneel		 * 81 /1		or r/m32, imm32
1009284899Sneel		 * REX.W + 81 /1	or r/m64, imm32 sign-extended to 64
1010253585Sneel		 *
1011284899Sneel		 * 83 /1		or r/m16, imm8 sign-extended to 16
1012284899Sneel		 * 83 /1		or r/m32, imm8 sign-extended to 32
1013284899Sneel		 * REX.W + 83/1		or r/m64, imm8 sign-extended to 64
1014253585Sneel		 */
1015253585Sneel
1016253585Sneel		/* get the first operand */
1017253585Sneel                error = memread(vm, vcpuid, gpa, &val1, size, arg);
1018253585Sneel                if (error)
1019253585Sneel			break;
1020253585Sneel
1021253585Sneel                /*
1022253585Sneel		 * perform the operation with the pre-fetched immediate
1023253585Sneel		 * operand and write the result
1024253585Sneel		 */
1025276349Sneel                result = val1 | vie->immediate;
1026276349Sneel                error = memwrite(vm, vcpuid, gpa, result, size, arg);
1027253585Sneel		break;
1028253585Sneel	default:
1029253585Sneel		break;
1030253585Sneel	}
1031276349Sneel	if (error)
1032276349Sneel		return (error);
1033276349Sneel
1034276349Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1035276349Sneel	if (error)
1036276349Sneel		return (error);
1037276349Sneel
1038276349Sneel	/*
1039276349Sneel	 * OF and CF are cleared; the SF, ZF and PF flags are set according
1040276349Sneel	 * to the result; AF is undefined.
1041276349Sneel	 *
1042276349Sneel	 * The updated status flags are obtained by subtracting 0 from 'result'.
1043276349Sneel	 */
1044276349Sneel	rflags2 = getcc(size, result, 0);
1045276349Sneel	rflags &= ~RFLAGS_STATUS_BITS;
1046276349Sneel	rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
1047276349Sneel
1048276349Sneel	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
1049253585Sneel	return (error);
1050253585Sneel}
1051253585Sneel
1052270159Sgrehanstatic int
1053270159Sgrehanemulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
1054270159Sgrehan	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
1055270159Sgrehan{
1056270159Sgrehan	int error, size;
1057284900Sneel	uint64_t regop, memop, op1, op2, rflags, rflags2;
1058270159Sgrehan	enum vm_reg_name reg;
1059270159Sgrehan
1060270159Sgrehan	size = vie->opsize;
1061270159Sgrehan	switch (vie->op.op_byte) {
1062284900Sneel	case 0x39:
1063270159Sgrehan	case 0x3B:
1064270159Sgrehan		/*
1065284900Sneel		 * 39/r		CMP r/m16, r16
1066284900Sneel		 * 39/r		CMP r/m32, r32
1067284900Sneel		 * REX.W 39/r	CMP r/m64, r64
1068284900Sneel		 *
1069270159Sgrehan		 * 3B/r		CMP r16, r/m16
1070270159Sgrehan		 * 3B/r		CMP r32, r/m32
1071270159Sgrehan		 * REX.W + 3B/r	CMP r64, r/m64
1072270159Sgrehan		 *
1073284900Sneel		 * Compare the first operand with the second operand and
1074270159Sgrehan		 * set status flags in EFLAGS register. The comparison is
1075270159Sgrehan		 * performed by subtracting the second operand from the first
1076270159Sgrehan		 * operand and then setting the status flags.
1077270159Sgrehan		 */
1078270159Sgrehan
1079284900Sneel		/* Get the register operand */
1080270159Sgrehan		reg = gpr_map[vie->reg];
1081284900Sneel		error = vie_read_register(vm, vcpuid, reg, &regop);
1082270159Sgrehan		if (error)
1083270159Sgrehan			return (error);
1084270159Sgrehan
1085284900Sneel		/* Get the memory operand */
1086284900Sneel		error = memread(vm, vcpuid, gpa, &memop, size, arg);
1087270159Sgrehan		if (error)
1088270159Sgrehan			return (error);
1089270159Sgrehan
1090284900Sneel		if (vie->op.op_byte == 0x3B) {
1091284900Sneel			op1 = regop;
1092284900Sneel			op2 = memop;
1093284900Sneel		} else {
1094284900Sneel			op1 = memop;
1095284900Sneel			op2 = regop;
1096284900Sneel		}
1097284899Sneel		rflags2 = getcc(size, op1, op2);
1098270159Sgrehan		break;
1099284900Sneel	case 0x80:
1100284899Sneel	case 0x81:
1101284899Sneel	case 0x83:
1102284899Sneel		/*
1103284900Sneel		 * 80 /7		cmp r/m8, imm8
1104284900Sneel		 * REX + 80 /7		cmp r/m8, imm8
1105284900Sneel		 *
1106284899Sneel		 * 81 /7		cmp r/m16, imm16
1107284899Sneel		 * 81 /7		cmp r/m32, imm32
1108284899Sneel		 * REX.W + 81 /7	cmp r/m64, imm32 sign-extended to 64
1109284899Sneel		 *
1110284899Sneel		 * 83 /7		cmp r/m16, imm8 sign-extended to 16
1111284899Sneel		 * 83 /7		cmp r/m32, imm8 sign-extended to 32
1112284899Sneel		 * REX.W + 83 /7	cmp r/m64, imm8 sign-extended to 64
1113284899Sneel		 *
1114284899Sneel		 * Compare mem (ModRM:r/m) with immediate and set
1115284899Sneel		 * status flags according to the results.  The
1116284899Sneel		 * comparison is performed by subtracting the
1117284899Sneel		 * immediate from the first operand and then setting
1118284899Sneel		 * the status flags.
1119284899Sneel		 *
1120284899Sneel		 */
1121284900Sneel		if (vie->op.op_byte == 0x80)
1122284900Sneel			size = 1;
1123284899Sneel
1124284899Sneel		/* get the first operand */
1125284899Sneel                error = memread(vm, vcpuid, gpa, &op1, size, arg);
1126284899Sneel		if (error)
1127284899Sneel			return (error);
1128284899Sneel
1129284899Sneel		rflags2 = getcc(size, op1, vie->immediate);
1130284899Sneel		break;
1131270159Sgrehan	default:
1132270159Sgrehan		return (EINVAL);
1133270159Sgrehan	}
1134270159Sgrehan	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1135270159Sgrehan	if (error)
1136270159Sgrehan		return (error);
1137270159Sgrehan	rflags &= ~RFLAGS_STATUS_BITS;
1138270159Sgrehan	rflags |= rflags2 & RFLAGS_STATUS_BITS;
1139270159Sgrehan
1140270159Sgrehan	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
1141270159Sgrehan	return (error);
1142270159Sgrehan}
1143270159Sgrehan
1144270159Sgrehanstatic int
1145271659Sgrehanemulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
1146271659Sgrehan	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
1147271659Sgrehan{
1148271659Sgrehan	int error, size;
1149271659Sgrehan	uint64_t nval, rflags, rflags2, val1, val2;
1150271659Sgrehan	enum vm_reg_name reg;
1151271659Sgrehan
1152271659Sgrehan	size = vie->opsize;
1153271659Sgrehan	error = EINVAL;
1154271659Sgrehan
1155271659Sgrehan	switch (vie->op.op_byte) {
1156271659Sgrehan	case 0x2B:
1157271659Sgrehan		/*
1158271659Sgrehan		 * SUB r/m from r and store the result in r
1159271659Sgrehan		 *
1160271659Sgrehan		 * 2B/r            SUB r16, r/m16
1161271659Sgrehan		 * 2B/r            SUB r32, r/m32
1162271659Sgrehan		 * REX.W + 2B/r    SUB r64, r/m64
1163271659Sgrehan		 */
1164271659Sgrehan
1165271659Sgrehan		/* get the first operand */
1166271659Sgrehan		reg = gpr_map[vie->reg];
1167271659Sgrehan		error = vie_read_register(vm, vcpuid, reg, &val1);
1168271659Sgrehan		if (error)
1169271659Sgrehan			break;
1170271659Sgrehan
1171271659Sgrehan		/* get the second operand */
1172271659Sgrehan		error = memread(vm, vcpuid, gpa, &val2, size, arg);
1173271659Sgrehan		if (error)
1174271659Sgrehan			break;
1175271659Sgrehan
1176271659Sgrehan		/* perform the operation and write the result */
1177271659Sgrehan		nval = val1 - val2;
1178271659Sgrehan		error = vie_update_register(vm, vcpuid, reg, nval, size);
1179271659Sgrehan		break;
1180271659Sgrehan	default:
1181271659Sgrehan		break;
1182271659Sgrehan	}
1183271659Sgrehan
1184271659Sgrehan	if (!error) {
1185271659Sgrehan		rflags2 = getcc(size, val1, val2);
1186271659Sgrehan		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
1187271659Sgrehan		    &rflags);
1188271659Sgrehan		if (error)
1189271659Sgrehan			return (error);
1190271659Sgrehan
1191271659Sgrehan		rflags &= ~RFLAGS_STATUS_BITS;
1192271659Sgrehan		rflags |= rflags2 & RFLAGS_STATUS_BITS;
1193271659Sgrehan		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
1194271659Sgrehan		    rflags, 8);
1195271659Sgrehan	}
1196271659Sgrehan
1197271659Sgrehan	return (error);
1198271659Sgrehan}
1199271659Sgrehan
1200271659Sgrehanstatic int
1201276349Sneelemulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
1202270159Sgrehan    struct vm_guest_paging *paging, mem_region_read_t memread,
1203270159Sgrehan    mem_region_write_t memwrite, void *arg)
1204270159Sgrehan{
1205270159Sgrehan#ifdef _KERNEL
1206270159Sgrehan	struct vm_copyinfo copyinfo[2];
1207270159Sgrehan#else
1208270159Sgrehan	struct iovec copyinfo[2];
1209270159Sgrehan#endif
1210270159Sgrehan	struct seg_desc ss_desc;
1211270159Sgrehan	uint64_t cr0, rflags, rsp, stack_gla, val;
1212284900Sneel	int error, fault, size, stackaddrsize, pushop;
1213270159Sgrehan
1214276349Sneel	val = 0;
1215276349Sneel	size = vie->opsize;
1216276349Sneel	pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0;
1217270159Sgrehan
1218270159Sgrehan	/*
1219270159Sgrehan	 * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
1220270159Sgrehan	 */
1221270159Sgrehan	if (paging->cpu_mode == CPU_MODE_REAL) {
1222270159Sgrehan		stackaddrsize = 2;
1223270159Sgrehan	} else if (paging->cpu_mode == CPU_MODE_64BIT) {
1224270159Sgrehan		/*
1225270159Sgrehan		 * "Stack Manipulation Instructions in 64-bit Mode", SDM, Vol 3
1226270159Sgrehan		 * - Stack pointer size is always 64-bits.
1227270159Sgrehan		 * - PUSH/POP of 32-bit values is not possible in 64-bit mode.
1228270159Sgrehan		 * - 16-bit PUSH/POP is supported by using the operand size
1229270159Sgrehan		 *   override prefix (66H).
1230270159Sgrehan		 */
1231270159Sgrehan		stackaddrsize = 8;
1232270159Sgrehan		size = vie->opsize_override ? 2 : 8;
1233270159Sgrehan	} else {
1234270159Sgrehan		/*
1235270159Sgrehan		 * In protected or compability mode the 'B' flag in the
1236270159Sgrehan		 * stack-segment descriptor determines the size of the
1237270159Sgrehan		 * stack pointer.
1238270159Sgrehan		 */
1239270159Sgrehan		error = vm_get_seg_desc(vm, vcpuid, VM_REG_GUEST_SS, &ss_desc);
1240270159Sgrehan		KASSERT(error == 0, ("%s: error %d getting SS descriptor",
1241270159Sgrehan		    __func__, error));
1242270159Sgrehan		if (SEG_DESC_DEF32(ss_desc.access))
1243270159Sgrehan			stackaddrsize = 4;
1244270159Sgrehan		else
1245270159Sgrehan			stackaddrsize = 2;
1246270159Sgrehan	}
1247270159Sgrehan
1248270159Sgrehan	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
1249270159Sgrehan	KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
1250270159Sgrehan
1251270159Sgrehan	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1252270159Sgrehan	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
1253270159Sgrehan
1254270159Sgrehan	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
1255270159Sgrehan	KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
1256276349Sneel	if (pushop) {
1257276349Sneel		rsp -= size;
1258276349Sneel	}
1259270159Sgrehan
1260270159Sgrehan	if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc,
1261276349Sneel	    rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ,
1262276349Sneel	    &stack_gla)) {
1263270159Sgrehan		vm_inject_ss(vm, vcpuid, 0);
1264270159Sgrehan		return (0);
1265270159Sgrehan	}
1266270159Sgrehan
1267270159Sgrehan	if (vie_canonical_check(paging->cpu_mode, stack_gla)) {
1268270159Sgrehan		vm_inject_ss(vm, vcpuid, 0);
1269270159Sgrehan		return (0);
1270270159Sgrehan	}
1271270159Sgrehan
1272270159Sgrehan	if (vie_alignment_check(paging->cpl, size, cr0, rflags, stack_gla)) {
1273270159Sgrehan		vm_inject_ac(vm, vcpuid, 0);
1274270159Sgrehan		return (0);
1275270159Sgrehan	}
1276270159Sgrehan
1277276349Sneel	error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size,
1278284900Sneel	    pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo),
1279284900Sneel	    &fault);
1280284900Sneel	if (error || fault)
1281284900Sneel		return (error);
1282270159Sgrehan
1283276349Sneel	if (pushop) {
1284276349Sneel		error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
1285276349Sneel		if (error == 0)
1286276349Sneel			vm_copyout(vm, vcpuid, &val, copyinfo, size);
1287276349Sneel	} else {
1288276349Sneel		vm_copyin(vm, vcpuid, copyinfo, &val, size);
1289276349Sneel		error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg);
1290276349Sneel		rsp += size;
1291276349Sneel	}
1292276349Sneel	vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
1293276349Sneel
1294270159Sgrehan	if (error == 0) {
1295270159Sgrehan		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
1296270159Sgrehan		    stackaddrsize);
1297270159Sgrehan		KASSERT(error == 0, ("error %d updating rsp", error));
1298270159Sgrehan	}
1299270159Sgrehan	return (error);
1300270159Sgrehan}
1301270159Sgrehan
1302276349Sneelstatic int
1303276349Sneelemulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
1304276349Sneel    struct vm_guest_paging *paging, mem_region_read_t memread,
1305276349Sneel    mem_region_write_t memwrite, void *arg)
1306276349Sneel{
1307276349Sneel	int error;
1308276349Sneel
1309276349Sneel	/*
1310276349Sneel	 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
1311276349Sneel	 *
1312276349Sneel	 * PUSH is part of the group 5 extended opcodes and is identified
1313276349Sneel	 * by ModRM:reg = b110.
1314276349Sneel	 */
1315276349Sneel	if ((vie->reg & 7) != 6)
1316276349Sneel		return (EINVAL);
1317276349Sneel
1318276349Sneel	error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
1319276349Sneel	    memwrite, arg);
1320276349Sneel	return (error);
1321276349Sneel}
1322276349Sneel
1323276349Sneelstatic int
1324276349Sneelemulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
1325276349Sneel    struct vm_guest_paging *paging, mem_region_read_t memread,
1326276349Sneel    mem_region_write_t memwrite, void *arg)
1327276349Sneel{
1328276349Sneel	int error;
1329276349Sneel
1330276349Sneel	/*
1331276349Sneel	 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
1332276349Sneel	 *
1333276349Sneel	 * POP is part of the group 1A extended opcodes and is identified
1334276349Sneel	 * by ModRM:reg = b000.
1335276349Sneel	 */
1336276349Sneel	if ((vie->reg & 7) != 0)
1337276349Sneel		return (EINVAL);
1338276349Sneel
1339276349Sneel	error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
1340276349Sneel	    memwrite, arg);
1341276349Sneel	return (error);
1342276349Sneel}
1343276349Sneel
1344284899Sneelstatic int
1345284899Sneelemulate_group1(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
1346284899Sneel    struct vm_guest_paging *paging, mem_region_read_t memread,
1347284899Sneel    mem_region_write_t memwrite, void *memarg)
1348284899Sneel{
1349284899Sneel	int error;
1350284899Sneel
1351284899Sneel	switch (vie->reg & 7) {
1352284899Sneel	case 0x1:	/* OR */
1353284899Sneel		error = emulate_or(vm, vcpuid, gpa, vie,
1354284899Sneel		    memread, memwrite, memarg);
1355284899Sneel		break;
1356284899Sneel	case 0x4:	/* AND */
1357284899Sneel		error = emulate_and(vm, vcpuid, gpa, vie,
1358284899Sneel		    memread, memwrite, memarg);
1359284899Sneel		break;
1360284899Sneel	case 0x7:	/* CMP */
1361284899Sneel		error = emulate_cmp(vm, vcpuid, gpa, vie,
1362284899Sneel		    memread, memwrite, memarg);
1363284899Sneel		break;
1364284899Sneel	default:
1365284899Sneel		error = EINVAL;
1366284899Sneel		break;
1367284899Sneel	}
1368284899Sneel
1369284899Sneel	return (error);
1370284899Sneel}
1371284899Sneel
1372284900Sneelstatic int
1373284900Sneelemulate_bittest(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
1374284900Sneel    mem_region_read_t memread, mem_region_write_t memwrite, void *memarg)
1375284900Sneel{
1376284900Sneel	uint64_t val, rflags;
1377284900Sneel	int error, bitmask, bitoff;
1378284900Sneel
1379284900Sneel	/*
1380284900Sneel	 * 0F BA is a Group 8 extended opcode.
1381284900Sneel	 *
1382284900Sneel	 * Currently we only emulate the 'Bit Test' instruction which is
1383284900Sneel	 * identified by a ModR/M:reg encoding of 100b.
1384284900Sneel	 */
1385284900Sneel	if ((vie->reg & 7) != 4)
1386284900Sneel		return (EINVAL);
1387284900Sneel
1388284900Sneel	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
1389284900Sneel	KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
1390284900Sneel
1391284900Sneel	error = memread(vm, vcpuid, gpa, &val, vie->opsize, memarg);
1392284900Sneel	if (error)
1393284900Sneel		return (error);
1394284900Sneel
1395284900Sneel	/*
1396284900Sneel	 * Intel SDM, Vol 2, Table 3-2:
1397284900Sneel	 * "Range of Bit Positions Specified by Bit Offset Operands"
1398284900Sneel	 */
1399284900Sneel	bitmask = vie->opsize * 8 - 1;
1400284900Sneel	bitoff = vie->immediate & bitmask;
1401284900Sneel
1402284900Sneel	/* Copy the bit into the Carry flag in %rflags */
1403284900Sneel	if (val & (1UL << bitoff))
1404284900Sneel		rflags |= PSL_C;
1405284900Sneel	else
1406284900Sneel		rflags &= ~PSL_C;
1407284900Sneel
1408284900Sneel	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
1409284900Sneel	KASSERT(error == 0, ("%s: error %d updating rflags", __func__, error));
1410284900Sneel
1411284900Sneel	return (0);
1412284900Sneel}
1413284900Sneel
1414243640Sneelint
1415243640Sneelvmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
1416270159Sgrehan    struct vm_guest_paging *paging, mem_region_read_t memread,
1417270159Sgrehan    mem_region_write_t memwrite, void *memarg)
1418243640Sneel{
1419243640Sneel	int error;
1420243640Sneel
1421243640Sneel	if (!vie->decoded)
1422243640Sneel		return (EINVAL);
1423243640Sneel
1424243640Sneel	switch (vie->op.op_type) {
1425284899Sneel	case VIE_OP_TYPE_GROUP1:
1426284899Sneel		error = emulate_group1(vm, vcpuid, gpa, vie, paging, memread,
1427284899Sneel		    memwrite, memarg);
1428284899Sneel		break;
1429276349Sneel	case VIE_OP_TYPE_POP:
1430276349Sneel		error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread,
1431276349Sneel		    memwrite, memarg);
1432276349Sneel		break;
1433270159Sgrehan	case VIE_OP_TYPE_PUSH:
1434270159Sgrehan		error = emulate_push(vm, vcpuid, gpa, vie, paging, memread,
1435270159Sgrehan		    memwrite, memarg);
1436270159Sgrehan		break;
1437270159Sgrehan	case VIE_OP_TYPE_CMP:
1438270159Sgrehan		error = emulate_cmp(vm, vcpuid, gpa, vie,
1439270159Sgrehan				    memread, memwrite, memarg);
1440270159Sgrehan		break;
1441243640Sneel	case VIE_OP_TYPE_MOV:
1442243640Sneel		error = emulate_mov(vm, vcpuid, gpa, vie,
1443243640Sneel				    memread, memwrite, memarg);
1444243640Sneel		break;
1445267396Sjhb	case VIE_OP_TYPE_MOVSX:
1446267396Sjhb	case VIE_OP_TYPE_MOVZX:
1447267396Sjhb		error = emulate_movx(vm, vcpuid, gpa, vie,
1448267396Sjhb				     memread, memwrite, memarg);
1449267396Sjhb		break;
1450284894Sneel	case VIE_OP_TYPE_MOVS:
1451284894Sneel		error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
1452284894Sneel		    memwrite, memarg);
1453284894Sneel		break;
1454284899Sneel	case VIE_OP_TYPE_STOS:
1455284899Sneel		error = emulate_stos(vm, vcpuid, gpa, vie, paging, memread,
1456284899Sneel		    memwrite, memarg);
1457284899Sneel		break;
1458243640Sneel	case VIE_OP_TYPE_AND:
1459243640Sneel		error = emulate_and(vm, vcpuid, gpa, vie,
1460243640Sneel				    memread, memwrite, memarg);
1461243640Sneel		break;
1462253585Sneel	case VIE_OP_TYPE_OR:
1463253585Sneel		error = emulate_or(vm, vcpuid, gpa, vie,
1464253585Sneel				    memread, memwrite, memarg);
1465253585Sneel		break;
1466271659Sgrehan	case VIE_OP_TYPE_SUB:
1467271659Sgrehan		error = emulate_sub(vm, vcpuid, gpa, vie,
1468271659Sgrehan				    memread, memwrite, memarg);
1469271659Sgrehan		break;
1470284900Sneel	case VIE_OP_TYPE_BITTEST:
1471284900Sneel		error = emulate_bittest(vm, vcpuid, gpa, vie,
1472284900Sneel		    memread, memwrite, memarg);
1473284900Sneel		break;
1474243640Sneel	default:
1475243640Sneel		error = EINVAL;
1476243640Sneel		break;
1477243640Sneel	}
1478243640Sneel
1479243640Sneel	return (error);
1480243640Sneel}
1481243640Sneel
1482268976Sjhbint
1483268976Sjhbvie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
1484268976Sjhb{
1485268976Sjhb	KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
1486268976Sjhb	    ("%s: invalid size %d", __func__, size));
1487268976Sjhb	KASSERT(cpl >= 0 && cpl <= 3, ("%s: invalid cpl %d", __func__, cpl));
1488268976Sjhb
1489268976Sjhb	if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
1490268976Sjhb		return (0);
1491268976Sjhb
1492268976Sjhb	return ((gla & (size - 1)) ? 1 : 0);
1493268976Sjhb}
1494268976Sjhb
1495268976Sjhbint
1496268976Sjhbvie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
1497268976Sjhb{
1498268976Sjhb	uint64_t mask;
1499268976Sjhb
1500268976Sjhb	if (cpu_mode != CPU_MODE_64BIT)
1501268976Sjhb		return (0);
1502268976Sjhb
1503268976Sjhb	/*
1504268976Sjhb	 * The value of the bit 47 in the 'gla' should be replicated in the
1505268976Sjhb	 * most significant 16 bits.
1506268976Sjhb	 */
1507268976Sjhb	mask = ~((1UL << 48) - 1);
1508268976Sjhb	if (gla & (1UL << 47))
1509268976Sjhb		return ((gla & mask) != mask);
1510268976Sjhb	else
1511268976Sjhb		return ((gla & mask) != 0);
1512268976Sjhb}
1513268976Sjhb
1514268976Sjhbuint64_t
1515268976Sjhbvie_size2mask(int size)
1516268976Sjhb{
1517268976Sjhb	KASSERT(size == 1 || size == 2 || size == 4 || size == 8,
1518268976Sjhb	    ("vie_size2mask: invalid size %d", size));
1519268976Sjhb	return (size2mask[size]);
1520268976Sjhb}
1521268976Sjhb
1522268976Sjhbint
1523268976Sjhbvie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
1524268976Sjhb    struct seg_desc *desc, uint64_t offset, int length, int addrsize,
1525268976Sjhb    int prot, uint64_t *gla)
1526268976Sjhb{
1527268976Sjhb	uint64_t firstoff, low_limit, high_limit, segbase;
1528268976Sjhb	int glasize, type;
1529268976Sjhb
1530268976Sjhb	KASSERT(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS,
1531268976Sjhb	    ("%s: invalid segment %d", __func__, seg));
1532268976Sjhb	KASSERT(length == 1 || length == 2 || length == 4 || length == 8,
1533268976Sjhb	    ("%s: invalid operand size %d", __func__, length));
1534268976Sjhb	KASSERT((prot & ~(PROT_READ | PROT_WRITE)) == 0,
1535268976Sjhb	    ("%s: invalid prot %#x", __func__, prot));
1536268976Sjhb
1537268976Sjhb	firstoff = offset;
1538268976Sjhb	if (cpu_mode == CPU_MODE_64BIT) {
1539268976Sjhb		KASSERT(addrsize == 4 || addrsize == 8, ("%s: invalid address "
1540268976Sjhb		    "size %d for cpu_mode %d", __func__, addrsize, cpu_mode));
1541268976Sjhb		glasize = 8;
1542268976Sjhb	} else {
1543268976Sjhb		KASSERT(addrsize == 2 || addrsize == 4, ("%s: invalid address "
1544268976Sjhb		    "size %d for cpu mode %d", __func__, addrsize, cpu_mode));
1545268976Sjhb		glasize = 4;
1546268976Sjhb		/*
1547268976Sjhb		 * If the segment selector is loaded with a NULL selector
1548268976Sjhb		 * then the descriptor is unusable and attempting to use
1549268976Sjhb		 * it results in a #GP(0).
1550268976Sjhb		 */
1551270159Sgrehan		if (SEG_DESC_UNUSABLE(desc->access))
1552268976Sjhb			return (-1);
1553268976Sjhb
1554268976Sjhb		/*
1555268976Sjhb		 * The processor generates a #NP exception when a segment
1556268976Sjhb		 * register is loaded with a selector that points to a
1557268976Sjhb		 * descriptor that is not present. If this was the case then
1558268976Sjhb		 * it would have been checked before the VM-exit.
1559268976Sjhb		 */
1560270159Sgrehan		KASSERT(SEG_DESC_PRESENT(desc->access),
1561270159Sgrehan		    ("segment %d not present: %#x", seg, desc->access));
1562268976Sjhb
1563268976Sjhb		/*
1564268976Sjhb		 * The descriptor type must indicate a code/data segment.
1565268976Sjhb		 */
1566270159Sgrehan		type = SEG_DESC_TYPE(desc->access);
1567268976Sjhb		KASSERT(type >= 16 && type <= 31, ("segment %d has invalid "
1568268976Sjhb		    "descriptor type %#x", seg, type));
1569268976Sjhb
1570268976Sjhb		if (prot & PROT_READ) {
1571268976Sjhb			/* #GP on a read access to a exec-only code segment */
1572268976Sjhb			if ((type & 0xA) == 0x8)
1573268976Sjhb				return (-1);
1574268976Sjhb		}
1575268976Sjhb
1576268976Sjhb		if (prot & PROT_WRITE) {
1577268976Sjhb			/*
1578268976Sjhb			 * #GP on a write access to a code segment or a
1579268976Sjhb			 * read-only data segment.
1580268976Sjhb			 */
1581268976Sjhb			if (type & 0x8)			/* code segment */
1582268976Sjhb				return (-1);
1583268976Sjhb
1584268976Sjhb			if ((type & 0xA) == 0)		/* read-only data seg */
1585268976Sjhb				return (-1);
1586268976Sjhb		}
1587268976Sjhb
1588268976Sjhb		/*
1589268976Sjhb		 * 'desc->limit' is fully expanded taking granularity into
1590268976Sjhb		 * account.
1591268976Sjhb		 */
1592268976Sjhb		if ((type & 0xC) == 0x4) {
1593268976Sjhb			/* expand-down data segment */
1594268976Sjhb			low_limit = desc->limit + 1;
1595270159Sgrehan			high_limit = SEG_DESC_DEF32(desc->access) ?
1596270159Sgrehan			    0xffffffff : 0xffff;
1597268976Sjhb		} else {
1598268976Sjhb			/* code segment or expand-up data segment */
1599268976Sjhb			low_limit = 0;
1600268976Sjhb			high_limit = desc->limit;
1601268976Sjhb		}
1602268976Sjhb
1603268976Sjhb		while (length > 0) {
1604268976Sjhb			offset &= vie_size2mask(addrsize);
1605268976Sjhb			if (offset < low_limit || offset > high_limit)
1606268976Sjhb				return (-1);
1607268976Sjhb			offset++;
1608268976Sjhb			length--;
1609268976Sjhb		}
1610268976Sjhb	}
1611268976Sjhb
1612268976Sjhb	/*
1613268976Sjhb	 * In 64-bit mode all segments except %fs and %gs have a segment
1614268976Sjhb	 * base address of 0.
1615268976Sjhb	 */
1616268976Sjhb	if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
1617268976Sjhb	    seg != VM_REG_GUEST_GS) {
1618268976Sjhb		segbase = 0;
1619268976Sjhb	} else {
1620268976Sjhb		segbase = desc->base;
1621268976Sjhb	}
1622268976Sjhb
1623268976Sjhb	/*
1624268976Sjhb	 * Truncate 'firstoff' to the effective address size before adding
1625268976Sjhb	 * it to the segment base.
1626268976Sjhb	 */
1627268976Sjhb	firstoff &= vie_size2mask(addrsize);
1628268976Sjhb	*gla = (segbase + firstoff) & vie_size2mask(glasize);
1629268976Sjhb	return (0);
1630268976Sjhb}
1631268976Sjhb
1632243640Sneel#ifdef _KERNEL
1633256072Sneelvoid
1634276403Sneelvie_init(struct vie *vie, const char *inst_bytes, int inst_length)
1635240941Sneel{
1636276403Sneel	KASSERT(inst_length >= 0 && inst_length <= VIE_INST_SIZE,
1637276403Sneel	    ("%s: invalid instruction length (%d)", __func__, inst_length));
1638240941Sneel
1639240941Sneel	bzero(vie, sizeof(struct vie));
1640240941Sneel
1641240941Sneel	vie->base_register = VM_REG_LAST;
1642240941Sneel	vie->index_register = VM_REG_LAST;
1643284894Sneel	vie->segment_register = VM_REG_LAST;
1644276403Sneel
1645276403Sneel	if (inst_length) {
1646276403Sneel		bcopy(inst_bytes, vie->inst, inst_length);
1647276403Sneel		vie->num_valid = inst_length;
1648276403Sneel	}
1649240941Sneel}
1650240941Sneel
1651240941Sneelstatic int
1652268976Sjhbpf_error_code(int usermode, int prot, int rsvd, uint64_t pte)
1653240941Sneel{
1654268976Sjhb	int error_code = 0;
1655268976Sjhb
1656268976Sjhb	if (pte & PG_V)
1657268976Sjhb		error_code |= PGEX_P;
1658268976Sjhb	if (prot & VM_PROT_WRITE)
1659268976Sjhb		error_code |= PGEX_W;
1660268976Sjhb	if (usermode)
1661268976Sjhb		error_code |= PGEX_U;
1662268976Sjhb	if (rsvd)
1663268976Sjhb		error_code |= PGEX_RSV;
1664268976Sjhb	if (prot & VM_PROT_EXECUTE)
1665268976Sjhb		error_code |= PGEX_I;
1666268976Sjhb
1667268976Sjhb	return (error_code);
1668268976Sjhb}
1669268976Sjhb
1670268976Sjhbstatic void
1671268976Sjhbptp_release(void **cookie)
1672268976Sjhb{
1673268976Sjhb	if (*cookie != NULL) {
1674268976Sjhb		vm_gpa_release(*cookie);
1675268976Sjhb		*cookie = NULL;
1676268976Sjhb	}
1677268976Sjhb}
1678268976Sjhb
1679268976Sjhbstatic void *
1680295124Sgrehanptp_hold(struct vm *vm, int vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
1681268976Sjhb{
1682268976Sjhb	void *ptr;
1683268976Sjhb
1684268976Sjhb	ptp_release(cookie);
1685295124Sgrehan	ptr = vm_gpa_hold(vm, vcpu, ptpphys, len, VM_PROT_RW, cookie);
1686268976Sjhb	return (ptr);
1687268976Sjhb}
1688268976Sjhb
1689268976Sjhbint
1690284899Sneelvm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
1691284900Sneel    uint64_t gla, int prot, uint64_t *gpa, int *guest_fault)
1692268976Sjhb{
1693268976Sjhb	int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable;
1694268976Sjhb	u_int retries;
1695268976Sjhb	uint64_t *ptpbase, ptpphys, pte, pgsize;
1696267399Sjhb	uint32_t *ptpbase32, pte32;
1697256072Sneel	void *cookie;
1698240941Sneel
1699284900Sneel	*guest_fault = 0;
1700284900Sneel
1701268976Sjhb	usermode = (paging->cpl == 3 ? 1 : 0);
1702268976Sjhb	writable = prot & VM_PROT_WRITE;
1703268976Sjhb	cookie = NULL;
1704268976Sjhb	retval = 0;
1705268976Sjhb	retries = 0;
1706268976Sjhbrestart:
1707268976Sjhb	ptpphys = paging->cr3;		/* root of the page tables */
1708268976Sjhb	ptp_release(&cookie);
1709268976Sjhb	if (retries++ > 0)
1710268976Sjhb		maybe_yield();
1711268976Sjhb
1712268976Sjhb	if (vie_canonical_check(paging->cpu_mode, gla)) {
1713268976Sjhb		/*
1714268976Sjhb		 * XXX assuming a non-stack reference otherwise a stack fault
1715268976Sjhb		 * should be generated.
1716268976Sjhb		 */
1717268976Sjhb		vm_inject_gp(vm, vcpuid);
1718268976Sjhb		goto fault;
1719268976Sjhb	}
1720268976Sjhb
1721268976Sjhb	if (paging->paging_mode == PAGING_MODE_FLAT) {
1722267399Sjhb		*gpa = gla;
1723268976Sjhb		goto done;
1724267399Sjhb	}
1725267399Sjhb
1726268976Sjhb	if (paging->paging_mode == PAGING_MODE_32) {
1727267399Sjhb		nlevels = 2;
1728267399Sjhb		while (--nlevels >= 0) {
1729267399Sjhb			/* Zero out the lower 12 bits. */
1730267399Sjhb			ptpphys &= ~0xfff;
1731267399Sjhb
1732295124Sgrehan			ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE,
1733295124Sgrehan			    &cookie);
1734268976Sjhb
1735267399Sjhb			if (ptpbase32 == NULL)
1736267399Sjhb				goto error;
1737267399Sjhb
1738267399Sjhb			ptpshift = PAGE_SHIFT + nlevels * 10;
1739267399Sjhb			ptpindex = (gla >> ptpshift) & 0x3FF;
1740267399Sjhb			pgsize = 1UL << ptpshift;
1741267399Sjhb
1742267399Sjhb			pte32 = ptpbase32[ptpindex];
1743267399Sjhb
1744268976Sjhb			if ((pte32 & PG_V) == 0 ||
1745268976Sjhb			    (usermode && (pte32 & PG_U) == 0) ||
1746268976Sjhb			    (writable && (pte32 & PG_RW) == 0)) {
1747268976Sjhb				pfcode = pf_error_code(usermode, prot, 0,
1748268976Sjhb				    pte32);
1749268976Sjhb				vm_inject_pf(vm, vcpuid, pfcode, gla);
1750268976Sjhb				goto fault;
1751268976Sjhb			}
1752267399Sjhb
1753268976Sjhb			/*
1754268976Sjhb			 * Emulate the x86 MMU's management of the accessed
1755268976Sjhb			 * and dirty flags. While the accessed flag is set
1756268976Sjhb			 * at every level of the page table, the dirty flag
1757268976Sjhb			 * is only set at the last level providing the guest
1758268976Sjhb			 * physical address.
1759268976Sjhb			 */
1760268976Sjhb			if ((pte32 & PG_A) == 0) {
1761268976Sjhb				if (atomic_cmpset_32(&ptpbase32[ptpindex],
1762268976Sjhb				    pte32, pte32 | PG_A) == 0) {
1763268976Sjhb					goto restart;
1764268976Sjhb				}
1765268976Sjhb			}
1766267399Sjhb
1767268976Sjhb			/* XXX must be ignored if CR4.PSE=0 */
1768268976Sjhb			if (nlevels > 0 && (pte32 & PG_PS) != 0)
1769267399Sjhb				break;
1770267399Sjhb
1771267399Sjhb			ptpphys = pte32;
1772267399Sjhb		}
1773267399Sjhb
1774268976Sjhb		/* Set the dirty bit in the page table entry if necessary */
1775268976Sjhb		if (writable && (pte32 & PG_M) == 0) {
1776268976Sjhb			if (atomic_cmpset_32(&ptpbase32[ptpindex],
1777268976Sjhb			    pte32, pte32 | PG_M) == 0) {
1778268976Sjhb				goto restart;
1779268976Sjhb			}
1780268976Sjhb		}
1781268976Sjhb
1782267399Sjhb		/* Zero out the lower 'ptpshift' bits */
1783267399Sjhb		pte32 >>= ptpshift; pte32 <<= ptpshift;
1784267399Sjhb		*gpa = pte32 | (gla & (pgsize - 1));
1785268976Sjhb		goto done;
1786267399Sjhb	}
1787267399Sjhb
1788268976Sjhb	if (paging->paging_mode == PAGING_MODE_PAE) {
1789268976Sjhb		/* Zero out the lower 5 bits and the upper 32 bits */
1790268976Sjhb		ptpphys &= 0xffffffe0UL;
1791267399Sjhb
1792295124Sgrehan		ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof(*ptpbase) * 4,
1793295124Sgrehan		    &cookie);
1794267399Sjhb		if (ptpbase == NULL)
1795267399Sjhb			goto error;
1796267399Sjhb
1797267399Sjhb		ptpindex = (gla >> 30) & 0x3;
1798267399Sjhb
1799267399Sjhb		pte = ptpbase[ptpindex];
1800267399Sjhb
1801268976Sjhb		if ((pte & PG_V) == 0) {
1802268976Sjhb			pfcode = pf_error_code(usermode, prot, 0, pte);
1803268976Sjhb			vm_inject_pf(vm, vcpuid, pfcode, gla);
1804268976Sjhb			goto fault;
1805268976Sjhb		}
1806267399Sjhb
1807267399Sjhb		ptpphys = pte;
1808267399Sjhb
1809267399Sjhb		nlevels = 2;
1810267399Sjhb	} else
1811267399Sjhb		nlevels = 4;
1812240941Sneel	while (--nlevels >= 0) {
1813240941Sneel		/* Zero out the lower 12 bits and the upper 12 bits */
1814240941Sneel		ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
1815240941Sneel
1816295124Sgrehan		ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie);
1817256072Sneel		if (ptpbase == NULL)
1818240941Sneel			goto error;
1819240941Sneel
1820240941Sneel		ptpshift = PAGE_SHIFT + nlevels * 9;
1821240941Sneel		ptpindex = (gla >> ptpshift) & 0x1FF;
1822240941Sneel		pgsize = 1UL << ptpshift;
1823240941Sneel
1824240941Sneel		pte = ptpbase[ptpindex];
1825240941Sneel
1826268976Sjhb		if ((pte & PG_V) == 0 ||
1827268976Sjhb		    (usermode && (pte & PG_U) == 0) ||
1828268976Sjhb		    (writable && (pte & PG_RW) == 0)) {
1829268976Sjhb			pfcode = pf_error_code(usermode, prot, 0, pte);
1830268976Sjhb			vm_inject_pf(vm, vcpuid, pfcode, gla);
1831268976Sjhb			goto fault;
1832268976Sjhb		}
1833256072Sneel
1834268976Sjhb		/* Set the accessed bit in the page table entry */
1835268976Sjhb		if ((pte & PG_A) == 0) {
1836268976Sjhb			if (atomic_cmpset_64(&ptpbase[ptpindex],
1837268976Sjhb			    pte, pte | PG_A) == 0) {
1838268976Sjhb				goto restart;
1839268976Sjhb			}
1840268976Sjhb		}
1841240941Sneel
1842268976Sjhb		if (nlevels > 0 && (pte & PG_PS) != 0) {
1843268976Sjhb			if (pgsize > 1 * GB) {
1844268976Sjhb				pfcode = pf_error_code(usermode, prot, 1, pte);
1845268976Sjhb				vm_inject_pf(vm, vcpuid, pfcode, gla);
1846268976Sjhb				goto fault;
1847268976Sjhb			}
1848268976Sjhb			break;
1849240941Sneel		}
1850240941Sneel
1851240941Sneel		ptpphys = pte;
1852240941Sneel	}
1853240941Sneel
1854268976Sjhb	/* Set the dirty bit in the page table entry if necessary */
1855268976Sjhb	if (writable && (pte & PG_M) == 0) {
1856268976Sjhb		if (atomic_cmpset_64(&ptpbase[ptpindex], pte, pte | PG_M) == 0)
1857268976Sjhb			goto restart;
1858268976Sjhb	}
1859268976Sjhb
1860240941Sneel	/* Zero out the lower 'ptpshift' bits and the upper 12 bits */
1861240941Sneel	pte >>= ptpshift; pte <<= (ptpshift + 12); pte >>= 12;
1862240941Sneel	*gpa = pte | (gla & (pgsize - 1));
1863268976Sjhbdone:
1864268976Sjhb	ptp_release(&cookie);
1865284900Sneel	KASSERT(retval == 0 || retval == EFAULT, ("%s: unexpected retval %d",
1866284900Sneel	    __func__, retval));
1867268976Sjhb	return (retval);
1868240941Sneelerror:
1869284900Sneel	retval = EFAULT;
1870268976Sjhb	goto done;
1871268976Sjhbfault:
1872284900Sneel	*guest_fault = 1;
1873268976Sjhb	goto done;
1874240941Sneel}
1875240941Sneel
1876240978Sneelint
1877270159Sgrehanvmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
1878284900Sneel    uint64_t rip, int inst_length, struct vie *vie, int *faultptr)
1879240941Sneel{
1880270159Sgrehan	struct vm_copyinfo copyinfo[2];
1881270159Sgrehan	int error, prot;
1882240941Sneel
1883240978Sneel	if (inst_length > VIE_INST_SIZE)
1884240978Sneel		panic("vmm_fetch_instruction: invalid length %d", inst_length);
1885240978Sneel
1886270159Sgrehan	prot = PROT_READ | PROT_EXEC;
1887270159Sgrehan	error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot,
1888284900Sneel	    copyinfo, nitems(copyinfo), faultptr);
1889284900Sneel	if (error || *faultptr)
1890284900Sneel		return (error);
1891284900Sneel
1892284900Sneel	vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length);
1893284900Sneel	vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
1894284900Sneel	vie->num_valid = inst_length;
1895284900Sneel	return (0);
1896240941Sneel}
1897240941Sneel
1898240941Sneelstatic int
1899240941Sneelvie_peek(struct vie *vie, uint8_t *x)
1900240941Sneel{
1901243640Sneel
1902240941Sneel	if (vie->num_processed < vie->num_valid) {
1903240941Sneel		*x = vie->inst[vie->num_processed];
1904240941Sneel		return (0);
1905240941Sneel	} else
1906240941Sneel		return (-1);
1907240941Sneel}
1908240941Sneel
1909240941Sneelstatic void
1910240941Sneelvie_advance(struct vie *vie)
1911240941Sneel{
1912240941Sneel
1913240941Sneel	vie->num_processed++;
1914240941Sneel}
1915240941Sneel
1916284894Sneelstatic bool
1917284894Sneelsegment_override(uint8_t x, int *seg)
1918284894Sneel{
1919284894Sneel
1920284894Sneel	switch (x) {
1921284894Sneel	case 0x2E:
1922284894Sneel		*seg = VM_REG_GUEST_CS;
1923284894Sneel		break;
1924284894Sneel	case 0x36:
1925284894Sneel		*seg = VM_REG_GUEST_SS;
1926284894Sneel		break;
1927284894Sneel	case 0x3E:
1928284894Sneel		*seg = VM_REG_GUEST_DS;
1929284894Sneel		break;
1930284894Sneel	case 0x26:
1931284894Sneel		*seg = VM_REG_GUEST_ES;
1932284894Sneel		break;
1933284894Sneel	case 0x64:
1934284894Sneel		*seg = VM_REG_GUEST_FS;
1935284894Sneel		break;
1936284894Sneel	case 0x65:
1937284894Sneel		*seg = VM_REG_GUEST_GS;
1938284894Sneel		break;
1939284894Sneel	default:
1940284894Sneel		return (false);
1941284894Sneel	}
1942284894Sneel	return (true);
1943284894Sneel}
1944284894Sneel
1945240941Sneelstatic int
1946270159Sgrehandecode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
1947240941Sneel{
1948240941Sneel	uint8_t x;
1949240941Sneel
1950270159Sgrehan	while (1) {
1951270159Sgrehan		if (vie_peek(vie, &x))
1952270159Sgrehan			return (-1);
1953240941Sneel
1954270159Sgrehan		if (x == 0x66)
1955270159Sgrehan			vie->opsize_override = 1;
1956270159Sgrehan		else if (x == 0x67)
1957270159Sgrehan			vie->addrsize_override = 1;
1958284894Sneel		else if (x == 0xF3)
1959284894Sneel			vie->repz_present = 1;
1960284894Sneel		else if (x == 0xF2)
1961284894Sneel			vie->repnz_present = 1;
1962284894Sneel		else if (segment_override(x, &vie->segment_register))
1963284894Sneel			vie->segment_override = 1;
1964270159Sgrehan		else
1965270159Sgrehan			break;
1966270159Sgrehan
1967270159Sgrehan		vie_advance(vie);
1968270159Sgrehan	}
1969270159Sgrehan
1970270159Sgrehan	/*
1971270159Sgrehan	 * From section 2.2.1, "REX Prefixes", Intel SDM Vol 2:
1972270159Sgrehan	 * - Only one REX prefix is allowed per instruction.
1973270159Sgrehan	 * - The REX prefix must immediately precede the opcode byte or the
1974270159Sgrehan	 *   escape opcode byte.
1975270159Sgrehan	 * - If an instruction has a mandatory prefix (0x66, 0xF2 or 0xF3)
1976270159Sgrehan	 *   the mandatory prefix must come before the REX prefix.
1977270159Sgrehan	 */
1978270159Sgrehan	if (cpu_mode == CPU_MODE_64BIT && x >= 0x40 && x <= 0x4F) {
1979246108Sneel		vie->rex_present = 1;
1980240941Sneel		vie->rex_w = x & 0x8 ? 1 : 0;
1981240941Sneel		vie->rex_r = x & 0x4 ? 1 : 0;
1982240941Sneel		vie->rex_x = x & 0x2 ? 1 : 0;
1983240941Sneel		vie->rex_b = x & 0x1 ? 1 : 0;
1984240941Sneel		vie_advance(vie);
1985240941Sneel	}
1986240941Sneel
1987270159Sgrehan	/*
1988270159Sgrehan	 * Section "Operand-Size And Address-Size Attributes", Intel SDM, Vol 1
1989270159Sgrehan	 */
1990270159Sgrehan	if (cpu_mode == CPU_MODE_64BIT) {
1991270159Sgrehan		/*
1992270159Sgrehan		 * Default address size is 64-bits and default operand size
1993270159Sgrehan		 * is 32-bits.
1994270159Sgrehan		 */
1995270159Sgrehan		vie->addrsize = vie->addrsize_override ? 4 : 8;
1996270159Sgrehan		if (vie->rex_w)
1997270159Sgrehan			vie->opsize = 8;
1998270159Sgrehan		else if (vie->opsize_override)
1999270159Sgrehan			vie->opsize = 2;
2000270159Sgrehan		else
2001270159Sgrehan			vie->opsize = 4;
2002270159Sgrehan	} else if (cs_d) {
2003270159Sgrehan		/* Default address and operand sizes are 32-bits */
2004270159Sgrehan		vie->addrsize = vie->addrsize_override ? 2 : 4;
2005270159Sgrehan		vie->opsize = vie->opsize_override ? 2 : 4;
2006270159Sgrehan	} else {
2007270159Sgrehan		/* Default address and operand sizes are 16-bits */
2008270159Sgrehan		vie->addrsize = vie->addrsize_override ? 4 : 2;
2009270159Sgrehan		vie->opsize = vie->opsize_override ? 4 : 2;
2010270159Sgrehan	}
2011240941Sneel	return (0);
2012240941Sneel}
2013240941Sneel
2014240941Sneelstatic int
2015267396Sjhbdecode_two_byte_opcode(struct vie *vie)
2016267396Sjhb{
2017267396Sjhb	uint8_t x;
2018267396Sjhb
2019267396Sjhb	if (vie_peek(vie, &x))
2020267396Sjhb		return (-1);
2021267396Sjhb
2022267396Sjhb	vie->op = two_byte_opcodes[x];
2023267396Sjhb
2024267396Sjhb	if (vie->op.op_type == VIE_OP_TYPE_NONE)
2025267396Sjhb		return (-1);
2026267396Sjhb
2027267396Sjhb	vie_advance(vie);
2028267396Sjhb	return (0);
2029267396Sjhb}
2030267396Sjhb
2031267396Sjhbstatic int
2032240941Sneeldecode_opcode(struct vie *vie)
2033240941Sneel{
2034240941Sneel	uint8_t x;
2035240941Sneel
2036240941Sneel	if (vie_peek(vie, &x))
2037240941Sneel		return (-1);
2038240941Sneel
2039243640Sneel	vie->op = one_byte_opcodes[x];
2040240941Sneel
2041243640Sneel	if (vie->op.op_type == VIE_OP_TYPE_NONE)
2042243640Sneel		return (-1);
2043243640Sneel
2044240941Sneel	vie_advance(vie);
2045267396Sjhb
2046267396Sjhb	if (vie->op.op_type == VIE_OP_TYPE_TWO_BYTE)
2047267396Sjhb		return (decode_two_byte_opcode(vie));
2048267396Sjhb
2049243640Sneel	return (0);
2050240941Sneel}
2051240941Sneel
2052240941Sneelstatic int
2053268976Sjhbdecode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode)
2054240941Sneel{
2055240941Sneel	uint8_t x;
2056240941Sneel
2057284899Sneel	if (vie->op.op_flags & VIE_OP_F_NO_MODRM)
2058284899Sneel		return (0);
2059284899Sneel
2060270159Sgrehan	if (cpu_mode == CPU_MODE_REAL)
2061270159Sgrehan		return (-1);
2062270159Sgrehan
2063240941Sneel	if (vie_peek(vie, &x))
2064240941Sneel		return (-1);
2065240941Sneel
2066240941Sneel	vie->mod = (x >> 6) & 0x3;
2067240941Sneel	vie->rm =  (x >> 0) & 0x7;
2068240941Sneel	vie->reg = (x >> 3) & 0x7;
2069240941Sneel
2070243640Sneel	/*
2071243640Sneel	 * A direct addressing mode makes no sense in the context of an EPT
2072243640Sneel	 * fault. There has to be a memory access involved to cause the
2073243640Sneel	 * EPT fault.
2074243640Sneel	 */
2075243640Sneel	if (vie->mod == VIE_MOD_DIRECT)
2076243640Sneel		return (-1);
2077243640Sneel
2078240941Sneel	if ((vie->mod == VIE_MOD_INDIRECT && vie->rm == VIE_RM_DISP32) ||
2079240941Sneel	    (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)) {
2080243640Sneel		/*
2081243640Sneel		 * Table 2-5: Special Cases of REX Encodings
2082243640Sneel		 *
2083243640Sneel		 * mod=0, r/m=5 is used in the compatibility mode to
2084243640Sneel		 * indicate a disp32 without a base register.
2085243640Sneel		 *
2086243640Sneel		 * mod!=3, r/m=4 is used in the compatibility mode to
2087243640Sneel		 * indicate that the SIB byte is present.
2088243640Sneel		 *
2089243640Sneel		 * The 'b' bit in the REX prefix is don't care in
2090243640Sneel		 * this case.
2091243640Sneel		 */
2092240941Sneel	} else {
2093240941Sneel		vie->rm |= (vie->rex_b << 3);
2094240941Sneel	}
2095240941Sneel
2096240941Sneel	vie->reg |= (vie->rex_r << 3);
2097240941Sneel
2098243640Sneel	/* SIB */
2099240941Sneel	if (vie->mod != VIE_MOD_DIRECT && vie->rm == VIE_RM_SIB)
2100243640Sneel		goto done;
2101240941Sneel
2102240941Sneel	vie->base_register = gpr_map[vie->rm];
2103240941Sneel
2104240941Sneel	switch (vie->mod) {
2105240941Sneel	case VIE_MOD_INDIRECT_DISP8:
2106240941Sneel		vie->disp_bytes = 1;
2107240941Sneel		break;
2108240941Sneel	case VIE_MOD_INDIRECT_DISP32:
2109240941Sneel		vie->disp_bytes = 4;
2110240941Sneel		break;
2111240941Sneel	case VIE_MOD_INDIRECT:
2112240941Sneel		if (vie->rm == VIE_RM_DISP32) {
2113240941Sneel			vie->disp_bytes = 4;
2114249879Sgrehan			/*
2115249879Sgrehan			 * Table 2-7. RIP-Relative Addressing
2116249879Sgrehan			 *
2117249879Sgrehan			 * In 64-bit mode mod=00 r/m=101 implies [rip] + disp32
2118249879Sgrehan			 * whereas in compatibility mode it just implies disp32.
2119249879Sgrehan			 */
2120249879Sgrehan
2121249879Sgrehan			if (cpu_mode == CPU_MODE_64BIT)
2122249879Sgrehan				vie->base_register = VM_REG_GUEST_RIP;
2123249879Sgrehan			else
2124249879Sgrehan				vie->base_register = VM_REG_LAST;
2125240941Sneel		}
2126240941Sneel		break;
2127240941Sneel	}
2128240941Sneel
2129243640Sneeldone:
2130240941Sneel	vie_advance(vie);
2131240941Sneel
2132240941Sneel	return (0);
2133240941Sneel}
2134240941Sneel
2135240941Sneelstatic int
2136243640Sneeldecode_sib(struct vie *vie)
2137243640Sneel{
2138243640Sneel	uint8_t x;
2139243640Sneel
2140243640Sneel	/* Proceed only if SIB byte is present */
2141243640Sneel	if (vie->mod == VIE_MOD_DIRECT || vie->rm != VIE_RM_SIB)
2142243640Sneel		return (0);
2143243640Sneel
2144243640Sneel	if (vie_peek(vie, &x))
2145243640Sneel		return (-1);
2146243640Sneel
2147243640Sneel	/* De-construct the SIB byte */
2148243640Sneel	vie->ss = (x >> 6) & 0x3;
2149243640Sneel	vie->index = (x >> 3) & 0x7;
2150243640Sneel	vie->base = (x >> 0) & 0x7;
2151243640Sneel
2152243640Sneel	/* Apply the REX prefix modifiers */
2153243640Sneel	vie->index |= vie->rex_x << 3;
2154243640Sneel	vie->base |= vie->rex_b << 3;
2155243640Sneel
2156243640Sneel	switch (vie->mod) {
2157243640Sneel	case VIE_MOD_INDIRECT_DISP8:
2158243640Sneel		vie->disp_bytes = 1;
2159243640Sneel		break;
2160243640Sneel	case VIE_MOD_INDIRECT_DISP32:
2161243640Sneel		vie->disp_bytes = 4;
2162243640Sneel		break;
2163243640Sneel	}
2164243640Sneel
2165243640Sneel	if (vie->mod == VIE_MOD_INDIRECT &&
2166243640Sneel	    (vie->base == 5 || vie->base == 13)) {
2167243640Sneel		/*
2168243640Sneel		 * Special case when base register is unused if mod = 0
2169243640Sneel		 * and base = %rbp or %r13.
2170243640Sneel		 *
2171243640Sneel		 * Documented in:
2172243640Sneel		 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
2173243640Sneel		 * Table 2-5: Special Cases of REX Encodings
2174243640Sneel		 */
2175243640Sneel		vie->disp_bytes = 4;
2176243640Sneel	} else {
2177243640Sneel		vie->base_register = gpr_map[vie->base];
2178243640Sneel	}
2179243640Sneel
2180243640Sneel	/*
2181243640Sneel	 * All encodings of 'index' are valid except for %rsp (4).
2182243640Sneel	 *
2183243640Sneel	 * Documented in:
2184243640Sneel	 * Table 2-3: 32-bit Addressing Forms with the SIB Byte
2185243640Sneel	 * Table 2-5: Special Cases of REX Encodings
2186243640Sneel	 */
2187243640Sneel	if (vie->index != 4)
2188243640Sneel		vie->index_register = gpr_map[vie->index];
2189243640Sneel
2190243640Sneel	/* 'scale' makes sense only in the context of an index register */
2191243640Sneel	if (vie->index_register < VM_REG_LAST)
2192243640Sneel		vie->scale = 1 << vie->ss;
2193243640Sneel
2194243640Sneel	vie_advance(vie);
2195243640Sneel
2196243640Sneel	return (0);
2197243640Sneel}
2198243640Sneel
2199243640Sneelstatic int
2200240941Sneeldecode_displacement(struct vie *vie)
2201240941Sneel{
2202240941Sneel	int n, i;
2203240941Sneel	uint8_t x;
2204240941Sneel
2205240941Sneel	union {
2206240941Sneel		char	buf[4];
2207240941Sneel		int8_t	signed8;
2208240941Sneel		int32_t	signed32;
2209240941Sneel	} u;
2210240941Sneel
2211240941Sneel	if ((n = vie->disp_bytes) == 0)
2212240941Sneel		return (0);
2213240941Sneel
2214240941Sneel	if (n != 1 && n != 4)
2215240941Sneel		panic("decode_displacement: invalid disp_bytes %d", n);
2216240941Sneel
2217240941Sneel	for (i = 0; i < n; i++) {
2218240941Sneel		if (vie_peek(vie, &x))
2219240941Sneel			return (-1);
2220240941Sneel
2221240941Sneel		u.buf[i] = x;
2222240941Sneel		vie_advance(vie);
2223240941Sneel	}
2224240941Sneel
2225240941Sneel	if (n == 1)
2226240941Sneel		vie->displacement = u.signed8;		/* sign-extended */
2227240941Sneel	else
2228240941Sneel		vie->displacement = u.signed32;		/* sign-extended */
2229240941Sneel
2230240941Sneel	return (0);
2231240941Sneel}
2232240941Sneel
2233240941Sneelstatic int
2234240941Sneeldecode_immediate(struct vie *vie)
2235240941Sneel{
2236240941Sneel	int i, n;
2237240941Sneel	uint8_t x;
2238240941Sneel	union {
2239240941Sneel		char	buf[4];
2240243640Sneel		int8_t	signed8;
2241270159Sgrehan		int16_t	signed16;
2242240941Sneel		int32_t	signed32;
2243240941Sneel	} u;
2244240941Sneel
2245255638Sneel	/* Figure out immediate operand size (if any) */
2246270159Sgrehan	if (vie->op.op_flags & VIE_OP_F_IMM) {
2247270159Sgrehan		/*
2248270159Sgrehan		 * Section 2.2.1.5 "Immediates", Intel SDM:
2249270159Sgrehan		 * In 64-bit mode the typical size of immediate operands
2250270159Sgrehan		 * remains 32-bits. When the operand size if 64-bits, the
2251270159Sgrehan		 * processor sign-extends all immediates to 64-bits prior
2252270159Sgrehan		 * to their use.
2253270159Sgrehan		 */
2254270159Sgrehan		if (vie->opsize == 4 || vie->opsize == 8)
2255270159Sgrehan			vie->imm_bytes = 4;
2256270159Sgrehan		else
2257270159Sgrehan			vie->imm_bytes = 2;
2258270159Sgrehan	} else if (vie->op.op_flags & VIE_OP_F_IMM8) {
2259255638Sneel		vie->imm_bytes = 1;
2260270159Sgrehan	}
2261255638Sneel
2262240941Sneel	if ((n = vie->imm_bytes) == 0)
2263240941Sneel		return (0);
2264240941Sneel
2265270159Sgrehan	KASSERT(n == 1 || n == 2 || n == 4,
2266270159Sgrehan	    ("%s: invalid number of immediate bytes: %d", __func__, n));
2267240941Sneel
2268240941Sneel	for (i = 0; i < n; i++) {
2269240941Sneel		if (vie_peek(vie, &x))
2270240941Sneel			return (-1);
2271240941Sneel
2272240941Sneel		u.buf[i] = x;
2273240941Sneel		vie_advance(vie);
2274240941Sneel	}
2275270159Sgrehan
2276270159Sgrehan	/* sign-extend the immediate value before use */
2277243640Sneel	if (n == 1)
2278270159Sgrehan		vie->immediate = u.signed8;
2279270159Sgrehan	else if (n == 2)
2280270159Sgrehan		vie->immediate = u.signed16;
2281243640Sneel	else
2282270159Sgrehan		vie->immediate = u.signed32;
2283240941Sneel
2284240941Sneel	return (0);
2285240941Sneel}
2286240941Sneel
2287270159Sgrehanstatic int
2288270159Sgrehandecode_moffset(struct vie *vie)
2289270159Sgrehan{
2290270159Sgrehan	int i, n;
2291270159Sgrehan	uint8_t x;
2292270159Sgrehan	union {
2293270159Sgrehan		char	buf[8];
2294270159Sgrehan		uint64_t u64;
2295270159Sgrehan	} u;
2296270159Sgrehan
2297270159Sgrehan	if ((vie->op.op_flags & VIE_OP_F_MOFFSET) == 0)
2298270159Sgrehan		return (0);
2299270159Sgrehan
2300270159Sgrehan	/*
2301270159Sgrehan	 * Section 2.2.1.4, "Direct Memory-Offset MOVs", Intel SDM:
2302270159Sgrehan	 * The memory offset size follows the address-size of the instruction.
2303270159Sgrehan	 */
2304270159Sgrehan	n = vie->addrsize;
2305270159Sgrehan	KASSERT(n == 2 || n == 4 || n == 8, ("invalid moffset bytes: %d", n));
2306270159Sgrehan
2307270159Sgrehan	u.u64 = 0;
2308270159Sgrehan	for (i = 0; i < n; i++) {
2309270159Sgrehan		if (vie_peek(vie, &x))
2310270159Sgrehan			return (-1);
2311270159Sgrehan
2312270159Sgrehan		u.buf[i] = x;
2313270159Sgrehan		vie_advance(vie);
2314270159Sgrehan	}
2315270159Sgrehan	vie->displacement = u.u64;
2316270159Sgrehan	return (0);
2317270159Sgrehan}
2318270159Sgrehan
2319243640Sneel/*
2320243640Sneel * Verify that the 'guest linear address' provided as collateral of the nested
2321243640Sneel * page table fault matches with our instruction decoding.
2322243640Sneel */
2323243640Sneelstatic int
2324295124Sgrehanverify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie,
2325295124Sgrehan    enum vm_cpu_mode cpu_mode)
2326243640Sneel{
2327243640Sneel	int error;
2328295124Sgrehan	uint64_t base, segbase, idx, gla2;
2329295124Sgrehan	enum vm_reg_name seg;
2330295124Sgrehan	struct seg_desc desc;
2331243640Sneel
2332248855Sneel	/* Skip 'gla' verification */
2333248855Sneel	if (gla == VIE_INVALID_GLA)
2334248855Sneel		return (0);
2335248855Sneel
2336243640Sneel	base = 0;
2337243640Sneel	if (vie->base_register != VM_REG_LAST) {
2338243640Sneel		error = vm_get_register(vm, cpuid, vie->base_register, &base);
2339243640Sneel		if (error) {
2340243640Sneel			printf("verify_gla: error %d getting base reg %d\n",
2341243640Sneel				error, vie->base_register);
2342243640Sneel			return (-1);
2343243640Sneel		}
2344249879Sgrehan
2345249879Sgrehan		/*
2346249879Sgrehan		 * RIP-relative addressing starts from the following
2347249879Sgrehan		 * instruction
2348249879Sgrehan		 */
2349249879Sgrehan		if (vie->base_register == VM_REG_GUEST_RIP)
2350284900Sneel			base += vie->num_processed;
2351243640Sneel	}
2352243640Sneel
2353243640Sneel	idx = 0;
2354243640Sneel	if (vie->index_register != VM_REG_LAST) {
2355243640Sneel		error = vm_get_register(vm, cpuid, vie->index_register, &idx);
2356243640Sneel		if (error) {
2357243640Sneel			printf("verify_gla: error %d getting index reg %d\n",
2358243640Sneel				error, vie->index_register);
2359243640Sneel			return (-1);
2360243640Sneel		}
2361243640Sneel	}
2362243640Sneel
2363295124Sgrehan	/*
2364295124Sgrehan	 * From "Specifying a Segment Selector", Intel SDM, Vol 1
2365295124Sgrehan	 *
2366295124Sgrehan	 * In 64-bit mode, segmentation is generally (but not
2367295124Sgrehan	 * completely) disabled.  The exceptions are the FS and GS
2368295124Sgrehan	 * segments.
2369295124Sgrehan	 *
2370295124Sgrehan	 * In legacy IA-32 mode, when the ESP or EBP register is used
2371295124Sgrehan	 * as the base, the SS segment is the default segment.  For
2372295124Sgrehan	 * other data references, except when relative to stack or
2373295124Sgrehan	 * string destination the DS segment is the default.  These
2374295124Sgrehan	 * can be overridden to allow other segments to be accessed.
2375295124Sgrehan	 */
2376295124Sgrehan	if (vie->segment_override)
2377295124Sgrehan		seg = vie->segment_register;
2378295124Sgrehan	else if (vie->base_register == VM_REG_GUEST_RSP ||
2379295124Sgrehan	    vie->base_register == VM_REG_GUEST_RBP)
2380295124Sgrehan		seg = VM_REG_GUEST_SS;
2381295124Sgrehan	else
2382295124Sgrehan		seg = VM_REG_GUEST_DS;
2383295124Sgrehan	if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
2384295124Sgrehan	    seg != VM_REG_GUEST_GS) {
2385295124Sgrehan		segbase = 0;
2386295124Sgrehan	} else {
2387295124Sgrehan		error = vm_get_seg_desc(vm, cpuid, seg, &desc);
2388295124Sgrehan		if (error) {
2389295124Sgrehan			printf("verify_gla: error %d getting segment"
2390295124Sgrehan			       " descriptor %d", error,
2391295124Sgrehan			       vie->segment_register);
2392295124Sgrehan			return (-1);
2393295124Sgrehan		}
2394295124Sgrehan		segbase = desc.base;
2395295124Sgrehan	}
2396295124Sgrehan
2397295124Sgrehan	gla2 = segbase + base + vie->scale * idx + vie->displacement;
2398270159Sgrehan	gla2 &= size2mask[vie->addrsize];
2399270159Sgrehan	if (gla != gla2) {
2400295124Sgrehan		printf("verify_gla mismatch: segbase(0x%0lx)"
2401243640Sneel		       "base(0x%0lx), scale(%d), index(0x%0lx), "
2402270159Sgrehan		       "disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n",
2403295124Sgrehan		       segbase, base, vie->scale, idx, vie->displacement,
2404295124Sgrehan		       gla, gla2);
2405243640Sneel		return (-1);
2406243640Sneel	}
2407243640Sneel
2408243640Sneel	return (0);
2409243640Sneel}
2410243640Sneel
2411240941Sneelint
2412267399Sjhbvmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
2413270159Sgrehan		       enum vm_cpu_mode cpu_mode, int cs_d, struct vie *vie)
2414240941Sneel{
2415243640Sneel
2416270159Sgrehan	if (decode_prefixes(vie, cpu_mode, cs_d))
2417270159Sgrehan		return (-1);
2418240941Sneel
2419240941Sneel	if (decode_opcode(vie))
2420240941Sneel		return (-1);
2421240941Sneel
2422267399Sjhb	if (decode_modrm(vie, cpu_mode))
2423240941Sneel		return (-1);
2424240941Sneel
2425243640Sneel	if (decode_sib(vie))
2426243640Sneel		return (-1);
2427243640Sneel
2428240941Sneel	if (decode_displacement(vie))
2429240941Sneel		return (-1);
2430270159Sgrehan
2431240941Sneel	if (decode_immediate(vie))
2432240941Sneel		return (-1);
2433240941Sneel
2434270159Sgrehan	if (decode_moffset(vie))
2435270159Sgrehan		return (-1);
2436270159Sgrehan
2437284894Sneel	if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
2438295124Sgrehan		if (verify_gla(vm, cpuid, gla, vie, cpu_mode))
2439284894Sneel			return (-1);
2440284894Sneel	}
2441243640Sneel
2442243640Sneel	vie->decoded = 1;	/* success */
2443243640Sneel
2444240941Sneel	return (0);
2445240941Sneel}
2446243640Sneel#endif	/* _KERNEL */
2447