1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/param.h>
29#ifndef WITHOUT_CAPSICUM
30#include <sys/capsicum.h>
31#endif
32#include <sys/endian.h>
33#include <sys/ioctl.h>
34#include <sys/mman.h>
35#include <sys/queue.h>
36#include <sys/socket.h>
37#include <sys/stat.h>
38
39#ifdef __aarch64__
40#include <machine/armreg.h>
41#endif
42#include <machine/atomic.h>
43#ifdef __amd64__
44#include <machine/specialreg.h>
45#endif
46#include <machine/vmm.h>
47
48#include <netinet/in.h>
49
50#include <assert.h>
51#ifndef WITHOUT_CAPSICUM
52#include <capsicum_helpers.h>
53#endif
54#include <err.h>
55#include <errno.h>
56#include <fcntl.h>
57#include <netdb.h>
58#include <pthread.h>
59#include <pthread_np.h>
60#include <stdbool.h>
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64#include <sysexits.h>
65#include <unistd.h>
66#include <vmmapi.h>
67
68#include "bhyverun.h"
69#include "config.h"
70#include "debug.h"
71#include "gdb.h"
72#include "mem.h"
73#include "mevent.h"
74
75#define	_PATH_GDB_XML		"/usr/share/bhyve/gdb"
76
77/*
78 * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
79 * use SIGTRAP.
80 */
81#define	GDB_SIGNAL_TRAP		5
82
83#if defined(__amd64__)
84#define	GDB_BP_SIZE		1
85#define	GDB_BP_INSTR		(uint8_t []){0xcc}
86#define	GDB_PC_REGNAME		VM_REG_GUEST_RIP
87#define	GDB_BREAKPOINT_CAP	VM_CAP_BPT_EXIT
88#elif defined(__aarch64__)
89#define	GDB_BP_SIZE		4
90#define	GDB_BP_INSTR		(uint8_t []){0x00, 0x00, 0x20, 0xd4}
91#define	GDB_PC_REGNAME		VM_REG_GUEST_PC
92#define	GDB_BREAKPOINT_CAP	VM_CAP_BRK_EXIT
93#else
94#error "Unsupported architecture"
95#endif
96
97_Static_assert(sizeof(GDB_BP_INSTR) == GDB_BP_SIZE,
98    "GDB_BP_INSTR has wrong size");
99
100static void gdb_resume_vcpus(void);
101static void check_command(int fd);
102
103static struct mevent *read_event, *write_event;
104
105static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
106static pthread_mutex_t gdb_lock;
107static pthread_cond_t idle_vcpus;
108static bool first_stop, report_next_stop, swbreak_enabled;
109static int xml_dfd = -1;
110
111/*
112 * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
113 * read buffer, 'start' is unused and 'len' contains the number of
114 * valid bytes in the buffer.  For a write buffer, 'start' is set to
115 * the index of the next byte in 'data' to send, and 'len' contains
116 * the remaining number of valid bytes to send.
117 */
118struct io_buffer {
119	uint8_t *data;
120	size_t capacity;
121	size_t start;
122	size_t len;
123};
124
125struct breakpoint {
126	uint64_t gpa;
127	uint8_t shadow_inst[GDB_BP_SIZE];
128	TAILQ_ENTRY(breakpoint) link;
129};
130
131/*
132 * When a vCPU stops to due to an event that should be reported to the
133 * debugger, information about the event is stored in this structure.
134 * The vCPU thread then sets 'stopped_vcpu' if it is not already set
135 * and stops other vCPUs so the event can be reported.  The
136 * report_stop() function reports the event for the 'stopped_vcpu'
137 * vCPU.  When the debugger resumes execution via continue or step,
138 * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
139 * event handlers until the associated event is reported or disabled.
140 *
141 * An idle vCPU will have all of the boolean fields set to false.
142 *
143 * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
144 * released to execute the stepped instruction.  When the vCPU reports
145 * the stepping trap, 'stepped' is set.
146 *
147 * When a vCPU hits a breakpoint set by the debug server,
148 * 'hit_swbreak' is set to true.
149 */
150struct vcpu_state {
151	bool stepping;
152	bool stepped;
153	bool hit_swbreak;
154};
155
156static struct io_buffer cur_comm, cur_resp;
157static uint8_t cur_csum;
158static struct vmctx *ctx;
159static int cur_fd = -1;
160static TAILQ_HEAD(, breakpoint) breakpoints;
161static struct vcpu_state *vcpu_state;
162static struct vcpu **vcpus;
163static int cur_vcpu, stopped_vcpu;
164static bool gdb_active = false;
165
166struct gdb_reg {
167	enum vm_reg_name id;
168	int size;
169}
170
171#ifdef __amd64__
172static const gdb_regset[] = {
173	{ .id = VM_REG_GUEST_RAX, .size = 8 },
174	{ .id = VM_REG_GUEST_RBX, .size = 8 },
175	{ .id = VM_REG_GUEST_RCX, .size = 8 },
176	{ .id = VM_REG_GUEST_RDX, .size = 8 },
177	{ .id = VM_REG_GUEST_RSI, .size = 8 },
178	{ .id = VM_REG_GUEST_RDI, .size = 8 },
179	{ .id = VM_REG_GUEST_RBP, .size = 8 },
180	{ .id = VM_REG_GUEST_RSP, .size = 8 },
181	{ .id = VM_REG_GUEST_R8, .size = 8 },
182	{ .id = VM_REG_GUEST_R9, .size = 8 },
183	{ .id = VM_REG_GUEST_R10, .size = 8 },
184	{ .id = VM_REG_GUEST_R11, .size = 8 },
185	{ .id = VM_REG_GUEST_R12, .size = 8 },
186	{ .id = VM_REG_GUEST_R13, .size = 8 },
187	{ .id = VM_REG_GUEST_R14, .size = 8 },
188	{ .id = VM_REG_GUEST_R15, .size = 8 },
189	{ .id = VM_REG_GUEST_RIP, .size = 8 },
190	{ .id = VM_REG_GUEST_RFLAGS, .size = 4 },
191	{ .id = VM_REG_GUEST_CS, .size = 4 },
192	{ .id = VM_REG_GUEST_SS, .size = 4 },
193	{ .id = VM_REG_GUEST_DS, .size = 4 },
194	{ .id = VM_REG_GUEST_ES, .size = 4 },
195	{ .id = VM_REG_GUEST_FS, .size = 4 },
196	{ .id = VM_REG_GUEST_GS, .size = 4 },
197	/*
198	 * Registers past this point are not included in a reply to a 'g' query,
199	 * to provide compatibility with debuggers that do not fetch a target
200	 * description.  The debugger can query them individually with 'p' if it
201	 * knows about them.
202	 */
203#define	GDB_REG_FIRST_EXT	VM_REG_GUEST_FS_BASE
204	{ .id = VM_REG_GUEST_FS_BASE, .size = 8 },
205	{ .id = VM_REG_GUEST_GS_BASE, .size = 8 },
206	{ .id = VM_REG_GUEST_KGS_BASE, .size = 8 },
207	{ .id = VM_REG_GUEST_CR0, .size = 8 },
208	{ .id = VM_REG_GUEST_CR2, .size = 8 },
209	{ .id = VM_REG_GUEST_CR3, .size = 8 },
210	{ .id = VM_REG_GUEST_CR4, .size = 8 },
211	{ .id = VM_REG_GUEST_TPR, .size = 8 },
212	{ .id = VM_REG_GUEST_EFER, .size = 8 },
213};
214#else /* __aarch64__ */
215static const gdb_regset[] = {
216	{ .id = VM_REG_GUEST_X0, .size = 8 },
217	{ .id = VM_REG_GUEST_X1, .size = 8 },
218	{ .id = VM_REG_GUEST_X2, .size = 8 },
219	{ .id = VM_REG_GUEST_X3, .size = 8 },
220	{ .id = VM_REG_GUEST_X4, .size = 8 },
221	{ .id = VM_REG_GUEST_X5, .size = 8 },
222	{ .id = VM_REG_GUEST_X6, .size = 8 },
223	{ .id = VM_REG_GUEST_X7, .size = 8 },
224	{ .id = VM_REG_GUEST_X8, .size = 8 },
225	{ .id = VM_REG_GUEST_X9, .size = 8 },
226	{ .id = VM_REG_GUEST_X10, .size = 8 },
227	{ .id = VM_REG_GUEST_X11, .size = 8 },
228	{ .id = VM_REG_GUEST_X12, .size = 8 },
229	{ .id = VM_REG_GUEST_X13, .size = 8 },
230	{ .id = VM_REG_GUEST_X14, .size = 8 },
231	{ .id = VM_REG_GUEST_X15, .size = 8 },
232	{ .id = VM_REG_GUEST_X16, .size = 8 },
233	{ .id = VM_REG_GUEST_X17, .size = 8 },
234	{ .id = VM_REG_GUEST_X18, .size = 8 },
235	{ .id = VM_REG_GUEST_X19, .size = 8 },
236	{ .id = VM_REG_GUEST_X20, .size = 8 },
237	{ .id = VM_REG_GUEST_X21, .size = 8 },
238	{ .id = VM_REG_GUEST_X22, .size = 8 },
239	{ .id = VM_REG_GUEST_X23, .size = 8 },
240	{ .id = VM_REG_GUEST_X24, .size = 8 },
241	{ .id = VM_REG_GUEST_X25, .size = 8 },
242	{ .id = VM_REG_GUEST_X26, .size = 8 },
243	{ .id = VM_REG_GUEST_X27, .size = 8 },
244	{ .id = VM_REG_GUEST_X28, .size = 8 },
245	{ .id = VM_REG_GUEST_X29, .size = 8 },
246	{ .id = VM_REG_GUEST_LR, .size = 8 },
247	{ .id = VM_REG_GUEST_SP, .size = 8 },
248	{ .id = VM_REG_GUEST_PC, .size = 8 },
249	{ .id = VM_REG_GUEST_CPSR, .size = 8 },
250};
251#endif
252
253#ifdef GDB_LOG
254#include <stdarg.h>
255#include <stdio.h>
256
257static void __printflike(1, 2)
258debug(const char *fmt, ...)
259{
260	static FILE *logfile;
261	va_list ap;
262
263	if (logfile == NULL) {
264		logfile = fopen("/tmp/bhyve_gdb.log", "w");
265		if (logfile == NULL)
266			return;
267#ifndef WITHOUT_CAPSICUM
268		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
269			fclose(logfile);
270			logfile = NULL;
271			return;
272		}
273#endif
274		setlinebuf(logfile);
275	}
276	va_start(ap, fmt);
277	vfprintf(logfile, fmt, ap);
278	va_end(ap);
279}
280#else
281#define debug(...)
282#endif
283
284static void	remove_all_sw_breakpoints(void);
285
286static int
287guest_paging_info(struct vcpu *vcpu, struct vm_guest_paging *paging)
288{
289#ifdef __amd64__
290	uint64_t regs[4];
291	const int regset[4] = {
292		VM_REG_GUEST_CR0,
293		VM_REG_GUEST_CR3,
294		VM_REG_GUEST_CR4,
295		VM_REG_GUEST_EFER
296	};
297
298	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
299		return (-1);
300
301	/*
302	 * For the debugger, always pretend to be the kernel (CPL 0),
303	 * and if long-mode is enabled, always parse addresses as if
304	 * in 64-bit mode.
305	 */
306	paging->cr3 = regs[1];
307	paging->cpl = 0;
308	if (regs[3] & EFER_LMA)
309		paging->cpu_mode = CPU_MODE_64BIT;
310	else if (regs[0] & CR0_PE)
311		paging->cpu_mode = CPU_MODE_PROTECTED;
312	else
313		paging->cpu_mode = CPU_MODE_REAL;
314	if (!(regs[0] & CR0_PG))
315		paging->paging_mode = PAGING_MODE_FLAT;
316	else if (!(regs[2] & CR4_PAE))
317		paging->paging_mode = PAGING_MODE_32;
318	else if (regs[3] & EFER_LME)
319		paging->paging_mode = (regs[2] & CR4_LA57) ?
320		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
321	else
322		paging->paging_mode = PAGING_MODE_PAE;
323	return (0);
324#else /* __aarch64__ */
325	uint64_t regs[6];
326	const int regset[6] = {
327		VM_REG_GUEST_TTBR0_EL1,
328		VM_REG_GUEST_TTBR1_EL1,
329		VM_REG_GUEST_TCR_EL1,
330		VM_REG_GUEST_TCR2_EL1,
331		VM_REG_GUEST_SCTLR_EL1,
332		VM_REG_GUEST_CPSR,
333	};
334
335	if (vm_get_register_set(vcpu, nitems(regset), regset, regs) == -1)
336		return (-1);
337
338	memset(paging, 0, sizeof(*paging));
339	paging->ttbr0_addr = regs[0] & ~(TTBR_ASID_MASK | TTBR_CnP);
340	paging->ttbr1_addr = regs[1] & ~(TTBR_ASID_MASK | TTBR_CnP);
341	paging->tcr_el1 = regs[2];
342	paging->tcr2_el1 = regs[3];
343	paging->flags = regs[5] & (PSR_M_MASK | PSR_M_32);
344	if ((regs[4] & SCTLR_M) != 0)
345		paging->flags |= VM_GP_MMU_ENABLED;
346
347	return (0);
348#endif /* __aarch64__ */
349}
350
351/*
352 * Map a guest virtual address to a physical address (for a given vcpu).
353 * If a guest virtual address is valid, return 1.  If the address is
354 * not valid, return 0.  If an error occurs obtaining the mapping,
355 * return -1.
356 */
357static int
358guest_vaddr2paddr(struct vcpu *vcpu, uint64_t vaddr, uint64_t *paddr)
359{
360	struct vm_guest_paging paging;
361	int fault;
362
363	if (guest_paging_info(vcpu, &paging) == -1)
364		return (-1);
365
366	/*
367	 * Always use PROT_READ.  We really care if the VA is
368	 * accessible, not if the current vCPU can write.
369	 */
370	if (vm_gla2gpa_nofault(vcpu, &paging, vaddr, PROT_READ, paddr,
371	    &fault) == -1)
372		return (-1);
373	if (fault)
374		return (0);
375	return (1);
376}
377
378static uint64_t
379guest_pc(struct vm_exit *vme)
380{
381#ifdef __amd64__
382	return (vme->rip);
383#else /* __aarch64__ */
384	return (vme->pc);
385#endif
386}
387
388static void
389io_buffer_reset(struct io_buffer *io)
390{
391
392	io->start = 0;
393	io->len = 0;
394}
395
396/* Available room for adding data. */
397static size_t
398io_buffer_avail(struct io_buffer *io)
399{
400
401	return (io->capacity - (io->start + io->len));
402}
403
404static uint8_t *
405io_buffer_head(struct io_buffer *io)
406{
407
408	return (io->data + io->start);
409}
410
411static uint8_t *
412io_buffer_tail(struct io_buffer *io)
413{
414
415	return (io->data + io->start + io->len);
416}
417
418static void
419io_buffer_advance(struct io_buffer *io, size_t amount)
420{
421
422	assert(amount <= io->len);
423	io->start += amount;
424	io->len -= amount;
425}
426
427static void
428io_buffer_consume(struct io_buffer *io, size_t amount)
429{
430
431	io_buffer_advance(io, amount);
432	if (io->len == 0) {
433		io->start = 0;
434		return;
435	}
436
437	/*
438	 * XXX: Consider making this move optional and compacting on a
439	 * future read() before realloc().
440	 */
441	memmove(io->data, io_buffer_head(io), io->len);
442	io->start = 0;
443}
444
445static void
446io_buffer_grow(struct io_buffer *io, size_t newsize)
447{
448	uint8_t *new_data;
449	size_t avail, new_cap;
450
451	avail = io_buffer_avail(io);
452	if (newsize <= avail)
453		return;
454
455	new_cap = io->capacity + (newsize - avail);
456	new_data = realloc(io->data, new_cap);
457	if (new_data == NULL)
458		err(1, "Failed to grow GDB I/O buffer");
459	io->data = new_data;
460	io->capacity = new_cap;
461}
462
463static bool
464response_pending(void)
465{
466
467	if (cur_resp.start == 0 && cur_resp.len == 0)
468		return (false);
469	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
470		return (false);
471	return (true);
472}
473
474static void
475close_connection(void)
476{
477
478	/*
479	 * XXX: This triggers a warning because mevent does the close
480	 * before the EV_DELETE.
481	 */
482	pthread_mutex_lock(&gdb_lock);
483	mevent_delete(write_event);
484	mevent_delete_close(read_event);
485	write_event = NULL;
486	read_event = NULL;
487	io_buffer_reset(&cur_comm);
488	io_buffer_reset(&cur_resp);
489	cur_fd = -1;
490
491	remove_all_sw_breakpoints();
492
493	/* Clear any pending events. */
494	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
495
496	/* Resume any stopped vCPUs. */
497	gdb_resume_vcpus();
498	pthread_mutex_unlock(&gdb_lock);
499}
500
501static uint8_t
502hex_digit(uint8_t nibble)
503{
504
505	if (nibble <= 9)
506		return (nibble + '0');
507	else
508		return (nibble + 'a' - 10);
509}
510
511static uint8_t
512parse_digit(uint8_t v)
513{
514
515	if (v >= '0' && v <= '9')
516		return (v - '0');
517	if (v >= 'a' && v <= 'f')
518		return (v - 'a' + 10);
519	if (v >= 'A' && v <= 'F')
520		return (v - 'A' + 10);
521	return (0xF);
522}
523
524/* Parses big-endian hexadecimal. */
525static uintmax_t
526parse_integer(const uint8_t *p, size_t len)
527{
528	uintmax_t v;
529
530	v = 0;
531	while (len > 0) {
532		v <<= 4;
533		v |= parse_digit(*p);
534		p++;
535		len--;
536	}
537	return (v);
538}
539
540static uint8_t
541parse_byte(const uint8_t *p)
542{
543
544	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
545}
546
547static void
548send_pending_data(int fd)
549{
550	ssize_t nwritten;
551
552	if (cur_resp.len == 0) {
553		mevent_disable(write_event);
554		return;
555	}
556	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
557	if (nwritten == -1) {
558		warn("Write to GDB socket failed");
559		close_connection();
560	} else {
561		io_buffer_advance(&cur_resp, nwritten);
562		if (cur_resp.len == 0)
563			mevent_disable(write_event);
564		else
565			mevent_enable(write_event);
566	}
567}
568
569/* Append a single character to the output buffer. */
570static void
571send_char(uint8_t data)
572{
573	io_buffer_grow(&cur_resp, 1);
574	*io_buffer_tail(&cur_resp) = data;
575	cur_resp.len++;
576}
577
578/* Append an array of bytes to the output buffer. */
579static void
580send_data(const uint8_t *data, size_t len)
581{
582
583	io_buffer_grow(&cur_resp, len);
584	memcpy(io_buffer_tail(&cur_resp), data, len);
585	cur_resp.len += len;
586}
587
588static void
589format_byte(uint8_t v, uint8_t *buf)
590{
591
592	buf[0] = hex_digit(v >> 4);
593	buf[1] = hex_digit(v & 0xf);
594}
595
596/*
597 * Append a single byte (formatted as two hex characters) to the
598 * output buffer.
599 */
600static void
601send_byte(uint8_t v)
602{
603	uint8_t buf[2];
604
605	format_byte(v, buf);
606	send_data(buf, sizeof(buf));
607}
608
609static void
610start_packet(void)
611{
612
613	send_char('$');
614	cur_csum = 0;
615}
616
617static void
618finish_packet(void)
619{
620
621	send_char('#');
622	send_byte(cur_csum);
623	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
624}
625
626/*
627 * Append a single character (for the packet payload) and update the
628 * checksum.
629 */
630static void
631append_char(uint8_t v)
632{
633
634	send_char(v);
635	cur_csum += v;
636}
637
638/*
639 * Append an array of bytes (for the packet payload) and update the
640 * checksum.
641 */
642static void
643append_packet_data(const uint8_t *data, size_t len)
644{
645
646	send_data(data, len);
647	while (len > 0) {
648		cur_csum += *data;
649		data++;
650		len--;
651	}
652}
653
654static void
655append_string(const char *str)
656{
657
658	append_packet_data(str, strlen(str));
659}
660
661static void
662append_byte(uint8_t v)
663{
664	uint8_t buf[2];
665
666	format_byte(v, buf);
667	append_packet_data(buf, sizeof(buf));
668}
669
670static void
671append_unsigned_native(uintmax_t value, size_t len)
672{
673	size_t i;
674
675	for (i = 0; i < len; i++) {
676		append_byte(value);
677		value >>= 8;
678	}
679}
680
681static void
682append_unsigned_be(uintmax_t value, size_t len)
683{
684	char buf[len * 2];
685	size_t i;
686
687	for (i = 0; i < len; i++) {
688		format_byte(value, buf + (len - i - 1) * 2);
689		value >>= 8;
690	}
691	append_packet_data(buf, sizeof(buf));
692}
693
694static void
695append_integer(unsigned int value)
696{
697
698	if (value == 0)
699		append_char('0');
700	else
701		append_unsigned_be(value, (fls(value) + 7) / 8);
702}
703
704static void
705append_asciihex(const char *str)
706{
707
708	while (*str != '\0') {
709		append_byte(*str);
710		str++;
711	}
712}
713
714static void
715send_empty_response(void)
716{
717
718	start_packet();
719	finish_packet();
720}
721
722static void
723send_error(int error)
724{
725
726	start_packet();
727	append_char('E');
728	append_byte(error);
729	finish_packet();
730}
731
732static void
733send_ok(void)
734{
735
736	start_packet();
737	append_string("OK");
738	finish_packet();
739}
740
741static int
742parse_threadid(const uint8_t *data, size_t len)
743{
744
745	if (len == 1 && *data == '0')
746		return (0);
747	if (len == 2 && memcmp(data, "-1", 2) == 0)
748		return (-1);
749	if (len == 0)
750		return (-2);
751	return (parse_integer(data, len));
752}
753
754/*
755 * Report the current stop event to the debugger.  If the stop is due
756 * to an event triggered on a specific vCPU such as a breakpoint or
757 * stepping trap, stopped_vcpu will be set to the vCPU triggering the
758 * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
759 * the reporting vCPU for vCPU events.
760 */
761static void
762report_stop(bool set_cur_vcpu)
763{
764	struct vcpu_state *vs;
765
766	start_packet();
767	if (stopped_vcpu == -1) {
768		append_char('S');
769		append_byte(GDB_SIGNAL_TRAP);
770	} else {
771		vs = &vcpu_state[stopped_vcpu];
772		if (set_cur_vcpu)
773			cur_vcpu = stopped_vcpu;
774		append_char('T');
775		append_byte(GDB_SIGNAL_TRAP);
776		append_string("thread:");
777		append_integer(stopped_vcpu + 1);
778		append_char(';');
779		if (vs->hit_swbreak) {
780			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
781			if (swbreak_enabled)
782				append_string("swbreak:;");
783		} else if (vs->stepped)
784			debug("$vCPU %d reporting step\n", stopped_vcpu);
785		else
786			debug("$vCPU %d reporting ???\n", stopped_vcpu);
787	}
788	finish_packet();
789	report_next_stop = false;
790}
791
792/*
793 * If this stop is due to a vCPU event, clear that event to mark it as
794 * acknowledged.
795 */
796static void
797discard_stop(void)
798{
799	struct vcpu_state *vs;
800
801	if (stopped_vcpu != -1) {
802		vs = &vcpu_state[stopped_vcpu];
803		vs->hit_swbreak = false;
804		vs->stepped = false;
805		stopped_vcpu = -1;
806	}
807	report_next_stop = true;
808}
809
810static void
811gdb_finish_suspend_vcpus(void)
812{
813
814	if (first_stop) {
815		first_stop = false;
816		stopped_vcpu = -1;
817	} else if (report_next_stop) {
818		assert(!response_pending());
819		report_stop(true);
820		send_pending_data(cur_fd);
821	}
822}
823
824/*
825 * vCPU threads invoke this function whenever the vCPU enters the
826 * debug server to pause or report an event.  vCPU threads wait here
827 * as long as the debug server keeps them suspended.
828 */
829static void
830_gdb_cpu_suspend(struct vcpu *vcpu, bool report_stop)
831{
832	int vcpuid = vcpu_id(vcpu);
833
834	debug("$vCPU %d suspending\n", vcpuid);
835	CPU_SET(vcpuid, &vcpus_waiting);
836	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
837		gdb_finish_suspend_vcpus();
838	while (CPU_ISSET(vcpuid, &vcpus_suspended))
839		pthread_cond_wait(&idle_vcpus, &gdb_lock);
840	CPU_CLR(vcpuid, &vcpus_waiting);
841	debug("$vCPU %d resuming\n", vcpuid);
842}
843
844/*
845 * Requests vCPU single-stepping using a
846 * VMEXIT suitable for the host platform.
847 */
848static int
849_gdb_set_step(struct vcpu *vcpu, int val)
850{
851	int error;
852
853#ifdef __amd64__
854	/*
855	 * If the MTRAP cap fails, we are running on an AMD host.
856	 * In that case, we request DB exits caused by RFLAGS.TF.
857	 */
858	error = vm_set_capability(vcpu, VM_CAP_MTRAP_EXIT, val);
859	if (error != 0)
860		error = vm_set_capability(vcpu, VM_CAP_RFLAGS_TF, val);
861	if (error == 0)
862		(void)vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, val);
863#else /* __aarch64__ */
864	error = vm_set_capability(vcpu, VM_CAP_SS_EXIT, val);
865	if (error == 0)
866		error = vm_set_capability(vcpu, VM_CAP_MASK_HWINTR, val);
867#endif
868	return (error);
869}
870
871/*
872 * Checks whether single-stepping is supported for a given vCPU.
873 */
874static int
875_gdb_check_step(struct vcpu *vcpu)
876{
877#ifdef __amd64__
878	int val;
879
880	if (vm_get_capability(vcpu, VM_CAP_MTRAP_EXIT, &val) != 0) {
881		if (vm_get_capability(vcpu, VM_CAP_RFLAGS_TF, &val) != 0)
882			return (-1);
883	}
884#else /* __aarch64__ */
885	(void)vcpu;
886#endif
887	return (0);
888}
889
890/*
891 * Invoked at the start of a vCPU thread's execution to inform the
892 * debug server about the new thread.
893 */
894void
895gdb_cpu_add(struct vcpu *vcpu)
896{
897	int vcpuid;
898
899	if (!gdb_active)
900		return;
901	vcpuid = vcpu_id(vcpu);
902	debug("$vCPU %d starting\n", vcpuid);
903	pthread_mutex_lock(&gdb_lock);
904	assert(vcpuid < guest_ncpus);
905	assert(vcpus[vcpuid] == NULL);
906	vcpus[vcpuid] = vcpu;
907	CPU_SET(vcpuid, &vcpus_active);
908	if (!TAILQ_EMPTY(&breakpoints)) {
909		vm_set_capability(vcpu, GDB_BREAKPOINT_CAP, 1);
910		debug("$vCPU %d enabled breakpoint exits\n", vcpuid);
911	}
912
913	/*
914	 * If a vcpu is added while vcpus are stopped, suspend the new
915	 * vcpu so that it will pop back out with a debug exit before
916	 * executing the first instruction.
917	 */
918	if (!CPU_EMPTY(&vcpus_suspended)) {
919		CPU_SET(vcpuid, &vcpus_suspended);
920		_gdb_cpu_suspend(vcpu, false);
921	}
922	pthread_mutex_unlock(&gdb_lock);
923}
924
925/*
926 * Invoked by vCPU before resuming execution.  This enables stepping
927 * if the vCPU is marked as stepping.
928 */
929static void
930gdb_cpu_resume(struct vcpu *vcpu)
931{
932	struct vcpu_state *vs;
933	int error;
934
935	vs = &vcpu_state[vcpu_id(vcpu)];
936
937	/*
938	 * Any pending event should already be reported before
939	 * resuming.
940	 */
941	assert(vs->hit_swbreak == false);
942	assert(vs->stepped == false);
943	if (vs->stepping) {
944		error = _gdb_set_step(vcpu, 1);
945		assert(error == 0);
946	}
947}
948
949/*
950 * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
951 * has been suspended due to an event on different vCPU or in response
952 * to a guest-wide suspend such as Ctrl-C or the stop on attach.
953 */
954void
955gdb_cpu_suspend(struct vcpu *vcpu)
956{
957
958	if (!gdb_active)
959		return;
960	pthread_mutex_lock(&gdb_lock);
961	_gdb_cpu_suspend(vcpu, true);
962	gdb_cpu_resume(vcpu);
963	pthread_mutex_unlock(&gdb_lock);
964}
965
966static void
967gdb_suspend_vcpus(void)
968{
969
970	assert(pthread_mutex_isowned_np(&gdb_lock));
971	debug("suspending all CPUs\n");
972	vcpus_suspended = vcpus_active;
973	vm_suspend_all_cpus(ctx);
974	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
975		gdb_finish_suspend_vcpus();
976}
977
978/*
979 * Invoked each time a vmexit handler needs to step a vCPU.
980 * Handles MTRAP and RFLAGS.TF vmexits.
981 */
982static void
983gdb_cpu_step(struct vcpu *vcpu)
984{
985	struct vcpu_state *vs;
986	int vcpuid = vcpu_id(vcpu);
987	int error;
988
989	debug("$vCPU %d stepped\n", vcpuid);
990	pthread_mutex_lock(&gdb_lock);
991	vs = &vcpu_state[vcpuid];
992	if (vs->stepping) {
993		vs->stepping = false;
994		vs->stepped = true;
995		error = _gdb_set_step(vcpu, 0);
996		assert(error == 0);
997
998		while (vs->stepped) {
999			if (stopped_vcpu == -1) {
1000				debug("$vCPU %d reporting step\n", vcpuid);
1001				stopped_vcpu = vcpuid;
1002				gdb_suspend_vcpus();
1003			}
1004			_gdb_cpu_suspend(vcpu, true);
1005		}
1006		gdb_cpu_resume(vcpu);
1007	}
1008	pthread_mutex_unlock(&gdb_lock);
1009}
1010
1011/*
1012 * A general handler for single-step exceptions.
1013 * Handles RFLAGS.TF exits on AMD SVM.
1014 */
1015void
1016gdb_cpu_debug(struct vcpu *vcpu, struct vm_exit *vmexit)
1017{
1018	if (!gdb_active)
1019		return;
1020
1021#ifdef __amd64__
1022	/* RFLAGS.TF exit? */
1023	if (vmexit->u.dbg.trace_trap) {
1024		gdb_cpu_step(vcpu);
1025	}
1026#else /* __aarch64__ */
1027	(void)vmexit;
1028	gdb_cpu_step(vcpu);
1029#endif
1030}
1031
1032/*
1033 * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
1034 * the VT-x-specific MTRAP exit.
1035 */
1036void
1037gdb_cpu_mtrap(struct vcpu *vcpu)
1038{
1039	if (!gdb_active)
1040		return;
1041	gdb_cpu_step(vcpu);
1042}
1043
1044static struct breakpoint *
1045find_breakpoint(uint64_t gpa)
1046{
1047	struct breakpoint *bp;
1048
1049	TAILQ_FOREACH(bp, &breakpoints, link) {
1050		if (bp->gpa == gpa)
1051			return (bp);
1052	}
1053	return (NULL);
1054}
1055
1056void
1057gdb_cpu_breakpoint(struct vcpu *vcpu, struct vm_exit *vmexit)
1058{
1059	struct breakpoint *bp;
1060	struct vcpu_state *vs;
1061	uint64_t gpa;
1062	int error, vcpuid;
1063
1064	if (!gdb_active) {
1065		EPRINTLN("vm_loop: unexpected VMEXIT_DEBUG");
1066		exit(4);
1067	}
1068	vcpuid = vcpu_id(vcpu);
1069	pthread_mutex_lock(&gdb_lock);
1070	error = guest_vaddr2paddr(vcpu, guest_pc(vmexit), &gpa);
1071	assert(error == 1);
1072	bp = find_breakpoint(gpa);
1073	if (bp != NULL) {
1074		vs = &vcpu_state[vcpuid];
1075		assert(vs->stepping == false);
1076		assert(vs->stepped == false);
1077		assert(vs->hit_swbreak == false);
1078		vs->hit_swbreak = true;
1079		vm_set_register(vcpu, GDB_PC_REGNAME, guest_pc(vmexit));
1080		for (;;) {
1081			if (stopped_vcpu == -1) {
1082				debug("$vCPU %d reporting breakpoint at rip %#lx\n",
1083				    vcpuid, guest_pc(vmexit));
1084				stopped_vcpu = vcpuid;
1085				gdb_suspend_vcpus();
1086			}
1087			_gdb_cpu_suspend(vcpu, true);
1088			if (!vs->hit_swbreak) {
1089				/* Breakpoint reported. */
1090				break;
1091			}
1092			bp = find_breakpoint(gpa);
1093			if (bp == NULL) {
1094				/* Breakpoint was removed. */
1095				vs->hit_swbreak = false;
1096				break;
1097			}
1098		}
1099		gdb_cpu_resume(vcpu);
1100	} else {
1101		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpuid,
1102		    guest_pc(vmexit));
1103#ifdef __amd64__
1104		error = vm_set_register(vcpu, VM_REG_GUEST_ENTRY_INST_LENGTH,
1105		    vmexit->u.bpt.inst_length);
1106		assert(error == 0);
1107		error = vm_inject_exception(vcpu, IDT_BP, 0, 0, 0);
1108		assert(error == 0);
1109#else /* __aarch64__ */
1110		uint64_t esr;
1111
1112		esr = (EXCP_BRK << ESR_ELx_EC_SHIFT) | vmexit->u.hyp.esr_el2;
1113		error = vm_inject_exception(vcpu, esr, 0);
1114		assert(error == 0);
1115#endif
1116	}
1117	pthread_mutex_unlock(&gdb_lock);
1118}
1119
1120static bool
1121gdb_step_vcpu(struct vcpu *vcpu)
1122{
1123	int error, vcpuid;
1124
1125	vcpuid = vcpu_id(vcpu);
1126	debug("$vCPU %d step\n", vcpuid);
1127	error = _gdb_check_step(vcpu);
1128	if (error < 0)
1129		return (false);
1130
1131	discard_stop();
1132	vcpu_state[vcpuid].stepping = true;
1133	vm_resume_cpu(vcpu);
1134	CPU_CLR(vcpuid, &vcpus_suspended);
1135	pthread_cond_broadcast(&idle_vcpus);
1136	return (true);
1137}
1138
1139static void
1140gdb_resume_vcpus(void)
1141{
1142
1143	assert(pthread_mutex_isowned_np(&gdb_lock));
1144	vm_resume_all_cpus(ctx);
1145	debug("resuming all CPUs\n");
1146	CPU_ZERO(&vcpus_suspended);
1147	pthread_cond_broadcast(&idle_vcpus);
1148}
1149
1150static void
1151gdb_read_regs(void)
1152{
1153	uint64_t regvals[nitems(gdb_regset)];
1154	int regnums[nitems(gdb_regset)];
1155
1156	for (size_t i = 0; i < nitems(gdb_regset); i++)
1157		regnums[i] = gdb_regset[i].id;
1158	if (vm_get_register_set(vcpus[cur_vcpu], nitems(gdb_regset),
1159	    regnums, regvals) == -1) {
1160		send_error(errno);
1161		return;
1162	}
1163
1164	start_packet();
1165	for (size_t i = 0; i < nitems(gdb_regset); i++) {
1166#ifdef GDB_REG_FIRST_EXT
1167		if (gdb_regset[i].id == GDB_REG_FIRST_EXT)
1168			break;
1169#endif
1170		append_unsigned_native(regvals[i], gdb_regset[i].size);
1171	}
1172	finish_packet();
1173}
1174
1175static void
1176gdb_read_one_reg(const uint8_t *data, size_t len)
1177{
1178	uint64_t regval;
1179	uintmax_t reg;
1180
1181	reg = parse_integer(data, len);
1182	if (reg >= nitems(gdb_regset)) {
1183		send_error(EINVAL);
1184		return;
1185	}
1186
1187	if (vm_get_register(vcpus[cur_vcpu], gdb_regset[reg].id, &regval) ==
1188	    -1) {
1189		send_error(errno);
1190		return;
1191	}
1192
1193	start_packet();
1194	append_unsigned_native(regval, gdb_regset[reg].size);
1195	finish_packet();
1196}
1197
1198static void
1199gdb_read_mem(const uint8_t *data, size_t len)
1200{
1201	uint64_t gpa, gva, val;
1202	uint8_t *cp;
1203	size_t resid, todo, bytes;
1204	bool started;
1205	int error;
1206
1207	assert(len >= 1);
1208
1209	/* Skip 'm' */
1210	data += 1;
1211	len -= 1;
1212
1213	/* Parse and consume address. */
1214	cp = memchr(data, ',', len);
1215	if (cp == NULL || cp == data) {
1216		send_error(EINVAL);
1217		return;
1218	}
1219	gva = parse_integer(data, cp - data);
1220	len -= (cp - data) + 1;
1221	data += (cp - data) + 1;
1222
1223	/* Parse length. */
1224	resid = parse_integer(data, len);
1225
1226	started = false;
1227	while (resid > 0) {
1228		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1229		if (error == -1) {
1230			if (started)
1231				finish_packet();
1232			else
1233				send_error(errno);
1234			return;
1235		}
1236		if (error == 0) {
1237			if (started)
1238				finish_packet();
1239			else
1240				send_error(EFAULT);
1241			return;
1242		}
1243
1244		/* Read bytes from current page. */
1245		todo = getpagesize() - gpa % getpagesize();
1246		if (todo > resid)
1247			todo = resid;
1248
1249		cp = paddr_guest2host(ctx, gpa, todo);
1250		if (cp != NULL) {
1251			/*
1252			 * If this page is guest RAM, read it a byte
1253			 * at a time.
1254			 */
1255			if (!started) {
1256				start_packet();
1257				started = true;
1258			}
1259			while (todo > 0) {
1260				append_byte(*cp);
1261				cp++;
1262				gpa++;
1263				gva++;
1264				resid--;
1265				todo--;
1266			}
1267		} else {
1268			/*
1269			 * If this page isn't guest RAM, try to handle
1270			 * it via MMIO.  For MMIO requests, use
1271			 * aligned reads of words when possible.
1272			 */
1273			while (todo > 0) {
1274				if (gpa & 1 || todo == 1)
1275					bytes = 1;
1276				else if (gpa & 2 || todo == 2)
1277					bytes = 2;
1278				else
1279					bytes = 4;
1280				error = read_mem(vcpus[cur_vcpu], gpa, &val,
1281				    bytes);
1282				if (error == 0) {
1283					if (!started) {
1284						start_packet();
1285						started = true;
1286					}
1287					gpa += bytes;
1288					gva += bytes;
1289					resid -= bytes;
1290					todo -= bytes;
1291					while (bytes > 0) {
1292						append_byte(val);
1293						val >>= 8;
1294						bytes--;
1295					}
1296				} else {
1297					if (started)
1298						finish_packet();
1299					else
1300						send_error(EFAULT);
1301					return;
1302				}
1303			}
1304		}
1305		assert(resid == 0 || gpa % getpagesize() == 0);
1306	}
1307	if (!started)
1308		start_packet();
1309	finish_packet();
1310}
1311
1312static void
1313gdb_write_mem(const uint8_t *data, size_t len)
1314{
1315	uint64_t gpa, gva, val;
1316	uint8_t *cp;
1317	size_t resid, todo, bytes;
1318	int error;
1319
1320	assert(len >= 1);
1321
1322	/* Skip 'M' */
1323	data += 1;
1324	len -= 1;
1325
1326	/* Parse and consume address. */
1327	cp = memchr(data, ',', len);
1328	if (cp == NULL || cp == data) {
1329		send_error(EINVAL);
1330		return;
1331	}
1332	gva = parse_integer(data, cp - data);
1333	len -= (cp - data) + 1;
1334	data += (cp - data) + 1;
1335
1336	/* Parse and consume length. */
1337	cp = memchr(data, ':', len);
1338	if (cp == NULL || cp == data) {
1339		send_error(EINVAL);
1340		return;
1341	}
1342	resid = parse_integer(data, cp - data);
1343	len -= (cp - data) + 1;
1344	data += (cp - data) + 1;
1345
1346	/* Verify the available bytes match the length. */
1347	if (len != resid * 2) {
1348		send_error(EINVAL);
1349		return;
1350	}
1351
1352	while (resid > 0) {
1353		error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1354		if (error == -1) {
1355			send_error(errno);
1356			return;
1357		}
1358		if (error == 0) {
1359			send_error(EFAULT);
1360			return;
1361		}
1362
1363		/* Write bytes to current page. */
1364		todo = getpagesize() - gpa % getpagesize();
1365		if (todo > resid)
1366			todo = resid;
1367
1368		cp = paddr_guest2host(ctx, gpa, todo);
1369		if (cp != NULL) {
1370			/*
1371			 * If this page is guest RAM, write it a byte
1372			 * at a time.
1373			 */
1374			while (todo > 0) {
1375				assert(len >= 2);
1376				*cp = parse_byte(data);
1377				data += 2;
1378				len -= 2;
1379				cp++;
1380				gpa++;
1381				gva++;
1382				resid--;
1383				todo--;
1384			}
1385		} else {
1386			/*
1387			 * If this page isn't guest RAM, try to handle
1388			 * it via MMIO.  For MMIO requests, use
1389			 * aligned writes of words when possible.
1390			 */
1391			while (todo > 0) {
1392				if (gpa & 1 || todo == 1) {
1393					bytes = 1;
1394					val = parse_byte(data);
1395				} else if (gpa & 2 || todo == 2) {
1396					bytes = 2;
1397					val = be16toh(parse_integer(data, 4));
1398				} else {
1399					bytes = 4;
1400					val = be32toh(parse_integer(data, 8));
1401				}
1402				error = write_mem(vcpus[cur_vcpu], gpa, val,
1403				    bytes);
1404				if (error == 0) {
1405					gpa += bytes;
1406					gva += bytes;
1407					resid -= bytes;
1408					todo -= bytes;
1409					data += 2 * bytes;
1410					len -= 2 * bytes;
1411				} else {
1412					send_error(EFAULT);
1413					return;
1414				}
1415			}
1416		}
1417		assert(resid == 0 || gpa % getpagesize() == 0);
1418	}
1419	assert(len == 0);
1420	send_ok();
1421}
1422
1423static bool
1424set_breakpoint_caps(bool enable)
1425{
1426	cpuset_t mask;
1427	int vcpu;
1428
1429	mask = vcpus_active;
1430	while (!CPU_EMPTY(&mask)) {
1431		vcpu = CPU_FFS(&mask) - 1;
1432		CPU_CLR(vcpu, &mask);
1433		if (vm_set_capability(vcpus[vcpu], GDB_BREAKPOINT_CAP,
1434		    enable ? 1 : 0) < 0)
1435			return (false);
1436		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1437		    enable ? "en" : "dis");
1438	}
1439	return (true);
1440}
1441
1442static void
1443write_instr(uint8_t *dest, uint8_t *instr, size_t len)
1444{
1445	memcpy(dest, instr, len);
1446#ifdef __arm64__
1447	__asm __volatile(
1448	    "dc cvau, %0\n"
1449	    "dsb ish\n"
1450	    "ic ialluis\n"
1451	    "dsb ish\n"
1452	    : : "r" (dest) : "memory");
1453#endif
1454}
1455
1456static void
1457remove_all_sw_breakpoints(void)
1458{
1459	struct breakpoint *bp, *nbp;
1460	uint8_t *cp;
1461
1462	if (TAILQ_EMPTY(&breakpoints))
1463		return;
1464
1465	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1466		debug("remove breakpoint at %#lx\n", bp->gpa);
1467		cp = paddr_guest2host(ctx, bp->gpa, sizeof(bp->shadow_inst));
1468		write_instr(cp, bp->shadow_inst, sizeof(bp->shadow_inst));
1469		TAILQ_REMOVE(&breakpoints, bp, link);
1470		free(bp);
1471	}
1472	TAILQ_INIT(&breakpoints);
1473	set_breakpoint_caps(false);
1474}
1475
1476static void
1477update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1478{
1479	struct breakpoint *bp;
1480	uint64_t gpa;
1481	uint8_t *cp;
1482	int error;
1483
1484	if (kind != GDB_BP_SIZE) {
1485		send_error(EINVAL);
1486		return;
1487	}
1488
1489	error = guest_vaddr2paddr(vcpus[cur_vcpu], gva, &gpa);
1490	if (error == -1) {
1491		send_error(errno);
1492		return;
1493	}
1494	if (error == 0) {
1495		send_error(EFAULT);
1496		return;
1497	}
1498
1499	cp = paddr_guest2host(ctx, gpa, sizeof(bp->shadow_inst));
1500
1501	/* Only permit breakpoints in guest RAM. */
1502	if (cp == NULL) {
1503		send_error(EFAULT);
1504		return;
1505	}
1506
1507	/* Find any existing breakpoint. */
1508	bp = find_breakpoint(gpa);
1509
1510	/*
1511	 * Silently ignore duplicate commands since the protocol
1512	 * requires these packets to be idempotent.
1513	 */
1514	if (insert) {
1515		if (bp == NULL) {
1516			if (TAILQ_EMPTY(&breakpoints) &&
1517			    !set_breakpoint_caps(true)) {
1518				send_empty_response();
1519				return;
1520			}
1521			bp = malloc(sizeof(*bp));
1522			bp->gpa = gpa;
1523			memcpy(bp->shadow_inst, cp, sizeof(bp->shadow_inst));
1524			write_instr(cp, GDB_BP_INSTR, sizeof(bp->shadow_inst));
1525			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1526			debug("new breakpoint at %#lx\n", gpa);
1527		}
1528	} else {
1529		if (bp != NULL) {
1530			debug("remove breakpoint at %#lx\n", gpa);
1531			write_instr(cp, bp->shadow_inst,
1532			    sizeof(bp->shadow_inst));
1533			TAILQ_REMOVE(&breakpoints, bp, link);
1534			free(bp);
1535			if (TAILQ_EMPTY(&breakpoints))
1536				set_breakpoint_caps(false);
1537		}
1538	}
1539	send_ok();
1540}
1541
1542static void
1543parse_breakpoint(const uint8_t *data, size_t len)
1544{
1545	uint64_t gva;
1546	uint8_t *cp;
1547	bool insert;
1548	int kind, type;
1549
1550	insert = data[0] == 'Z';
1551
1552	/* Skip 'Z/z' */
1553	data += 1;
1554	len -= 1;
1555
1556	/* Parse and consume type. */
1557	cp = memchr(data, ',', len);
1558	if (cp == NULL || cp == data) {
1559		send_error(EINVAL);
1560		return;
1561	}
1562	type = parse_integer(data, cp - data);
1563	len -= (cp - data) + 1;
1564	data += (cp - data) + 1;
1565
1566	/* Parse and consume address. */
1567	cp = memchr(data, ',', len);
1568	if (cp == NULL || cp == data) {
1569		send_error(EINVAL);
1570		return;
1571	}
1572	gva = parse_integer(data, cp - data);
1573	len -= (cp - data) + 1;
1574	data += (cp - data) + 1;
1575
1576	/* Parse and consume kind. */
1577	cp = memchr(data, ';', len);
1578	if (cp == data) {
1579		send_error(EINVAL);
1580		return;
1581	}
1582	if (cp != NULL) {
1583		/*
1584		 * We do not advertise support for either the
1585		 * ConditionalBreakpoints or BreakpointCommands
1586		 * features, so we should not be getting conditions or
1587		 * commands from the remote end.
1588		 */
1589		send_empty_response();
1590		return;
1591	}
1592	kind = parse_integer(data, len);
1593	data += len;
1594	len = 0;
1595
1596	switch (type) {
1597	case 0:
1598		update_sw_breakpoint(gva, kind, insert);
1599		break;
1600	default:
1601		send_empty_response();
1602		break;
1603	}
1604}
1605
1606static bool
1607command_equals(const uint8_t *data, size_t len, const char *cmd)
1608{
1609
1610	if (strlen(cmd) > len)
1611		return (false);
1612	return (memcmp(data, cmd, strlen(cmd)) == 0);
1613}
1614
1615static void
1616check_features(const uint8_t *data, size_t len)
1617{
1618	char *feature, *next_feature, *str, *value;
1619	bool supported;
1620
1621	str = malloc(len + 1);
1622	memcpy(str, data, len);
1623	str[len] = '\0';
1624	next_feature = str;
1625
1626	while ((feature = strsep(&next_feature, ";")) != NULL) {
1627		/*
1628		 * Null features shouldn't exist, but skip if they
1629		 * do.
1630		 */
1631		if (strcmp(feature, "") == 0)
1632			continue;
1633
1634		/*
1635		 * Look for the value or supported / not supported
1636		 * flag.
1637		 */
1638		value = strchr(feature, '=');
1639		if (value != NULL) {
1640			*value = '\0';
1641			value++;
1642			supported = true;
1643		} else {
1644			value = feature + strlen(feature) - 1;
1645			switch (*value) {
1646			case '+':
1647				supported = true;
1648				break;
1649			case '-':
1650				supported = false;
1651				break;
1652			default:
1653				/*
1654				 * This is really a protocol error,
1655				 * but we just ignore malformed
1656				 * features for ease of
1657				 * implementation.
1658				 */
1659				continue;
1660			}
1661			value = NULL;
1662		}
1663
1664		if (strcmp(feature, "swbreak") == 0)
1665			swbreak_enabled = supported;
1666	}
1667	free(str);
1668
1669	start_packet();
1670
1671	/* This is an arbitrary limit. */
1672	append_string("PacketSize=4096");
1673	append_string(";swbreak+");
1674	append_string(";qXfer:features:read+");
1675	finish_packet();
1676}
1677
1678static void
1679gdb_query(const uint8_t *data, size_t len)
1680{
1681
1682	/*
1683	 * TODO:
1684	 * - qSearch
1685	 */
1686	if (command_equals(data, len, "qAttached")) {
1687		start_packet();
1688		append_char('1');
1689		finish_packet();
1690	} else if (command_equals(data, len, "qC")) {
1691		start_packet();
1692		append_string("QC");
1693		append_integer(cur_vcpu + 1);
1694		finish_packet();
1695	} else if (command_equals(data, len, "qfThreadInfo")) {
1696		cpuset_t mask;
1697		bool first;
1698		int vcpu;
1699
1700		if (CPU_EMPTY(&vcpus_active)) {
1701			send_error(EINVAL);
1702			return;
1703		}
1704		mask = vcpus_active;
1705		start_packet();
1706		append_char('m');
1707		first = true;
1708		while (!CPU_EMPTY(&mask)) {
1709			vcpu = CPU_FFS(&mask) - 1;
1710			CPU_CLR(vcpu, &mask);
1711			if (first)
1712				first = false;
1713			else
1714				append_char(',');
1715			append_integer(vcpu + 1);
1716		}
1717		finish_packet();
1718	} else if (command_equals(data, len, "qsThreadInfo")) {
1719		start_packet();
1720		append_char('l');
1721		finish_packet();
1722	} else if (command_equals(data, len, "qSupported")) {
1723		data += strlen("qSupported");
1724		len -= strlen("qSupported");
1725		check_features(data, len);
1726	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1727		char buf[16];
1728		int tid;
1729
1730		data += strlen("qThreadExtraInfo");
1731		len -= strlen("qThreadExtraInfo");
1732		if (len == 0 || *data != ',') {
1733			send_error(EINVAL);
1734			return;
1735		}
1736		tid = parse_threadid(data + 1, len - 1);
1737		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1738			send_error(EINVAL);
1739			return;
1740		}
1741
1742		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1743		start_packet();
1744		append_asciihex(buf);
1745		finish_packet();
1746	} else if (command_equals(data, len, "qXfer:features:read:")) {
1747		struct stat sb;
1748		const char *xml;
1749		const uint8_t *pathend;
1750		char buf[64], path[PATH_MAX];
1751		size_t xmllen;
1752		unsigned int doff, dlen;
1753		int fd;
1754
1755		data += strlen("qXfer:features:read:");
1756		len -= strlen("qXfer:features:read:");
1757
1758		pathend = memchr(data, ':', len);
1759		if (pathend == NULL ||
1760		    (size_t)(pathend - data) >= sizeof(path) - 1) {
1761			send_error(EINVAL);
1762			return;
1763		}
1764		memcpy(path, data, pathend - data);
1765		path[pathend - data] = '\0';
1766		data += (pathend - data) + 1;
1767		len -= (pathend - data) + 1;
1768
1769		if (len > sizeof(buf) - 1) {
1770			send_error(EINVAL);
1771			return;
1772		}
1773		memcpy(buf, data, len);
1774		buf[len] = '\0';
1775		if (sscanf(buf, "%x,%x", &doff, &dlen) != 2) {
1776			send_error(EINVAL);
1777			return;
1778		}
1779
1780		fd = openat(xml_dfd, path, O_RDONLY | O_RESOLVE_BENEATH);
1781		if (fd < 0) {
1782			send_error(errno);
1783			return;
1784		}
1785		if (fstat(fd, &sb) < 0) {
1786			send_error(errno);
1787			close(fd);
1788			return;
1789		}
1790		xml = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, fd, 0);
1791		if (xml == MAP_FAILED) {
1792			send_error(errno);
1793			close(fd);
1794			return;
1795		}
1796		close(fd);
1797		xmllen = sb.st_size;
1798
1799		start_packet();
1800		if (doff >= xmllen) {
1801			append_char('l');
1802		} else if (doff + dlen >= xmllen) {
1803			append_char('l');
1804			append_packet_data(xml + doff, xmllen - doff);
1805		} else {
1806			append_char('m');
1807			append_packet_data(xml + doff, dlen);
1808		}
1809		finish_packet();
1810		(void)munmap(__DECONST(void *, xml), xmllen);
1811	} else
1812		send_empty_response();
1813}
1814
1815static void
1816handle_command(const uint8_t *data, size_t len)
1817{
1818
1819	/* Reject packets with a sequence-id. */
1820	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1821	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1822		send_empty_response();
1823		return;
1824	}
1825
1826	switch (*data) {
1827	case 'c':
1828		if (len != 1) {
1829			send_error(EINVAL);
1830			break;
1831		}
1832
1833		discard_stop();
1834		gdb_resume_vcpus();
1835		break;
1836	case 'D':
1837		send_ok();
1838
1839		/* TODO: Resume any stopped CPUs. */
1840		break;
1841	case 'g':
1842		gdb_read_regs();
1843		break;
1844	case 'p':
1845		gdb_read_one_reg(data + 1, len - 1);
1846		break;
1847	case 'H': {
1848		int tid;
1849
1850		if (len < 2 || (data[1] != 'g' && data[1] != 'c')) {
1851			send_error(EINVAL);
1852			break;
1853		}
1854		tid = parse_threadid(data + 2, len - 2);
1855		if (tid == -2) {
1856			send_error(EINVAL);
1857			break;
1858		}
1859
1860		if (CPU_EMPTY(&vcpus_active)) {
1861			send_error(EINVAL);
1862			break;
1863		}
1864		if (tid == -1 || tid == 0)
1865			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1866		else if (CPU_ISSET(tid - 1, &vcpus_active))
1867			cur_vcpu = tid - 1;
1868		else {
1869			send_error(EINVAL);
1870			break;
1871		}
1872		send_ok();
1873		break;
1874	}
1875	case 'm':
1876		gdb_read_mem(data, len);
1877		break;
1878	case 'M':
1879		gdb_write_mem(data, len);
1880		break;
1881	case 'T': {
1882		int tid;
1883
1884		tid = parse_threadid(data + 1, len - 1);
1885		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1886			send_error(EINVAL);
1887			return;
1888		}
1889		send_ok();
1890		break;
1891	}
1892	case 'q':
1893		gdb_query(data, len);
1894		break;
1895	case 's':
1896		if (len != 1) {
1897			send_error(EINVAL);
1898			break;
1899		}
1900
1901		/* Don't send a reply until a stop occurs. */
1902		if (!gdb_step_vcpu(vcpus[cur_vcpu])) {
1903			send_error(EOPNOTSUPP);
1904			break;
1905		}
1906		break;
1907	case 'z':
1908	case 'Z':
1909		parse_breakpoint(data, len);
1910		break;
1911	case '?':
1912		report_stop(false);
1913		break;
1914	case 'G': /* TODO */
1915	case 'v':
1916		/* Handle 'vCont' */
1917		/* 'vCtrlC' */
1918	case 'P': /* TODO */
1919	case 'Q': /* TODO */
1920	case 't': /* TODO */
1921	case 'X': /* TODO */
1922	default:
1923		send_empty_response();
1924	}
1925}
1926
1927/* Check for a valid packet in the command buffer. */
1928static void
1929check_command(int fd)
1930{
1931	uint8_t *head, *hash, *p, sum;
1932	size_t avail, plen;
1933
1934	for (;;) {
1935		avail = cur_comm.len;
1936		if (avail == 0)
1937			return;
1938		head = io_buffer_head(&cur_comm);
1939		switch (*head) {
1940		case 0x03:
1941			debug("<- Ctrl-C\n");
1942			io_buffer_consume(&cur_comm, 1);
1943
1944			gdb_suspend_vcpus();
1945			break;
1946		case '+':
1947			/* ACK of previous response. */
1948			debug("<- +\n");
1949			if (response_pending())
1950				io_buffer_reset(&cur_resp);
1951			io_buffer_consume(&cur_comm, 1);
1952			if (stopped_vcpu != -1 && report_next_stop) {
1953				report_stop(true);
1954				send_pending_data(fd);
1955			}
1956			break;
1957		case '-':
1958			/* NACK of previous response. */
1959			debug("<- -\n");
1960			if (response_pending()) {
1961				cur_resp.len += cur_resp.start;
1962				cur_resp.start = 0;
1963				if (cur_resp.data[0] == '+')
1964					io_buffer_advance(&cur_resp, 1);
1965				debug("-> %.*s\n", (int)cur_resp.len,
1966				    io_buffer_head(&cur_resp));
1967			}
1968			io_buffer_consume(&cur_comm, 1);
1969			send_pending_data(fd);
1970			break;
1971		case '$':
1972			/* Packet. */
1973
1974			if (response_pending()) {
1975				warnx("New GDB command while response in "
1976				    "progress");
1977				io_buffer_reset(&cur_resp);
1978			}
1979
1980			/* Is packet complete? */
1981			hash = memchr(head, '#', avail);
1982			if (hash == NULL)
1983				return;
1984			plen = (hash - head + 1) + 2;
1985			if (avail < plen)
1986				return;
1987			debug("<- %.*s\n", (int)plen, head);
1988
1989			/* Verify checksum. */
1990			for (sum = 0, p = head + 1; p < hash; p++)
1991				sum += *p;
1992			if (sum != parse_byte(hash + 1)) {
1993				io_buffer_consume(&cur_comm, plen);
1994				debug("-> -\n");
1995				send_char('-');
1996				send_pending_data(fd);
1997				break;
1998			}
1999			send_char('+');
2000
2001			handle_command(head + 1, hash - (head + 1));
2002			io_buffer_consume(&cur_comm, plen);
2003			if (!response_pending())
2004				debug("-> +\n");
2005			send_pending_data(fd);
2006			break;
2007		default:
2008			/* XXX: Possibly drop connection instead. */
2009			debug("-> %02x\n", *head);
2010			io_buffer_consume(&cur_comm, 1);
2011			break;
2012		}
2013	}
2014}
2015
2016static void
2017gdb_readable(int fd, enum ev_type event __unused, void *arg __unused)
2018{
2019	size_t pending;
2020	ssize_t nread;
2021	int n;
2022
2023	if (ioctl(fd, FIONREAD, &n) == -1) {
2024		warn("FIONREAD on GDB socket");
2025		return;
2026	}
2027	assert(n >= 0);
2028	pending = n;
2029
2030	/*
2031	 * 'pending' might be zero due to EOF.  We need to call read
2032	 * with a non-zero length to detect EOF.
2033	 */
2034	if (pending == 0)
2035		pending = 1;
2036
2037	/* Ensure there is room in the command buffer. */
2038	io_buffer_grow(&cur_comm, pending);
2039	assert(io_buffer_avail(&cur_comm) >= pending);
2040
2041	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
2042	if (nread == 0) {
2043		close_connection();
2044	} else if (nread == -1) {
2045		if (errno == EAGAIN)
2046			return;
2047
2048		warn("Read from GDB socket");
2049		close_connection();
2050	} else {
2051		cur_comm.len += nread;
2052		pthread_mutex_lock(&gdb_lock);
2053		check_command(fd);
2054		pthread_mutex_unlock(&gdb_lock);
2055	}
2056}
2057
2058static void
2059gdb_writable(int fd, enum ev_type event __unused, void *arg __unused)
2060{
2061
2062	send_pending_data(fd);
2063}
2064
2065static void
2066new_connection(int fd, enum ev_type event __unused, void *arg)
2067{
2068	int optval, s;
2069
2070	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
2071	if (s == -1) {
2072		if (arg != NULL)
2073			err(1, "Failed accepting initial GDB connection");
2074
2075		/* Silently ignore errors post-startup. */
2076		return;
2077	}
2078
2079	optval = 1;
2080	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
2081	    -1) {
2082		warn("Failed to disable SIGPIPE for GDB connection");
2083		close(s);
2084		return;
2085	}
2086
2087	pthread_mutex_lock(&gdb_lock);
2088	if (cur_fd != -1) {
2089		close(s);
2090		warnx("Ignoring additional GDB connection.");
2091	}
2092
2093	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
2094	if (read_event == NULL) {
2095		if (arg != NULL)
2096			err(1, "Failed to setup initial GDB connection");
2097		pthread_mutex_unlock(&gdb_lock);
2098		return;
2099	}
2100	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
2101	if (write_event == NULL) {
2102		if (arg != NULL)
2103			err(1, "Failed to setup initial GDB connection");
2104		mevent_delete_close(read_event);
2105		read_event = NULL;
2106	}
2107
2108	cur_fd = s;
2109	cur_vcpu = 0;
2110	stopped_vcpu = -1;
2111
2112	/* Break on attach. */
2113	first_stop = true;
2114	report_next_stop = false;
2115	gdb_suspend_vcpus();
2116	pthread_mutex_unlock(&gdb_lock);
2117}
2118
2119#ifndef WITHOUT_CAPSICUM
2120static void
2121limit_gdb_socket(int s)
2122{
2123	cap_rights_t rights;
2124	unsigned long ioctls[] = { FIONREAD };
2125
2126	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
2127	    CAP_SETSOCKOPT, CAP_IOCTL);
2128	if (caph_rights_limit(s, &rights) == -1)
2129		errx(EX_OSERR, "Unable to apply rights for sandbox");
2130	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
2131		errx(EX_OSERR, "Unable to apply rights for sandbox");
2132}
2133#endif
2134
2135void
2136init_gdb(struct vmctx *_ctx)
2137{
2138#ifndef WITHOUT_CAPSICUM
2139	cap_rights_t rights;
2140#endif
2141	int error, flags, optval, s;
2142	struct addrinfo hints;
2143	struct addrinfo *gdbaddr;
2144	const char *saddr, *value;
2145	char *sport;
2146	bool wait;
2147
2148	value = get_config_value("gdb.port");
2149	if (value == NULL)
2150		return;
2151	sport = strdup(value);
2152	if (sport == NULL)
2153		errx(4, "Failed to allocate memory");
2154
2155	wait = get_config_bool_default("gdb.wait", false);
2156
2157	saddr = get_config_value("gdb.address");
2158	if (saddr == NULL) {
2159		saddr = "localhost";
2160	}
2161
2162	debug("==> starting on %s:%s, %swaiting\n",
2163	    saddr, sport, wait ? "" : "not ");
2164
2165	error = pthread_mutex_init(&gdb_lock, NULL);
2166	if (error != 0)
2167		errc(1, error, "gdb mutex init");
2168	error = pthread_cond_init(&idle_vcpus, NULL);
2169	if (error != 0)
2170		errc(1, error, "gdb cv init");
2171
2172	memset(&hints, 0, sizeof(hints));
2173	hints.ai_family = AF_UNSPEC;
2174	hints.ai_socktype = SOCK_STREAM;
2175	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
2176
2177	error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
2178	if (error != 0)
2179		errx(1, "gdb address resolution: %s", gai_strerror(error));
2180
2181	ctx = _ctx;
2182	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
2183	if (s < 0)
2184		err(1, "gdb socket create");
2185
2186	optval = 1;
2187	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
2188
2189	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
2190		err(1, "gdb socket bind");
2191
2192	if (listen(s, 1) < 0)
2193		err(1, "gdb socket listen");
2194
2195	stopped_vcpu = -1;
2196	TAILQ_INIT(&breakpoints);
2197	vcpus = calloc(guest_ncpus, sizeof(*vcpus));
2198	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
2199	if (wait) {
2200		/*
2201		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
2202		 * logic in gdb_cpu_add() to suspend the first vcpu before
2203		 * it starts execution.  The vcpu will remain suspended
2204		 * until a debugger connects.
2205		 */
2206		CPU_SET(0, &vcpus_suspended);
2207		stopped_vcpu = 0;
2208	}
2209
2210	flags = fcntl(s, F_GETFL);
2211	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
2212		err(1, "Failed to mark gdb socket non-blocking");
2213
2214#ifndef WITHOUT_CAPSICUM
2215	limit_gdb_socket(s);
2216#endif
2217	mevent_add(s, EVF_READ, new_connection, NULL);
2218	gdb_active = true;
2219	freeaddrinfo(gdbaddr);
2220	free(sport);
2221
2222	xml_dfd = open(_PATH_GDB_XML, O_DIRECTORY);
2223	if (xml_dfd == -1)
2224		err(1, "Failed to open gdb xml directory");
2225#ifndef WITHOUT_CAPSICUM
2226	cap_rights_init(&rights, CAP_FSTAT, CAP_LOOKUP, CAP_MMAP_R, CAP_PREAD);
2227	if (caph_rights_limit(xml_dfd, &rights) == -1)
2228		err(1, "cap_rights_init");
2229#endif
2230}
2231