1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2019 Isilon Systems, LLC.
5 * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved.
6 * Copyright (c) 2000 Darrell Anderson
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32#include "opt_ddb.h"
33#include "opt_inet.h"
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/endian.h>
38#include <sys/errno.h>
39#include <sys/eventhandler.h>
40#include <sys/kernel.h>
41#include <sys/lock.h>
42#include <sys/mutex.h>
43#include <sys/socket.h>
44#include <sys/sysctl.h>
45
46#ifdef DDB
47#include <ddb/ddb.h>
48#include <ddb/db_lex.h>
49#endif
50
51#include <net/ethernet.h>
52#include <net/if.h>
53#include <net/if_arp.h>
54#include <net/if_dl.h>
55#include <net/if_types.h>
56#include <net/if_var.h>
57#include <net/if_private.h>
58#include <net/vnet.h>
59#include <net/route.h>
60#include <net/route/nhop.h>
61
62#include <netinet/in.h>
63#include <netinet/in_fib.h>
64#include <netinet/in_systm.h>
65#include <netinet/in_var.h>
66#include <netinet/ip.h>
67#include <netinet/ip_var.h>
68#include <netinet/ip_options.h>
69#include <netinet/udp.h>
70#include <netinet/udp_var.h>
71
72#include <machine/in_cksum.h>
73#include <machine/pcb.h>
74
75#include <net/debugnet.h>
76#define	DEBUGNET_INTERNAL
77#include <net/debugnet_int.h>
78
79FEATURE(debugnet, "Debugnet support");
80
81SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
82    "debugnet parameters");
83
84unsigned debugnet_debug;
85SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN,
86    &debugnet_debug, 0,
87    "Debug message verbosity (0: off; 1: on; 2: verbose)");
88
89int debugnet_npolls = 2000;
90SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN,
91    &debugnet_npolls, 0,
92    "Number of times to poll before assuming packet loss (0.5ms per poll)");
93int debugnet_nretries = 10;
94SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN,
95    &debugnet_nretries, 0,
96    "Number of retransmit attempts before giving up");
97int debugnet_fib = RT_DEFAULT_FIB;
98SYSCTL_INT(_net_debugnet, OID_AUTO, fib, CTLFLAG_RWTUN,
99    &debugnet_fib, 0,
100    "Fib to use when sending dump");
101
102static bool g_debugnet_pcb_inuse;
103static struct debugnet_pcb g_dnet_pcb;
104
105/*
106 * Simple accessors for opaque PCB.
107 */
108const unsigned char *
109debugnet_get_gw_mac(const struct debugnet_pcb *pcb)
110{
111	MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
112	    pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
113	return (pcb->dp_gw_mac.octet);
114}
115
116const in_addr_t *
117debugnet_get_server_addr(const struct debugnet_pcb *pcb)
118{
119	MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
120	    pcb->dp_state >= DN_STATE_GOT_HERALD_PORT);
121	return (&pcb->dp_server);
122}
123
124const uint16_t
125debugnet_get_server_port(const struct debugnet_pcb *pcb)
126{
127	MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb &&
128	    pcb->dp_state >= DN_STATE_GOT_HERALD_PORT);
129	return (pcb->dp_server_port);
130}
131
132/*
133 * Start of network primitives, beginning with output primitives.
134 */
135
136/*
137 * Handles creation of the ethernet header, then places outgoing packets into
138 * the tx buffer for the NIC
139 *
140 * Parameters:
141 *	m	The mbuf containing the packet to be sent (will be freed by
142 *		this function or the NIC driver)
143 *	ifp	The interface to send on
144 *	dst	The destination ethernet address (source address will be looked
145 *		up using ifp)
146 *	etype	The ETHERTYPE_* value for the protocol that is being sent
147 *
148 * Returns:
149 *	int	see errno.h, 0 for success
150 */
151int
152debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst,
153    u_short etype)
154{
155	struct ether_header *eh;
156
157	if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) ||
158	    (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) {
159		if_printf(ifp, "%s: interface isn't up\n", __func__);
160		m_freem(m);
161		return (ENETDOWN);
162	}
163
164	/* Fill in the ethernet header. */
165	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
166	if (m == NULL) {
167		printf("%s: out of mbufs\n", __func__);
168		return (ENOBUFS);
169	}
170	eh = mtod(m, struct ether_header *);
171	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
172	memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN);
173	eh->ether_type = htons(etype);
174	return (ifp->if_debugnet_methods->dn_transmit(ifp, m));
175}
176
177/*
178 * Unreliable transmission of an mbuf chain to the debugnet server
179 * Note: can't handle fragmentation; fails if the packet is larger than
180 *	 ifp->if_mtu after adding the UDP/IP headers
181 *
182 * Parameters:
183 *	pcb	The debugnet context block
184 *	m	mbuf chain
185 *
186 * Returns:
187 *	int	see errno.h, 0 for success
188 */
189static int
190debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m)
191{
192	struct udphdr *udp;
193
194	MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC);
195
196	M_PREPEND(m, sizeof(*udp), M_NOWAIT);
197	if (m == NULL) {
198		printf("%s: out of mbufs\n", __func__);
199		return (ENOBUFS);
200	}
201
202	udp = mtod(m, void *);
203	udp->uh_ulen = htons(m->m_pkthdr.len);
204	/* Use this src port so that the server can connect() the socket */
205	udp->uh_sport = htons(pcb->dp_client_port);
206	udp->uh_dport = htons(pcb->dp_server_port);
207	/* Computed later (protocol-dependent). */
208	udp->uh_sum = 0;
209
210	return (debugnet_ip_output(pcb, m));
211}
212
213int
214debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */)
215{
216	struct debugnet_ack *dn_ack;
217	struct mbuf *m;
218
219	DNETDEBUG("Acking with seqno %u\n", ntohl(seqno));
220
221	m = m_gethdr(M_NOWAIT, MT_DATA);
222	if (m == NULL) {
223		printf("%s: Out of mbufs\n", __func__);
224		return (ENOBUFS);
225	}
226	m->m_len = sizeof(*dn_ack);
227	m->m_pkthdr.len = sizeof(*dn_ack);
228	MH_ALIGN(m, sizeof(*dn_ack));
229	dn_ack = mtod(m, void *);
230	dn_ack->da_seqno = seqno;
231
232	return (debugnet_udp_output(pcb, m));
233}
234
235/*
236 * Dummy free function for debugnet clusters.
237 */
238static void
239debugnet_mbuf_free(struct mbuf *m __unused)
240{
241}
242
243/*
244 * Construct and reliably send a debugnet packet.  May fail from a resource
245 * shortage or extreme number of unacknowledged retransmissions.  Wait for
246 * an acknowledgement before returning.  Splits packets into chunks small
247 * enough to be sent without fragmentation (looks up the interface MTU)
248 *
249 * Parameters:
250 *	type	debugnet packet type (HERALD, FINISHED, ...)
251 *	data	data
252 *	datalen	data size (bytes)
253 *	auxdata	optional auxiliary information
254 *
255 * Returns:
256 *	int see errno.h, 0 for success
257 */
258int
259debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data,
260    uint32_t datalen, const struct debugnet_proto_aux *auxdata)
261{
262	struct debugnet_msg_hdr *dn_msg_hdr;
263	struct mbuf *m, *m2;
264	uint64_t want_acks;
265	uint32_t i, pktlen, sent_so_far;
266	int retries, polls, error;
267
268	if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
269		return (ECONNRESET);
270
271	want_acks = 0;
272	pcb->dp_rcvd_acks = 0;
273	retries = 0;
274
275retransmit:
276	/* Chunks can be too big to fit in packets. */
277	for (i = sent_so_far = 0; sent_so_far < datalen ||
278	    (i == 0 && datalen == 0); i++) {
279		pktlen = datalen - sent_so_far;
280
281		/* Bound: the interface MTU (assume no IP options). */
282		pktlen = min(pktlen, pcb->dp_ifp->if_mtu -
283		    sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr));
284
285		/*
286		 * Check if it is retransmitting and this has been ACKed
287		 * already.
288		 */
289		if ((pcb->dp_rcvd_acks & (1 << i)) != 0) {
290			sent_so_far += pktlen;
291			continue;
292		}
293
294		/*
295		 * Get and fill a header mbuf, then chain data as an extended
296		 * mbuf.
297		 */
298		m = m_gethdr(M_NOWAIT, MT_DATA);
299		if (m == NULL) {
300			printf("%s: Out of mbufs\n", __func__);
301			return (ENOBUFS);
302		}
303		m->m_len = sizeof(struct debugnet_msg_hdr);
304		m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr);
305		MH_ALIGN(m, sizeof(struct debugnet_msg_hdr));
306		dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *);
307		dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i);
308		dn_msg_hdr->mh_type = htonl(type);
309		dn_msg_hdr->mh_len = htonl(pktlen);
310
311		if (auxdata != NULL) {
312			dn_msg_hdr->mh_offset =
313			    htobe64(auxdata->dp_offset_start + sent_so_far);
314			dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2);
315		} else {
316			dn_msg_hdr->mh_offset = htobe64(sent_so_far);
317			dn_msg_hdr->mh_aux2 = 0;
318		}
319
320		if (pktlen != 0) {
321			m2 = m_get(M_NOWAIT, MT_DATA);
322			if (m2 == NULL) {
323				m_freem(m);
324				printf("%s: Out of mbufs\n", __func__);
325				return (ENOBUFS);
326			}
327			MEXTADD(m2, __DECONST(char *, data) + sent_so_far,
328			    pktlen, debugnet_mbuf_free, NULL, NULL, 0,
329			    EXT_DISPOSABLE);
330			m2->m_len = pktlen;
331
332			m_cat(m, m2);
333			m->m_pkthdr.len += pktlen;
334		}
335		error = debugnet_udp_output(pcb, m);
336		if (error != 0)
337			return (error);
338
339		/* Note that we're waiting for this packet in the bitfield. */
340		want_acks |= (1 << i);
341		sent_so_far += pktlen;
342	}
343	if (i >= DEBUGNET_MAX_IN_FLIGHT)
344		printf("Warning: Sent more than %d packets (%d). "
345		    "Acknowledgements will fail unless the size of "
346		    "rcvd_acks/want_acks is increased.\n",
347		    DEBUGNET_MAX_IN_FLIGHT, i);
348
349	/*
350	 * Wait for acks.  A *real* window would speed things up considerably.
351	 */
352	polls = 0;
353	while (pcb->dp_rcvd_acks != want_acks) {
354		if (polls++ > debugnet_npolls) {
355			if (retries++ > debugnet_nretries)
356				return (ETIMEDOUT);
357			printf(". ");
358			goto retransmit;
359		}
360		debugnet_network_poll(pcb);
361		DELAY(500);
362		if (pcb->dp_state == DN_STATE_REMOTE_CLOSED)
363			return (ECONNRESET);
364	}
365	pcb->dp_seqno += i;
366	return (0);
367}
368
369/*
370 * Network input primitives.
371 */
372
373/*
374 * Just introspect the header enough to fire off a seqno ack and validate
375 * length fits.
376 */
377static void
378debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb)
379{
380	const struct debugnet_msg_hdr *dnh;
381	struct mbuf *m;
382	uint32_t hdr_type;
383	uint32_t seqno;
384	int error;
385
386	m = *mb;
387
388	if (m->m_pkthdr.len < sizeof(*dnh)) {
389		DNETDEBUG("ignoring small debugnet_msg packet\n");
390		return;
391	}
392
393	/* Get ND header. */
394	if (m->m_len < sizeof(*dnh)) {
395		m = m_pullup(m, sizeof(*dnh));
396		*mb = m;
397		if (m == NULL) {
398			DNETDEBUG("m_pullup failed\n");
399			return;
400		}
401	}
402
403	dnh = mtod(m, const void *);
404	if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) {
405		DNETDEBUG("Dropping short packet.\n");
406		return;
407	}
408
409	hdr_type = ntohl(dnh->mh_type);
410	if (hdr_type != DEBUGNET_DATA) {
411		if (hdr_type == DEBUGNET_FINISHED) {
412			printf("Remote shut down the connection on us!\n");
413			pcb->dp_state = DN_STATE_REMOTE_CLOSED;
414			if (pcb->dp_finish_handler != NULL) {
415				pcb->dp_finish_handler();
416			}
417		} else {
418			DNETDEBUG("Got unexpected debugnet message %u\n", hdr_type);
419		}
420		return;
421	}
422
423	/*
424	 * If the issue is transient (ENOBUFS), sender should resend.  If
425	 * non-transient (like driver objecting to rx -> tx from the same
426	 * thread), not much else we can do.
427	 */
428	seqno = dnh->mh_seqno; /* net endian */
429	m_adj(m, sizeof(*dnh));
430	dnh = NULL;
431	error = pcb->dp_rx_handler(m);
432	if (error != 0) {
433		DNETDEBUG("RX handler was not able to accept message, error %d. "
434		    "Skipping ack.\n", error);
435		return;
436	}
437
438	error = debugnet_ack_output(pcb, seqno);
439	if (error != 0) {
440		DNETDEBUG("Couldn't ACK rx packet %u; %d\n", ntohl(seqno), error);
441	}
442}
443
444static void
445debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport)
446{
447	const struct debugnet_ack *dn_ack;
448	struct mbuf *m;
449	uint32_t rcv_ackno;
450
451	m = *mb;
452
453	/* Get Ack. */
454	if (m->m_len < sizeof(*dn_ack)) {
455		m = m_pullup(m, sizeof(*dn_ack));
456		*mb = m;
457		if (m == NULL) {
458			DNETDEBUG("m_pullup failed\n");
459			return;
460		}
461	}
462	dn_ack = mtod(m, const void *);
463
464	/* Debugnet processing. */
465	/*
466	 * Packet is meant for us.  Extract the ack sequence number and the
467	 * port number if necessary.
468	 */
469	rcv_ackno = ntohl(dn_ack->da_seqno);
470	if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) {
471		pcb->dp_server_port = sport;
472		pcb->dp_state = DN_STATE_GOT_HERALD_PORT;
473	}
474	if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT)
475		printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno);
476	else if (rcv_ackno >= pcb->dp_seqno) {
477		/* We're interested in this ack. Record it. */
478		pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno);
479	}
480}
481
482void
483debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb)
484{
485	const struct udphdr *udp;
486	struct mbuf *m;
487	uint16_t sport, ulen;
488
489	/* UDP processing. */
490
491	m = *mb;
492	if (m->m_pkthdr.len < sizeof(*udp)) {
493		DNETDEBUG("ignoring small UDP packet\n");
494		return;
495	}
496
497	/* Get UDP headers. */
498	if (m->m_len < sizeof(*udp)) {
499		m = m_pullup(m, sizeof(*udp));
500		*mb = m;
501		if (m == NULL) {
502			DNETDEBUG("m_pullup failed\n");
503			return;
504		}
505	}
506	udp = mtod(m, const void *);
507
508	/* We expect to receive UDP packets on the configured client port. */
509	if (ntohs(udp->uh_dport) != pcb->dp_client_port) {
510		DNETDEBUG("not on the expected port.\n");
511		return;
512	}
513
514	/* Check that ulen does not exceed actual size of data. */
515	ulen = ntohs(udp->uh_ulen);
516	if (m->m_pkthdr.len < ulen) {
517		DNETDEBUG("ignoring runt UDP packet\n");
518		return;
519	}
520
521	sport = ntohs(udp->uh_sport);
522
523	m_adj(m, sizeof(*udp));
524	ulen -= sizeof(*udp);
525
526	if (ulen == sizeof(struct debugnet_ack)) {
527		debugnet_handle_ack(pcb, mb, sport);
528		return;
529	}
530
531	if (pcb->dp_rx_handler == NULL) {
532		if (ulen < sizeof(struct debugnet_ack))
533			DNETDEBUG("ignoring small ACK packet\n");
534		else
535			DNETDEBUG("ignoring unexpected non-ACK packet on "
536			    "half-duplex connection.\n");
537		return;
538	}
539
540	debugnet_handle_rx_msg(pcb, mb);
541}
542
543/*
544 * Handler for incoming packets directly from the network adapter
545 * Identifies the packet type (IP or ARP) and passes it along to one of the
546 * helper functions debugnet_handle_ip or debugnet_handle_arp.
547 *
548 * It needs to partially replicate the behaviour of ether_input() and
549 * ether_demux().
550 *
551 * Parameters:
552 *	ifp	the interface the packet came from
553 *	m	an mbuf containing the packet received
554 */
555static void
556debugnet_input_one(struct ifnet *ifp, struct mbuf *m)
557{
558	struct ifreq ifr;
559	struct ether_header *eh;
560	u_short etype;
561
562	/* Ethernet processing. */
563	if ((m->m_flags & M_PKTHDR) == 0) {
564		DNETDEBUG_IF(ifp, "discard frame without packet header\n");
565		goto done;
566	}
567	if (m->m_len < ETHER_HDR_LEN) {
568		DNETDEBUG_IF(ifp,
569	    "discard frame without leading eth header (len %d pktlen %d)\n",
570		    m->m_len, m->m_pkthdr.len);
571		goto done;
572	}
573	if ((m->m_flags & M_HASFCS) != 0) {
574		m_adj(m, -ETHER_CRC_LEN);
575		m->m_flags &= ~M_HASFCS;
576	}
577	eh = mtod(m, struct ether_header *);
578	etype = ntohs(eh->ether_type);
579	if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) {
580		DNETDEBUG_IF(ifp, "ignoring vlan packets\n");
581		goto done;
582	}
583	if (if_gethwaddr(ifp, &ifr) != 0) {
584		DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n");
585		goto done;
586	}
587	if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost,
588	    ETHER_ADDR_LEN) != 0 &&
589	    (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) {
590		DNETDEBUG_IF(ifp,
591		    "discard frame with incorrect destination addr\n");
592		goto done;
593	}
594
595	MPASS(g_debugnet_pcb_inuse);
596
597	/* Done ethernet processing. Strip off the ethernet header. */
598	m_adj(m, ETHER_HDR_LEN);
599	switch (etype) {
600	case ETHERTYPE_ARP:
601		debugnet_handle_arp(&g_dnet_pcb, &m);
602		break;
603	case ETHERTYPE_IP:
604		debugnet_handle_ip(&g_dnet_pcb, &m);
605		break;
606	default:
607		DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype);
608		break;
609	}
610done:
611	if (m != NULL)
612		m_freem(m);
613}
614
615static void
616debugnet_input(struct ifnet *ifp, struct mbuf *m)
617{
618	struct mbuf *n;
619
620	do {
621		n = m->m_nextpkt;
622		m->m_nextpkt = NULL;
623		debugnet_input_one(ifp, m);
624		m = n;
625	} while (m != NULL);
626}
627
628/*
629 * Network polling primitive.
630 *
631 * Instead of assuming that most of the network stack is sane, we just poll the
632 * driver directly for packets.
633 */
634void
635debugnet_network_poll(struct debugnet_pcb *pcb)
636{
637	struct ifnet *ifp;
638
639	ifp = pcb->dp_ifp;
640	ifp->if_debugnet_methods->dn_poll(ifp, 1000);
641}
642
643/*
644 * Start of consumer API surface.
645 */
646void
647debugnet_free(struct debugnet_pcb *pcb)
648{
649	struct ifnet *ifp;
650
651	MPASS(pcb == &g_dnet_pcb);
652	MPASS(pcb->dp_drv_input == NULL || g_debugnet_pcb_inuse);
653
654	ifp = pcb->dp_ifp;
655	if (ifp != NULL) {
656		if (pcb->dp_drv_input != NULL)
657			ifp->if_input = pcb->dp_drv_input;
658		if (pcb->dp_event_started)
659			ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END);
660	}
661	debugnet_mbuf_finish();
662
663	g_debugnet_pcb_inuse = false;
664	memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb));
665}
666
667int
668debugnet_connect(const struct debugnet_conn_params *dcp,
669    struct debugnet_pcb **pcb_out)
670{
671	struct debugnet_proto_aux herald_auxdata;
672	struct debugnet_pcb *pcb;
673	struct ifnet *ifp;
674	int error;
675
676	if (g_debugnet_pcb_inuse) {
677		printf("%s: Only one connection at a time.\n", __func__);
678		return (EBUSY);
679	}
680
681	pcb = &g_dnet_pcb;
682	*pcb = (struct debugnet_pcb) {
683		.dp_state = DN_STATE_INIT,
684		.dp_client = dcp->dc_client,
685		.dp_server = dcp->dc_server,
686		.dp_gateway = dcp->dc_gateway,
687		.dp_server_port = dcp->dc_herald_port,	/* Initially */
688		.dp_client_port = dcp->dc_client_port,
689		.dp_seqno = 1,
690		.dp_ifp = dcp->dc_ifp,
691		.dp_rx_handler = dcp->dc_rx_handler,
692		.dp_drv_input = NULL,
693	};
694
695	/* Switch to the debugnet mbuf zones. */
696	debugnet_mbuf_start();
697
698	/* At least one needed parameter is missing; infer it. */
699	if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY ||
700	    pcb->dp_ifp == NULL) {
701		struct sockaddr_in dest_sin, *gw_sin, *local_sin;
702		struct ifnet *rt_ifp;
703		struct nhop_object *nh;
704
705		memset(&dest_sin, 0, sizeof(dest_sin));
706		dest_sin = (struct sockaddr_in) {
707			.sin_len = sizeof(dest_sin),
708			.sin_family = AF_INET,
709			.sin_addr.s_addr = pcb->dp_server,
710		};
711
712		CURVNET_SET(vnet0);
713		nh = fib4_lookup_debugnet(debugnet_fib, dest_sin.sin_addr, 0,
714		    NHR_NONE);
715		CURVNET_RESTORE();
716
717		if (nh == NULL) {
718			printf("%s: Could not get route for that server.\n",
719			    __func__);
720			error = ENOENT;
721			goto cleanup;
722		}
723
724		/* TODO support AF_INET6 */
725		if (nh->gw_sa.sa_family == AF_INET)
726			gw_sin = &nh->gw4_sa;
727		else {
728			if (nh->gw_sa.sa_family == AF_LINK)
729				DNETDEBUG("Destination address is on link.\n");
730			gw_sin = NULL;
731		}
732
733		MPASS(nh->nh_ifa->ifa_addr->sa_family == AF_INET);
734		local_sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr;
735
736		rt_ifp = nh->nh_ifp;
737
738		if (pcb->dp_client == INADDR_ANY)
739			pcb->dp_client = local_sin->sin_addr.s_addr;
740		if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL)
741			pcb->dp_gateway = gw_sin->sin_addr.s_addr;
742		if (pcb->dp_ifp == NULL)
743			pcb->dp_ifp = rt_ifp;
744	}
745
746	ifp = pcb->dp_ifp;
747
748	if (debugnet_debug > 0) {
749		char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN],
750		    gwbuf[INET_ADDRSTRLEN];
751		inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf));
752		inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf));
753		if (pcb->dp_gateway != INADDR_ANY)
754			inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf));
755		DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n",
756		    serbuf, pcb->dp_server_port,
757		    (pcb->dp_gateway == INADDR_ANY) ? "" : " via ",
758		    (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf,
759		    clibuf, pcb->dp_client_port, if_name(ifp));
760	}
761
762	/* Validate iface is online and supported. */
763	if (!DEBUGNET_SUPPORTED_NIC(ifp)) {
764		printf("%s: interface '%s' does not support debugnet\n",
765		    __func__, if_name(ifp));
766		error = ENODEV;
767		goto cleanup;
768	}
769	if ((if_getflags(ifp) & IFF_UP) == 0) {
770		printf("%s: interface '%s' link is down\n", __func__,
771		    if_name(ifp));
772		error = ENXIO;
773		goto cleanup;
774	}
775
776	ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START);
777	pcb->dp_event_started = true;
778
779	/*
780	 * We maintain the invariant that g_debugnet_pcb_inuse is always true
781	 * while the debugnet ifp's if_input is overridden with
782	 * debugnet_input().
783	 */
784	g_debugnet_pcb_inuse = true;
785
786	/* Make the card use *our* receive callback. */
787	pcb->dp_drv_input = ifp->if_input;
788	ifp->if_input = debugnet_input;
789
790	printf("%s: searching for %s MAC...\n", __func__,
791	    (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway");
792
793	error = debugnet_arp_gw(pcb);
794	if (error != 0) {
795		printf("%s: failed to locate MAC address\n", __func__);
796		goto cleanup;
797	}
798	MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC);
799
800	herald_auxdata = (struct debugnet_proto_aux) {
801		.dp_offset_start = dcp->dc_herald_offset,
802		.dp_aux2 = dcp->dc_herald_aux2,
803	};
804	error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data,
805	    dcp->dc_herald_datalen, &herald_auxdata);
806	if (error != 0) {
807		printf("%s: failed to herald debugnet server\n", __func__);
808		goto cleanup;
809	}
810
811	*pcb_out = pcb;
812	return (0);
813
814cleanup:
815	debugnet_free(pcb);
816	return (error);
817}
818
819/*
820 * Pre-allocated dump-time mbuf tracking.
821 *
822 * We just track the high water mark we've ever seen and allocate appropriately
823 * for that iface/mtu combo.
824 */
825static struct {
826	int nmbuf;
827	int ncl;
828	int clsize;
829} dn_hwm;
830static struct mtx dn_hwm_lk;
831MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF);
832
833static void
834dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize)
835{
836	bool any;
837
838	any = false;
839	mtx_lock(&dn_hwm_lk);
840
841	if (nmbuf > dn_hwm.nmbuf) {
842		any = true;
843		dn_hwm.nmbuf = nmbuf;
844	} else
845		nmbuf = dn_hwm.nmbuf;
846
847	if (ncl > dn_hwm.ncl) {
848		any = true;
849		dn_hwm.ncl = ncl;
850	} else
851		ncl = dn_hwm.ncl;
852
853	if (clsize > dn_hwm.clsize) {
854		any = true;
855		dn_hwm.clsize = clsize;
856	} else
857		clsize = dn_hwm.clsize;
858
859	mtx_unlock(&dn_hwm_lk);
860
861	if (any)
862		debugnet_mbuf_reinit(nmbuf, ncl, clsize);
863}
864
865void
866debugnet_any_ifnet_update(struct ifnet *ifp)
867{
868	int clsize, nmbuf, ncl, nrxr;
869
870	if (!DEBUGNET_SUPPORTED_NIC(ifp))
871		return;
872
873	ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize);
874	KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr));
875
876	/*
877	 * We need two headers per message on the transmit side. Multiply by
878	 * four to give us some breathing room.
879	 */
880	nmbuf = ncl * (4 + nrxr);
881	ncl *= nrxr;
882
883	/*
884	 * Bandaid for drivers that (incorrectly) advertise LinkUp before their
885	 * dn_init method is available.
886	 */
887	if (nmbuf == 0 || ncl == 0 || clsize == 0) {
888#ifndef INVARIANTS
889		if (bootverbose)
890#endif
891		printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n",
892		    __func__, if_name(ifp), ifp);
893		return;
894	}
895	dn_maybe_reinit_mbufs(nmbuf, ncl, clsize);
896}
897
898/*
899 * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless
900 * for us because drivers tend to if_attach before invoking DEBUGNET_SET().
901 *
902 * On the other hand, hooking DEBUGNET_SET() itself may still be too early,
903 * because the driver is still in attach.  Since we cannot use down interfaces,
904 * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient?  ... Nope, at least
905 * with vtnet and dhcpclient that event just never occurs.
906 *
907 * So that's how I've landed on the lower level ifnet_link_event.
908 */
909
910static void
911dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state)
912{
913	if (link_state == LINK_STATE_UP)
914		debugnet_any_ifnet_update(ifp);
915}
916
917static eventhandler_tag dn_attach_cookie;
918static void
919dn_evh_init(void *ctx __unused)
920{
921	dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event,
922	    dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
923}
924SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL);
925
926/*
927 * DDB parsing helpers for debugnet(4) consumers.
928 */
929#ifdef DDB
930struct my_inet_opt {
931	bool has_opt;
932	const char *printname;
933	in_addr_t *result;
934};
935
936static int
937dn_parse_optarg_ipv4(struct my_inet_opt *opt)
938{
939	in_addr_t tmp;
940	unsigned octet;
941	int t;
942
943	tmp = 0;
944	for (octet = 0; octet < 4; octet++) {
945		t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL);
946		if (t != tNUMBER) {
947			db_printf("%s:%s: octet %u expected number; found %d\n",
948			    __func__, opt->printname, octet, t);
949			return (EINVAL);
950		}
951		/*
952		 * db_lex lexes '-' distinctly from the number itself, but
953		 * let's document that invariant.
954		 */
955		MPASS(db_tok_number >= 0);
956
957		if (db_tok_number > UINT8_MAX) {
958			db_printf("%s:%s: octet %u out of range: %jd\n", __func__,
959			    opt->printname, octet, (intmax_t)db_tok_number);
960			return (EDOM);
961		}
962
963		/* Constructed host-endian and converted to network later. */
964		tmp = (tmp << 8) | db_tok_number;
965
966		if (octet < 3) {
967			t = db_read_token_flags(DRT_WSPACE);
968			if (t != tDOT) {
969				db_printf("%s:%s: octet %u expected '.'; found"
970				    " %d\n", __func__, opt->printname, octet,
971				    t);
972				return (EINVAL);
973			}
974		}
975	}
976
977	*opt->result = htonl(tmp);
978	opt->has_opt = true;
979	return (0);
980}
981
982int
983debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result)
984{
985	struct ifnet *ifp;
986	int t, error;
987	bool want_ifp;
988	char ch;
989
990	struct my_inet_opt opt_client = {
991		.printname = "client",
992		.result = &result->dd_client,
993	},
994	opt_server = {
995		.printname = "server",
996		.result = &result->dd_server,
997	},
998	opt_gateway = {
999		.printname = "gateway",
1000		.result = &result->dd_gateway,
1001	},
1002	*cur_inet_opt;
1003
1004	ifp = NULL;
1005	memset(result, 0, sizeof(*result));
1006
1007	/*
1008	 * command [space] [-] [opt] [[space] [optarg]] ...
1009	 *
1010	 * db_command has already lexed 'command' for us.
1011	 */
1012	t = db_read_token_flags(DRT_WSPACE);
1013	if (t == tWSPACE)
1014		t = db_read_token_flags(DRT_WSPACE);
1015
1016	while (t != tEOL) {
1017		if (t != tMINUS) {
1018			db_printf("%s: Bad syntax; expected '-', got %d\n",
1019			    cmd, t);
1020			goto usage;
1021		}
1022
1023		t = db_read_token_flags(DRT_WSPACE);
1024		if (t != tIDENT) {
1025			db_printf("%s: Bad syntax; expected tIDENT, got %d\n",
1026			    cmd, t);
1027			goto usage;
1028		}
1029
1030		if (strlen(db_tok_string) > 1) {
1031			db_printf("%s: Bad syntax; expected single option "
1032			    "flag, got '%s'\n", cmd, db_tok_string);
1033			goto usage;
1034		}
1035
1036		want_ifp = false;
1037		cur_inet_opt = NULL;
1038		switch ((ch = db_tok_string[0])) {
1039		default:
1040			DNETDEBUG("Unexpected: '%c'\n", ch);
1041			/* FALLTHROUGH */
1042		case 'h':
1043			goto usage;
1044		case 'c':
1045			cur_inet_opt = &opt_client;
1046			break;
1047		case 'g':
1048			cur_inet_opt = &opt_gateway;
1049			break;
1050		case 's':
1051			cur_inet_opt = &opt_server;
1052			break;
1053		case 'i':
1054			want_ifp = true;
1055			break;
1056		}
1057
1058		t = db_read_token_flags(DRT_WSPACE);
1059		if (t != tWSPACE) {
1060			db_printf("%s: Bad syntax; expected space after "
1061			    "flag %c, got %d\n", cmd, ch, t);
1062			goto usage;
1063		}
1064
1065		if (want_ifp) {
1066			t = db_read_token_flags(DRT_WSPACE);
1067			if (t != tIDENT) {
1068				db_printf("%s: Expected interface but got %d\n",
1069				    cmd, t);
1070				goto usage;
1071			}
1072
1073			CURVNET_SET(vnet0);
1074			/*
1075			 * We *don't* take a ref here because the only current
1076			 * consumer, db_netdump_cmd, does not need it.  It
1077			 * (somewhat redundantly) extracts the if_name(),
1078			 * re-lookups the ifp, and takes its own reference.
1079			 */
1080			ifp = ifunit(db_tok_string);
1081			CURVNET_RESTORE();
1082			if (ifp == NULL) {
1083				db_printf("Could not locate interface %s\n",
1084				    db_tok_string);
1085				error = ENOENT;
1086				goto cleanup;
1087			}
1088		} else {
1089			MPASS(cur_inet_opt != NULL);
1090			/* Assume IPv4 for now. */
1091			error = dn_parse_optarg_ipv4(cur_inet_opt);
1092			if (error != 0)
1093				goto cleanup;
1094		}
1095
1096		/* Skip (mandatory) whitespace after option, if not EOL. */
1097		t = db_read_token_flags(DRT_WSPACE);
1098		if (t == tEOL)
1099			break;
1100		if (t != tWSPACE) {
1101			db_printf("%s: Bad syntax; expected space after "
1102			    "flag %c option; got %d\n", cmd, ch, t);
1103			goto usage;
1104		}
1105		t = db_read_token_flags(DRT_WSPACE);
1106	}
1107
1108	if (!opt_server.has_opt) {
1109		db_printf("%s: need a destination server address\n", cmd);
1110		goto usage;
1111	}
1112
1113	result->dd_has_client = opt_client.has_opt;
1114	result->dd_has_gateway = opt_gateway.has_opt;
1115	result->dd_ifp = ifp;
1116
1117	/* We parsed the full line to tEOL already, or bailed with an error. */
1118	return (0);
1119
1120usage:
1121	db_printf("Usage: %s -s <server> [-g <gateway> -c <localip> "
1122	    "-i <interface>]\n", cmd);
1123	error = EINVAL;
1124	/* FALLTHROUGH */
1125cleanup:
1126	db_skip_to_eol();
1127	return (error);
1128}
1129#endif /* DDB */
1130