1/*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
2
3/*-
4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
5 * Nottingham University 1987.
6 *
7 * This source may be freely distributed, however I would be interested
8 * in any changes that are made.
9 *
10 * This driver takes packets off the IP i/f and hands them up to a
11 * user process to have its wicked way with. This driver has it's
12 * roots in a similar driver written by Phil Cockcroft (formerly) at
13 * UCL. This driver is based much more on read/write/poll mode of
14 * operation though.
15 *
16 * $FreeBSD: stable/11/sys/net/if_tun.c 353157 2019-10-07 01:03:14Z kevans $
17 */
18
19#include "opt_inet.h"
20#include "opt_inet6.h"
21
22#include <sys/param.h>
23#include <sys/lock.h>
24#include <sys/priv.h>
25#include <sys/proc.h>
26#include <sys/systm.h>
27#include <sys/jail.h>
28#include <sys/mbuf.h>
29#include <sys/module.h>
30#include <sys/socket.h>
31#include <sys/fcntl.h>
32#include <sys/filio.h>
33#include <sys/sockio.h>
34#include <sys/sx.h>
35#include <sys/syslog.h>
36#include <sys/ttycom.h>
37#include <sys/poll.h>
38#include <sys/selinfo.h>
39#include <sys/signalvar.h>
40#include <sys/filedesc.h>
41#include <sys/kernel.h>
42#include <sys/sysctl.h>
43#include <sys/conf.h>
44#include <sys/uio.h>
45#include <sys/malloc.h>
46#include <sys/random.h>
47#include <sys/ctype.h>
48
49#include <net/if.h>
50#include <net/if_var.h>
51#include <net/if_clone.h>
52#include <net/if_types.h>
53#include <net/netisr.h>
54#include <net/route.h>
55#include <net/vnet.h>
56#ifdef INET
57#include <netinet/in.h>
58#endif
59#include <net/bpf.h>
60#include <net/if_tun.h>
61
62#include <sys/queue.h>
63#include <sys/condvar.h>
64
65#include <security/mac/mac_framework.h>
66
67/*
68 * tun_list is protected by global tunmtx.  Other mutable fields are
69 * protected by tun->tun_mtx, or by their owning subsystem.  tun_dev is
70 * static for the duration of a tunnel interface.
71 */
72struct tun_softc {
73	TAILQ_ENTRY(tun_softc)	tun_list;
74	struct cdev *tun_dev;
75	u_short	tun_flags;		/* misc flags */
76#define	TUN_OPEN	0x0001
77#define	TUN_INITED	0x0002
78#define	TUN_RCOLL	0x0004
79#define	TUN_IASET	0x0008
80#define	TUN_DSTADDR	0x0010
81#define	TUN_LMODE	0x0020
82#define	TUN_RWAIT	0x0040
83#define	TUN_ASYNC	0x0080
84#define	TUN_IFHEAD	0x0100
85#define	TUN_DYING	0x0200
86
87#define TUN_READY       (TUN_OPEN | TUN_INITED)
88
89	pid_t	tun_pid;		/* owning pid */
90	struct	ifnet *tun_ifp;		/* the interface */
91	struct  sigio *tun_sigio;	/* information for async I/O */
92	struct	selinfo	tun_rsel;	/* read select */
93	struct mtx	tun_mtx;	/* protect mutable softc fields */
94	struct cv	tun_cv;		/* protect against ref'd dev destroy */
95};
96#define TUN2IFP(sc)	((sc)->tun_ifp)
97
98#define TUNDEBUG	if (tundebug) if_printf
99
100/*
101 * All mutable global variables in if_tun are locked using tunmtx, with
102 * the exception of tundebug, which is used unlocked, and tunclones,
103 * which is static after setup.
104 */
105static struct mtx tunmtx;
106static eventhandler_tag tag;
107static const char tunname[] = "tun";
108static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface");
109static int tundebug = 0;
110static int tundclone = 1;
111static struct clonedevs *tunclones;
112static TAILQ_HEAD(,tun_softc)	tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
113SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
114
115static struct sx tun_ioctl_sx;
116SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
117
118SYSCTL_DECL(_net_link);
119static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
120    "IP tunnel software network interface.");
121SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0,
122    "Enable legacy devfs interface creation.");
123
124static void	tunclone(void *arg, struct ucred *cred, char *name,
125		    int namelen, struct cdev **dev);
126static void	tuncreate(const char *name, struct cdev *dev);
127static int	tunifioctl(struct ifnet *, u_long, caddr_t);
128static void	tuninit(struct ifnet *);
129static int	tunmodevent(module_t, int, void *);
130static int	tunoutput(struct ifnet *, struct mbuf *,
131		    const struct sockaddr *, struct route *ro);
132static void	tunstart(struct ifnet *);
133
134static int	tun_clone_match(struct if_clone *ifc, const char *name);
135static int	tun_clone_create(struct if_clone *, char *, size_t, caddr_t);
136static int	tun_clone_destroy(struct if_clone *, struct ifnet *);
137static struct unrhdr	*tun_unrhdr;
138static VNET_DEFINE(struct if_clone *, tun_cloner);
139#define V_tun_cloner VNET(tun_cloner)
140
141static d_open_t		tunopen;
142static d_close_t	tunclose;
143static d_read_t		tunread;
144static d_write_t	tunwrite;
145static d_ioctl_t	tunioctl;
146static d_poll_t		tunpoll;
147static d_kqfilter_t	tunkqfilter;
148
149static int		tunkqread(struct knote *, long);
150static int		tunkqwrite(struct knote *, long);
151static void		tunkqdetach(struct knote *);
152
153static struct filterops tun_read_filterops = {
154	.f_isfd =	1,
155	.f_attach =	NULL,
156	.f_detach =	tunkqdetach,
157	.f_event =	tunkqread,
158};
159
160static struct filterops tun_write_filterops = {
161	.f_isfd =	1,
162	.f_attach =	NULL,
163	.f_detach =	tunkqdetach,
164	.f_event =	tunkqwrite,
165};
166
167static struct cdevsw tun_cdevsw = {
168	.d_version =	D_VERSION,
169	.d_flags =	D_NEEDMINOR,
170	.d_open =	tunopen,
171	.d_close =	tunclose,
172	.d_read =	tunread,
173	.d_write =	tunwrite,
174	.d_ioctl =	tunioctl,
175	.d_poll =	tunpoll,
176	.d_kqfilter =	tunkqfilter,
177	.d_name =	tunname,
178};
179
180static int
181tun_clone_match(struct if_clone *ifc, const char *name)
182{
183	if (strncmp(tunname, name, 3) == 0 &&
184	    (name[3] == '\0' || isdigit(name[3])))
185		return (1);
186
187	return (0);
188}
189
190static int
191tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
192{
193	struct cdev *dev;
194	int err, unit, i;
195
196	err = ifc_name2unit(name, &unit);
197	if (err != 0)
198		return (err);
199
200	if (unit != -1) {
201		/* If this unit number is still available that/s okay. */
202		if (alloc_unr_specific(tun_unrhdr, unit) == -1)
203			return (EEXIST);
204	} else {
205		unit = alloc_unr(tun_unrhdr);
206	}
207
208	snprintf(name, IFNAMSIZ, "%s%d", tunname, unit);
209
210	/* find any existing device, or allocate new unit number */
211	i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0);
212	if (i) {
213		/* No preexisting struct cdev *, create one */
214		dev = make_dev(&tun_cdevsw, unit,
215		    UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit);
216	}
217	tuncreate(tunname, dev);
218
219	return (0);
220}
221
222static void
223tunclone(void *arg, struct ucred *cred, char *name, int namelen,
224    struct cdev **dev)
225{
226	char devname[SPECNAMELEN + 1];
227	int u, i, append_unit;
228
229	if (*dev != NULL)
230		return;
231
232	/*
233	 * If tun cloning is enabled, only the superuser can create an
234	 * interface.
235	 */
236	if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)
237		return;
238
239	if (strcmp(name, tunname) == 0) {
240		u = -1;
241	} else if (dev_stdclone(name, NULL, tunname, &u) != 1)
242		return;	/* Don't recognise the name */
243	if (u != -1 && u > IF_MAXUNIT)
244		return;	/* Unit number too high */
245
246	if (u == -1)
247		append_unit = 1;
248	else
249		append_unit = 0;
250
251	CURVNET_SET(CRED_TO_VNET(cred));
252	/* find any existing device, or allocate new unit number */
253	i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0);
254	if (i) {
255		if (append_unit) {
256			namelen = snprintf(devname, sizeof(devname), "%s%d",
257			    name, u);
258			name = devname;
259		}
260		/* No preexisting struct cdev *, create one */
261		*dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred,
262		    UID_UUCP, GID_DIALER, 0600, "%s", name);
263	}
264
265	if_clone_create(name, namelen, NULL);
266	CURVNET_RESTORE();
267}
268
269static void
270tun_destroy(struct tun_softc *tp)
271{
272	struct cdev *dev;
273
274	mtx_lock(&tp->tun_mtx);
275	tp->tun_flags |= TUN_DYING;
276	if ((tp->tun_flags & TUN_OPEN) != 0)
277		cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx);
278	else
279		mtx_unlock(&tp->tun_mtx);
280
281	CURVNET_SET(TUN2IFP(tp)->if_vnet);
282
283	dev = tp->tun_dev;
284	bpfdetach(TUN2IFP(tp));
285	if_detach(TUN2IFP(tp));
286
287	sx_xlock(&tun_ioctl_sx);
288	TUN2IFP(tp)->if_softc = NULL;
289	sx_xunlock(&tun_ioctl_sx);
290
291	free_unr(tun_unrhdr, TUN2IFP(tp)->if_dunit);
292	if_free(TUN2IFP(tp));
293	destroy_dev(dev);
294	seldrain(&tp->tun_rsel);
295	knlist_clear(&tp->tun_rsel.si_note, 0);
296	knlist_destroy(&tp->tun_rsel.si_note);
297	mtx_destroy(&tp->tun_mtx);
298	cv_destroy(&tp->tun_cv);
299	free(tp, M_TUN);
300	CURVNET_RESTORE();
301}
302
303static int
304tun_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
305{
306	struct tun_softc *tp = ifp->if_softc;
307
308	mtx_lock(&tunmtx);
309	TAILQ_REMOVE(&tunhead, tp, tun_list);
310	mtx_unlock(&tunmtx);
311	tun_destroy(tp);
312
313	return (0);
314}
315
316static void
317vnet_tun_init(const void *unused __unused)
318{
319	V_tun_cloner = if_clone_advanced(tunname, 0, tun_clone_match,
320			tun_clone_create, tun_clone_destroy);
321}
322VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
323		vnet_tun_init, NULL);
324
325static void
326vnet_tun_uninit(const void *unused __unused)
327{
328	if_clone_detach(V_tun_cloner);
329}
330VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
331    vnet_tun_uninit, NULL);
332
333static void
334tun_uninit(const void *unused __unused)
335{
336	struct tun_softc *tp;
337
338	EVENTHANDLER_DEREGISTER(dev_clone, tag);
339	drain_dev_clone_events();
340
341	mtx_lock(&tunmtx);
342	while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
343		TAILQ_REMOVE(&tunhead, tp, tun_list);
344		mtx_unlock(&tunmtx);
345		tun_destroy(tp);
346		mtx_lock(&tunmtx);
347	}
348	mtx_unlock(&tunmtx);
349	delete_unrhdr(tun_unrhdr);
350	clone_cleanup(&tunclones);
351	mtx_destroy(&tunmtx);
352}
353SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL);
354
355static int
356tunmodevent(module_t mod, int type, void *data)
357{
358
359	switch (type) {
360	case MOD_LOAD:
361		mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF);
362		clone_setup(&tunclones);
363		tun_unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
364		tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
365		if (tag == NULL)
366			return (ENOMEM);
367		break;
368	case MOD_UNLOAD:
369		/* See tun_uninit, so it's done after the vnet_sysuninit() */
370		break;
371	default:
372		return EOPNOTSUPP;
373	}
374	return 0;
375}
376
377static moduledata_t tun_mod = {
378	"if_tun",
379	tunmodevent,
380	0
381};
382
383DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
384MODULE_VERSION(if_tun, 1);
385
386static void
387tunstart(struct ifnet *ifp)
388{
389	struct tun_softc *tp = ifp->if_softc;
390	struct mbuf *m;
391
392	TUNDEBUG(ifp,"%s starting\n", ifp->if_xname);
393	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
394		IFQ_LOCK(&ifp->if_snd);
395		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
396		if (m == NULL) {
397			IFQ_UNLOCK(&ifp->if_snd);
398			return;
399		}
400		IFQ_UNLOCK(&ifp->if_snd);
401	}
402
403	mtx_lock(&tp->tun_mtx);
404	if (tp->tun_flags & TUN_RWAIT) {
405		tp->tun_flags &= ~TUN_RWAIT;
406		wakeup(tp);
407	}
408	selwakeuppri(&tp->tun_rsel, PZERO + 1);
409	KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
410	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) {
411		mtx_unlock(&tp->tun_mtx);
412		pgsigio(&tp->tun_sigio, SIGIO, 0);
413	} else
414		mtx_unlock(&tp->tun_mtx);
415}
416
417/* XXX: should return an error code so it can fail. */
418static void
419tuncreate(const char *name, struct cdev *dev)
420{
421	struct tun_softc *sc;
422	struct ifnet *ifp;
423
424	sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO);
425	mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF);
426	cv_init(&sc->tun_cv, "tun_condvar");
427	sc->tun_flags = TUN_INITED;
428	sc->tun_dev = dev;
429	mtx_lock(&tunmtx);
430	TAILQ_INSERT_TAIL(&tunhead, sc, tun_list);
431	mtx_unlock(&tunmtx);
432
433	ifp = sc->tun_ifp = if_alloc(IFT_PPP);
434	if (ifp == NULL)
435		panic("%s%d: failed to if_alloc() interface.\n",
436		    name, dev2unit(dev));
437	if_initname(ifp, name, dev2unit(dev));
438	ifp->if_mtu = TUNMTU;
439	ifp->if_ioctl = tunifioctl;
440	ifp->if_output = tunoutput;
441	ifp->if_start = tunstart;
442	ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
443	ifp->if_softc = sc;
444	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
445	ifp->if_snd.ifq_drv_maxlen = 0;
446	IFQ_SET_READY(&ifp->if_snd);
447	knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx);
448	ifp->if_capabilities |= IFCAP_LINKSTATE;
449	ifp->if_capenable |= IFCAP_LINKSTATE;
450
451	if_attach(ifp);
452	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
453	dev->si_drv1 = sc;
454	TUNDEBUG(ifp, "interface %s is created, minor = %#x\n",
455	    ifp->if_xname, dev2unit(dev));
456}
457
458static int
459tunopen(struct cdev *dev, int flag, int mode, struct thread *td)
460{
461	struct ifnet	*ifp;
462	struct tun_softc *tp;
463
464	/*
465	 * XXXRW: Non-atomic test and set of dev->si_drv1 requires
466	 * synchronization.
467	 */
468	tp = dev->si_drv1;
469	if (!tp) {
470		tuncreate(tunname, dev);
471		tp = dev->si_drv1;
472	}
473
474	mtx_lock(&tp->tun_mtx);
475	if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) {
476		mtx_unlock(&tp->tun_mtx);
477		return (EBUSY);
478	}
479
480	tp->tun_pid = td->td_proc->p_pid;
481	tp->tun_flags |= TUN_OPEN;
482	ifp = TUN2IFP(tp);
483	if_link_state_change(ifp, LINK_STATE_UP);
484	TUNDEBUG(ifp, "open\n");
485	mtx_unlock(&tp->tun_mtx);
486
487	return (0);
488}
489
490/*
491 * tunclose - close the device - mark i/f down & delete
492 * routing info
493 */
494static	int
495tunclose(struct cdev *dev, int foo, int bar, struct thread *td)
496{
497	struct proc *p;
498	struct tun_softc *tp;
499	struct ifnet *ifp;
500
501	p = td->td_proc;
502	tp = dev->si_drv1;
503	ifp = TUN2IFP(tp);
504
505	mtx_lock(&tp->tun_mtx);
506
507	/*
508	 * Realistically, we can't be obstinate here.  This only means that the
509	 * tuntap device was closed out of order, and the last closer wasn't the
510	 * controller.  These are still good to know about, though, as software
511	 * should avoid multiple processes with a tuntap device open and
512	 * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in
513	 * parent).
514	 */
515	if (p->p_pid != tp->tun_pid) {
516		log(LOG_INFO,
517		    "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n",
518		    p->p_pid, p->p_comm, dev->si_name);
519	}
520
521	/*
522	 * junk all pending output
523	 */
524	CURVNET_SET(ifp->if_vnet);
525	IFQ_PURGE(&ifp->if_snd);
526
527	if (ifp->if_flags & IFF_UP) {
528		mtx_unlock(&tp->tun_mtx);
529		if_down(ifp);
530		mtx_lock(&tp->tun_mtx);
531	}
532
533	/* Delete all addresses and routes which reference this interface. */
534	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
535		struct ifaddr *ifa;
536
537		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
538		mtx_unlock(&tp->tun_mtx);
539		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
540			/* deal w/IPv4 PtP destination; unlocked read */
541			if (ifa->ifa_addr->sa_family == AF_INET) {
542				rtinit(ifa, (int)RTM_DELETE,
543				    tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
544			} else {
545				rtinit(ifa, (int)RTM_DELETE, 0);
546			}
547		}
548		if_purgeaddrs(ifp);
549		mtx_lock(&tp->tun_mtx);
550	}
551	if_link_state_change(ifp, LINK_STATE_DOWN);
552	CURVNET_RESTORE();
553
554	funsetown(&tp->tun_sigio);
555	selwakeuppri(&tp->tun_rsel, PZERO + 1);
556	KNOTE_LOCKED(&tp->tun_rsel.si_note, 0);
557	TUNDEBUG (ifp, "closed\n");
558	tp->tun_flags &= ~TUN_OPEN;
559	tp->tun_pid = 0;
560
561	cv_broadcast(&tp->tun_cv);
562	mtx_unlock(&tp->tun_mtx);
563	return (0);
564}
565
566static void
567tuninit(struct ifnet *ifp)
568{
569	struct tun_softc *tp = ifp->if_softc;
570#ifdef INET
571	struct ifaddr *ifa;
572#endif
573
574	TUNDEBUG(ifp, "tuninit\n");
575
576	mtx_lock(&tp->tun_mtx);
577	ifp->if_flags |= IFF_UP;
578	ifp->if_drv_flags |= IFF_DRV_RUNNING;
579	getmicrotime(&ifp->if_lastchange);
580
581#ifdef INET
582	if_addr_rlock(ifp);
583	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
584		if (ifa->ifa_addr->sa_family == AF_INET) {
585			struct sockaddr_in *si;
586
587			si = (struct sockaddr_in *)ifa->ifa_addr;
588			if (si->sin_addr.s_addr)
589				tp->tun_flags |= TUN_IASET;
590
591			si = (struct sockaddr_in *)ifa->ifa_dstaddr;
592			if (si && si->sin_addr.s_addr)
593				tp->tun_flags |= TUN_DSTADDR;
594		}
595	}
596	if_addr_runlock(ifp);
597#endif
598	mtx_unlock(&tp->tun_mtx);
599}
600
601/*
602 * Process an ioctl request.
603 */
604static int
605tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
606{
607	struct ifreq *ifr = (struct ifreq *)data;
608	struct tun_softc *tp;
609	struct ifstat *ifs;
610	int		error = 0;
611
612	sx_xlock(&tun_ioctl_sx);
613	tp = ifp->if_softc;
614	if (tp == NULL) {
615		error = ENXIO;
616		goto bad;
617	}
618	switch(cmd) {
619	case SIOCGIFSTATUS:
620		ifs = (struct ifstat *)data;
621		mtx_lock(&tp->tun_mtx);
622		if (tp->tun_pid)
623			snprintf(ifs->ascii, sizeof(ifs->ascii),
624			    "\tOpened by PID %d\n", tp->tun_pid);
625		else
626			ifs->ascii[0] = '\0';
627		mtx_unlock(&tp->tun_mtx);
628		break;
629	case SIOCSIFADDR:
630		tuninit(ifp);
631		TUNDEBUG(ifp, "address set\n");
632		break;
633	case SIOCSIFMTU:
634		ifp->if_mtu = ifr->ifr_mtu;
635		TUNDEBUG(ifp, "mtu set\n");
636		break;
637	case SIOCSIFFLAGS:
638	case SIOCADDMULTI:
639	case SIOCDELMULTI:
640		break;
641	default:
642		error = EINVAL;
643	}
644bad:
645	sx_xunlock(&tun_ioctl_sx);
646	return (error);
647}
648
649/*
650 * tunoutput - queue packets from higher level ready to put out.
651 */
652static int
653tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
654    struct route *ro)
655{
656	struct tun_softc *tp = ifp->if_softc;
657	u_short cached_tun_flags;
658	int error;
659	u_int32_t af;
660
661	TUNDEBUG (ifp, "tunoutput\n");
662
663#ifdef MAC
664	error = mac_ifnet_check_transmit(ifp, m0);
665	if (error) {
666		m_freem(m0);
667		return (error);
668	}
669#endif
670
671	/* Could be unlocked read? */
672	mtx_lock(&tp->tun_mtx);
673	cached_tun_flags = tp->tun_flags;
674	mtx_unlock(&tp->tun_mtx);
675	if ((cached_tun_flags & TUN_READY) != TUN_READY) {
676		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
677		m_freem (m0);
678		return (EHOSTDOWN);
679	}
680
681	if ((ifp->if_flags & IFF_UP) != IFF_UP) {
682		m_freem (m0);
683		return (EHOSTDOWN);
684	}
685
686	/* BPF writes need to be handled specially. */
687	if (dst->sa_family == AF_UNSPEC)
688		bcopy(dst->sa_data, &af, sizeof(af));
689	else
690		af = dst->sa_family;
691
692	if (bpf_peers_present(ifp->if_bpf))
693		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
694
695	/* prepend sockaddr? this may abort if the mbuf allocation fails */
696	if (cached_tun_flags & TUN_LMODE) {
697		/* allocate space for sockaddr */
698		M_PREPEND(m0, dst->sa_len, M_NOWAIT);
699
700		/* if allocation failed drop packet */
701		if (m0 == NULL) {
702			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
703			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
704			return (ENOBUFS);
705		} else {
706			bcopy(dst, m0->m_data, dst->sa_len);
707		}
708	}
709
710	if (cached_tun_flags & TUN_IFHEAD) {
711		/* Prepend the address family */
712		M_PREPEND(m0, 4, M_NOWAIT);
713
714		/* if allocation failed drop packet */
715		if (m0 == NULL) {
716			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
717			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
718			return (ENOBUFS);
719		} else
720			*(u_int32_t *)m0->m_data = htonl(af);
721	} else {
722#ifdef INET
723		if (af != AF_INET)
724#endif
725		{
726			m_freem(m0);
727			return (EAFNOSUPPORT);
728		}
729	}
730
731	error = (ifp->if_transmit)(ifp, m0);
732	if (error)
733		return (ENOBUFS);
734	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
735	return (0);
736}
737
738/*
739 * the cdevsw interface is now pretty minimal.
740 */
741static	int
742tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
743    struct thread *td)
744{
745	struct ifreq ifr, *ifrp;
746	struct tun_softc *tp = dev->si_drv1;
747	struct tuninfo *tunp;
748	int error;
749
750	switch (cmd) {
751	case TUNGIFNAME:
752		ifrp = (struct ifreq *)data;
753		strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ);
754		break;
755	case TUNSIFINFO:
756		tunp = (struct tuninfo *)data;
757		if (TUN2IFP(tp)->if_type != tunp->type)
758			return (EPROTOTYPE);
759		mtx_lock(&tp->tun_mtx);
760		if (TUN2IFP(tp)->if_mtu != tunp->mtu) {
761			strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ);
762			ifr.ifr_mtu = tunp->mtu;
763			CURVNET_SET(TUN2IFP(tp)->if_vnet);
764			error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp),
765			    (caddr_t)&ifr, td);
766			CURVNET_RESTORE();
767			if (error) {
768				mtx_unlock(&tp->tun_mtx);
769				return (error);
770			}
771		}
772		TUN2IFP(tp)->if_baudrate = tunp->baudrate;
773		mtx_unlock(&tp->tun_mtx);
774		break;
775	case TUNGIFINFO:
776		tunp = (struct tuninfo *)data;
777		mtx_lock(&tp->tun_mtx);
778		tunp->mtu = TUN2IFP(tp)->if_mtu;
779		tunp->type = TUN2IFP(tp)->if_type;
780		tunp->baudrate = TUN2IFP(tp)->if_baudrate;
781		mtx_unlock(&tp->tun_mtx);
782		break;
783	case TUNSDEBUG:
784		tundebug = *(int *)data;
785		break;
786	case TUNGDEBUG:
787		*(int *)data = tundebug;
788		break;
789	case TUNSLMODE:
790		mtx_lock(&tp->tun_mtx);
791		if (*(int *)data) {
792			tp->tun_flags |= TUN_LMODE;
793			tp->tun_flags &= ~TUN_IFHEAD;
794		} else
795			tp->tun_flags &= ~TUN_LMODE;
796		mtx_unlock(&tp->tun_mtx);
797		break;
798	case TUNSIFHEAD:
799		mtx_lock(&tp->tun_mtx);
800		if (*(int *)data) {
801			tp->tun_flags |= TUN_IFHEAD;
802			tp->tun_flags &= ~TUN_LMODE;
803		} else
804			tp->tun_flags &= ~TUN_IFHEAD;
805		mtx_unlock(&tp->tun_mtx);
806		break;
807	case TUNGIFHEAD:
808		mtx_lock(&tp->tun_mtx);
809		*(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0;
810		mtx_unlock(&tp->tun_mtx);
811		break;
812	case TUNSIFMODE:
813		/* deny this if UP */
814		if (TUN2IFP(tp)->if_flags & IFF_UP)
815			return(EBUSY);
816
817		switch (*(int *)data & ~IFF_MULTICAST) {
818		case IFF_POINTOPOINT:
819		case IFF_BROADCAST:
820			mtx_lock(&tp->tun_mtx);
821			TUN2IFP(tp)->if_flags &=
822			    ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST);
823			TUN2IFP(tp)->if_flags |= *(int *)data;
824			mtx_unlock(&tp->tun_mtx);
825			break;
826		default:
827			return(EINVAL);
828		}
829		break;
830	case TUNSIFPID:
831		mtx_lock(&tp->tun_mtx);
832		tp->tun_pid = curthread->td_proc->p_pid;
833		mtx_unlock(&tp->tun_mtx);
834		break;
835	case FIONBIO:
836		break;
837	case FIOASYNC:
838		mtx_lock(&tp->tun_mtx);
839		if (*(int *)data)
840			tp->tun_flags |= TUN_ASYNC;
841		else
842			tp->tun_flags &= ~TUN_ASYNC;
843		mtx_unlock(&tp->tun_mtx);
844		break;
845	case FIONREAD:
846		if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) {
847			struct mbuf *mb;
848			IFQ_LOCK(&TUN2IFP(tp)->if_snd);
849			IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb);
850			for (*(int *)data = 0; mb != NULL; mb = mb->m_next)
851				*(int *)data += mb->m_len;
852			IFQ_UNLOCK(&TUN2IFP(tp)->if_snd);
853		} else
854			*(int *)data = 0;
855		break;
856	case FIOSETOWN:
857		return (fsetown(*(int *)data, &tp->tun_sigio));
858
859	case FIOGETOWN:
860		*(int *)data = fgetown(&tp->tun_sigio);
861		return (0);
862
863	/* This is deprecated, FIOSETOWN should be used instead. */
864	case TIOCSPGRP:
865		return (fsetown(-(*(int *)data), &tp->tun_sigio));
866
867	/* This is deprecated, FIOGETOWN should be used instead. */
868	case TIOCGPGRP:
869		*(int *)data = -fgetown(&tp->tun_sigio);
870		return (0);
871
872	default:
873		return (ENOTTY);
874	}
875	return (0);
876}
877
878/*
879 * The cdevsw read interface - reads a packet at a time, or at
880 * least as much of a packet as can be read.
881 */
882static	int
883tunread(struct cdev *dev, struct uio *uio, int flag)
884{
885	struct tun_softc *tp = dev->si_drv1;
886	struct ifnet	*ifp = TUN2IFP(tp);
887	struct mbuf	*m;
888	int		error=0, len;
889
890	TUNDEBUG (ifp, "read\n");
891	mtx_lock(&tp->tun_mtx);
892	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
893		mtx_unlock(&tp->tun_mtx);
894		TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags);
895		return (EHOSTDOWN);
896	}
897
898	tp->tun_flags &= ~TUN_RWAIT;
899
900	do {
901		IFQ_DEQUEUE(&ifp->if_snd, m);
902		if (m == NULL) {
903			if (flag & O_NONBLOCK) {
904				mtx_unlock(&tp->tun_mtx);
905				return (EWOULDBLOCK);
906			}
907			tp->tun_flags |= TUN_RWAIT;
908			error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1),
909			    "tunread", 0);
910			if (error != 0) {
911				mtx_unlock(&tp->tun_mtx);
912				return (error);
913			}
914		}
915	} while (m == NULL);
916	mtx_unlock(&tp->tun_mtx);
917
918	while (m && uio->uio_resid > 0 && error == 0) {
919		len = min(uio->uio_resid, m->m_len);
920		if (len != 0)
921			error = uiomove(mtod(m, void *), len, uio);
922		m = m_free(m);
923	}
924
925	if (m) {
926		TUNDEBUG(ifp, "Dropping mbuf\n");
927		m_freem(m);
928	}
929	return (error);
930}
931
932/*
933 * the cdevsw write interface - an atomic write is a packet - or else!
934 */
935static	int
936tunwrite(struct cdev *dev, struct uio *uio, int flag)
937{
938	struct tun_softc *tp = dev->si_drv1;
939	struct ifnet	*ifp = TUN2IFP(tp);
940	struct mbuf	*m;
941	uint32_t	family, mru;
942	int 		isr;
943
944	TUNDEBUG(ifp, "tunwrite\n");
945
946	if ((ifp->if_flags & IFF_UP) != IFF_UP)
947		/* ignore silently */
948		return (0);
949
950	if (uio->uio_resid == 0)
951		return (0);
952
953	mru = TUNMRU;
954	if (tp->tun_flags & TUN_IFHEAD)
955		mru += sizeof(family);
956	if (uio->uio_resid < 0 || uio->uio_resid > mru) {
957		TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid);
958		return (EIO);
959	}
960
961	if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) {
962		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
963		return (ENOBUFS);
964	}
965
966	m->m_pkthdr.rcvif = ifp;
967#ifdef MAC
968	mac_ifnet_create_mbuf(ifp, m);
969#endif
970
971	/* Could be unlocked read? */
972	mtx_lock(&tp->tun_mtx);
973	if (tp->tun_flags & TUN_IFHEAD) {
974		mtx_unlock(&tp->tun_mtx);
975		if (m->m_len < sizeof(family) &&
976		    (m = m_pullup(m, sizeof(family))) == NULL)
977			return (ENOBUFS);
978		family = ntohl(*mtod(m, u_int32_t *));
979		m_adj(m, sizeof(family));
980	} else {
981		mtx_unlock(&tp->tun_mtx);
982		family = AF_INET;
983	}
984
985	BPF_MTAP2(ifp, &family, sizeof(family), m);
986
987	switch (family) {
988#ifdef INET
989	case AF_INET:
990		isr = NETISR_IP;
991		break;
992#endif
993#ifdef INET6
994	case AF_INET6:
995		isr = NETISR_IPV6;
996		break;
997#endif
998	default:
999		m_freem(m);
1000		return (EAFNOSUPPORT);
1001	}
1002	random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_TUN);
1003	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
1004	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
1005	CURVNET_SET(ifp->if_vnet);
1006	M_SETFIB(m, ifp->if_fib);
1007	netisr_dispatch(isr, m);
1008	CURVNET_RESTORE();
1009	return (0);
1010}
1011
1012/*
1013 * tunpoll - the poll interface, this is only useful on reads
1014 * really. The write detect always returns true, write never blocks
1015 * anyway, it either accepts the packet or drops it.
1016 */
1017static	int
1018tunpoll(struct cdev *dev, int events, struct thread *td)
1019{
1020	struct tun_softc *tp = dev->si_drv1;
1021	struct ifnet	*ifp = TUN2IFP(tp);
1022	int		revents = 0;
1023	struct mbuf	*m;
1024
1025	TUNDEBUG(ifp, "tunpoll\n");
1026
1027	if (events & (POLLIN | POLLRDNORM)) {
1028		IFQ_LOCK(&ifp->if_snd);
1029		IFQ_POLL_NOLOCK(&ifp->if_snd, m);
1030		if (m != NULL) {
1031			TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len);
1032			revents |= events & (POLLIN | POLLRDNORM);
1033		} else {
1034			TUNDEBUG(ifp, "tunpoll waiting\n");
1035			selrecord(td, &tp->tun_rsel);
1036		}
1037		IFQ_UNLOCK(&ifp->if_snd);
1038	}
1039	if (events & (POLLOUT | POLLWRNORM))
1040		revents |= events & (POLLOUT | POLLWRNORM);
1041
1042	return (revents);
1043}
1044
1045/*
1046 * tunkqfilter - support for the kevent() system call.
1047 */
1048static int
1049tunkqfilter(struct cdev *dev, struct knote *kn)
1050{
1051	struct tun_softc	*tp = dev->si_drv1;
1052	struct ifnet	*ifp = TUN2IFP(tp);
1053
1054	switch(kn->kn_filter) {
1055	case EVFILT_READ:
1056		TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n",
1057		    ifp->if_xname, dev2unit(dev));
1058		kn->kn_fop = &tun_read_filterops;
1059		break;
1060
1061	case EVFILT_WRITE:
1062		TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n",
1063		    ifp->if_xname, dev2unit(dev));
1064		kn->kn_fop = &tun_write_filterops;
1065		break;
1066
1067	default:
1068		TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n",
1069		    ifp->if_xname, dev2unit(dev));
1070		return(EINVAL);
1071	}
1072
1073	kn->kn_hook = tp;
1074	knlist_add(&tp->tun_rsel.si_note, kn, 0);
1075
1076	return (0);
1077}
1078
1079/*
1080 * Return true of there is data in the interface queue.
1081 */
1082static int
1083tunkqread(struct knote *kn, long hint)
1084{
1085	int			ret;
1086	struct tun_softc	*tp = kn->kn_hook;
1087	struct cdev		*dev = tp->tun_dev;
1088	struct ifnet	*ifp = TUN2IFP(tp);
1089
1090	if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) {
1091		TUNDEBUG(ifp,
1092		    "%s have data in the queue.  Len = %d, minor = %#x\n",
1093		    ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev));
1094		ret = 1;
1095	} else {
1096		TUNDEBUG(ifp,
1097		    "%s waiting for data, minor = %#x\n", ifp->if_xname,
1098		    dev2unit(dev));
1099		ret = 0;
1100	}
1101
1102	return (ret);
1103}
1104
1105/*
1106 * Always can write, always return MTU in kn->data.
1107 */
1108static int
1109tunkqwrite(struct knote *kn, long hint)
1110{
1111	struct tun_softc	*tp = kn->kn_hook;
1112	struct ifnet	*ifp = TUN2IFP(tp);
1113
1114	kn->kn_data = ifp->if_mtu;
1115
1116	return (1);
1117}
1118
1119static void
1120tunkqdetach(struct knote *kn)
1121{
1122	struct tun_softc	*tp = kn->kn_hook;
1123
1124	knlist_remove(&tp->tun_rsel.si_note, kn, 0);
1125}
1126