1/* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ 2 3/*- 4 * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk> 5 * Nottingham University 1987. 6 * 7 * This source may be freely distributed, however I would be interested 8 * in any changes that are made. 9 * 10 * This driver takes packets off the IP i/f and hands them up to a 11 * user process to have its wicked way with. This driver has it's 12 * roots in a similar driver written by Phil Cockcroft (formerly) at 13 * UCL. This driver is based much more on read/write/poll mode of 14 * operation though. 15 * 16 * $FreeBSD: stable/11/sys/net/if_tun.c 353157 2019-10-07 01:03:14Z kevans $ 17 */ 18 19#include "opt_inet.h" 20#include "opt_inet6.h" 21 22#include <sys/param.h> 23#include <sys/lock.h> 24#include <sys/priv.h> 25#include <sys/proc.h> 26#include <sys/systm.h> 27#include <sys/jail.h> 28#include <sys/mbuf.h> 29#include <sys/module.h> 30#include <sys/socket.h> 31#include <sys/fcntl.h> 32#include <sys/filio.h> 33#include <sys/sockio.h> 34#include <sys/sx.h> 35#include <sys/syslog.h> 36#include <sys/ttycom.h> 37#include <sys/poll.h> 38#include <sys/selinfo.h> 39#include <sys/signalvar.h> 40#include <sys/filedesc.h> 41#include <sys/kernel.h> 42#include <sys/sysctl.h> 43#include <sys/conf.h> 44#include <sys/uio.h> 45#include <sys/malloc.h> 46#include <sys/random.h> 47#include <sys/ctype.h> 48 49#include <net/if.h> 50#include <net/if_var.h> 51#include <net/if_clone.h> 52#include <net/if_types.h> 53#include <net/netisr.h> 54#include <net/route.h> 55#include <net/vnet.h> 56#ifdef INET 57#include <netinet/in.h> 58#endif 59#include <net/bpf.h> 60#include <net/if_tun.h> 61 62#include <sys/queue.h> 63#include <sys/condvar.h> 64 65#include <security/mac/mac_framework.h> 66 67/* 68 * tun_list is protected by global tunmtx. Other mutable fields are 69 * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is 70 * static for the duration of a tunnel interface. 71 */ 72struct tun_softc { 73 TAILQ_ENTRY(tun_softc) tun_list; 74 struct cdev *tun_dev; 75 u_short tun_flags; /* misc flags */ 76#define TUN_OPEN 0x0001 77#define TUN_INITED 0x0002 78#define TUN_RCOLL 0x0004 79#define TUN_IASET 0x0008 80#define TUN_DSTADDR 0x0010 81#define TUN_LMODE 0x0020 82#define TUN_RWAIT 0x0040 83#define TUN_ASYNC 0x0080 84#define TUN_IFHEAD 0x0100 85#define TUN_DYING 0x0200 86 87#define TUN_READY (TUN_OPEN | TUN_INITED) 88 89 pid_t tun_pid; /* owning pid */ 90 struct ifnet *tun_ifp; /* the interface */ 91 struct sigio *tun_sigio; /* information for async I/O */ 92 struct selinfo tun_rsel; /* read select */ 93 struct mtx tun_mtx; /* protect mutable softc fields */ 94 struct cv tun_cv; /* protect against ref'd dev destroy */ 95}; 96#define TUN2IFP(sc) ((sc)->tun_ifp) 97 98#define TUNDEBUG if (tundebug) if_printf 99 100/* 101 * All mutable global variables in if_tun are locked using tunmtx, with 102 * the exception of tundebug, which is used unlocked, and tunclones, 103 * which is static after setup. 104 */ 105static struct mtx tunmtx; 106static eventhandler_tag tag; 107static const char tunname[] = "tun"; 108static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface"); 109static int tundebug = 0; 110static int tundclone = 1; 111static struct clonedevs *tunclones; 112static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); 113SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); 114 115static struct sx tun_ioctl_sx; 116SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl"); 117 118SYSCTL_DECL(_net_link); 119static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, 120 "IP tunnel software network interface."); 121SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0, 122 "Enable legacy devfs interface creation."); 123 124static void tunclone(void *arg, struct ucred *cred, char *name, 125 int namelen, struct cdev **dev); 126static void tuncreate(const char *name, struct cdev *dev); 127static int tunifioctl(struct ifnet *, u_long, caddr_t); 128static void tuninit(struct ifnet *); 129static int tunmodevent(module_t, int, void *); 130static int tunoutput(struct ifnet *, struct mbuf *, 131 const struct sockaddr *, struct route *ro); 132static void tunstart(struct ifnet *); 133 134static int tun_clone_match(struct if_clone *ifc, const char *name); 135static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t); 136static int tun_clone_destroy(struct if_clone *, struct ifnet *); 137static struct unrhdr *tun_unrhdr; 138static VNET_DEFINE(struct if_clone *, tun_cloner); 139#define V_tun_cloner VNET(tun_cloner) 140 141static d_open_t tunopen; 142static d_close_t tunclose; 143static d_read_t tunread; 144static d_write_t tunwrite; 145static d_ioctl_t tunioctl; 146static d_poll_t tunpoll; 147static d_kqfilter_t tunkqfilter; 148 149static int tunkqread(struct knote *, long); 150static int tunkqwrite(struct knote *, long); 151static void tunkqdetach(struct knote *); 152 153static struct filterops tun_read_filterops = { 154 .f_isfd = 1, 155 .f_attach = NULL, 156 .f_detach = tunkqdetach, 157 .f_event = tunkqread, 158}; 159 160static struct filterops tun_write_filterops = { 161 .f_isfd = 1, 162 .f_attach = NULL, 163 .f_detach = tunkqdetach, 164 .f_event = tunkqwrite, 165}; 166 167static struct cdevsw tun_cdevsw = { 168 .d_version = D_VERSION, 169 .d_flags = D_NEEDMINOR, 170 .d_open = tunopen, 171 .d_close = tunclose, 172 .d_read = tunread, 173 .d_write = tunwrite, 174 .d_ioctl = tunioctl, 175 .d_poll = tunpoll, 176 .d_kqfilter = tunkqfilter, 177 .d_name = tunname, 178}; 179 180static int 181tun_clone_match(struct if_clone *ifc, const char *name) 182{ 183 if (strncmp(tunname, name, 3) == 0 && 184 (name[3] == '\0' || isdigit(name[3]))) 185 return (1); 186 187 return (0); 188} 189 190static int 191tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) 192{ 193 struct cdev *dev; 194 int err, unit, i; 195 196 err = ifc_name2unit(name, &unit); 197 if (err != 0) 198 return (err); 199 200 if (unit != -1) { 201 /* If this unit number is still available that/s okay. */ 202 if (alloc_unr_specific(tun_unrhdr, unit) == -1) 203 return (EEXIST); 204 } else { 205 unit = alloc_unr(tun_unrhdr); 206 } 207 208 snprintf(name, IFNAMSIZ, "%s%d", tunname, unit); 209 210 /* find any existing device, or allocate new unit number */ 211 i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0); 212 if (i) { 213 /* No preexisting struct cdev *, create one */ 214 dev = make_dev(&tun_cdevsw, unit, 215 UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit); 216 } 217 tuncreate(tunname, dev); 218 219 return (0); 220} 221 222static void 223tunclone(void *arg, struct ucred *cred, char *name, int namelen, 224 struct cdev **dev) 225{ 226 char devname[SPECNAMELEN + 1]; 227 int u, i, append_unit; 228 229 if (*dev != NULL) 230 return; 231 232 /* 233 * If tun cloning is enabled, only the superuser can create an 234 * interface. 235 */ 236 if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0) 237 return; 238 239 if (strcmp(name, tunname) == 0) { 240 u = -1; 241 } else if (dev_stdclone(name, NULL, tunname, &u) != 1) 242 return; /* Don't recognise the name */ 243 if (u != -1 && u > IF_MAXUNIT) 244 return; /* Unit number too high */ 245 246 if (u == -1) 247 append_unit = 1; 248 else 249 append_unit = 0; 250 251 CURVNET_SET(CRED_TO_VNET(cred)); 252 /* find any existing device, or allocate new unit number */ 253 i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0); 254 if (i) { 255 if (append_unit) { 256 namelen = snprintf(devname, sizeof(devname), "%s%d", 257 name, u); 258 name = devname; 259 } 260 /* No preexisting struct cdev *, create one */ 261 *dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred, 262 UID_UUCP, GID_DIALER, 0600, "%s", name); 263 } 264 265 if_clone_create(name, namelen, NULL); 266 CURVNET_RESTORE(); 267} 268 269static void 270tun_destroy(struct tun_softc *tp) 271{ 272 struct cdev *dev; 273 274 mtx_lock(&tp->tun_mtx); 275 tp->tun_flags |= TUN_DYING; 276 if ((tp->tun_flags & TUN_OPEN) != 0) 277 cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); 278 else 279 mtx_unlock(&tp->tun_mtx); 280 281 CURVNET_SET(TUN2IFP(tp)->if_vnet); 282 283 dev = tp->tun_dev; 284 bpfdetach(TUN2IFP(tp)); 285 if_detach(TUN2IFP(tp)); 286 287 sx_xlock(&tun_ioctl_sx); 288 TUN2IFP(tp)->if_softc = NULL; 289 sx_xunlock(&tun_ioctl_sx); 290 291 free_unr(tun_unrhdr, TUN2IFP(tp)->if_dunit); 292 if_free(TUN2IFP(tp)); 293 destroy_dev(dev); 294 seldrain(&tp->tun_rsel); 295 knlist_clear(&tp->tun_rsel.si_note, 0); 296 knlist_destroy(&tp->tun_rsel.si_note); 297 mtx_destroy(&tp->tun_mtx); 298 cv_destroy(&tp->tun_cv); 299 free(tp, M_TUN); 300 CURVNET_RESTORE(); 301} 302 303static int 304tun_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) 305{ 306 struct tun_softc *tp = ifp->if_softc; 307 308 mtx_lock(&tunmtx); 309 TAILQ_REMOVE(&tunhead, tp, tun_list); 310 mtx_unlock(&tunmtx); 311 tun_destroy(tp); 312 313 return (0); 314} 315 316static void 317vnet_tun_init(const void *unused __unused) 318{ 319 V_tun_cloner = if_clone_advanced(tunname, 0, tun_clone_match, 320 tun_clone_create, tun_clone_destroy); 321} 322VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, 323 vnet_tun_init, NULL); 324 325static void 326vnet_tun_uninit(const void *unused __unused) 327{ 328 if_clone_detach(V_tun_cloner); 329} 330VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, 331 vnet_tun_uninit, NULL); 332 333static void 334tun_uninit(const void *unused __unused) 335{ 336 struct tun_softc *tp; 337 338 EVENTHANDLER_DEREGISTER(dev_clone, tag); 339 drain_dev_clone_events(); 340 341 mtx_lock(&tunmtx); 342 while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { 343 TAILQ_REMOVE(&tunhead, tp, tun_list); 344 mtx_unlock(&tunmtx); 345 tun_destroy(tp); 346 mtx_lock(&tunmtx); 347 } 348 mtx_unlock(&tunmtx); 349 delete_unrhdr(tun_unrhdr); 350 clone_cleanup(&tunclones); 351 mtx_destroy(&tunmtx); 352} 353SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL); 354 355static int 356tunmodevent(module_t mod, int type, void *data) 357{ 358 359 switch (type) { 360 case MOD_LOAD: 361 mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF); 362 clone_setup(&tunclones); 363 tun_unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx); 364 tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000); 365 if (tag == NULL) 366 return (ENOMEM); 367 break; 368 case MOD_UNLOAD: 369 /* See tun_uninit, so it's done after the vnet_sysuninit() */ 370 break; 371 default: 372 return EOPNOTSUPP; 373 } 374 return 0; 375} 376 377static moduledata_t tun_mod = { 378 "if_tun", 379 tunmodevent, 380 0 381}; 382 383DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 384MODULE_VERSION(if_tun, 1); 385 386static void 387tunstart(struct ifnet *ifp) 388{ 389 struct tun_softc *tp = ifp->if_softc; 390 struct mbuf *m; 391 392 TUNDEBUG(ifp,"%s starting\n", ifp->if_xname); 393 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 394 IFQ_LOCK(&ifp->if_snd); 395 IFQ_POLL_NOLOCK(&ifp->if_snd, m); 396 if (m == NULL) { 397 IFQ_UNLOCK(&ifp->if_snd); 398 return; 399 } 400 IFQ_UNLOCK(&ifp->if_snd); 401 } 402 403 mtx_lock(&tp->tun_mtx); 404 if (tp->tun_flags & TUN_RWAIT) { 405 tp->tun_flags &= ~TUN_RWAIT; 406 wakeup(tp); 407 } 408 selwakeuppri(&tp->tun_rsel, PZERO + 1); 409 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 410 if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) { 411 mtx_unlock(&tp->tun_mtx); 412 pgsigio(&tp->tun_sigio, SIGIO, 0); 413 } else 414 mtx_unlock(&tp->tun_mtx); 415} 416 417/* XXX: should return an error code so it can fail. */ 418static void 419tuncreate(const char *name, struct cdev *dev) 420{ 421 struct tun_softc *sc; 422 struct ifnet *ifp; 423 424 sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO); 425 mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF); 426 cv_init(&sc->tun_cv, "tun_condvar"); 427 sc->tun_flags = TUN_INITED; 428 sc->tun_dev = dev; 429 mtx_lock(&tunmtx); 430 TAILQ_INSERT_TAIL(&tunhead, sc, tun_list); 431 mtx_unlock(&tunmtx); 432 433 ifp = sc->tun_ifp = if_alloc(IFT_PPP); 434 if (ifp == NULL) 435 panic("%s%d: failed to if_alloc() interface.\n", 436 name, dev2unit(dev)); 437 if_initname(ifp, name, dev2unit(dev)); 438 ifp->if_mtu = TUNMTU; 439 ifp->if_ioctl = tunifioctl; 440 ifp->if_output = tunoutput; 441 ifp->if_start = tunstart; 442 ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; 443 ifp->if_softc = sc; 444 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 445 ifp->if_snd.ifq_drv_maxlen = 0; 446 IFQ_SET_READY(&ifp->if_snd); 447 knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx); 448 ifp->if_capabilities |= IFCAP_LINKSTATE; 449 ifp->if_capenable |= IFCAP_LINKSTATE; 450 451 if_attach(ifp); 452 bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); 453 dev->si_drv1 = sc; 454 TUNDEBUG(ifp, "interface %s is created, minor = %#x\n", 455 ifp->if_xname, dev2unit(dev)); 456} 457 458static int 459tunopen(struct cdev *dev, int flag, int mode, struct thread *td) 460{ 461 struct ifnet *ifp; 462 struct tun_softc *tp; 463 464 /* 465 * XXXRW: Non-atomic test and set of dev->si_drv1 requires 466 * synchronization. 467 */ 468 tp = dev->si_drv1; 469 if (!tp) { 470 tuncreate(tunname, dev); 471 tp = dev->si_drv1; 472 } 473 474 mtx_lock(&tp->tun_mtx); 475 if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) { 476 mtx_unlock(&tp->tun_mtx); 477 return (EBUSY); 478 } 479 480 tp->tun_pid = td->td_proc->p_pid; 481 tp->tun_flags |= TUN_OPEN; 482 ifp = TUN2IFP(tp); 483 if_link_state_change(ifp, LINK_STATE_UP); 484 TUNDEBUG(ifp, "open\n"); 485 mtx_unlock(&tp->tun_mtx); 486 487 return (0); 488} 489 490/* 491 * tunclose - close the device - mark i/f down & delete 492 * routing info 493 */ 494static int 495tunclose(struct cdev *dev, int foo, int bar, struct thread *td) 496{ 497 struct proc *p; 498 struct tun_softc *tp; 499 struct ifnet *ifp; 500 501 p = td->td_proc; 502 tp = dev->si_drv1; 503 ifp = TUN2IFP(tp); 504 505 mtx_lock(&tp->tun_mtx); 506 507 /* 508 * Realistically, we can't be obstinate here. This only means that the 509 * tuntap device was closed out of order, and the last closer wasn't the 510 * controller. These are still good to know about, though, as software 511 * should avoid multiple processes with a tuntap device open and 512 * ill-defined transfer of control (e.g., handoff, TUNSIFPID, close in 513 * parent). 514 */ 515 if (p->p_pid != tp->tun_pid) { 516 log(LOG_INFO, 517 "pid %d (%s), %s: tun/tap protocol violation, non-controlling process closed last.\n", 518 p->p_pid, p->p_comm, dev->si_name); 519 } 520 521 /* 522 * junk all pending output 523 */ 524 CURVNET_SET(ifp->if_vnet); 525 IFQ_PURGE(&ifp->if_snd); 526 527 if (ifp->if_flags & IFF_UP) { 528 mtx_unlock(&tp->tun_mtx); 529 if_down(ifp); 530 mtx_lock(&tp->tun_mtx); 531 } 532 533 /* Delete all addresses and routes which reference this interface. */ 534 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 535 struct ifaddr *ifa; 536 537 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 538 mtx_unlock(&tp->tun_mtx); 539 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 540 /* deal w/IPv4 PtP destination; unlocked read */ 541 if (ifa->ifa_addr->sa_family == AF_INET) { 542 rtinit(ifa, (int)RTM_DELETE, 543 tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); 544 } else { 545 rtinit(ifa, (int)RTM_DELETE, 0); 546 } 547 } 548 if_purgeaddrs(ifp); 549 mtx_lock(&tp->tun_mtx); 550 } 551 if_link_state_change(ifp, LINK_STATE_DOWN); 552 CURVNET_RESTORE(); 553 554 funsetown(&tp->tun_sigio); 555 selwakeuppri(&tp->tun_rsel, PZERO + 1); 556 KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); 557 TUNDEBUG (ifp, "closed\n"); 558 tp->tun_flags &= ~TUN_OPEN; 559 tp->tun_pid = 0; 560 561 cv_broadcast(&tp->tun_cv); 562 mtx_unlock(&tp->tun_mtx); 563 return (0); 564} 565 566static void 567tuninit(struct ifnet *ifp) 568{ 569 struct tun_softc *tp = ifp->if_softc; 570#ifdef INET 571 struct ifaddr *ifa; 572#endif 573 574 TUNDEBUG(ifp, "tuninit\n"); 575 576 mtx_lock(&tp->tun_mtx); 577 ifp->if_flags |= IFF_UP; 578 ifp->if_drv_flags |= IFF_DRV_RUNNING; 579 getmicrotime(&ifp->if_lastchange); 580 581#ifdef INET 582 if_addr_rlock(ifp); 583 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 584 if (ifa->ifa_addr->sa_family == AF_INET) { 585 struct sockaddr_in *si; 586 587 si = (struct sockaddr_in *)ifa->ifa_addr; 588 if (si->sin_addr.s_addr) 589 tp->tun_flags |= TUN_IASET; 590 591 si = (struct sockaddr_in *)ifa->ifa_dstaddr; 592 if (si && si->sin_addr.s_addr) 593 tp->tun_flags |= TUN_DSTADDR; 594 } 595 } 596 if_addr_runlock(ifp); 597#endif 598 mtx_unlock(&tp->tun_mtx); 599} 600 601/* 602 * Process an ioctl request. 603 */ 604static int 605tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 606{ 607 struct ifreq *ifr = (struct ifreq *)data; 608 struct tun_softc *tp; 609 struct ifstat *ifs; 610 int error = 0; 611 612 sx_xlock(&tun_ioctl_sx); 613 tp = ifp->if_softc; 614 if (tp == NULL) { 615 error = ENXIO; 616 goto bad; 617 } 618 switch(cmd) { 619 case SIOCGIFSTATUS: 620 ifs = (struct ifstat *)data; 621 mtx_lock(&tp->tun_mtx); 622 if (tp->tun_pid) 623 snprintf(ifs->ascii, sizeof(ifs->ascii), 624 "\tOpened by PID %d\n", tp->tun_pid); 625 else 626 ifs->ascii[0] = '\0'; 627 mtx_unlock(&tp->tun_mtx); 628 break; 629 case SIOCSIFADDR: 630 tuninit(ifp); 631 TUNDEBUG(ifp, "address set\n"); 632 break; 633 case SIOCSIFMTU: 634 ifp->if_mtu = ifr->ifr_mtu; 635 TUNDEBUG(ifp, "mtu set\n"); 636 break; 637 case SIOCSIFFLAGS: 638 case SIOCADDMULTI: 639 case SIOCDELMULTI: 640 break; 641 default: 642 error = EINVAL; 643 } 644bad: 645 sx_xunlock(&tun_ioctl_sx); 646 return (error); 647} 648 649/* 650 * tunoutput - queue packets from higher level ready to put out. 651 */ 652static int 653tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, 654 struct route *ro) 655{ 656 struct tun_softc *tp = ifp->if_softc; 657 u_short cached_tun_flags; 658 int error; 659 u_int32_t af; 660 661 TUNDEBUG (ifp, "tunoutput\n"); 662 663#ifdef MAC 664 error = mac_ifnet_check_transmit(ifp, m0); 665 if (error) { 666 m_freem(m0); 667 return (error); 668 } 669#endif 670 671 /* Could be unlocked read? */ 672 mtx_lock(&tp->tun_mtx); 673 cached_tun_flags = tp->tun_flags; 674 mtx_unlock(&tp->tun_mtx); 675 if ((cached_tun_flags & TUN_READY) != TUN_READY) { 676 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 677 m_freem (m0); 678 return (EHOSTDOWN); 679 } 680 681 if ((ifp->if_flags & IFF_UP) != IFF_UP) { 682 m_freem (m0); 683 return (EHOSTDOWN); 684 } 685 686 /* BPF writes need to be handled specially. */ 687 if (dst->sa_family == AF_UNSPEC) 688 bcopy(dst->sa_data, &af, sizeof(af)); 689 else 690 af = dst->sa_family; 691 692 if (bpf_peers_present(ifp->if_bpf)) 693 bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0); 694 695 /* prepend sockaddr? this may abort if the mbuf allocation fails */ 696 if (cached_tun_flags & TUN_LMODE) { 697 /* allocate space for sockaddr */ 698 M_PREPEND(m0, dst->sa_len, M_NOWAIT); 699 700 /* if allocation failed drop packet */ 701 if (m0 == NULL) { 702 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 703 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 704 return (ENOBUFS); 705 } else { 706 bcopy(dst, m0->m_data, dst->sa_len); 707 } 708 } 709 710 if (cached_tun_flags & TUN_IFHEAD) { 711 /* Prepend the address family */ 712 M_PREPEND(m0, 4, M_NOWAIT); 713 714 /* if allocation failed drop packet */ 715 if (m0 == NULL) { 716 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); 717 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); 718 return (ENOBUFS); 719 } else 720 *(u_int32_t *)m0->m_data = htonl(af); 721 } else { 722#ifdef INET 723 if (af != AF_INET) 724#endif 725 { 726 m_freem(m0); 727 return (EAFNOSUPPORT); 728 } 729 } 730 731 error = (ifp->if_transmit)(ifp, m0); 732 if (error) 733 return (ENOBUFS); 734 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); 735 return (0); 736} 737 738/* 739 * the cdevsw interface is now pretty minimal. 740 */ 741static int 742tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 743 struct thread *td) 744{ 745 struct ifreq ifr, *ifrp; 746 struct tun_softc *tp = dev->si_drv1; 747 struct tuninfo *tunp; 748 int error; 749 750 switch (cmd) { 751 case TUNGIFNAME: 752 ifrp = (struct ifreq *)data; 753 strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, IFNAMSIZ); 754 break; 755 case TUNSIFINFO: 756 tunp = (struct tuninfo *)data; 757 if (TUN2IFP(tp)->if_type != tunp->type) 758 return (EPROTOTYPE); 759 mtx_lock(&tp->tun_mtx); 760 if (TUN2IFP(tp)->if_mtu != tunp->mtu) { 761 strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ); 762 ifr.ifr_mtu = tunp->mtu; 763 CURVNET_SET(TUN2IFP(tp)->if_vnet); 764 error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp), 765 (caddr_t)&ifr, td); 766 CURVNET_RESTORE(); 767 if (error) { 768 mtx_unlock(&tp->tun_mtx); 769 return (error); 770 } 771 } 772 TUN2IFP(tp)->if_baudrate = tunp->baudrate; 773 mtx_unlock(&tp->tun_mtx); 774 break; 775 case TUNGIFINFO: 776 tunp = (struct tuninfo *)data; 777 mtx_lock(&tp->tun_mtx); 778 tunp->mtu = TUN2IFP(tp)->if_mtu; 779 tunp->type = TUN2IFP(tp)->if_type; 780 tunp->baudrate = TUN2IFP(tp)->if_baudrate; 781 mtx_unlock(&tp->tun_mtx); 782 break; 783 case TUNSDEBUG: 784 tundebug = *(int *)data; 785 break; 786 case TUNGDEBUG: 787 *(int *)data = tundebug; 788 break; 789 case TUNSLMODE: 790 mtx_lock(&tp->tun_mtx); 791 if (*(int *)data) { 792 tp->tun_flags |= TUN_LMODE; 793 tp->tun_flags &= ~TUN_IFHEAD; 794 } else 795 tp->tun_flags &= ~TUN_LMODE; 796 mtx_unlock(&tp->tun_mtx); 797 break; 798 case TUNSIFHEAD: 799 mtx_lock(&tp->tun_mtx); 800 if (*(int *)data) { 801 tp->tun_flags |= TUN_IFHEAD; 802 tp->tun_flags &= ~TUN_LMODE; 803 } else 804 tp->tun_flags &= ~TUN_IFHEAD; 805 mtx_unlock(&tp->tun_mtx); 806 break; 807 case TUNGIFHEAD: 808 mtx_lock(&tp->tun_mtx); 809 *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0; 810 mtx_unlock(&tp->tun_mtx); 811 break; 812 case TUNSIFMODE: 813 /* deny this if UP */ 814 if (TUN2IFP(tp)->if_flags & IFF_UP) 815 return(EBUSY); 816 817 switch (*(int *)data & ~IFF_MULTICAST) { 818 case IFF_POINTOPOINT: 819 case IFF_BROADCAST: 820 mtx_lock(&tp->tun_mtx); 821 TUN2IFP(tp)->if_flags &= 822 ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST); 823 TUN2IFP(tp)->if_flags |= *(int *)data; 824 mtx_unlock(&tp->tun_mtx); 825 break; 826 default: 827 return(EINVAL); 828 } 829 break; 830 case TUNSIFPID: 831 mtx_lock(&tp->tun_mtx); 832 tp->tun_pid = curthread->td_proc->p_pid; 833 mtx_unlock(&tp->tun_mtx); 834 break; 835 case FIONBIO: 836 break; 837 case FIOASYNC: 838 mtx_lock(&tp->tun_mtx); 839 if (*(int *)data) 840 tp->tun_flags |= TUN_ASYNC; 841 else 842 tp->tun_flags &= ~TUN_ASYNC; 843 mtx_unlock(&tp->tun_mtx); 844 break; 845 case FIONREAD: 846 if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) { 847 struct mbuf *mb; 848 IFQ_LOCK(&TUN2IFP(tp)->if_snd); 849 IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb); 850 for (*(int *)data = 0; mb != NULL; mb = mb->m_next) 851 *(int *)data += mb->m_len; 852 IFQ_UNLOCK(&TUN2IFP(tp)->if_snd); 853 } else 854 *(int *)data = 0; 855 break; 856 case FIOSETOWN: 857 return (fsetown(*(int *)data, &tp->tun_sigio)); 858 859 case FIOGETOWN: 860 *(int *)data = fgetown(&tp->tun_sigio); 861 return (0); 862 863 /* This is deprecated, FIOSETOWN should be used instead. */ 864 case TIOCSPGRP: 865 return (fsetown(-(*(int *)data), &tp->tun_sigio)); 866 867 /* This is deprecated, FIOGETOWN should be used instead. */ 868 case TIOCGPGRP: 869 *(int *)data = -fgetown(&tp->tun_sigio); 870 return (0); 871 872 default: 873 return (ENOTTY); 874 } 875 return (0); 876} 877 878/* 879 * The cdevsw read interface - reads a packet at a time, or at 880 * least as much of a packet as can be read. 881 */ 882static int 883tunread(struct cdev *dev, struct uio *uio, int flag) 884{ 885 struct tun_softc *tp = dev->si_drv1; 886 struct ifnet *ifp = TUN2IFP(tp); 887 struct mbuf *m; 888 int error=0, len; 889 890 TUNDEBUG (ifp, "read\n"); 891 mtx_lock(&tp->tun_mtx); 892 if ((tp->tun_flags & TUN_READY) != TUN_READY) { 893 mtx_unlock(&tp->tun_mtx); 894 TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); 895 return (EHOSTDOWN); 896 } 897 898 tp->tun_flags &= ~TUN_RWAIT; 899 900 do { 901 IFQ_DEQUEUE(&ifp->if_snd, m); 902 if (m == NULL) { 903 if (flag & O_NONBLOCK) { 904 mtx_unlock(&tp->tun_mtx); 905 return (EWOULDBLOCK); 906 } 907 tp->tun_flags |= TUN_RWAIT; 908 error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1), 909 "tunread", 0); 910 if (error != 0) { 911 mtx_unlock(&tp->tun_mtx); 912 return (error); 913 } 914 } 915 } while (m == NULL); 916 mtx_unlock(&tp->tun_mtx); 917 918 while (m && uio->uio_resid > 0 && error == 0) { 919 len = min(uio->uio_resid, m->m_len); 920 if (len != 0) 921 error = uiomove(mtod(m, void *), len, uio); 922 m = m_free(m); 923 } 924 925 if (m) { 926 TUNDEBUG(ifp, "Dropping mbuf\n"); 927 m_freem(m); 928 } 929 return (error); 930} 931 932/* 933 * the cdevsw write interface - an atomic write is a packet - or else! 934 */ 935static int 936tunwrite(struct cdev *dev, struct uio *uio, int flag) 937{ 938 struct tun_softc *tp = dev->si_drv1; 939 struct ifnet *ifp = TUN2IFP(tp); 940 struct mbuf *m; 941 uint32_t family, mru; 942 int isr; 943 944 TUNDEBUG(ifp, "tunwrite\n"); 945 946 if ((ifp->if_flags & IFF_UP) != IFF_UP) 947 /* ignore silently */ 948 return (0); 949 950 if (uio->uio_resid == 0) 951 return (0); 952 953 mru = TUNMRU; 954 if (tp->tun_flags & TUN_IFHEAD) 955 mru += sizeof(family); 956 if (uio->uio_resid < 0 || uio->uio_resid > mru) { 957 TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid); 958 return (EIO); 959 } 960 961 if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) { 962 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); 963 return (ENOBUFS); 964 } 965 966 m->m_pkthdr.rcvif = ifp; 967#ifdef MAC 968 mac_ifnet_create_mbuf(ifp, m); 969#endif 970 971 /* Could be unlocked read? */ 972 mtx_lock(&tp->tun_mtx); 973 if (tp->tun_flags & TUN_IFHEAD) { 974 mtx_unlock(&tp->tun_mtx); 975 if (m->m_len < sizeof(family) && 976 (m = m_pullup(m, sizeof(family))) == NULL) 977 return (ENOBUFS); 978 family = ntohl(*mtod(m, u_int32_t *)); 979 m_adj(m, sizeof(family)); 980 } else { 981 mtx_unlock(&tp->tun_mtx); 982 family = AF_INET; 983 } 984 985 BPF_MTAP2(ifp, &family, sizeof(family), m); 986 987 switch (family) { 988#ifdef INET 989 case AF_INET: 990 isr = NETISR_IP; 991 break; 992#endif 993#ifdef INET6 994 case AF_INET6: 995 isr = NETISR_IPV6; 996 break; 997#endif 998 default: 999 m_freem(m); 1000 return (EAFNOSUPPORT); 1001 } 1002 random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_TUN); 1003 if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 1004 if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); 1005 CURVNET_SET(ifp->if_vnet); 1006 M_SETFIB(m, ifp->if_fib); 1007 netisr_dispatch(isr, m); 1008 CURVNET_RESTORE(); 1009 return (0); 1010} 1011 1012/* 1013 * tunpoll - the poll interface, this is only useful on reads 1014 * really. The write detect always returns true, write never blocks 1015 * anyway, it either accepts the packet or drops it. 1016 */ 1017static int 1018tunpoll(struct cdev *dev, int events, struct thread *td) 1019{ 1020 struct tun_softc *tp = dev->si_drv1; 1021 struct ifnet *ifp = TUN2IFP(tp); 1022 int revents = 0; 1023 struct mbuf *m; 1024 1025 TUNDEBUG(ifp, "tunpoll\n"); 1026 1027 if (events & (POLLIN | POLLRDNORM)) { 1028 IFQ_LOCK(&ifp->if_snd); 1029 IFQ_POLL_NOLOCK(&ifp->if_snd, m); 1030 if (m != NULL) { 1031 TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len); 1032 revents |= events & (POLLIN | POLLRDNORM); 1033 } else { 1034 TUNDEBUG(ifp, "tunpoll waiting\n"); 1035 selrecord(td, &tp->tun_rsel); 1036 } 1037 IFQ_UNLOCK(&ifp->if_snd); 1038 } 1039 if (events & (POLLOUT | POLLWRNORM)) 1040 revents |= events & (POLLOUT | POLLWRNORM); 1041 1042 return (revents); 1043} 1044 1045/* 1046 * tunkqfilter - support for the kevent() system call. 1047 */ 1048static int 1049tunkqfilter(struct cdev *dev, struct knote *kn) 1050{ 1051 struct tun_softc *tp = dev->si_drv1; 1052 struct ifnet *ifp = TUN2IFP(tp); 1053 1054 switch(kn->kn_filter) { 1055 case EVFILT_READ: 1056 TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n", 1057 ifp->if_xname, dev2unit(dev)); 1058 kn->kn_fop = &tun_read_filterops; 1059 break; 1060 1061 case EVFILT_WRITE: 1062 TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n", 1063 ifp->if_xname, dev2unit(dev)); 1064 kn->kn_fop = &tun_write_filterops; 1065 break; 1066 1067 default: 1068 TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n", 1069 ifp->if_xname, dev2unit(dev)); 1070 return(EINVAL); 1071 } 1072 1073 kn->kn_hook = tp; 1074 knlist_add(&tp->tun_rsel.si_note, kn, 0); 1075 1076 return (0); 1077} 1078 1079/* 1080 * Return true of there is data in the interface queue. 1081 */ 1082static int 1083tunkqread(struct knote *kn, long hint) 1084{ 1085 int ret; 1086 struct tun_softc *tp = kn->kn_hook; 1087 struct cdev *dev = tp->tun_dev; 1088 struct ifnet *ifp = TUN2IFP(tp); 1089 1090 if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { 1091 TUNDEBUG(ifp, 1092 "%s have data in the queue. Len = %d, minor = %#x\n", 1093 ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); 1094 ret = 1; 1095 } else { 1096 TUNDEBUG(ifp, 1097 "%s waiting for data, minor = %#x\n", ifp->if_xname, 1098 dev2unit(dev)); 1099 ret = 0; 1100 } 1101 1102 return (ret); 1103} 1104 1105/* 1106 * Always can write, always return MTU in kn->data. 1107 */ 1108static int 1109tunkqwrite(struct knote *kn, long hint) 1110{ 1111 struct tun_softc *tp = kn->kn_hook; 1112 struct ifnet *ifp = TUN2IFP(tp); 1113 1114 kn->kn_data = ifp->if_mtu; 1115 1116 return (1); 1117} 1118 1119static void 1120tunkqdetach(struct knote *kn) 1121{ 1122 struct tun_softc *tp = kn->kn_hook; 1123 1124 knlist_remove(&tp->tun_rsel.si_note, kn, 0); 1125} 1126