1/* 2 * Copyright (c) 1999-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 30 * support for mandatory and extensible security protections. This notice 31 * is included in support of clause 2.2 (b) of the Apple Public License, 32 * Version 2.0. 33 */ 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/malloc.h> 39#include <sys/mbuf.h> 40#include <sys/socket.h> 41#include <sys/domain.h> 42#include <sys/user.h> 43#include <sys/random.h> 44#include <sys/socketvar.h> 45#include <net/if_dl.h> 46#include <net/if.h> 47#include <net/route.h> 48#include <net/if_var.h> 49#include <net/dlil.h> 50#include <net/if_arp.h> 51#include <net/iptap.h> 52#include <sys/kern_event.h> 53#include <sys/kdebug.h> 54#include <sys/mcache.h> 55 56#include <kern/assert.h> 57#include <kern/task.h> 58#include <kern/thread.h> 59#include <kern/sched_prim.h> 60#include <kern/locks.h> 61#include <kern/zalloc.h> 62#include <net/kpi_protocol.h> 63 64#include <net/if_types.h> 65#include <net/if_llreach.h> 66#include <net/kpi_interfacefilter.h> 67#include <net/classq/classq.h> 68#include <net/classq/classq_sfb.h> 69 70#if INET 71#include <netinet/in_var.h> 72#include <netinet/igmp_var.h> 73#include <netinet/ip_var.h> 74#include <netinet/tcp.h> 75#include <netinet/tcp_var.h> 76#include <netinet/udp.h> 77#include <netinet/udp_var.h> 78#include <netinet/if_ether.h> 79#include <netinet/in_pcb.h> 80#endif /* INET */ 81 82#if INET6 83#include <netinet6/in6_var.h> 84#include <netinet6/nd6.h> 85#include <netinet6/mld6_var.h> 86#endif /* INET6 */ 87 88#if NETAT 89#include <netat/at_var.h> 90#endif /* NETAT */ 91 92#include <libkern/OSAtomic.h> 93 94#include <machine/machine_routines.h> 95 96#include <mach/thread_act.h> 97#include <mach/sdt.h> 98 99#if CONFIG_MACF_NET 100#include <security/mac_framework.h> 101#endif /* MAC_NET */ 102 103#if PF 104#include <net/pfvar.h> 105#endif /* PF */ 106#if PF_ALTQ 107#include <net/altq/altq.h> 108#endif /* PF_ALTQ */ 109#include <net/pktsched/pktsched.h> 110 111#define DBG_LAYER_BEG DLILDBG_CODE(DBG_DLIL_STATIC, 0) 112#define DBG_LAYER_END DLILDBG_CODE(DBG_DLIL_STATIC, 2) 113#define DBG_FNC_DLIL_INPUT DLILDBG_CODE(DBG_DLIL_STATIC, (1 << 8)) 114#define DBG_FNC_DLIL_OUTPUT DLILDBG_CODE(DBG_DLIL_STATIC, (2 << 8)) 115#define DBG_FNC_DLIL_IFOUT DLILDBG_CODE(DBG_DLIL_STATIC, (3 << 8)) 116 117 118#define MAX_FRAME_TYPE_SIZE 4 /* LONGWORDS */ 119#define MAX_LINKADDR 4 /* LONGWORDS */ 120#define M_NKE M_IFADDR 121 122#if 1 123#define DLIL_PRINTF printf 124#else 125#define DLIL_PRINTF kprintf 126#endif 127 128#define IF_DATA_REQUIRE_ALIGNED_64(f) \ 129 _CASSERT(!(offsetof(struct if_data_internal, f) % sizeof (u_int64_t))) 130 131#define IFNET_IF_DATA_REQUIRE_ALIGNED_64(f) \ 132 _CASSERT(!(offsetof(struct ifnet, if_data.f) % sizeof (u_int64_t))) 133 134enum { 135 kProtoKPI_v1 = 1, 136 kProtoKPI_v2 = 2 137}; 138 139/* 140 * List of if_proto structures in if_proto_hash[] is protected by 141 * the ifnet lock. The rest of the fields are initialized at protocol 142 * attach time and never change, thus no lock required as long as 143 * a reference to it is valid, via if_proto_ref(). 144 */ 145struct if_proto { 146 SLIST_ENTRY(if_proto) next_hash; 147 u_int32_t refcount; 148 u_int32_t detached; 149 struct ifnet *ifp; 150 protocol_family_t protocol_family; 151 int proto_kpi; 152 union { 153 struct { 154 proto_media_input input; 155 proto_media_preout pre_output; 156 proto_media_event event; 157 proto_media_ioctl ioctl; 158 proto_media_detached detached; 159 proto_media_resolve_multi resolve_multi; 160 proto_media_send_arp send_arp; 161 } v1; 162 struct { 163 proto_media_input_v2 input; 164 proto_media_preout pre_output; 165 proto_media_event event; 166 proto_media_ioctl ioctl; 167 proto_media_detached detached; 168 proto_media_resolve_multi resolve_multi; 169 proto_media_send_arp send_arp; 170 } v2; 171 } kpi; 172}; 173 174SLIST_HEAD(proto_hash_entry, if_proto); 175 176#define DLIL_SDLMAXLEN 64 177#define DLIL_SDLDATALEN \ 178 (DLIL_SDLMAXLEN - offsetof(struct sockaddr_dl, sdl_data[0])) 179 180struct dlil_ifnet { 181 struct ifnet dl_if; /* public ifnet */ 182 /* 183 * DLIL private fields, protected by dl_if_lock 184 */ 185 decl_lck_mtx_data(, dl_if_lock); 186 TAILQ_ENTRY(dlil_ifnet) dl_if_link; /* dlil_ifnet link */ 187 u_int32_t dl_if_flags; /* flags (below) */ 188 u_int32_t dl_if_refcnt; /* refcnt */ 189 void (*dl_if_trace)(struct dlil_ifnet *, int); /* ref trace callback */ 190 void *dl_if_uniqueid; /* unique interface id */ 191 size_t dl_if_uniqueid_len; /* length of the unique id */ 192 char dl_if_namestorage[IFNAMSIZ]; /* interface name storage */ 193 struct { 194 struct ifaddr ifa; /* lladdr ifa */ 195 u_int8_t asdl[DLIL_SDLMAXLEN]; /* addr storage */ 196 u_int8_t msdl[DLIL_SDLMAXLEN]; /* mask storage */ 197 } dl_if_lladdr; 198 u_int8_t dl_if_descstorage[IF_DESCSIZE]; /* desc storage */ 199 struct dlil_threading_info dl_if_inpstorage; /* input thread storage */ 200 ctrace_t dl_if_attach; /* attach PC stacktrace */ 201 ctrace_t dl_if_detach; /* detach PC stacktrace */ 202}; 203 204/* Values for dl_if_flags (private to DLIL) */ 205#define DLIF_INUSE 0x1 /* DLIL ifnet recycler, ifnet in use */ 206#define DLIF_REUSE 0x2 /* DLIL ifnet recycles, ifnet is not new */ 207#define DLIF_DEBUG 0x4 /* has debugging info */ 208 209#define IF_REF_TRACE_HIST_SIZE 8 /* size of ref trace history */ 210 211/* For gdb */ 212__private_extern__ unsigned int if_ref_trace_hist_size = IF_REF_TRACE_HIST_SIZE; 213 214struct dlil_ifnet_dbg { 215 struct dlil_ifnet dldbg_dlif; /* dlil_ifnet */ 216 u_int16_t dldbg_if_refhold_cnt; /* # ifnet references */ 217 u_int16_t dldbg_if_refrele_cnt; /* # ifnet releases */ 218 /* 219 * Circular lists of ifnet_{reference,release} callers. 220 */ 221 ctrace_t dldbg_if_refhold[IF_REF_TRACE_HIST_SIZE]; 222 ctrace_t dldbg_if_refrele[IF_REF_TRACE_HIST_SIZE]; 223}; 224 225#define DLIL_TO_IFP(s) (&s->dl_if) 226#define IFP_TO_DLIL(s) ((struct dlil_ifnet *)s) 227 228struct ifnet_filter { 229 TAILQ_ENTRY(ifnet_filter) filt_next; 230 u_int32_t filt_skip; 231 ifnet_t filt_ifp; 232 const char *filt_name; 233 void *filt_cookie; 234 protocol_family_t filt_protocol; 235 iff_input_func filt_input; 236 iff_output_func filt_output; 237 iff_event_func filt_event; 238 iff_ioctl_func filt_ioctl; 239 iff_detached_func filt_detached; 240}; 241 242struct proto_input_entry; 243 244static TAILQ_HEAD(, dlil_ifnet) dlil_ifnet_head; 245static lck_grp_t *dlil_lock_group; 246lck_grp_t *ifnet_lock_group; 247static lck_grp_t *ifnet_head_lock_group; 248static lck_grp_t *ifnet_snd_lock_group; 249static lck_grp_t *ifnet_rcv_lock_group; 250lck_attr_t *ifnet_lock_attr; 251decl_lck_rw_data(static, ifnet_head_lock); 252decl_lck_mtx_data(static, dlil_ifnet_lock); 253u_int32_t dlil_filter_count = 0; 254extern u_int32_t ipv4_ll_arp_aware; 255 256struct sfb_fc_list ifnet_fclist; 257decl_lck_mtx_data(static, ifnet_fclist_lock); 258 259static unsigned int ifnet_fcezone_size; /* size of ifnet_fce */ 260static struct zone *ifnet_fcezone; /* zone for ifnet_fce */ 261 262#define IFNET_FCEZONE_MAX 32 /* maximum elements in zone */ 263#define IFNET_FCEZONE_NAME "ifnet_fcezone" /* zone name */ 264 265static void ifnet_fc_thread_func(void *, wait_result_t); 266static void ifnet_fc_init(void); 267 268#if DEBUG 269static unsigned int ifnet_debug = 1; /* debugging (enabled) */ 270#else 271static unsigned int ifnet_debug; /* debugging (disabled) */ 272#endif /* !DEBUG */ 273static unsigned int dlif_size; /* size of dlil_ifnet to allocate */ 274static unsigned int dlif_bufsize; /* size of dlif_size + headroom */ 275static struct zone *dlif_zone; /* zone for dlil_ifnet */ 276 277#define DLIF_ZONE_MAX 64 /* maximum elements in zone */ 278#define DLIF_ZONE_NAME "ifnet" /* zone name */ 279 280static unsigned int dlif_filt_size; /* size of ifnet_filter */ 281static struct zone *dlif_filt_zone; /* zone for ifnet_filter */ 282 283#define DLIF_FILT_ZONE_MAX 8 /* maximum elements in zone */ 284#define DLIF_FILT_ZONE_NAME "ifnet_filter" /* zone name */ 285 286static unsigned int dlif_phash_size; /* size of ifnet proto hash table */ 287static struct zone *dlif_phash_zone; /* zone for ifnet proto hash table */ 288 289#define DLIF_PHASH_ZONE_MAX DLIF_ZONE_MAX /* maximum elements in zone */ 290#define DLIF_PHASH_ZONE_NAME "ifnet_proto_hash" /* zone name */ 291 292static unsigned int dlif_proto_size; /* size of if_proto */ 293static struct zone *dlif_proto_zone; /* zone for if_proto */ 294 295#define DLIF_PROTO_ZONE_MAX (DLIF_ZONE_MAX*2) /* maximum elements in zone */ 296#define DLIF_PROTO_ZONE_NAME "ifnet_proto" /* zone name */ 297 298static unsigned int dlif_tcpstat_size; /* size of tcpstat_local to allocate */ 299static unsigned int dlif_tcpstat_bufsize; /* size of dlif_tcpstat_size + headroom */ 300static struct zone *dlif_tcpstat_zone; /* zone for tcpstat_local */ 301 302#define DLIF_TCPSTAT_ZONE_MAX 1 /* maximum elements in zone */ 303#define DLIF_TCPSTAT_ZONE_NAME "ifnet_tcpstat" /* zone name */ 304 305static unsigned int dlif_udpstat_size; /* size of udpstat_local to allocate */ 306static unsigned int dlif_udpstat_bufsize; /* size of dlif_udpstat_size + headroom */ 307static struct zone *dlif_udpstat_zone; /* zone for udpstat_local */ 308 309#define DLIF_UDPSTAT_ZONE_MAX 1 /* maximum elements in zone */ 310#define DLIF_UDPSTAT_ZONE_NAME "ifnet_udpstat" /* zone name */ 311 312/* 313 * Updating this variable should be done by first acquiring the global 314 * radix node head (rnh_lock), in tandem with settting/clearing the 315 * PR_AGGDRAIN for routedomain. 316 */ 317u_int32_t ifnet_aggressive_drainers; 318static u_int32_t net_rtref; 319 320static struct dlil_main_threading_info dlil_main_input_thread_info; 321__private_extern__ struct dlil_threading_info *dlil_main_input_thread = 322 (struct dlil_threading_info *)&dlil_main_input_thread_info; 323 324static int dlil_event_internal(struct ifnet *ifp, struct kev_msg *msg); 325static int dlil_detach_filter_internal(interface_filter_t filter, int detached); 326static void dlil_if_trace(struct dlil_ifnet *, int); 327static void if_proto_ref(struct if_proto *); 328static void if_proto_free(struct if_proto *); 329static struct if_proto *find_attached_proto(struct ifnet *, u_int32_t); 330static int dlil_ifp_proto_count(struct ifnet *); 331static void if_flt_monitor_busy(struct ifnet *); 332static void if_flt_monitor_unbusy(struct ifnet *); 333static void if_flt_monitor_enter(struct ifnet *); 334static void if_flt_monitor_leave(struct ifnet *); 335static int dlil_interface_filters_input(struct ifnet *, struct mbuf **, 336 char **, protocol_family_t); 337static int dlil_interface_filters_output(struct ifnet *, struct mbuf **, 338 protocol_family_t); 339static struct ifaddr *dlil_alloc_lladdr(struct ifnet *, 340 const struct sockaddr_dl *); 341static int ifnet_lookup(struct ifnet *); 342static void if_purgeaddrs(struct ifnet *); 343 344static errno_t ifproto_media_input_v1(struct ifnet *, protocol_family_t, 345 struct mbuf *, char *); 346static errno_t ifproto_media_input_v2(struct ifnet *, protocol_family_t, 347 struct mbuf *); 348static errno_t ifproto_media_preout(struct ifnet *, protocol_family_t, 349 mbuf_t *, const struct sockaddr *, void *, char *, char *); 350static void ifproto_media_event(struct ifnet *, protocol_family_t, 351 const struct kev_msg *); 352static errno_t ifproto_media_ioctl(struct ifnet *, protocol_family_t, 353 unsigned long, void *); 354static errno_t ifproto_media_resolve_multi(ifnet_t, const struct sockaddr *, 355 struct sockaddr_dl *, size_t); 356static errno_t ifproto_media_send_arp(struct ifnet *, u_short, 357 const struct sockaddr_dl *, const struct sockaddr *, 358 const struct sockaddr_dl *, const struct sockaddr *); 359 360static errno_t ifp_if_output(struct ifnet *, struct mbuf *); 361static void ifp_if_start(struct ifnet *); 362static void ifp_if_input_poll(struct ifnet *, u_int32_t, u_int32_t, 363 struct mbuf **, struct mbuf **, u_int32_t *, u_int32_t *); 364static errno_t ifp_if_ctl(struct ifnet *, ifnet_ctl_cmd_t, u_int32_t, void *); 365static errno_t ifp_if_demux(struct ifnet *, struct mbuf *, char *, 366 protocol_family_t *); 367static errno_t ifp_if_add_proto(struct ifnet *, protocol_family_t, 368 const struct ifnet_demux_desc *, u_int32_t); 369static errno_t ifp_if_del_proto(struct ifnet *, protocol_family_t); 370static errno_t ifp_if_check_multi(struct ifnet *, const struct sockaddr *); 371static errno_t ifp_if_framer(struct ifnet *, struct mbuf **, 372 const struct sockaddr *, const char *, const char * 373#if CONFIG_EMBEDDED 374 , 375 u_int32_t *, u_int32_t * 376#endif /* CONFIG_EMBEDDED */ 377 ); 378static errno_t ifp_if_set_bpf_tap(struct ifnet *, bpf_tap_mode, bpf_packet_func); 379static void ifp_if_free(struct ifnet *); 380static void ifp_if_event(struct ifnet *, const struct kev_msg *); 381static __inline void ifp_inc_traffic_class_in(struct ifnet *, struct mbuf *); 382static __inline void ifp_inc_traffic_class_out(struct ifnet *, struct mbuf *); 383 384static void dlil_main_input_thread_func(void *, wait_result_t); 385static void dlil_input_thread_func(void *, wait_result_t); 386static void dlil_rxpoll_input_thread_func(void *, wait_result_t); 387static void dlil_rxpoll_calc_limits(struct dlil_threading_info *); 388static int dlil_create_input_thread(ifnet_t, struct dlil_threading_info *); 389static void dlil_terminate_input_thread(struct dlil_threading_info *); 390static void dlil_input_stats_add(const struct ifnet_stat_increment_param *, 391 struct dlil_threading_info *, boolean_t); 392static void dlil_input_stats_sync(struct ifnet *, struct dlil_threading_info *); 393static void dlil_input_packet_list_common(struct ifnet *, struct mbuf *, 394 u_int32_t, ifnet_model_t, boolean_t); 395static errno_t ifnet_input_common(struct ifnet *, struct mbuf *, struct mbuf *, 396 const struct ifnet_stat_increment_param *, boolean_t, boolean_t); 397 398static void ifnet_detacher_thread_func(void *, wait_result_t); 399static int ifnet_detacher_thread_cont(int); 400static void ifnet_detach_final(struct ifnet *); 401static void ifnet_detaching_enqueue(struct ifnet *); 402static struct ifnet *ifnet_detaching_dequeue(void); 403 404static void ifnet_start_thread_fn(void *, wait_result_t); 405static void ifnet_poll_thread_fn(void *, wait_result_t); 406static void ifnet_poll(struct ifnet *); 407 408static void ifp_src_route_copyout(struct ifnet *, struct route *); 409static void ifp_src_route_copyin(struct ifnet *, struct route *); 410#if INET6 411static void ifp_src_route6_copyout(struct ifnet *, struct route_in6 *); 412static void ifp_src_route6_copyin(struct ifnet *, struct route_in6 *); 413#endif /* INET6 */ 414 415static int sysctl_rxpoll SYSCTL_HANDLER_ARGS; 416static int sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS; 417static int sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS; 418 419/* The following are protected by dlil_ifnet_lock */ 420static TAILQ_HEAD(, ifnet) ifnet_detaching_head; 421static u_int32_t ifnet_detaching_cnt; 422static void *ifnet_delayed_run; /* wait channel for detaching thread */ 423 424extern void bpfdetach(struct ifnet*); 425extern void proto_input_run(void); 426 427extern uint32_t udp_count_opportunistic(unsigned int ifindex, 428 u_int32_t flags); 429extern uint32_t tcp_count_opportunistic(unsigned int ifindex, 430 u_int32_t flags); 431 432__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *); 433 434#if DEBUG 435static int dlil_verbose = 1; 436#else 437static int dlil_verbose = 0; 438#endif /* DEBUG */ 439#if IFNET_INPUT_SANITY_CHK 440/* sanity checking of input packet lists received */ 441static u_int32_t dlil_input_sanity_check = 0; 442#endif /* IFNET_INPUT_SANITY_CHK */ 443/* rate limit debug messages */ 444struct timespec dlil_dbgrate = { 1, 0 }; 445 446SYSCTL_DECL(_net_link_generic_system); 447 448SYSCTL_INT(_net_link_generic_system, OID_AUTO, dlil_verbose, 449 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_verbose, 0, "Log DLIL error messages"); 450 451#define IF_SNDQ_MINLEN 32 452u_int32_t if_sndq_maxlen = IFQ_MAXLEN; 453SYSCTL_PROC(_net_link_generic_system, OID_AUTO, sndq_maxlen, 454 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_sndq_maxlen, IFQ_MAXLEN, 455 sysctl_sndq_maxlen, "I", "Default transmit queue max length"); 456 457#define IF_RCVQ_MINLEN 32 458#define IF_RCVQ_MAXLEN 256 459u_int32_t if_rcvq_maxlen = IF_RCVQ_MAXLEN; 460SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rcvq_maxlen, 461 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rcvq_maxlen, IFQ_MAXLEN, 462 sysctl_rcvq_maxlen, "I", "Default receive queue max length"); 463 464#define IF_RXPOLL_DECAY 2 /* ilog2 of EWMA decay rate (4) */ 465static u_int32_t if_rxpoll_decay = IF_RXPOLL_DECAY; 466SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_decay, 467 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_decay, IF_RXPOLL_DECAY, 468 "ilog2 of EWMA decay rate of avg inbound packets"); 469 470#define IF_RXPOLL_MODE_HOLDTIME (1000ULL * 1000 * 1000) /* 1 sec */ 471static u_int64_t if_rxpoll_mode_holdtime = IF_RXPOLL_MODE_HOLDTIME; 472SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_freeze_time, 473 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_mode_holdtime, 474 "input poll mode freeze time"); 475 476#define IF_RXPOLL_SAMPLETIME (10ULL * 1000 * 1000) /* 10 ms */ 477static u_int64_t if_rxpoll_sample_holdtime = IF_RXPOLL_SAMPLETIME; 478SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_sample_time, 479 CTLFLAG_RD | CTLFLAG_LOCKED, &if_rxpoll_sample_holdtime, 480 "input poll sampling time"); 481 482#define IF_RXPOLL_INTERVAL_TIME (1ULL * 1000 * 1000) /* 1 ms */ 483static u_int64_t if_rxpoll_interval_time = IF_RXPOLL_INTERVAL_TIME; 484SYSCTL_QUAD(_net_link_generic_system, OID_AUTO, rxpoll_interval_time, 485 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_time, 486 "input poll interval (time)"); 487 488#define IF_RXPOLL_INTERVAL_PKTS 0 /* 0 (disabled) */ 489static u_int32_t if_rxpoll_interval_pkts = IF_RXPOLL_INTERVAL_PKTS; 490SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_interval_pkts, 491 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_interval_pkts, 492 IF_RXPOLL_INTERVAL_PKTS, "input poll interval (packets)"); 493 494#define IF_RXPOLL_WLOWAT 5 495static u_int32_t if_rxpoll_wlowat = IF_RXPOLL_WLOWAT; 496SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_lowat, 497 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_wlowat, IF_RXPOLL_WLOWAT, 498 "input poll wakeup low watermark"); 499 500#define IF_RXPOLL_WHIWAT 100 501static u_int32_t if_rxpoll_whiwat = IF_RXPOLL_WHIWAT; 502SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_wakeups_hiwat, 503 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_whiwat, IF_RXPOLL_WHIWAT, 504 "input poll wakeup high watermark"); 505 506static u_int32_t if_rxpoll_max = 0; /* 0 (automatic) */ 507SYSCTL_UINT(_net_link_generic_system, OID_AUTO, rxpoll_max, 508 CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll_max, 0, 509 "max packets per poll call"); 510 511static u_int32_t if_rxpoll = 1; 512SYSCTL_PROC(_net_link_generic_system, OID_AUTO, rxpoll, 513 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &if_rxpoll, 0, 514 sysctl_rxpoll, "I", "enable opportunistic input polling"); 515 516u_int32_t if_bw_smoothing_val = 3; 517SYSCTL_UINT(_net_link_generic_system, OID_AUTO, if_bw_smoothing_val, 518 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_smoothing_val, 0, ""); 519 520u_int32_t if_bw_measure_size = 10; 521SYSCTL_INT(_net_link_generic_system, OID_AUTO, if_bw_measure_size, 522 CTLFLAG_RW | CTLFLAG_LOCKED, &if_bw_measure_size, 0, ""); 523 524static u_int32_t cur_dlil_input_threads = 0; 525SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_threads, 526 CTLFLAG_RD | CTLFLAG_LOCKED, &cur_dlil_input_threads , 0, 527 "Current number of DLIL input threads"); 528 529#if IFNET_INPUT_SANITY_CHK 530SYSCTL_UINT(_net_link_generic_system, OID_AUTO, dlil_input_sanity_check, 531 CTLFLAG_RW | CTLFLAG_LOCKED, &dlil_input_sanity_check , 0, 532 "Turn on sanity checking in DLIL input"); 533#endif /* IFNET_INPUT_SANITY_CHK */ 534 535static u_int32_t if_flowadv = 1; 536SYSCTL_UINT(_net_link_generic_system, OID_AUTO, flow_advisory, 537 CTLFLAG_RW | CTLFLAG_LOCKED, &if_flowadv, 1, 538 "enable flow-advisory mechanism"); 539 540unsigned int net_rxpoll = 1; 541unsigned int net_affinity = 1; 542static kern_return_t dlil_affinity_set(struct thread *, u_int32_t); 543 544extern u_int32_t inject_buckets; 545 546static lck_grp_attr_t *dlil_grp_attributes = NULL; 547static lck_attr_t *dlil_lck_attributes = NULL; 548 549#define PROTO_HASH_SLOTS 0x5 550 551#define DLIL_INPUT_CHECK(m, ifp) { \ 552 struct ifnet *_rcvif = mbuf_pkthdr_rcvif(m); \ 553 if (_rcvif == NULL || (ifp != lo_ifp && _rcvif != ifp) || \ 554 !(mbuf_flags(m) & MBUF_PKTHDR)) { \ 555 panic_plain("%s: invalid mbuf %p\n", __func__, m); \ 556 /* NOTREACHED */ \ 557 } \ 558} 559 560#define DLIL_EWMA(old, new, decay) do { \ 561 u_int32_t _avg; \ 562 if ((_avg = (old)) > 0) \ 563 _avg = (((_avg << (decay)) - _avg) + (new)) >> (decay); \ 564 else \ 565 _avg = (new); \ 566 (old) = _avg; \ 567} while (0) 568 569#define MBPS (1ULL * 1000 * 1000) 570#define GBPS (MBPS * 1000) 571 572struct rxpoll_time_tbl { 573 u_int64_t speed; /* downlink speed */ 574 u_int32_t plowat; /* packets low watermark */ 575 u_int32_t phiwat; /* packets high watermark */ 576 u_int32_t blowat; /* bytes low watermark */ 577 u_int32_t bhiwat; /* bytes high watermark */ 578}; 579 580static struct rxpoll_time_tbl rxpoll_tbl[] = { 581 { 10 * MBPS, 2, 8, (1 * 1024), (6 * 1024) }, 582 { 100 * MBPS, 10, 40, (4 * 1024), (64 * 1024) }, 583 { 1 * GBPS, 10, 40, (4 * 1024), (64 * 1024) }, 584 { 10 * GBPS, 10, 40, (4 * 1024), (64 * 1024) }, 585 { 100 * GBPS, 10, 40, (4 * 1024), (64 * 1024) }, 586 { 0, 0, 0, 0, 0 } 587}; 588 589/* 590 * Internal functions. 591 */ 592 593static int 594proto_hash_value(u_int32_t protocol_family) 595{ 596 /* 597 * dlil_proto_unplumb_all() depends on the mapping between 598 * the hash bucket index and the protocol family defined 599 * here; future changes must be applied there as well. 600 */ 601 switch(protocol_family) { 602 case PF_INET: 603 return (0); 604 case PF_INET6: 605 return (1); 606 case PF_APPLETALK: 607 return (2); 608 case PF_VLAN: 609 return (3); 610 case PF_UNSPEC: 611 default: 612 return (4); 613 } 614} 615 616/* 617 * Caller must already be holding ifnet lock. 618 */ 619static struct if_proto * 620find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family) 621{ 622 struct if_proto *proto = NULL; 623 u_int32_t i = proto_hash_value(protocol_family); 624 625 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED); 626 627 if (ifp->if_proto_hash != NULL) 628 proto = SLIST_FIRST(&ifp->if_proto_hash[i]); 629 630 while (proto != NULL && proto->protocol_family != protocol_family) 631 proto = SLIST_NEXT(proto, next_hash); 632 633 if (proto != NULL) 634 if_proto_ref(proto); 635 636 return (proto); 637} 638 639static void 640if_proto_ref(struct if_proto *proto) 641{ 642 atomic_add_32(&proto->refcount, 1); 643} 644 645extern void if_rtproto_del(struct ifnet *ifp, int protocol); 646 647static void 648if_proto_free(struct if_proto *proto) 649{ 650 u_int32_t oldval; 651 struct ifnet *ifp = proto->ifp; 652 u_int32_t proto_family = proto->protocol_family; 653 struct kev_dl_proto_data ev_pr_data; 654 655 oldval = atomic_add_32_ov(&proto->refcount, -1); 656 if (oldval > 1) 657 return; 658 659 /* No more reference on this, protocol must have been detached */ 660 VERIFY(proto->detached); 661 662 if (proto->proto_kpi == kProtoKPI_v1) { 663 if (proto->kpi.v1.detached) 664 proto->kpi.v1.detached(ifp, proto->protocol_family); 665 } 666 if (proto->proto_kpi == kProtoKPI_v2) { 667 if (proto->kpi.v2.detached) 668 proto->kpi.v2.detached(ifp, proto->protocol_family); 669 } 670 671 /* 672 * Cleanup routes that may still be in the routing table for that 673 * interface/protocol pair. 674 */ 675 if_rtproto_del(ifp, proto_family); 676 677 /* 678 * The reserved field carries the number of protocol still attached 679 * (subject to change) 680 */ 681 ifnet_lock_shared(ifp); 682 ev_pr_data.proto_family = proto_family; 683 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); 684 ifnet_lock_done(ifp); 685 686 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED, 687 (struct net_event_data *)&ev_pr_data, 688 sizeof(struct kev_dl_proto_data)); 689 690 zfree(dlif_proto_zone, proto); 691} 692 693__private_extern__ void 694ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what) 695{ 696 unsigned int type = 0; 697 int ass = 1; 698 699 switch (what) { 700 case IFNET_LCK_ASSERT_EXCLUSIVE: 701 type = LCK_RW_ASSERT_EXCLUSIVE; 702 break; 703 704 case IFNET_LCK_ASSERT_SHARED: 705 type = LCK_RW_ASSERT_SHARED; 706 break; 707 708 case IFNET_LCK_ASSERT_OWNED: 709 type = LCK_RW_ASSERT_HELD; 710 break; 711 712 case IFNET_LCK_ASSERT_NOTOWNED: 713 /* nothing to do here for RW lock; bypass assert */ 714 ass = 0; 715 break; 716 717 default: 718 panic("bad ifnet assert type: %d", what); 719 /* NOTREACHED */ 720 } 721 if (ass) 722 lck_rw_assert(&ifp->if_lock, type); 723} 724 725__private_extern__ void 726ifnet_lock_shared(struct ifnet *ifp) 727{ 728 lck_rw_lock_shared(&ifp->if_lock); 729} 730 731__private_extern__ void 732ifnet_lock_exclusive(struct ifnet *ifp) 733{ 734 lck_rw_lock_exclusive(&ifp->if_lock); 735} 736 737__private_extern__ void 738ifnet_lock_done(struct ifnet *ifp) 739{ 740 lck_rw_done(&ifp->if_lock); 741} 742 743__private_extern__ void 744ifnet_head_lock_shared(void) 745{ 746 lck_rw_lock_shared(&ifnet_head_lock); 747} 748 749__private_extern__ void 750ifnet_head_lock_exclusive(void) 751{ 752 lck_rw_lock_exclusive(&ifnet_head_lock); 753} 754 755__private_extern__ void 756ifnet_head_done(void) 757{ 758 lck_rw_done(&ifnet_head_lock); 759} 760 761/* 762 * Caller must already be holding ifnet lock. 763 */ 764static int 765dlil_ifp_proto_count(struct ifnet * ifp) 766{ 767 int i, count = 0; 768 769 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED); 770 771 if (ifp->if_proto_hash == NULL) 772 goto done; 773 774 for (i = 0; i < PROTO_HASH_SLOTS; i++) { 775 struct if_proto *proto; 776 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) { 777 count++; 778 } 779 } 780done: 781 return (count); 782} 783 784__private_extern__ void 785dlil_post_msg(struct ifnet *ifp, u_int32_t event_subclass, 786 u_int32_t event_code, struct net_event_data *event_data, 787 u_int32_t event_data_len) 788{ 789 struct net_event_data ev_data; 790 struct kev_msg ev_msg; 791 792 bzero(&ev_msg, sizeof (ev_msg)); 793 bzero(&ev_data, sizeof (ev_data)); 794 /* 795 * a net event always starts with a net_event_data structure 796 * but the caller can generate a simple net event or 797 * provide a longer event structure to post 798 */ 799 ev_msg.vendor_code = KEV_VENDOR_APPLE; 800 ev_msg.kev_class = KEV_NETWORK_CLASS; 801 ev_msg.kev_subclass = event_subclass; 802 ev_msg.event_code = event_code; 803 804 if (event_data == NULL) { 805 event_data = &ev_data; 806 event_data_len = sizeof(struct net_event_data); 807 } 808 809 strncpy(&event_data->if_name[0], ifp->if_name, IFNAMSIZ); 810 event_data->if_family = ifp->if_family; 811 event_data->if_unit = (u_int32_t) ifp->if_unit; 812 813 ev_msg.dv[0].data_length = event_data_len; 814 ev_msg.dv[0].data_ptr = event_data; 815 ev_msg.dv[1].data_length = 0; 816 817 dlil_event_internal(ifp, &ev_msg); 818} 819 820__private_extern__ int 821dlil_alloc_local_stats(struct ifnet *ifp) 822{ 823 int ret = EINVAL; 824 void *buf, *base, **pbuf; 825 826 if (ifp == NULL) 827 goto end; 828 829 if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) { 830 /* allocate tcpstat_local structure */ 831 buf = zalloc(dlif_tcpstat_zone); 832 if (buf == NULL) { 833 ret = ENOMEM; 834 goto end; 835 } 836 bzero(buf, dlif_tcpstat_bufsize); 837 838 /* Get the 64-bit aligned base address for this object */ 839 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 840 sizeof (u_int64_t)); 841 VERIFY(((intptr_t)base + dlif_tcpstat_size) <= 842 ((intptr_t)buf + dlif_tcpstat_bufsize)); 843 844 /* 845 * Wind back a pointer size from the aligned base and 846 * save the original address so we can free it later. 847 */ 848 pbuf = (void **)((intptr_t)base - sizeof (void *)); 849 *pbuf = buf; 850 ifp->if_tcp_stat = base; 851 852 /* allocate udpstat_local structure */ 853 buf = zalloc(dlif_udpstat_zone); 854 if (buf == NULL) { 855 ret = ENOMEM; 856 goto end; 857 } 858 bzero(buf, dlif_udpstat_bufsize); 859 860 /* Get the 64-bit aligned base address for this object */ 861 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 862 sizeof (u_int64_t)); 863 VERIFY(((intptr_t)base + dlif_udpstat_size) <= 864 ((intptr_t)buf + dlif_udpstat_bufsize)); 865 866 /* 867 * Wind back a pointer size from the aligned base and 868 * save the original address so we can free it later. 869 */ 870 pbuf = (void **)((intptr_t)base - sizeof (void *)); 871 *pbuf = buf; 872 ifp->if_udp_stat = base; 873 874 VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof (u_int64_t)) && 875 IS_P2ALIGNED(ifp->if_udp_stat, sizeof (u_int64_t))); 876 877 ret = 0; 878 } 879 880end: 881 if (ret != 0) { 882 if (ifp->if_tcp_stat != NULL) { 883 pbuf = (void **) 884 ((intptr_t)ifp->if_tcp_stat - sizeof (void *)); 885 zfree(dlif_tcpstat_zone, *pbuf); 886 ifp->if_tcp_stat = NULL; 887 } 888 if (ifp->if_udp_stat != NULL) { 889 pbuf = (void **) 890 ((intptr_t)ifp->if_udp_stat - sizeof (void *)); 891 zfree(dlif_udpstat_zone, *pbuf); 892 ifp->if_udp_stat = NULL; 893 } 894 } 895 896 return (ret); 897} 898 899static int 900dlil_create_input_thread(ifnet_t ifp, struct dlil_threading_info *inp) 901{ 902 thread_continue_t func; 903 u_int32_t limit; 904 int error; 905 906 /* NULL ifp indicates the main input thread, called at dlil_init time */ 907 if (ifp == NULL) { 908 func = dlil_main_input_thread_func; 909 VERIFY(inp == dlil_main_input_thread); 910 (void) strlcat(inp->input_name, 911 "main_input", DLIL_THREADNAME_LEN); 912 } else if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) { 913 func = dlil_rxpoll_input_thread_func; 914 VERIFY(inp != dlil_main_input_thread); 915 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN, 916 "%s%d_input_poll", ifp->if_name, ifp->if_unit); 917 } else { 918 func = dlil_input_thread_func; 919 VERIFY(inp != dlil_main_input_thread); 920 (void) snprintf(inp->input_name, DLIL_THREADNAME_LEN, 921 "%s%d_input", ifp->if_name, ifp->if_unit); 922 } 923 VERIFY(inp->input_thr == THREAD_NULL); 924 925 inp->lck_grp = lck_grp_alloc_init(inp->input_name, dlil_grp_attributes); 926 lck_mtx_init(&inp->input_lck, inp->lck_grp, dlil_lck_attributes); 927 928 inp->mode = IFNET_MODEL_INPUT_POLL_OFF; 929 inp->ifp = ifp; /* NULL for main input thread */ 930 931 net_timerclear(&inp->mode_holdtime); 932 net_timerclear(&inp->mode_lasttime); 933 net_timerclear(&inp->sample_holdtime); 934 net_timerclear(&inp->sample_lasttime); 935 net_timerclear(&inp->dbg_lasttime); 936 937 /* 938 * For interfaces that support opportunistic polling, set the 939 * low and high watermarks for outstanding inbound packets/bytes. 940 * Also define freeze times for transitioning between modes 941 * and updating the average. 942 */ 943 if (ifp != NULL && net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) { 944 limit = MAX(if_rcvq_maxlen, IF_RCVQ_MINLEN); 945 dlil_rxpoll_calc_limits(inp); 946 } else { 947 limit = (u_int32_t)-1; 948 } 949 950 _qinit(&inp->rcvq_pkts, Q_DROPTAIL, limit); 951 if (inp == dlil_main_input_thread) { 952 struct dlil_main_threading_info *inpm = 953 (struct dlil_main_threading_info *)inp; 954 _qinit(&inpm->lo_rcvq_pkts, Q_DROPTAIL, limit); 955 } 956 957 error = kernel_thread_start(func, inp, &inp->input_thr); 958 if (error == KERN_SUCCESS) { 959 ml_thread_policy(inp->input_thr, MACHINE_GROUP, 960 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_NETISR)); 961 /* 962 * We create an affinity set so that the matching workloop 963 * thread or the starter thread (for loopback) can be 964 * scheduled on the same processor set as the input thread. 965 */ 966 if (net_affinity) { 967 struct thread *tp = inp->input_thr; 968 u_int32_t tag; 969 /* 970 * Randomize to reduce the probability 971 * of affinity tag namespace collision. 972 */ 973 read_random(&tag, sizeof (tag)); 974 if (dlil_affinity_set(tp, tag) == KERN_SUCCESS) { 975 thread_reference(tp); 976 inp->tag = tag; 977 inp->net_affinity = TRUE; 978 } 979 } 980 } else if (inp == dlil_main_input_thread) { 981 panic_plain("%s: couldn't create main input thread", __func__); 982 /* NOTREACHED */ 983 } else { 984 panic_plain("%s: couldn't create %s%d input thread", __func__, 985 ifp->if_name, ifp->if_unit); 986 /* NOTREACHED */ 987 } 988 OSAddAtomic(1, &cur_dlil_input_threads); 989 990 return (error); 991} 992 993static void 994dlil_terminate_input_thread(struct dlil_threading_info *inp) 995{ 996 struct ifnet *ifp; 997 998 VERIFY(current_thread() == inp->input_thr); 999 VERIFY(inp != dlil_main_input_thread); 1000 1001 OSAddAtomic(-1, &cur_dlil_input_threads); 1002 1003 lck_mtx_destroy(&inp->input_lck, inp->lck_grp); 1004 lck_grp_free(inp->lck_grp); 1005 1006 inp->input_waiting = 0; 1007 inp->wtot = 0; 1008 bzero(inp->input_name, sizeof (inp->input_name)); 1009 ifp = inp->ifp; 1010 inp->ifp = NULL; 1011 VERIFY(qhead(&inp->rcvq_pkts) == NULL && qempty(&inp->rcvq_pkts)); 1012 qlimit(&inp->rcvq_pkts) = 0; 1013 bzero(&inp->stats, sizeof (inp->stats)); 1014 1015 VERIFY(!inp->net_affinity); 1016 inp->input_thr = THREAD_NULL; 1017 VERIFY(inp->wloop_thr == THREAD_NULL); 1018 VERIFY(inp->poll_thr == THREAD_NULL); 1019 VERIFY(inp->tag == 0); 1020 1021 inp->mode = IFNET_MODEL_INPUT_POLL_OFF; 1022 bzero(&inp->tstats, sizeof (inp->tstats)); 1023 bzero(&inp->pstats, sizeof (inp->pstats)); 1024 bzero(&inp->sstats, sizeof (inp->sstats)); 1025 1026 net_timerclear(&inp->mode_holdtime); 1027 net_timerclear(&inp->mode_lasttime); 1028 net_timerclear(&inp->sample_holdtime); 1029 net_timerclear(&inp->sample_lasttime); 1030 net_timerclear(&inp->dbg_lasttime); 1031 1032#if IFNET_INPUT_SANITY_CHK 1033 inp->input_mbuf_cnt = 0; 1034#endif /* IFNET_INPUT_SANITY_CHK */ 1035 1036 if (dlil_verbose) { 1037 printf("%s%d: input thread terminated\n", 1038 ifp->if_name, ifp->if_unit); 1039 } 1040 1041 /* for the extra refcnt from kernel_thread_start() */ 1042 thread_deallocate(current_thread()); 1043 1044 /* this is the end */ 1045 thread_terminate(current_thread()); 1046 /* NOTREACHED */ 1047} 1048 1049static kern_return_t 1050dlil_affinity_set(struct thread *tp, u_int32_t tag) 1051{ 1052 thread_affinity_policy_data_t policy; 1053 1054 bzero(&policy, sizeof (policy)); 1055 policy.affinity_tag = tag; 1056 return (thread_policy_set(tp, THREAD_AFFINITY_POLICY, 1057 (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT)); 1058} 1059 1060void 1061dlil_init(void) 1062{ 1063 thread_t thread = THREAD_NULL; 1064 1065 /* 1066 * The following fields must be 64-bit aligned for atomic operations. 1067 */ 1068 IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets); 1069 IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors) 1070 IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets); 1071 IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors); 1072 IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions); 1073 IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes); 1074 IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes); 1075 IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts); 1076 IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts); 1077 IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops); 1078 IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto); 1079 IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs); 1080 1081 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ipackets); 1082 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ierrors) 1083 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_opackets); 1084 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_oerrors); 1085 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_collisions); 1086 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_ibytes); 1087 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_obytes); 1088 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_imcasts); 1089 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_omcasts); 1090 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_iqdrops); 1091 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_noproto); 1092 IFNET_IF_DATA_REQUIRE_ALIGNED_64(ifi_alignerrs); 1093 1094 /* 1095 * These IF_HWASSIST_ flags must be equal to their IFNET_* counterparts. 1096 */ 1097 _CASSERT(IF_HWASSIST_CSUM_IP == IFNET_CSUM_IP); 1098 _CASSERT(IF_HWASSIST_CSUM_TCP == IFNET_CSUM_TCP); 1099 _CASSERT(IF_HWASSIST_CSUM_UDP == IFNET_CSUM_UDP); 1100 _CASSERT(IF_HWASSIST_CSUM_IP_FRAGS == IFNET_CSUM_FRAGMENT); 1101 _CASSERT(IF_HWASSIST_CSUM_FRAGMENT == IFNET_IP_FRAGMENT); 1102 _CASSERT(IF_HWASSIST_CSUM_TCP_SUM16 == IFNET_CSUM_SUM16); 1103 _CASSERT(IF_HWASSIST_VLAN_TAGGING == IFNET_VLAN_TAGGING); 1104 _CASSERT(IF_HWASSIST_VLAN_MTU == IFNET_VLAN_MTU); 1105 _CASSERT(IF_HWASSIST_TSO_V4 == IFNET_TSO_IPV4); 1106 _CASSERT(IF_HWASSIST_TSO_V6 == IFNET_TSO_IPV6); 1107 1108 /* 1109 * Make sure we have at least IF_LLREACH_MAXLEN in the llreach info. 1110 */ 1111 _CASSERT(IF_LLREACH_MAXLEN <= IF_LLREACHINFO_ADDRLEN); 1112 _CASSERT(IFNET_LLREACHINFO_ADDRLEN == IF_LLREACHINFO_ADDRLEN); 1113 1114 PE_parse_boot_argn("net_affinity", &net_affinity, 1115 sizeof (net_affinity)); 1116 1117 PE_parse_boot_argn("net_rxpoll", &net_rxpoll, sizeof (net_rxpoll)); 1118 1119 PE_parse_boot_argn("net_rtref", &net_rtref, sizeof (net_rtref)); 1120 1121 PE_parse_boot_argn("ifnet_debug", &ifnet_debug, sizeof (ifnet_debug)); 1122 1123 dlif_size = (ifnet_debug == 0) ? sizeof (struct dlil_ifnet) : 1124 sizeof (struct dlil_ifnet_dbg); 1125 /* Enforce 64-bit alignment for dlil_ifnet structure */ 1126 dlif_bufsize = dlif_size + sizeof (void *) + sizeof (u_int64_t); 1127 dlif_bufsize = P2ROUNDUP(dlif_bufsize, sizeof (u_int64_t)); 1128 dlif_zone = zinit(dlif_bufsize, DLIF_ZONE_MAX * dlif_bufsize, 1129 0, DLIF_ZONE_NAME); 1130 if (dlif_zone == NULL) { 1131 panic_plain("%s: failed allocating %s", __func__, 1132 DLIF_ZONE_NAME); 1133 /* NOTREACHED */ 1134 } 1135 zone_change(dlif_zone, Z_EXPAND, TRUE); 1136 zone_change(dlif_zone, Z_CALLERACCT, FALSE); 1137 1138 dlif_filt_size = sizeof (struct ifnet_filter); 1139 dlif_filt_zone = zinit(dlif_filt_size, 1140 DLIF_FILT_ZONE_MAX * dlif_filt_size, 0, DLIF_FILT_ZONE_NAME); 1141 if (dlif_filt_zone == NULL) { 1142 panic_plain("%s: failed allocating %s", __func__, 1143 DLIF_FILT_ZONE_NAME); 1144 /* NOTREACHED */ 1145 } 1146 zone_change(dlif_filt_zone, Z_EXPAND, TRUE); 1147 zone_change(dlif_filt_zone, Z_CALLERACCT, FALSE); 1148 1149 dlif_phash_size = sizeof (struct proto_hash_entry) * PROTO_HASH_SLOTS; 1150 dlif_phash_zone = zinit(dlif_phash_size, 1151 DLIF_PHASH_ZONE_MAX * dlif_phash_size, 0, DLIF_PHASH_ZONE_NAME); 1152 if (dlif_phash_zone == NULL) { 1153 panic_plain("%s: failed allocating %s", __func__, 1154 DLIF_PHASH_ZONE_NAME); 1155 /* NOTREACHED */ 1156 } 1157 zone_change(dlif_phash_zone, Z_EXPAND, TRUE); 1158 zone_change(dlif_phash_zone, Z_CALLERACCT, FALSE); 1159 1160 dlif_proto_size = sizeof (struct if_proto); 1161 dlif_proto_zone = zinit(dlif_proto_size, 1162 DLIF_PROTO_ZONE_MAX * dlif_proto_size, 0, DLIF_PROTO_ZONE_NAME); 1163 if (dlif_proto_zone == NULL) { 1164 panic_plain("%s: failed allocating %s", __func__, 1165 DLIF_PROTO_ZONE_NAME); 1166 /* NOTREACHED */ 1167 } 1168 zone_change(dlif_proto_zone, Z_EXPAND, TRUE); 1169 zone_change(dlif_proto_zone, Z_CALLERACCT, FALSE); 1170 1171 dlif_tcpstat_size = sizeof (struct tcpstat_local); 1172 /* Enforce 64-bit alignment for tcpstat_local structure */ 1173 dlif_tcpstat_bufsize = 1174 dlif_tcpstat_size + sizeof (void *) + sizeof (u_int64_t); 1175 dlif_tcpstat_bufsize = 1176 P2ROUNDUP(dlif_tcpstat_bufsize, sizeof (u_int64_t)); 1177 dlif_tcpstat_zone = zinit(dlif_tcpstat_bufsize, 1178 DLIF_TCPSTAT_ZONE_MAX * dlif_tcpstat_bufsize, 0, 1179 DLIF_TCPSTAT_ZONE_NAME); 1180 if (dlif_tcpstat_zone == NULL) { 1181 panic_plain("%s: failed allocating %s", __func__, 1182 DLIF_TCPSTAT_ZONE_NAME); 1183 /* NOTREACHED */ 1184 } 1185 zone_change(dlif_tcpstat_zone, Z_EXPAND, TRUE); 1186 zone_change(dlif_tcpstat_zone, Z_CALLERACCT, FALSE); 1187 1188 dlif_udpstat_size = sizeof (struct udpstat_local); 1189 /* Enforce 64-bit alignment for udpstat_local structure */ 1190 dlif_udpstat_bufsize = 1191 dlif_udpstat_size + sizeof (void *) + sizeof (u_int64_t); 1192 dlif_udpstat_bufsize = 1193 P2ROUNDUP(dlif_udpstat_bufsize, sizeof (u_int64_t)); 1194 dlif_udpstat_zone = zinit(dlif_udpstat_bufsize, 1195 DLIF_TCPSTAT_ZONE_MAX * dlif_udpstat_bufsize, 0, 1196 DLIF_UDPSTAT_ZONE_NAME); 1197 if (dlif_udpstat_zone == NULL) { 1198 panic_plain("%s: failed allocating %s", __func__, 1199 DLIF_UDPSTAT_ZONE_NAME); 1200 /* NOTREACHED */ 1201 } 1202 zone_change(dlif_udpstat_zone, Z_EXPAND, TRUE); 1203 zone_change(dlif_udpstat_zone, Z_CALLERACCT, FALSE); 1204 1205 ifnet_llreach_init(); 1206 1207 TAILQ_INIT(&dlil_ifnet_head); 1208 TAILQ_INIT(&ifnet_head); 1209 TAILQ_INIT(&ifnet_detaching_head); 1210 1211 /* Setup the lock groups we will use */ 1212 dlil_grp_attributes = lck_grp_attr_alloc_init(); 1213 1214 dlil_lock_group = lck_grp_alloc_init("DLIL internal locks", 1215 dlil_grp_attributes); 1216 ifnet_lock_group = lck_grp_alloc_init("ifnet locks", 1217 dlil_grp_attributes); 1218 ifnet_head_lock_group = lck_grp_alloc_init("ifnet head lock", 1219 dlil_grp_attributes); 1220 ifnet_rcv_lock_group = lck_grp_alloc_init("ifnet rcv locks", 1221 dlil_grp_attributes); 1222 ifnet_snd_lock_group = lck_grp_alloc_init("ifnet snd locks", 1223 dlil_grp_attributes); 1224 1225 /* Setup the lock attributes we will use */ 1226 dlil_lck_attributes = lck_attr_alloc_init(); 1227 1228 ifnet_lock_attr = lck_attr_alloc_init(); 1229 1230 lck_rw_init(&ifnet_head_lock, ifnet_head_lock_group, 1231 dlil_lck_attributes); 1232 lck_mtx_init(&dlil_ifnet_lock, dlil_lock_group, dlil_lck_attributes); 1233 1234 ifnet_fc_init(); 1235 1236 lck_attr_free(dlil_lck_attributes); 1237 dlil_lck_attributes = NULL; 1238 1239 ifa_init(); 1240 /* 1241 * Create and start up the main DLIL input thread and the interface 1242 * detacher threads once everything is initialized. 1243 */ 1244 dlil_create_input_thread(NULL, dlil_main_input_thread); 1245 1246 if (kernel_thread_start(ifnet_detacher_thread_func, 1247 NULL, &thread) != KERN_SUCCESS) { 1248 panic_plain("%s: couldn't create detacher thread", __func__); 1249 /* NOTREACHED */ 1250 } 1251 thread_deallocate(thread); 1252 1253#if PF 1254 /* Initialize the packet filter */ 1255 pfinit(); 1256#endif /* PF */ 1257 1258 /* Initialize queue algorithms */ 1259 classq_init(); 1260 1261 /* Initialize packet schedulers */ 1262 pktsched_init(); 1263} 1264 1265static void 1266if_flt_monitor_busy(struct ifnet *ifp) 1267{ 1268 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 1269 1270 ++ifp->if_flt_busy; 1271 VERIFY(ifp->if_flt_busy != 0); 1272} 1273 1274static void 1275if_flt_monitor_unbusy(struct ifnet *ifp) 1276{ 1277 if_flt_monitor_leave(ifp); 1278} 1279 1280static void 1281if_flt_monitor_enter(struct ifnet *ifp) 1282{ 1283 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 1284 1285 while (ifp->if_flt_busy) { 1286 ++ifp->if_flt_waiters; 1287 (void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock, 1288 (PZERO - 1), "if_flt_monitor", NULL); 1289 } 1290 if_flt_monitor_busy(ifp); 1291} 1292 1293static void 1294if_flt_monitor_leave(struct ifnet *ifp) 1295{ 1296 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 1297 1298 VERIFY(ifp->if_flt_busy != 0); 1299 --ifp->if_flt_busy; 1300 1301 if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) { 1302 ifp->if_flt_waiters = 0; 1303 wakeup(&ifp->if_flt_head); 1304 } 1305} 1306 1307__private_extern__ int 1308dlil_attach_filter(struct ifnet *ifp, const struct iff_filter *if_filter, 1309 interface_filter_t *filter_ref) 1310{ 1311 int retval = 0; 1312 struct ifnet_filter *filter = NULL; 1313 1314 ifnet_head_lock_shared(); 1315 /* Check that the interface is in the global list */ 1316 if (!ifnet_lookup(ifp)) { 1317 retval = ENXIO; 1318 goto done; 1319 } 1320 1321 filter = zalloc(dlif_filt_zone); 1322 if (filter == NULL) { 1323 retval = ENOMEM; 1324 goto done; 1325 } 1326 bzero(filter, dlif_filt_size); 1327 1328 /* refcnt held above during lookup */ 1329 filter->filt_ifp = ifp; 1330 filter->filt_cookie = if_filter->iff_cookie; 1331 filter->filt_name = if_filter->iff_name; 1332 filter->filt_protocol = if_filter->iff_protocol; 1333 filter->filt_input = if_filter->iff_input; 1334 filter->filt_output = if_filter->iff_output; 1335 filter->filt_event = if_filter->iff_event; 1336 filter->filt_ioctl = if_filter->iff_ioctl; 1337 filter->filt_detached = if_filter->iff_detached; 1338 1339 lck_mtx_lock(&ifp->if_flt_lock); 1340 if_flt_monitor_enter(ifp); 1341 1342 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 1343 TAILQ_INSERT_TAIL(&ifp->if_flt_head, filter, filt_next); 1344 1345 if_flt_monitor_leave(ifp); 1346 lck_mtx_unlock(&ifp->if_flt_lock); 1347 1348 *filter_ref = filter; 1349 1350 /* 1351 * Bump filter count and route_generation ID to let TCP 1352 * know it shouldn't do TSO on this connection 1353 */ 1354 OSAddAtomic(1, &dlil_filter_count); 1355 if (use_routegenid) 1356 routegenid_update(); 1357 1358 if (dlil_verbose) { 1359 printf("%s%d: %s filter attached\n", ifp->if_name, 1360 ifp->if_unit, if_filter->iff_name); 1361 } 1362done: 1363 ifnet_head_done(); 1364 if (retval != 0 && ifp != NULL) { 1365 DLIL_PRINTF("%s%d: failed to attach %s (err=%d)\n", 1366 ifp->if_name, ifp->if_unit, if_filter->iff_name, retval); 1367 } 1368 if (retval != 0 && filter != NULL) 1369 zfree(dlif_filt_zone, filter); 1370 1371 return (retval); 1372} 1373 1374static int 1375dlil_detach_filter_internal(interface_filter_t filter, int detached) 1376{ 1377 int retval = 0; 1378 1379 if (detached == 0) { 1380 ifnet_t ifp = NULL; 1381 1382 ifnet_head_lock_shared(); 1383 TAILQ_FOREACH(ifp, &ifnet_head, if_link) { 1384 interface_filter_t entry = NULL; 1385 1386 lck_mtx_lock(&ifp->if_flt_lock); 1387 TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) { 1388 if (entry != filter || entry->filt_skip) 1389 continue; 1390 /* 1391 * We've found a match; since it's possible 1392 * that the thread gets blocked in the monitor, 1393 * we do the lock dance. Interface should 1394 * not be detached since we still have a use 1395 * count held during filter attach. 1396 */ 1397 entry->filt_skip = 1; /* skip input/output */ 1398 lck_mtx_unlock(&ifp->if_flt_lock); 1399 ifnet_head_done(); 1400 1401 lck_mtx_lock(&ifp->if_flt_lock); 1402 if_flt_monitor_enter(ifp); 1403 lck_mtx_assert(&ifp->if_flt_lock, 1404 LCK_MTX_ASSERT_OWNED); 1405 1406 /* Remove the filter from the list */ 1407 TAILQ_REMOVE(&ifp->if_flt_head, filter, 1408 filt_next); 1409 1410 if_flt_monitor_leave(ifp); 1411 lck_mtx_unlock(&ifp->if_flt_lock); 1412 if (dlil_verbose) { 1413 printf("%s%d: %s filter detached\n", 1414 ifp->if_name, ifp->if_unit, 1415 filter->filt_name); 1416 } 1417 goto destroy; 1418 } 1419 lck_mtx_unlock(&ifp->if_flt_lock); 1420 } 1421 ifnet_head_done(); 1422 1423 /* filter parameter is not a valid filter ref */ 1424 retval = EINVAL; 1425 goto done; 1426 } 1427 1428 if (dlil_verbose) 1429 printf("%s filter detached\n", filter->filt_name); 1430 1431destroy: 1432 1433 /* Call the detached function if there is one */ 1434 if (filter->filt_detached) 1435 filter->filt_detached(filter->filt_cookie, filter->filt_ifp); 1436 1437 /* Free the filter */ 1438 zfree(dlif_filt_zone, filter); 1439 1440 /* 1441 * Decrease filter count and route_generation ID to let TCP 1442 * know it should reevalute doing TSO or not 1443 */ 1444 OSAddAtomic(-1, &dlil_filter_count); 1445 if (use_routegenid) 1446 routegenid_update(); 1447 1448done: 1449 if (retval != 0) { 1450 DLIL_PRINTF("failed to detach %s filter (err=%d)\n", 1451 filter->filt_name, retval); 1452 } 1453 return (retval); 1454} 1455 1456__private_extern__ void 1457dlil_detach_filter(interface_filter_t filter) 1458{ 1459 if (filter == NULL) 1460 return; 1461 dlil_detach_filter_internal(filter, 0); 1462} 1463 1464/* 1465 * Main input thread: 1466 * 1467 * a) handles all inbound packets for lo0 1468 * b) handles all inbound packets for interfaces with no dedicated 1469 * input thread (e.g. anything but Ethernet/PDP or those that support 1470 * opportunistic polling.) 1471 * c) protocol registrations 1472 * d) packet injections 1473 */ 1474static void 1475dlil_main_input_thread_func(void *v, wait_result_t w) 1476{ 1477#pragma unused(w) 1478 struct dlil_main_threading_info *inpm = v; 1479 struct dlil_threading_info *inp = v; 1480 1481 VERIFY(inp == dlil_main_input_thread); 1482 VERIFY(inp->ifp == NULL); 1483 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF); 1484 1485 while (1) { 1486 struct mbuf *m = NULL, *m_loop = NULL; 1487 u_int32_t m_cnt, m_cnt_loop; 1488 boolean_t proto_req; 1489 1490 lck_mtx_lock_spin(&inp->input_lck); 1491 1492 /* Wait until there is work to be done */ 1493 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { 1494 inp->input_waiting &= ~DLIL_INPUT_RUNNING; 1495 (void) msleep(&inp->input_waiting, &inp->input_lck, 1496 (PZERO - 1) | PSPIN, inp->input_name, NULL); 1497 } 1498 1499 inp->input_waiting |= DLIL_INPUT_RUNNING; 1500 inp->input_waiting &= ~DLIL_INPUT_WAITING; 1501 1502 /* Main input thread cannot be terminated */ 1503 VERIFY(!(inp->input_waiting & DLIL_INPUT_TERMINATE)); 1504 1505 proto_req = (inp->input_waiting & 1506 (DLIL_PROTO_WAITING | DLIL_PROTO_REGISTER)); 1507 1508 /* Packets for non-dedicated interfaces other than lo0 */ 1509 m_cnt = qlen(&inp->rcvq_pkts); 1510 m = _getq_all(&inp->rcvq_pkts); 1511 1512 /* Packets exclusive for lo0 */ 1513 m_cnt_loop = qlen(&inpm->lo_rcvq_pkts); 1514 m_loop = _getq_all(&inpm->lo_rcvq_pkts); 1515 1516 inp->wtot = 0; 1517 1518 lck_mtx_unlock(&inp->input_lck); 1519 1520 /* 1521 * NOTE warning %%% attention !!!! 1522 * We should think about putting some thread starvation 1523 * safeguards if we deal with long chains of packets. 1524 */ 1525 if (m_loop != NULL) 1526 dlil_input_packet_list_extended(lo_ifp, m_loop, 1527 m_cnt_loop, inp->mode); 1528 1529 if (m != NULL) 1530 dlil_input_packet_list_extended(NULL, m, 1531 m_cnt, inp->mode); 1532 1533 if (proto_req) 1534 proto_input_run(); 1535 } 1536 1537 /* NOTREACHED */ 1538 VERIFY(0); /* we should never get here */ 1539} 1540 1541/* 1542 * Input thread for interfaces with legacy input model. 1543 */ 1544static void 1545dlil_input_thread_func(void *v, wait_result_t w) 1546{ 1547#pragma unused(w) 1548 struct dlil_threading_info *inp = v; 1549 struct ifnet *ifp = inp->ifp; 1550 1551 VERIFY(inp != dlil_main_input_thread); 1552 VERIFY(ifp != NULL); 1553 VERIFY(!(ifp->if_eflags & IFEF_RXPOLL) || !net_rxpoll); 1554 VERIFY(inp->mode == IFNET_MODEL_INPUT_POLL_OFF); 1555 1556 while (1) { 1557 struct mbuf *m = NULL; 1558 u_int32_t m_cnt; 1559 1560 lck_mtx_lock_spin(&inp->input_lck); 1561 1562 /* Wait until there is work to be done */ 1563 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING)) { 1564 inp->input_waiting &= ~DLIL_INPUT_RUNNING; 1565 (void) msleep(&inp->input_waiting, &inp->input_lck, 1566 (PZERO - 1) | PSPIN, inp->input_name, NULL); 1567 } 1568 1569 inp->input_waiting |= DLIL_INPUT_RUNNING; 1570 inp->input_waiting &= ~DLIL_INPUT_WAITING; 1571 1572 /* 1573 * Protocol registration and injection must always use 1574 * the main input thread; in theory the latter can utilize 1575 * the corresponding input thread where the packet arrived 1576 * on, but that requires our knowing the interface in advance 1577 * (and the benefits might not worth the trouble.) 1578 */ 1579 VERIFY(!(inp->input_waiting & 1580 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER))); 1581 1582 /* Packets for this interface */ 1583 m_cnt = qlen(&inp->rcvq_pkts); 1584 m = _getq_all(&inp->rcvq_pkts); 1585 1586 if (inp->input_waiting & DLIL_INPUT_TERMINATE) { 1587 lck_mtx_unlock(&inp->input_lck); 1588 1589 /* Free up pending packets */ 1590 if (m != NULL) 1591 mbuf_freem_list(m); 1592 1593 dlil_terminate_input_thread(inp); 1594 /* NOTREACHED */ 1595 return; 1596 } 1597 1598 inp->wtot = 0; 1599 1600 dlil_input_stats_sync(ifp, inp); 1601 1602 lck_mtx_unlock(&inp->input_lck); 1603 1604 /* 1605 * NOTE warning %%% attention !!!! 1606 * We should think about putting some thread starvation 1607 * safeguards if we deal with long chains of packets. 1608 */ 1609 if (m != NULL) 1610 dlil_input_packet_list_extended(NULL, m, 1611 m_cnt, inp->mode); 1612 } 1613 1614 /* NOTREACHED */ 1615 VERIFY(0); /* we should never get here */ 1616} 1617 1618/* 1619 * Input thread for interfaces with opportunistic polling input model. 1620 */ 1621static void 1622dlil_rxpoll_input_thread_func(void *v, wait_result_t w) 1623{ 1624#pragma unused(w) 1625 struct dlil_threading_info *inp = v; 1626 struct ifnet *ifp = inp->ifp; 1627 struct timespec ts; 1628 1629 VERIFY(inp != dlil_main_input_thread); 1630 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL)); 1631 1632 while (1) { 1633 struct mbuf *m = NULL; 1634 u_int32_t m_cnt, m_size, poll_req = 0; 1635 ifnet_model_t mode; 1636 struct timespec now, delta; 1637 1638 lck_mtx_lock_spin(&inp->input_lck); 1639 1640 /* Link parameters changed? */ 1641 if (ifp->if_poll_update != 0) { 1642 ifp->if_poll_update = 0; 1643 dlil_rxpoll_calc_limits(inp); 1644 } 1645 1646 /* Current operating mode */ 1647 mode = inp->mode; 1648 1649 /* Wait until there is work to be done */ 1650 while (!(inp->input_waiting & ~DLIL_INPUT_RUNNING) && 1651 qempty(&inp->rcvq_pkts)) { 1652 inp->input_waiting &= ~DLIL_INPUT_RUNNING; 1653 (void) msleep(&inp->input_waiting, &inp->input_lck, 1654 (PZERO - 1) | PSPIN, inp->input_name, NULL); 1655 } 1656 1657 inp->input_waiting |= DLIL_INPUT_RUNNING; 1658 inp->input_waiting &= ~DLIL_INPUT_WAITING; 1659 1660 /* 1661 * Protocol registration and injection must always use 1662 * the main input thread; in theory the latter can utilize 1663 * the corresponding input thread where the packet arrived 1664 * on, but that requires our knowing the interface in advance 1665 * (and the benefits might not worth the trouble.) 1666 */ 1667 VERIFY(!(inp->input_waiting & 1668 (DLIL_PROTO_WAITING|DLIL_PROTO_REGISTER))); 1669 1670 if (inp->input_waiting & DLIL_INPUT_TERMINATE) { 1671 /* Free up pending packets */ 1672 _flushq(&inp->rcvq_pkts); 1673 lck_mtx_unlock(&inp->input_lck); 1674 1675 dlil_terminate_input_thread(inp); 1676 /* NOTREACHED */ 1677 return; 1678 } 1679 1680 /* Total count of all packets */ 1681 m_cnt = qlen(&inp->rcvq_pkts); 1682 1683 /* Total bytes of all packets */ 1684 m_size = qsize(&inp->rcvq_pkts); 1685 1686 /* Packets for this interface */ 1687 m = _getq_all(&inp->rcvq_pkts); 1688 VERIFY(m != NULL || m_cnt == 0); 1689 1690 nanouptime(&now); 1691 if (!net_timerisset(&inp->sample_lasttime)) 1692 *(&inp->sample_lasttime) = *(&now); 1693 1694 net_timersub(&now, &inp->sample_lasttime, &delta); 1695 if (if_rxpoll && net_timerisset(&inp->sample_holdtime)) { 1696 u_int32_t ptot, btot; 1697 1698 /* Accumulate statistics for current sampling */ 1699 PKTCNTR_ADD(&inp->sstats, m_cnt, m_size); 1700 1701 if (net_timercmp(&delta, &inp->sample_holdtime, <)) 1702 goto skip; 1703 1704 *(&inp->sample_lasttime) = *(&now); 1705 1706 /* Calculate min/max of inbound bytes */ 1707 btot = (u_int32_t)inp->sstats.bytes; 1708 if (inp->rxpoll_bmin == 0 || inp->rxpoll_bmin > btot) 1709 inp->rxpoll_bmin = btot; 1710 if (btot > inp->rxpoll_bmax) 1711 inp->rxpoll_bmax = btot; 1712 1713 /* Calculate EWMA of inbound bytes */ 1714 DLIL_EWMA(inp->rxpoll_bavg, btot, if_rxpoll_decay); 1715 1716 /* Calculate min/max of inbound packets */ 1717 ptot = (u_int32_t)inp->sstats.packets; 1718 if (inp->rxpoll_pmin == 0 || inp->rxpoll_pmin > ptot) 1719 inp->rxpoll_pmin = ptot; 1720 if (ptot > inp->rxpoll_pmax) 1721 inp->rxpoll_pmax = ptot; 1722 1723 /* Calculate EWMA of inbound packets */ 1724 DLIL_EWMA(inp->rxpoll_pavg, ptot, if_rxpoll_decay); 1725 1726 /* Reset sampling statistics */ 1727 PKTCNTR_CLEAR(&inp->sstats); 1728 1729 /* Calculate EWMA of wakeup requests */ 1730 DLIL_EWMA(inp->rxpoll_wavg, inp->wtot, if_rxpoll_decay); 1731 inp->wtot = 0; 1732 1733 if (dlil_verbose) { 1734 if (!net_timerisset(&inp->dbg_lasttime)) 1735 *(&inp->dbg_lasttime) = *(&now); 1736 net_timersub(&now, &inp->dbg_lasttime, &delta); 1737 if (net_timercmp(&delta, &dlil_dbgrate, >=)) { 1738 *(&inp->dbg_lasttime) = *(&now); 1739 printf("%s%d: [%s] pkts avg %d max %d " 1740 "limits [%d/%d], wreq avg %d " 1741 "limits [%d/%d], bytes avg %d " 1742 "limits [%d/%d]\n", ifp->if_name, 1743 ifp->if_unit, (inp->mode == 1744 IFNET_MODEL_INPUT_POLL_ON) ? 1745 "ON" : "OFF", inp->rxpoll_pavg, 1746 inp->rxpoll_pmax, 1747 inp->rxpoll_plowat, 1748 inp->rxpoll_phiwat, 1749 inp->rxpoll_wavg, 1750 inp->rxpoll_wlowat, 1751 inp->rxpoll_whiwat, 1752 inp->rxpoll_bavg, 1753 inp->rxpoll_blowat, 1754 inp->rxpoll_bhiwat); 1755 } 1756 } 1757 1758 /* Perform mode transition, if necessary */ 1759 if (!net_timerisset(&inp->mode_lasttime)) 1760 *(&inp->mode_lasttime) = *(&now); 1761 1762 net_timersub(&now, &inp->mode_lasttime, &delta); 1763 if (net_timercmp(&delta, &inp->mode_holdtime, <)) 1764 goto skip; 1765 1766 if (inp->rxpoll_pavg <= inp->rxpoll_plowat && 1767 inp->rxpoll_bavg <= inp->rxpoll_blowat && 1768 inp->rxpoll_wavg <= inp->rxpoll_wlowat && 1769 inp->mode != IFNET_MODEL_INPUT_POLL_OFF) { 1770 mode = IFNET_MODEL_INPUT_POLL_OFF; 1771 } else if (inp->rxpoll_pavg >= inp->rxpoll_phiwat && 1772 (inp->rxpoll_bavg >= inp->rxpoll_bhiwat || 1773 inp->rxpoll_wavg >= inp->rxpoll_whiwat) && 1774 inp->mode != IFNET_MODEL_INPUT_POLL_ON) { 1775 mode = IFNET_MODEL_INPUT_POLL_ON; 1776 } 1777 1778 if (mode != inp->mode) { 1779 inp->mode = mode; 1780 *(&inp->mode_lasttime) = *(&now); 1781 poll_req++; 1782 } 1783 } 1784skip: 1785 dlil_input_stats_sync(ifp, inp); 1786 1787 lck_mtx_unlock(&inp->input_lck); 1788 1789 /* 1790 * If there's a mode change and interface is still attached, 1791 * perform a downcall to the driver for the new mode. Also 1792 * hold an IO refcnt on the interface to prevent it from 1793 * being detached (will be release below.) 1794 */ 1795 if (poll_req != 0 && ifnet_is_attached(ifp, 1)) { 1796 struct ifnet_model_params p = { mode, { 0 } }; 1797 errno_t err; 1798 1799 if (dlil_verbose) { 1800 printf("%s%d: polling is now %s, " 1801 "pkts avg %d max %d limits [%d/%d], " 1802 "wreq avg %d limits [%d/%d], " 1803 "bytes avg %d limits [%d/%d]\n", 1804 ifp->if_name, ifp->if_unit, 1805 (mode == IFNET_MODEL_INPUT_POLL_ON) ? 1806 "ON" : "OFF", inp->rxpoll_pavg, 1807 inp->rxpoll_pmax, inp->rxpoll_plowat, 1808 inp->rxpoll_phiwat, inp->rxpoll_wavg, 1809 inp->rxpoll_wlowat, inp->rxpoll_whiwat, 1810 inp->rxpoll_bavg, inp->rxpoll_blowat, 1811 inp->rxpoll_bhiwat); 1812 } 1813 1814 if ((err = ((*ifp->if_input_ctl)(ifp, 1815 IFNET_CTL_SET_INPUT_MODEL, sizeof (p), &p))) != 0) { 1816 printf("%s%d: error setting polling mode " 1817 "to %s (%d)\n", ifp->if_name, ifp->if_unit, 1818 (mode == IFNET_MODEL_INPUT_POLL_ON) ? 1819 "ON" : "OFF", err); 1820 } 1821 1822 switch (mode) { 1823 case IFNET_MODEL_INPUT_POLL_OFF: 1824 ifnet_set_poll_cycle(ifp, NULL); 1825 inp->rxpoll_offreq++; 1826 if (err != 0) 1827 inp->rxpoll_offerr++; 1828 break; 1829 1830 case IFNET_MODEL_INPUT_POLL_ON: 1831 net_nsectimer(&if_rxpoll_interval_time, &ts); 1832 ifnet_set_poll_cycle(ifp, &ts); 1833 ifnet_poll(ifp); 1834 inp->rxpoll_onreq++; 1835 if (err != 0) 1836 inp->rxpoll_onerr++; 1837 break; 1838 1839 default: 1840 VERIFY(0); 1841 /* NOTREACHED */ 1842 } 1843 1844 /* Release the IO refcnt */ 1845 ifnet_decr_iorefcnt(ifp); 1846 } 1847 1848 /* 1849 * NOTE warning %%% attention !!!! 1850 * We should think about putting some thread starvation 1851 * safeguards if we deal with long chains of packets. 1852 */ 1853 if (m != NULL) 1854 dlil_input_packet_list_extended(NULL, m, m_cnt, mode); 1855 } 1856 1857 /* NOTREACHED */ 1858 VERIFY(0); /* we should never get here */ 1859} 1860 1861static void 1862dlil_rxpoll_calc_limits(struct dlil_threading_info *inp) 1863{ 1864 struct ifnet *ifp = inp->ifp; 1865 u_int64_t sample_holdtime, inbw; 1866 1867 VERIFY(inp != dlil_main_input_thread); 1868 VERIFY(ifp != NULL && (ifp->if_eflags & IFEF_RXPOLL)); 1869 1870 if ((inbw = ifnet_input_linkrate(ifp)) == 0) { 1871 sample_holdtime = 0; /* polling is disabled */ 1872 inp->rxpoll_wlowat = inp->rxpoll_plowat = 1873 inp->rxpoll_blowat = 0; 1874 inp->rxpoll_whiwat = inp->rxpoll_phiwat = 1875 inp->rxpoll_bhiwat = (u_int32_t)-1; 1876 } else { 1877 unsigned int n, i; 1878 1879 n = 0; 1880 for (i = 0; rxpoll_tbl[i].speed != 0; i++) { 1881 if (inbw < rxpoll_tbl[i].speed) 1882 break; 1883 n = i; 1884 } 1885 sample_holdtime = if_rxpoll_sample_holdtime; 1886 inp->rxpoll_wlowat = if_rxpoll_wlowat; 1887 inp->rxpoll_whiwat = if_rxpoll_whiwat; 1888 inp->rxpoll_plowat = rxpoll_tbl[n].plowat; 1889 inp->rxpoll_phiwat = rxpoll_tbl[n].phiwat; 1890 inp->rxpoll_blowat = rxpoll_tbl[n].blowat; 1891 inp->rxpoll_bhiwat = rxpoll_tbl[n].bhiwat; 1892 } 1893 1894 net_nsectimer(&if_rxpoll_mode_holdtime, &inp->mode_holdtime); 1895 net_nsectimer(&sample_holdtime, &inp->sample_holdtime); 1896 1897 if (dlil_verbose) { 1898 printf("%s%d: speed %llu bps, sample per %llu nsec, " 1899 "pkt limits [%d/%d], wreq limits [%d/%d], " 1900 "bytes limits [%d/%d]\n", ifp->if_name, ifp->if_unit, 1901 inbw, sample_holdtime, inp->rxpoll_plowat, 1902 inp->rxpoll_phiwat, inp->rxpoll_wlowat, inp->rxpoll_whiwat, 1903 inp->rxpoll_blowat, inp->rxpoll_bhiwat); 1904 } 1905} 1906 1907errno_t 1908ifnet_input(struct ifnet *ifp, struct mbuf *m_head, 1909 const struct ifnet_stat_increment_param *s) 1910{ 1911 return (ifnet_input_common(ifp, m_head, NULL, s, FALSE, FALSE)); 1912} 1913 1914errno_t 1915ifnet_input_extended(struct ifnet *ifp, struct mbuf *m_head, 1916 struct mbuf *m_tail, const struct ifnet_stat_increment_param *s) 1917{ 1918 return (ifnet_input_common(ifp, m_head, m_tail, s, TRUE, FALSE)); 1919} 1920 1921static errno_t 1922ifnet_input_common(struct ifnet *ifp, struct mbuf *m_head, struct mbuf *m_tail, 1923 const struct ifnet_stat_increment_param *s, boolean_t ext, boolean_t poll) 1924{ 1925 struct thread *tp = current_thread(); 1926 struct mbuf *last; 1927 struct dlil_threading_info *inp; 1928 u_int32_t m_cnt = 0, m_size = 0; 1929 1930 /* 1931 * Drop the packet(s) if the parameters are invalid, or if the 1932 * interface is no longer attached; else hold an IO refcnt to 1933 * prevent it from being detached (will be released below.) 1934 */ 1935 if (ifp == NULL || m_head == NULL || (s == NULL && ext) || 1936 (ifp != lo_ifp && !ifnet_is_attached(ifp, 1))) { 1937 if (m_head != NULL) 1938 mbuf_freem_list(m_head); 1939 return (EINVAL); 1940 } 1941 1942 VERIFY(m_tail == NULL || ext); 1943 VERIFY(s != NULL || !ext); 1944 1945 if (m_tail == NULL) { 1946 last = m_head; 1947 while (1) { 1948#if IFNET_INPUT_SANITY_CHK 1949 if (dlil_input_sanity_check != 0) 1950 DLIL_INPUT_CHECK(last, ifp); 1951#endif /* IFNET_INPUT_SANITY_CHK */ 1952 m_cnt++; 1953 m_size += m_length(last); 1954 if (mbuf_nextpkt(last) == NULL) 1955 break; 1956 last = mbuf_nextpkt(last); 1957 } 1958 m_tail = last; 1959 } else { 1960#if IFNET_INPUT_SANITY_CHK 1961 if (dlil_input_sanity_check != 0) { 1962 last = m_head; 1963 while (1) { 1964 DLIL_INPUT_CHECK(last, ifp); 1965 m_cnt++; 1966 m_size += m_length(last); 1967 if (mbuf_nextpkt(last) == NULL) 1968 break; 1969 last = mbuf_nextpkt(last); 1970 } 1971 } else { 1972 m_cnt = s->packets_in; 1973 m_size = s->bytes_in; 1974 last = m_tail; 1975 } 1976#else 1977 m_cnt = s->packets_in; 1978 m_size = s->bytes_in; 1979 last = m_tail; 1980#endif /* IFNET_INPUT_SANITY_CHK */ 1981 } 1982 1983 if (last != m_tail) { 1984 panic_plain("%s: invalid input packet chain for %s%d, " 1985 "tail mbuf %p instead of %p\n", __func__, ifp->if_name, 1986 ifp->if_unit, m_tail, last); 1987 } 1988 1989 /* 1990 * Assert packet count only for the extended variant, for backwards 1991 * compatibility, since this came directly from the device driver. 1992 * Relax this assertion for input bytes, as the driver may have 1993 * included the link-layer headers in the computation; hence 1994 * m_size is just an approximation. 1995 */ 1996 if (ext && s->packets_in != m_cnt) { 1997 panic_plain("%s: input packet count mismatch for %s%d, " 1998 "%d instead of %d\n", __func__, ifp->if_name, 1999 ifp->if_unit, s->packets_in, m_cnt); 2000 } 2001 2002 if ((inp = ifp->if_inp) == NULL) 2003 inp = dlil_main_input_thread; 2004 2005 /* 2006 * If there is a matching DLIL input thread associated with an 2007 * affinity set, associate this thread with the same set. We 2008 * will only do this once. 2009 */ 2010 lck_mtx_lock_spin(&inp->input_lck); 2011 if (inp != dlil_main_input_thread && inp->net_affinity && 2012 ((!poll && inp->wloop_thr == THREAD_NULL) || 2013 (poll && inp->poll_thr == THREAD_NULL))) { 2014 u_int32_t tag = inp->tag; 2015 2016 if (poll) { 2017 VERIFY(inp->poll_thr == THREAD_NULL); 2018 inp->poll_thr = tp; 2019 } else { 2020 VERIFY(inp->wloop_thr == THREAD_NULL); 2021 inp->wloop_thr = tp; 2022 } 2023 lck_mtx_unlock(&inp->input_lck); 2024 2025 /* Associate the current thread with the new affinity tag */ 2026 (void) dlil_affinity_set(tp, tag); 2027 2028 /* 2029 * Take a reference on the current thread; during detach, 2030 * we will need to refer to it in order ot tear down its 2031 * affinity. 2032 */ 2033 thread_reference(tp); 2034 lck_mtx_lock_spin(&inp->input_lck); 2035 } 2036 2037 /* 2038 * Because of loopbacked multicast we cannot stuff the ifp in 2039 * the rcvif of the packet header: loopback (lo0) packets use a 2040 * dedicated list so that we can later associate them with lo_ifp 2041 * on their way up the stack. Packets for other interfaces without 2042 * dedicated input threads go to the regular list. 2043 */ 2044 if (inp == dlil_main_input_thread && ifp == lo_ifp) { 2045 struct dlil_main_threading_info *inpm = 2046 (struct dlil_main_threading_info *)inp; 2047 _addq_multi(&inpm->lo_rcvq_pkts, m_head, m_tail, m_cnt, m_size); 2048 } else { 2049 _addq_multi(&inp->rcvq_pkts, m_head, m_tail, m_cnt, m_size); 2050 } 2051 2052#if IFNET_INPUT_SANITY_CHK 2053 if (dlil_input_sanity_check != 0) { 2054 u_int32_t count; 2055 struct mbuf *m0; 2056 2057 for (m0 = m_head, count = 0; m0; m0 = mbuf_nextpkt(m0)) 2058 count++; 2059 2060 if (count != m_cnt) { 2061 panic_plain("%s%d: invalid packet count %d " 2062 "(expected %d)\n", ifp->if_name, ifp->if_unit, 2063 count, m_cnt); 2064 /* NOTREACHED */ 2065 } 2066 2067 inp->input_mbuf_cnt += m_cnt; 2068 } 2069#endif /* IFNET_INPUT_SANITY_CHK */ 2070 2071 if (s != NULL) { 2072 dlil_input_stats_add(s, inp, poll); 2073 /* 2074 * If we're using the main input thread, synchronize the 2075 * stats now since we have the interface context. All 2076 * other cases involving dedicated input threads will 2077 * have their stats synchronized there. 2078 */ 2079 if (inp == dlil_main_input_thread) 2080 dlil_input_stats_sync(ifp, inp); 2081 } 2082 2083 inp->input_waiting |= DLIL_INPUT_WAITING; 2084 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) { 2085 inp->wtot++; 2086 wakeup_one((caddr_t)&inp->input_waiting); 2087 } 2088 lck_mtx_unlock(&inp->input_lck); 2089 2090 if (ifp != lo_ifp) { 2091 /* Release the IO refcnt */ 2092 ifnet_decr_iorefcnt(ifp); 2093 } 2094 2095 return (0); 2096} 2097 2098void 2099ifnet_start(struct ifnet *ifp) 2100{ 2101 /* 2102 * If the starter thread is inactive, signal it to do work. 2103 */ 2104 lck_mtx_lock_spin(&ifp->if_start_lock); 2105 ifp->if_start_req++; 2106 if (!ifp->if_start_active && ifp->if_start_thread != THREAD_NULL) { 2107 wakeup_one((caddr_t)&ifp->if_start_thread); 2108 } 2109 lck_mtx_unlock(&ifp->if_start_lock); 2110} 2111 2112static void 2113ifnet_start_thread_fn(void *v, wait_result_t w) 2114{ 2115#pragma unused(w) 2116 struct ifnet *ifp = v; 2117 char ifname[IFNAMSIZ + 1]; 2118 struct timespec *ts = NULL; 2119 struct ifclassq *ifq = &ifp->if_snd; 2120 2121 /* 2122 * Treat the dedicated starter thread for lo0 as equivalent to 2123 * the driver workloop thread; if net_affinity is enabled for 2124 * the main input thread, associate this starter thread to it 2125 * by binding them with the same affinity tag. This is done 2126 * only once (as we only have one lo_ifp which never goes away.) 2127 */ 2128 if (ifp == lo_ifp) { 2129 struct dlil_threading_info *inp = dlil_main_input_thread; 2130 struct thread *tp = current_thread(); 2131 2132 lck_mtx_lock(&inp->input_lck); 2133 if (inp->net_affinity) { 2134 u_int32_t tag = inp->tag; 2135 2136 VERIFY(inp->wloop_thr == THREAD_NULL); 2137 VERIFY(inp->poll_thr == THREAD_NULL); 2138 inp->wloop_thr = tp; 2139 lck_mtx_unlock(&inp->input_lck); 2140 2141 /* Associate this thread with the affinity tag */ 2142 (void) dlil_affinity_set(tp, tag); 2143 } else { 2144 lck_mtx_unlock(&inp->input_lck); 2145 } 2146 } 2147 2148 snprintf(ifname, sizeof (ifname), "%s%d_starter", 2149 ifp->if_name, ifp->if_unit); 2150 2151 lck_mtx_lock_spin(&ifp->if_start_lock); 2152 2153 for (;;) { 2154 (void) msleep(&ifp->if_start_thread, &ifp->if_start_lock, 2155 (PZERO - 1) | PSPIN, ifname, ts); 2156 2157 /* interface is detached? */ 2158 if (ifp->if_start_thread == THREAD_NULL) { 2159 ifnet_set_start_cycle(ifp, NULL); 2160 lck_mtx_unlock(&ifp->if_start_lock); 2161 ifnet_purge(ifp); 2162 2163 if (dlil_verbose) { 2164 printf("%s%d: starter thread terminated\n", 2165 ifp->if_name, ifp->if_unit); 2166 } 2167 2168 /* for the extra refcnt from kernel_thread_start() */ 2169 thread_deallocate(current_thread()); 2170 /* this is the end */ 2171 thread_terminate(current_thread()); 2172 /* NOTREACHED */ 2173 return; 2174 } 2175 2176 ifp->if_start_active = 1; 2177 for (;;) { 2178 u_int32_t req = ifp->if_start_req; 2179 2180 lck_mtx_unlock(&ifp->if_start_lock); 2181 /* invoke the driver's start routine */ 2182 ((*ifp->if_start)(ifp)); 2183 lck_mtx_lock_spin(&ifp->if_start_lock); 2184 2185 /* if there's no pending request, we're done */ 2186 if (req == ifp->if_start_req) 2187 break; 2188 } 2189 ifp->if_start_req = 0; 2190 ifp->if_start_active = 0; 2191 /* 2192 * Wakeup N ns from now if rate-controlled by TBR, and if 2193 * there are still packets in the send queue which haven't 2194 * been dequeued so far; else sleep indefinitely (ts = NULL) 2195 * until ifnet_start() is called again. 2196 */ 2197 ts = ((IFCQ_TBR_IS_ENABLED(ifq) && !IFCQ_IS_EMPTY(ifq)) ? 2198 &ifp->if_start_cycle : NULL); 2199 2200 if (ts != NULL && ts->tv_sec == 0 && ts->tv_nsec == 0) 2201 ts = NULL; 2202 } 2203 2204 /* NOTREACHED */ 2205 lck_mtx_unlock(&ifp->if_start_lock); 2206 VERIFY(0); /* we should never get here */ 2207} 2208 2209void 2210ifnet_set_start_cycle(struct ifnet *ifp, struct timespec *ts) 2211{ 2212 if (ts == NULL) 2213 bzero(&ifp->if_start_cycle, sizeof (ifp->if_start_cycle)); 2214 else 2215 *(&ifp->if_start_cycle) = *ts; 2216 2217 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) 2218 printf("%s%d: restart interval set to %lu nsec\n", 2219 ifp->if_name, ifp->if_unit, ts->tv_nsec); 2220} 2221 2222static void 2223ifnet_poll(struct ifnet *ifp) 2224{ 2225 /* 2226 * If the poller thread is inactive, signal it to do work. 2227 */ 2228 lck_mtx_lock_spin(&ifp->if_poll_lock); 2229 ifp->if_poll_req++; 2230 if (!ifp->if_poll_active && ifp->if_poll_thread != THREAD_NULL) { 2231 wakeup_one((caddr_t)&ifp->if_poll_thread); 2232 } 2233 lck_mtx_unlock(&ifp->if_poll_lock); 2234} 2235 2236static void 2237ifnet_poll_thread_fn(void *v, wait_result_t w) 2238{ 2239#pragma unused(w) 2240 struct dlil_threading_info *inp; 2241 struct ifnet *ifp = v; 2242 char ifname[IFNAMSIZ + 1]; 2243 struct timespec *ts = NULL; 2244 struct ifnet_stat_increment_param s; 2245 2246 snprintf(ifname, sizeof (ifname), "%s%d_poller", 2247 ifp->if_name, ifp->if_unit); 2248 bzero(&s, sizeof (s)); 2249 2250 lck_mtx_lock_spin(&ifp->if_poll_lock); 2251 2252 inp = ifp->if_inp; 2253 VERIFY(inp != NULL); 2254 2255 for (;;) { 2256 if (ifp->if_poll_thread != THREAD_NULL) { 2257 (void) msleep(&ifp->if_poll_thread, &ifp->if_poll_lock, 2258 (PZERO - 1) | PSPIN, ifname, ts); 2259 } 2260 2261 /* interface is detached (maybe while asleep)? */ 2262 if (ifp->if_poll_thread == THREAD_NULL) { 2263 ifnet_set_poll_cycle(ifp, NULL); 2264 lck_mtx_unlock(&ifp->if_poll_lock); 2265 2266 if (dlil_verbose) { 2267 printf("%s%d: poller thread terminated\n", 2268 ifp->if_name, ifp->if_unit); 2269 } 2270 2271 /* for the extra refcnt from kernel_thread_start() */ 2272 thread_deallocate(current_thread()); 2273 /* this is the end */ 2274 thread_terminate(current_thread()); 2275 /* NOTREACHED */ 2276 return; 2277 } 2278 2279 ifp->if_poll_active = 1; 2280 for (;;) { 2281 struct mbuf *m_head, *m_tail; 2282 u_int32_t m_lim, m_cnt, m_totlen; 2283 u_int16_t req = ifp->if_poll_req; 2284 2285 lck_mtx_unlock(&ifp->if_poll_lock); 2286 2287 /* 2288 * If no longer attached, there's nothing to do; 2289 * else hold an IO refcnt to prevent the interface 2290 * from being detached (will be released below.) 2291 */ 2292 if (!ifnet_is_attached(ifp, 1)) { 2293 lck_mtx_lock_spin(&ifp->if_poll_lock); 2294 break; 2295 } 2296 2297 m_lim = (if_rxpoll_max != 0) ? if_rxpoll_max : 2298 MAX((qlimit(&inp->rcvq_pkts)), 2299 (inp->rxpoll_phiwat << 2)); 2300 2301 if (dlil_verbose > 1) { 2302 printf("%s%d: polling up to %d pkts, " 2303 "pkts avg %d max %d, wreq avg %d, " 2304 "bytes avg %d\n", 2305 ifp->if_name, ifp->if_unit, m_lim, 2306 inp->rxpoll_pavg, inp->rxpoll_pmax, 2307 inp->rxpoll_wavg, inp->rxpoll_bavg); 2308 } 2309 2310 /* invoke the driver's input poll routine */ 2311 ((*ifp->if_input_poll)(ifp, 0, m_lim, &m_head, &m_tail, 2312 &m_cnt, &m_totlen)); 2313 2314 if (m_head != NULL) { 2315 VERIFY(m_tail != NULL && m_cnt > 0); 2316 2317 if (dlil_verbose > 1) { 2318 printf("%s%d: polled %d pkts, " 2319 "pkts avg %d max %d, wreq avg %d, " 2320 "bytes avg %d\n", 2321 ifp->if_name, ifp->if_unit, m_cnt, 2322 inp->rxpoll_pavg, inp->rxpoll_pmax, 2323 inp->rxpoll_wavg, inp->rxpoll_bavg); 2324 } 2325 2326 /* stats are required for extended variant */ 2327 s.packets_in = m_cnt; 2328 s.bytes_in = m_totlen; 2329 2330 (void) ifnet_input_common(ifp, m_head, m_tail, 2331 &s, TRUE, TRUE); 2332 } else if (dlil_verbose > 1) { 2333 printf("%s%d: no packets, pkts avg %d max %d, " 2334 "wreq avg %d, bytes avg %d\n", ifp->if_name, 2335 ifp->if_unit, inp->rxpoll_pavg, 2336 inp->rxpoll_pmax, inp->rxpoll_wavg, 2337 inp->rxpoll_bavg); 2338 } 2339 2340 /* Release the io ref count */ 2341 ifnet_decr_iorefcnt(ifp); 2342 2343 lck_mtx_lock_spin(&ifp->if_poll_lock); 2344 2345 /* if there's no pending request, we're done */ 2346 if (req == ifp->if_poll_req) 2347 break; 2348 } 2349 ifp->if_poll_req = 0; 2350 ifp->if_poll_active = 0; 2351 2352 /* 2353 * Wakeup N ns from now, else sleep indefinitely (ts = NULL) 2354 * until ifnet_poll() is called again. 2355 */ 2356 ts = &ifp->if_poll_cycle; 2357 if (ts->tv_sec == 0 && ts->tv_nsec == 0) 2358 ts = NULL; 2359 } 2360 2361 /* NOTREACHED */ 2362 lck_mtx_unlock(&ifp->if_poll_lock); 2363 VERIFY(0); /* we should never get here */ 2364} 2365 2366void 2367ifnet_set_poll_cycle(struct ifnet *ifp, struct timespec *ts) 2368{ 2369 if (ts == NULL) 2370 bzero(&ifp->if_poll_cycle, sizeof (ifp->if_poll_cycle)); 2371 else 2372 *(&ifp->if_poll_cycle) = *ts; 2373 2374 if (ts != NULL && ts->tv_nsec != 0 && dlil_verbose) 2375 printf("%s%d: poll interval set to %lu nsec\n", 2376 ifp->if_name, ifp->if_unit, ts->tv_nsec); 2377} 2378 2379void 2380ifnet_purge(struct ifnet *ifp) 2381{ 2382 if (ifp != NULL && (ifp->if_eflags & IFEF_TXSTART)) 2383 if_qflush(ifp, 0); 2384} 2385 2386void 2387ifnet_update_sndq(struct ifclassq *ifq, cqev_t ev) 2388{ 2389 IFCQ_LOCK_ASSERT_HELD(ifq); 2390 2391 if (!(IFCQ_IS_READY(ifq))) 2392 return; 2393 2394 if (IFCQ_TBR_IS_ENABLED(ifq)) { 2395 struct tb_profile tb = { ifq->ifcq_tbr.tbr_rate_raw, 2396 ifq->ifcq_tbr.tbr_percent, 0 }; 2397 (void) ifclassq_tbr_set(ifq, &tb, FALSE); 2398 } 2399 2400 ifclassq_update(ifq, ev); 2401} 2402 2403void 2404ifnet_update_rcv(struct ifnet *ifp, cqev_t ev) 2405{ 2406 switch (ev) { 2407 case CLASSQ_EV_LINK_SPEED: 2408 if (net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) 2409 ifp->if_poll_update++; 2410 break; 2411 2412 default: 2413 break; 2414 } 2415} 2416 2417errno_t 2418ifnet_set_output_sched_model(struct ifnet *ifp, u_int32_t model) 2419{ 2420 struct ifclassq *ifq; 2421 u_int32_t omodel; 2422 errno_t err; 2423 2424 if (ifp == NULL || (model != IFNET_SCHED_MODEL_DRIVER_MANAGED && 2425 model != IFNET_SCHED_MODEL_NORMAL)) 2426 return (EINVAL); 2427 else if (!(ifp->if_eflags & IFEF_TXSTART)) 2428 return (ENXIO); 2429 2430 ifq = &ifp->if_snd; 2431 IFCQ_LOCK(ifq); 2432 omodel = ifp->if_output_sched_model; 2433 ifp->if_output_sched_model = model; 2434 if ((err = ifclassq_pktsched_setup(ifq)) != 0) 2435 ifp->if_output_sched_model = omodel; 2436 IFCQ_UNLOCK(ifq); 2437 2438 return (err); 2439} 2440 2441errno_t 2442ifnet_set_sndq_maxlen(struct ifnet *ifp, u_int32_t maxqlen) 2443{ 2444 if (ifp == NULL) 2445 return (EINVAL); 2446 else if (!(ifp->if_eflags & IFEF_TXSTART)) 2447 return (ENXIO); 2448 2449 ifclassq_set_maxlen(&ifp->if_snd, maxqlen); 2450 2451 return (0); 2452} 2453 2454errno_t 2455ifnet_get_sndq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen) 2456{ 2457 if (ifp == NULL || maxqlen == NULL) 2458 return (EINVAL); 2459 else if (!(ifp->if_eflags & IFEF_TXSTART)) 2460 return (ENXIO); 2461 2462 *maxqlen = ifclassq_get_maxlen(&ifp->if_snd); 2463 2464 return (0); 2465} 2466 2467errno_t 2468ifnet_get_sndq_len(struct ifnet *ifp, u_int32_t *qlen) 2469{ 2470 if (ifp == NULL || qlen == NULL) 2471 return (EINVAL); 2472 else if (!(ifp->if_eflags & IFEF_TXSTART)) 2473 return (ENXIO); 2474 2475 *qlen = ifclassq_get_len(&ifp->if_snd); 2476 2477 return (0); 2478} 2479 2480errno_t 2481ifnet_set_rcvq_maxlen(struct ifnet *ifp, u_int32_t maxqlen) 2482{ 2483 struct dlil_threading_info *inp; 2484 2485 if (ifp == NULL) 2486 return (EINVAL); 2487 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) 2488 return (ENXIO); 2489 2490 if (maxqlen == 0) 2491 maxqlen = if_rcvq_maxlen; 2492 else if (maxqlen < IF_RCVQ_MINLEN) 2493 maxqlen = IF_RCVQ_MINLEN; 2494 2495 inp = ifp->if_inp; 2496 lck_mtx_lock(&inp->input_lck); 2497 qlimit(&inp->rcvq_pkts) = maxqlen; 2498 lck_mtx_unlock(&inp->input_lck); 2499 2500 return (0); 2501} 2502 2503errno_t 2504ifnet_get_rcvq_maxlen(struct ifnet *ifp, u_int32_t *maxqlen) 2505{ 2506 struct dlil_threading_info *inp; 2507 2508 if (ifp == NULL || maxqlen == NULL) 2509 return (EINVAL); 2510 else if (!(ifp->if_eflags & IFEF_RXPOLL) || ifp->if_inp == NULL) 2511 return (ENXIO); 2512 2513 inp = ifp->if_inp; 2514 lck_mtx_lock(&inp->input_lck); 2515 *maxqlen = qlimit(&inp->rcvq_pkts); 2516 lck_mtx_unlock(&inp->input_lck); 2517 return (0); 2518} 2519 2520errno_t 2521ifnet_enqueue(struct ifnet *ifp, struct mbuf *m) 2522{ 2523 int error; 2524 2525 if (ifp == NULL || m == NULL || !(m->m_flags & M_PKTHDR) || 2526 m->m_nextpkt != NULL) { 2527 if (m != NULL) 2528 m_freem_list(m); 2529 return (EINVAL); 2530 } else if (!(ifp->if_eflags & IFEF_TXSTART) || 2531 !(ifp->if_refflags & IFRF_ATTACHED)) { 2532 /* flag tested without lock for performance */ 2533 m_freem(m); 2534 return (ENXIO); 2535 } else if (!(ifp->if_flags & IFF_UP)) { 2536 m_freem(m); 2537 return (ENETDOWN); 2538 2539 } 2540 2541 /* enqueue the packet */ 2542 error = ifclassq_enqueue(&ifp->if_snd, m); 2543 2544 /* 2545 * Tell the driver to start dequeueing; do this even when the queue 2546 * for the packet is suspended (EQSUSPENDED), as the driver could still 2547 * be dequeueing from other unsuspended queues. 2548 */ 2549 if (error == 0 || error == EQFULL || error == EQSUSPENDED) 2550 ifnet_start(ifp); 2551 2552 return (error); 2553} 2554 2555errno_t 2556ifnet_dequeue(struct ifnet *ifp, struct mbuf **mp) 2557{ 2558 if (ifp == NULL || mp == NULL) 2559 return (EINVAL); 2560 else if (!(ifp->if_eflags & IFEF_TXSTART) || 2561 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL)) 2562 return (ENXIO); 2563 2564 return (ifclassq_dequeue(&ifp->if_snd, 1, mp, NULL, NULL, NULL)); 2565} 2566 2567errno_t 2568ifnet_dequeue_service_class(struct ifnet *ifp, mbuf_svc_class_t sc, 2569 struct mbuf **mp) 2570{ 2571 if (ifp == NULL || mp == NULL || !MBUF_VALID_SC(sc)) 2572 return (EINVAL); 2573 else if (!(ifp->if_eflags & IFEF_TXSTART) || 2574 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)) 2575 return (ENXIO); 2576 2577 return (ifclassq_dequeue_sc(&ifp->if_snd, sc, 1, mp, NULL, NULL, NULL)); 2578} 2579 2580errno_t 2581ifnet_dequeue_multi(struct ifnet *ifp, u_int32_t limit, struct mbuf **head, 2582 struct mbuf **tail, u_int32_t *cnt, u_int32_t *len) 2583{ 2584 if (ifp == NULL || head == NULL || limit < 1) 2585 return (EINVAL); 2586 else if (!(ifp->if_eflags & IFEF_TXSTART) || 2587 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_NORMAL)) 2588 return (ENXIO); 2589 2590 return (ifclassq_dequeue(&ifp->if_snd, limit, head, tail, cnt, len)); 2591} 2592 2593errno_t 2594ifnet_dequeue_service_class_multi(struct ifnet *ifp, mbuf_svc_class_t sc, 2595 u_int32_t limit, struct mbuf **head, struct mbuf **tail, u_int32_t *cnt, 2596 u_int32_t *len) 2597{ 2598 2599 if (ifp == NULL || head == NULL || limit < 1 || !MBUF_VALID_SC(sc)) 2600 return (EINVAL); 2601 else if (!(ifp->if_eflags & IFEF_TXSTART) || 2602 (ifp->if_output_sched_model != IFNET_SCHED_MODEL_DRIVER_MANAGED)) 2603 return (ENXIO); 2604 2605 return (ifclassq_dequeue_sc(&ifp->if_snd, sc, limit, head, 2606 tail, cnt, len)); 2607} 2608 2609static int 2610dlil_interface_filters_input(struct ifnet *ifp, struct mbuf **m_p, 2611 char **frame_header_p, protocol_family_t protocol_family) 2612{ 2613 struct ifnet_filter *filter; 2614 2615 /* 2616 * Pass the inbound packet to the interface filters 2617 */ 2618 lck_mtx_lock_spin(&ifp->if_flt_lock); 2619 /* prevent filter list from changing in case we drop the lock */ 2620 if_flt_monitor_busy(ifp); 2621 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { 2622 int result; 2623 2624 if (!filter->filt_skip && filter->filt_input != NULL && 2625 (filter->filt_protocol == 0 || 2626 filter->filt_protocol == protocol_family)) { 2627 lck_mtx_unlock(&ifp->if_flt_lock); 2628 2629 result = (*filter->filt_input)(filter->filt_cookie, 2630 ifp, protocol_family, m_p, frame_header_p); 2631 2632 lck_mtx_lock_spin(&ifp->if_flt_lock); 2633 if (result != 0) { 2634 /* we're done with the filter list */ 2635 if_flt_monitor_unbusy(ifp); 2636 lck_mtx_unlock(&ifp->if_flt_lock); 2637 return (result); 2638 } 2639 } 2640 } 2641 /* we're done with the filter list */ 2642 if_flt_monitor_unbusy(ifp); 2643 lck_mtx_unlock(&ifp->if_flt_lock); 2644 2645 /* 2646 * Strip away M_PROTO1 bit prior to sending packet up the stack as 2647 * it is meant to be local to a subsystem -- if_bridge for M_PROTO1 2648 */ 2649 if (*m_p != NULL) 2650 (*m_p)->m_flags &= ~M_PROTO1; 2651 2652 return (0); 2653} 2654 2655static int 2656dlil_interface_filters_output(struct ifnet *ifp, struct mbuf **m_p, 2657 protocol_family_t protocol_family) 2658{ 2659 struct ifnet_filter *filter; 2660 2661 /* 2662 * Pass the outbound packet to the interface filters 2663 */ 2664 lck_mtx_lock_spin(&ifp->if_flt_lock); 2665 /* prevent filter list from changing in case we drop the lock */ 2666 if_flt_monitor_busy(ifp); 2667 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { 2668 int result; 2669 2670 if (!filter->filt_skip && filter->filt_output != NULL && 2671 (filter->filt_protocol == 0 || 2672 filter->filt_protocol == protocol_family)) { 2673 lck_mtx_unlock(&ifp->if_flt_lock); 2674 2675 result = filter->filt_output(filter->filt_cookie, ifp, 2676 protocol_family, m_p); 2677 2678 lck_mtx_lock_spin(&ifp->if_flt_lock); 2679 if (result != 0) { 2680 /* we're done with the filter list */ 2681 if_flt_monitor_unbusy(ifp); 2682 lck_mtx_unlock(&ifp->if_flt_lock); 2683 return (result); 2684 } 2685 } 2686 } 2687 /* we're done with the filter list */ 2688 if_flt_monitor_unbusy(ifp); 2689 lck_mtx_unlock(&ifp->if_flt_lock); 2690 2691 return (0); 2692} 2693 2694static void 2695dlil_ifproto_input(struct if_proto * ifproto, mbuf_t m) 2696{ 2697 int error; 2698 2699 if (ifproto->proto_kpi == kProtoKPI_v1) { 2700 /* Version 1 protocols get one packet at a time */ 2701 while (m != NULL) { 2702 char * frame_header; 2703 mbuf_t next_packet; 2704 2705 next_packet = m->m_nextpkt; 2706 m->m_nextpkt = NULL; 2707 frame_header = m->m_pkthdr.header; 2708 m->m_pkthdr.header = NULL; 2709 error = (*ifproto->kpi.v1.input)(ifproto->ifp, 2710 ifproto->protocol_family, m, frame_header); 2711 if (error != 0 && error != EJUSTRETURN) 2712 m_freem(m); 2713 m = next_packet; 2714 } 2715 } else if (ifproto->proto_kpi == kProtoKPI_v2) { 2716 /* Version 2 protocols support packet lists */ 2717 error = (*ifproto->kpi.v2.input)(ifproto->ifp, 2718 ifproto->protocol_family, m); 2719 if (error != 0 && error != EJUSTRETURN) 2720 m_freem_list(m); 2721 } 2722 return; 2723} 2724 2725static void 2726dlil_input_stats_add(const struct ifnet_stat_increment_param *s, 2727 struct dlil_threading_info *inp, boolean_t poll) 2728{ 2729 struct ifnet_stat_increment_param *d = &inp->stats; 2730 2731 if (s->packets_in != 0) 2732 d->packets_in += s->packets_in; 2733 if (s->bytes_in != 0) 2734 d->bytes_in += s->bytes_in; 2735 if (s->errors_in != 0) 2736 d->errors_in += s->errors_in; 2737 2738 if (s->packets_out != 0) 2739 d->packets_out += s->packets_out; 2740 if (s->bytes_out != 0) 2741 d->bytes_out += s->bytes_out; 2742 if (s->errors_out != 0) 2743 d->errors_out += s->errors_out; 2744 2745 if (s->collisions != 0) 2746 d->collisions += s->collisions; 2747 if (s->dropped != 0) 2748 d->dropped += s->dropped; 2749 2750 if (poll) 2751 PKTCNTR_ADD(&inp->tstats, s->packets_in, s->bytes_in); 2752} 2753 2754static void 2755dlil_input_stats_sync(struct ifnet *ifp, struct dlil_threading_info *inp) 2756{ 2757 struct ifnet_stat_increment_param *s = &inp->stats; 2758 2759 /* 2760 * Use of atomic operations is unavoidable here because 2761 * these stats may also be incremented elsewhere via KPIs. 2762 */ 2763 if (s->packets_in != 0) { 2764 atomic_add_64(&ifp->if_data.ifi_ipackets, s->packets_in); 2765 s->packets_in = 0; 2766 } 2767 if (s->bytes_in != 0) { 2768 atomic_add_64(&ifp->if_data.ifi_ibytes, s->bytes_in); 2769 s->bytes_in = 0; 2770 } 2771 if (s->errors_in != 0) { 2772 atomic_add_64(&ifp->if_data.ifi_ierrors, s->errors_in); 2773 s->errors_in = 0; 2774 } 2775 2776 if (s->packets_out != 0) { 2777 atomic_add_64(&ifp->if_data.ifi_opackets, s->packets_out); 2778 s->packets_out = 0; 2779 } 2780 if (s->bytes_out != 0) { 2781 atomic_add_64(&ifp->if_data.ifi_obytes, s->bytes_out); 2782 s->bytes_out = 0; 2783 } 2784 if (s->errors_out != 0) { 2785 atomic_add_64(&ifp->if_data.ifi_oerrors, s->errors_out); 2786 s->errors_out = 0; 2787 } 2788 2789 if (s->collisions != 0) { 2790 atomic_add_64(&ifp->if_data.ifi_collisions, s->collisions); 2791 s->collisions = 0; 2792 } 2793 if (s->dropped != 0) { 2794 atomic_add_64(&ifp->if_data.ifi_iqdrops, s->dropped); 2795 s->dropped = 0; 2796 } 2797 2798 /* 2799 * No need for atomic operations as they are modified here 2800 * only from within the DLIL input thread context. 2801 */ 2802 if (inp->tstats.packets != 0) { 2803 inp->pstats.ifi_poll_packets += inp->tstats.packets; 2804 inp->tstats.packets = 0; 2805 } 2806 if (inp->tstats.bytes != 0) { 2807 inp->pstats.ifi_poll_bytes += inp->tstats.bytes; 2808 inp->tstats.bytes = 0; 2809 } 2810} 2811 2812__private_extern__ void 2813dlil_input_packet_list(struct ifnet *ifp, struct mbuf *m) 2814{ 2815 return (dlil_input_packet_list_common(ifp, m, 0, 2816 IFNET_MODEL_INPUT_POLL_OFF, FALSE)); 2817} 2818 2819__private_extern__ void 2820dlil_input_packet_list_extended(struct ifnet *ifp, struct mbuf *m, 2821 u_int32_t cnt, ifnet_model_t mode) 2822{ 2823 return (dlil_input_packet_list_common(ifp, m, cnt, mode, TRUE)); 2824} 2825 2826static void 2827dlil_input_packet_list_common(struct ifnet *ifp_param, struct mbuf *m, 2828 u_int32_t cnt, ifnet_model_t mode, boolean_t ext) 2829{ 2830 int error = 0; 2831 protocol_family_t protocol_family; 2832 mbuf_t next_packet; 2833 ifnet_t ifp = ifp_param; 2834 char * frame_header; 2835 struct if_proto * last_ifproto = NULL; 2836 mbuf_t pkt_first = NULL; 2837 mbuf_t * pkt_next = NULL; 2838 u_int32_t poll_thresh = 0, poll_ival = 0; 2839 2840 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_START,0,0,0,0,0); 2841 2842 if (ext && mode == IFNET_MODEL_INPUT_POLL_ON && cnt > 1 && 2843 (poll_ival = if_rxpoll_interval_pkts) > 0) 2844 poll_thresh = cnt; 2845 2846 while (m != NULL) { 2847 struct if_proto *ifproto = NULL; 2848 int iorefcnt = 0; 2849 2850 if (ifp_param == NULL) 2851 ifp = m->m_pkthdr.rcvif; 2852 2853 if ((ifp->if_eflags & IFEF_RXPOLL) && poll_thresh != 0 && 2854 poll_ival > 0 && (--poll_thresh % poll_ival) == 0) 2855 ifnet_poll(ifp); 2856 2857 /* Check if this mbuf looks valid */ 2858 MBUF_INPUT_CHECK(m, ifp); 2859 2860 next_packet = m->m_nextpkt; 2861 m->m_nextpkt = NULL; 2862 frame_header = m->m_pkthdr.header; 2863 m->m_pkthdr.header = NULL; 2864 2865 /* 2866 * Get an IO reference count if the interface is not 2867 * loopback (lo0) and it is attached; lo0 never goes 2868 * away, so optimize for that. 2869 */ 2870 if (ifp != lo_ifp) { 2871 if (!ifnet_is_attached(ifp, 1)) { 2872 m_freem(m); 2873 goto next; 2874 } 2875 iorefcnt = 1; 2876 } 2877 2878 ifp_inc_traffic_class_in(ifp, m); 2879 2880 /* find which protocol family this packet is for */ 2881 ifnet_lock_shared(ifp); 2882 error = (*ifp->if_demux)(ifp, m, frame_header, 2883 &protocol_family); 2884 ifnet_lock_done(ifp); 2885 if (error != 0) { 2886 if (error == EJUSTRETURN) 2887 goto next; 2888 protocol_family = 0; 2889 } 2890 2891#if CONFIG_EMBEDDED 2892 iptap_ipf_input(ifp, protocol_family, m, frame_header); 2893#endif /* CONFIG_EMBEDDED */ 2894 2895 if (m->m_flags & (M_BCAST|M_MCAST)) 2896 atomic_add_64(&ifp->if_imcasts, 1); 2897 2898 /* run interface filters, exclude VLAN packets PR-3586856 */ 2899 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) { 2900 error = dlil_interface_filters_input(ifp, &m, 2901 &frame_header, protocol_family); 2902 if (error != 0) { 2903 if (error != EJUSTRETURN) 2904 m_freem(m); 2905 goto next; 2906 } 2907 } 2908 if (error != 0 || ((m->m_flags & M_PROMISC) != 0) ) { 2909 m_freem(m); 2910 goto next; 2911 } 2912 2913 /* Lookup the protocol attachment to this interface */ 2914 if (protocol_family == 0) { 2915 ifproto = NULL; 2916 } else if (last_ifproto != NULL && last_ifproto->ifp == ifp && 2917 (last_ifproto->protocol_family == protocol_family)) { 2918 VERIFY(ifproto == NULL); 2919 ifproto = last_ifproto; 2920 if_proto_ref(last_ifproto); 2921 } else { 2922 VERIFY(ifproto == NULL); 2923 ifnet_lock_shared(ifp); 2924 /* callee holds a proto refcnt upon success */ 2925 ifproto = find_attached_proto(ifp, protocol_family); 2926 ifnet_lock_done(ifp); 2927 } 2928 if (ifproto == NULL) { 2929 /* no protocol for this packet, discard */ 2930 m_freem(m); 2931 goto next; 2932 } 2933 if (ifproto != last_ifproto) { 2934 if (last_ifproto != NULL) { 2935 /* pass up the list for the previous protocol */ 2936 dlil_ifproto_input(last_ifproto, pkt_first); 2937 pkt_first = NULL; 2938 if_proto_free(last_ifproto); 2939 } 2940 last_ifproto = ifproto; 2941 if_proto_ref(ifproto); 2942 } 2943 /* extend the list */ 2944 m->m_pkthdr.header = frame_header; 2945 if (pkt_first == NULL) { 2946 pkt_first = m; 2947 } else { 2948 *pkt_next = m; 2949 } 2950 pkt_next = &m->m_nextpkt; 2951 2952next: 2953 if (next_packet == NULL && last_ifproto != NULL) { 2954 /* pass up the last list of packets */ 2955 dlil_ifproto_input(last_ifproto, pkt_first); 2956 if_proto_free(last_ifproto); 2957 last_ifproto = NULL; 2958 } 2959 if (ifproto != NULL) { 2960 if_proto_free(ifproto); 2961 ifproto = NULL; 2962 } 2963 2964 m = next_packet; 2965 2966 /* update the driver's multicast filter, if needed */ 2967 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) 2968 ifp->if_updatemcasts = 0; 2969 if (iorefcnt == 1) 2970 ifnet_decr_iorefcnt(ifp); 2971 } 2972 2973 KERNEL_DEBUG(DBG_FNC_DLIL_INPUT | DBG_FUNC_END,0,0,0,0,0); 2974} 2975 2976errno_t 2977if_mcasts_update(struct ifnet *ifp) 2978{ 2979 errno_t err; 2980 2981 err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL); 2982 if (err == EAFNOSUPPORT) 2983 err = 0; 2984 printf("%s%d: %s %d suspended link-layer multicast membership(s) " 2985 "(err=%d)\n", ifp->if_name, ifp->if_unit, 2986 (err == 0 ? "successfully restored" : "failed to restore"), 2987 ifp->if_updatemcasts, err); 2988 2989 /* just return success */ 2990 return (0); 2991} 2992 2993static int 2994dlil_event_internal(struct ifnet *ifp, struct kev_msg *event) 2995{ 2996 struct ifnet_filter *filter; 2997 2998 /* Get an io ref count if the interface is attached */ 2999 if (!ifnet_is_attached(ifp, 1)) 3000 goto done; 3001 3002 /* 3003 * Pass the event to the interface filters 3004 */ 3005 lck_mtx_lock_spin(&ifp->if_flt_lock); 3006 /* prevent filter list from changing in case we drop the lock */ 3007 if_flt_monitor_busy(ifp); 3008 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { 3009 if (filter->filt_event != NULL) { 3010 lck_mtx_unlock(&ifp->if_flt_lock); 3011 3012 filter->filt_event(filter->filt_cookie, ifp, 3013 filter->filt_protocol, event); 3014 3015 lck_mtx_lock_spin(&ifp->if_flt_lock); 3016 } 3017 } 3018 /* we're done with the filter list */ 3019 if_flt_monitor_unbusy(ifp); 3020 lck_mtx_unlock(&ifp->if_flt_lock); 3021 3022 ifnet_lock_shared(ifp); 3023 if (ifp->if_proto_hash != NULL) { 3024 int i; 3025 3026 for (i = 0; i < PROTO_HASH_SLOTS; i++) { 3027 struct if_proto *proto; 3028 3029 SLIST_FOREACH(proto, &ifp->if_proto_hash[i], 3030 next_hash) { 3031 proto_media_event eventp = 3032 (proto->proto_kpi == kProtoKPI_v1 ? 3033 proto->kpi.v1.event : 3034 proto->kpi.v2.event); 3035 3036 if (eventp != NULL) { 3037 if_proto_ref(proto); 3038 ifnet_lock_done(ifp); 3039 3040 eventp(ifp, proto->protocol_family, 3041 event); 3042 3043 ifnet_lock_shared(ifp); 3044 if_proto_free(proto); 3045 } 3046 } 3047 } 3048 } 3049 ifnet_lock_done(ifp); 3050 3051 /* Pass the event to the interface */ 3052 if (ifp->if_event != NULL) 3053 ifp->if_event(ifp, event); 3054 3055 /* Release the io ref count */ 3056 ifnet_decr_iorefcnt(ifp); 3057 3058done: 3059 return (kev_post_msg(event)); 3060} 3061 3062errno_t 3063ifnet_event(ifnet_t ifp, struct kern_event_msg *event) 3064{ 3065 struct kev_msg kev_msg; 3066 int result = 0; 3067 3068 if (ifp == NULL || event == NULL) 3069 return (EINVAL); 3070 3071 bzero(&kev_msg, sizeof (kev_msg)); 3072 kev_msg.vendor_code = event->vendor_code; 3073 kev_msg.kev_class = event->kev_class; 3074 kev_msg.kev_subclass = event->kev_subclass; 3075 kev_msg.event_code = event->event_code; 3076 kev_msg.dv[0].data_ptr = &event->event_data[0]; 3077 kev_msg.dv[0].data_length = event->total_size - KEV_MSG_HEADER_SIZE; 3078 kev_msg.dv[1].data_length = 0; 3079 3080 result = dlil_event_internal(ifp, &kev_msg); 3081 3082 return (result); 3083} 3084 3085#if CONFIG_MACF_NET 3086#include <netinet/ip6.h> 3087#include <netinet/ip.h> 3088static int 3089dlil_get_socket_type(struct mbuf **mp, int family, int raw) 3090{ 3091 struct mbuf *m; 3092 struct ip *ip; 3093 struct ip6_hdr *ip6; 3094 int type = SOCK_RAW; 3095 3096 if (!raw) { 3097 switch (family) { 3098 case PF_INET: 3099 m = m_pullup(*mp, sizeof(struct ip)); 3100 if (m == NULL) 3101 break; 3102 *mp = m; 3103 ip = mtod(m, struct ip *); 3104 if (ip->ip_p == IPPROTO_TCP) 3105 type = SOCK_STREAM; 3106 else if (ip->ip_p == IPPROTO_UDP) 3107 type = SOCK_DGRAM; 3108 break; 3109 case PF_INET6: 3110 m = m_pullup(*mp, sizeof(struct ip6_hdr)); 3111 if (m == NULL) 3112 break; 3113 *mp = m; 3114 ip6 = mtod(m, struct ip6_hdr *); 3115 if (ip6->ip6_nxt == IPPROTO_TCP) 3116 type = SOCK_STREAM; 3117 else if (ip6->ip6_nxt == IPPROTO_UDP) 3118 type = SOCK_DGRAM; 3119 break; 3120 } 3121 } 3122 3123 return (type); 3124} 3125#endif 3126 3127/* 3128 * This is mostly called from the context of the DLIL input thread; 3129 * because of that there is no need for atomic operations. 3130 */ 3131static __inline void 3132ifp_inc_traffic_class_in(struct ifnet *ifp, struct mbuf *m) 3133{ 3134 if (!(m->m_flags & M_PKTHDR)) 3135 return; 3136 3137 switch (m_get_traffic_class(m)) { 3138 case MBUF_TC_BE: 3139 ifp->if_tc.ifi_ibepackets++; 3140 ifp->if_tc.ifi_ibebytes += m->m_pkthdr.len; 3141 break; 3142 case MBUF_TC_BK: 3143 ifp->if_tc.ifi_ibkpackets++; 3144 ifp->if_tc.ifi_ibkbytes += m->m_pkthdr.len; 3145 break; 3146 case MBUF_TC_VI: 3147 ifp->if_tc.ifi_ivipackets++; 3148 ifp->if_tc.ifi_ivibytes += m->m_pkthdr.len; 3149 break; 3150 case MBUF_TC_VO: 3151 ifp->if_tc.ifi_ivopackets++; 3152 ifp->if_tc.ifi_ivobytes += m->m_pkthdr.len; 3153 break; 3154 default: 3155 break; 3156 } 3157 3158 if (mbuf_is_traffic_class_privileged(m)) { 3159 ifp->if_tc.ifi_ipvpackets++; 3160 ifp->if_tc.ifi_ipvbytes += m->m_pkthdr.len; 3161 } 3162} 3163 3164/* 3165 * This is called from DLIL output, hence multiple threads could end 3166 * up modifying the statistics. We trade off acccuracy for performance 3167 * by not using atomic operations here. 3168 */ 3169static __inline void 3170ifp_inc_traffic_class_out(struct ifnet *ifp, struct mbuf *m) 3171{ 3172 if (!(m->m_flags & M_PKTHDR)) 3173 return; 3174 3175 switch (m_get_traffic_class(m)) { 3176 case MBUF_TC_BE: 3177 ifp->if_tc.ifi_obepackets++; 3178 ifp->if_tc.ifi_obebytes += m->m_pkthdr.len; 3179 break; 3180 case MBUF_TC_BK: 3181 ifp->if_tc.ifi_obkpackets++; 3182 ifp->if_tc.ifi_obkbytes += m->m_pkthdr.len; 3183 break; 3184 case MBUF_TC_VI: 3185 ifp->if_tc.ifi_ovipackets++; 3186 ifp->if_tc.ifi_ovibytes += m->m_pkthdr.len; 3187 break; 3188 case MBUF_TC_VO: 3189 ifp->if_tc.ifi_ovopackets++; 3190 ifp->if_tc.ifi_ovobytes += m->m_pkthdr.len; 3191 break; 3192 default: 3193 break; 3194 } 3195 3196 if (mbuf_is_traffic_class_privileged(m)) { 3197 ifp->if_tc.ifi_opvpackets++; 3198 ifp->if_tc.ifi_opvbytes += m->m_pkthdr.len; 3199 } 3200} 3201 3202/* 3203 * dlil_output 3204 * 3205 * Caller should have a lock on the protocol domain if the protocol 3206 * doesn't support finer grained locking. In most cases, the lock 3207 * will be held from the socket layer and won't be released until 3208 * we return back to the socket layer. 3209 * 3210 * This does mean that we must take a protocol lock before we take 3211 * an interface lock if we're going to take both. This makes sense 3212 * because a protocol is likely to interact with an ifp while it 3213 * is under the protocol lock. 3214 * 3215 * An advisory code will be returned if adv is not null. This 3216 * can be used to provide feedback about interface queues to the 3217 * application. 3218 */ 3219errno_t 3220dlil_output(ifnet_t ifp, protocol_family_t proto_family, mbuf_t packetlist, 3221 void *route, const struct sockaddr *dest, int raw, struct flowadv *adv) 3222{ 3223 char *frame_type = NULL; 3224 char *dst_linkaddr = NULL; 3225 int retval = 0; 3226 char frame_type_buffer[MAX_FRAME_TYPE_SIZE * 4]; 3227 char dst_linkaddr_buffer[MAX_LINKADDR * 4]; 3228 struct if_proto *proto = NULL; 3229 mbuf_t m; 3230 mbuf_t send_head = NULL; 3231 mbuf_t *send_tail = &send_head; 3232 int iorefcnt = 0; 3233#if CONFIG_EMBEDDED 3234 u_int32_t pre = 0, post = 0; 3235#endif /* CONFIG_EMBEDDED */ 3236 3237 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_START,0,0,0,0,0); 3238 3239 /* Get an io refcnt if the interface is attached to prevent ifnet_detach 3240 * from happening while this operation is in progress */ 3241 if (!ifnet_is_attached(ifp, 1)) { 3242 retval = ENXIO; 3243 goto cleanup; 3244 } 3245 iorefcnt = 1; 3246 3247 /* update the driver's multicast filter, if needed */ 3248 if (ifp->if_updatemcasts > 0 && if_mcasts_update(ifp) == 0) 3249 ifp->if_updatemcasts = 0; 3250 3251 frame_type = frame_type_buffer; 3252 dst_linkaddr = dst_linkaddr_buffer; 3253 3254 if (raw == 0) { 3255 ifnet_lock_shared(ifp); 3256 /* callee holds a proto refcnt upon success */ 3257 proto = find_attached_proto(ifp, proto_family); 3258 if (proto == NULL) { 3259 ifnet_lock_done(ifp); 3260 retval = ENXIO; 3261 goto cleanup; 3262 } 3263 ifnet_lock_done(ifp); 3264 } 3265 3266preout_again: 3267 if (packetlist == NULL) 3268 goto cleanup; 3269 3270 m = packetlist; 3271 packetlist = packetlist->m_nextpkt; 3272 m->m_nextpkt = NULL; 3273 3274 if (raw == 0) { 3275 proto_media_preout preoutp = (proto->proto_kpi == kProtoKPI_v1 ? 3276 proto->kpi.v1.pre_output : proto->kpi.v2.pre_output); 3277 retval = 0; 3278 if (preoutp != NULL) { 3279 retval = preoutp(ifp, proto_family, &m, dest, route, 3280 frame_type, dst_linkaddr); 3281 3282 if (retval != 0) { 3283 if (retval == EJUSTRETURN) 3284 goto preout_again; 3285 m_freem(m); 3286 goto cleanup; 3287 } 3288 } 3289 } 3290 3291#if CONFIG_MACF_NET 3292 retval = mac_ifnet_check_transmit(ifp, m, proto_family, 3293 dlil_get_socket_type(&m, proto_family, raw)); 3294 if (retval) { 3295 m_freem(m); 3296 goto cleanup; 3297 } 3298#endif 3299 3300 do { 3301#if CONFIG_DTRACE 3302 if (!raw && proto_family == PF_INET) { 3303 struct ip *ip = mtod(m, struct ip*); 3304 DTRACE_IP6(send, struct mbuf *, m, struct inpcb *, NULL, 3305 struct ip *, ip, struct ifnet *, ifp, 3306 struct ip *, ip, struct ip6_hdr *, NULL); 3307 3308 } else if (!raw && proto_family == PF_INET6) { 3309 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr*); 3310 DTRACE_IP6(send, struct mbuf*, m, struct inpcb *, NULL, 3311 struct ip6_hdr *, ip6, struct ifnet*, ifp, 3312 struct ip*, NULL, struct ip6_hdr *, ip6); 3313 } 3314#endif /* CONFIG_DTRACE */ 3315 3316 if (raw == 0 && ifp->if_framer) { 3317 int rcvif_set = 0; 3318 3319 /* 3320 * If this is a broadcast packet that needs to be 3321 * looped back into the system, set the inbound ifp 3322 * to that of the outbound ifp. This will allow 3323 * us to determine that it is a legitimate packet 3324 * for the system. Only set the ifp if it's not 3325 * already set, just to be safe. 3326 */ 3327 if ((m->m_flags & (M_BCAST | M_LOOP)) && 3328 m->m_pkthdr.rcvif == NULL) { 3329 m->m_pkthdr.rcvif = ifp; 3330 rcvif_set = 1; 3331 } 3332 3333 retval = ifp->if_framer(ifp, &m, dest, dst_linkaddr, 3334 frame_type 3335#if CONFIG_EMBEDDED 3336 , 3337 &pre, &post 3338#endif /* CONFIG_EMBEDDED */ 3339 ); 3340 if (retval) { 3341 if (retval != EJUSTRETURN) 3342 m_freem(m); 3343 goto next; 3344 } 3345 3346 /* 3347 * Clear the ifp if it was set above, and to be 3348 * safe, only if it is still the same as the 3349 * outbound ifp we have in context. If it was 3350 * looped back, then a copy of it was sent to the 3351 * loopback interface with the rcvif set, and we 3352 * are clearing the one that will go down to the 3353 * layer below. 3354 */ 3355 if (rcvif_set && m->m_pkthdr.rcvif == ifp) 3356 m->m_pkthdr.rcvif = NULL; 3357 } 3358 3359 /* 3360 * Let interface filters (if any) do their thing ... 3361 */ 3362 /* Do not pass VLAN tagged packets to filters PR-3586856 */ 3363 if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) == 0) { 3364 retval = dlil_interface_filters_output(ifp, 3365 &m, proto_family); 3366 if (retval != 0) { 3367 if (retval != EJUSTRETURN) 3368 m_freem(m); 3369 goto next; 3370 } 3371 } 3372 /* 3373 * Strip away M_PROTO1 bit prior to sending packet to the driver 3374 * as this field may be used by the driver 3375 */ 3376 m->m_flags &= ~M_PROTO1; 3377 3378 /* 3379 * If the underlying interface is not capable of handling a 3380 * packet whose data portion spans across physically disjoint 3381 * pages, we need to "normalize" the packet so that we pass 3382 * down a chain of mbufs where each mbuf points to a span that 3383 * resides in the system page boundary. If the packet does 3384 * not cross page(s), the following is a no-op. 3385 */ 3386 if (!(ifp->if_hwassist & IFNET_MULTIPAGES)) { 3387 if ((m = m_normalize(m)) == NULL) 3388 goto next; 3389 } 3390 3391 /* 3392 * If this is a TSO packet, make sure the interface still 3393 * advertise TSO capability. 3394 */ 3395 3396 if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV4) && 3397 !(ifp->if_hwassist & IFNET_TSO_IPV4)) { 3398 retval = EMSGSIZE; 3399 m_freem(m); 3400 goto cleanup; 3401 } 3402 3403 if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) && 3404 !(ifp->if_hwassist & IFNET_TSO_IPV6)) { 3405 retval = EMSGSIZE; 3406 m_freem(m); 3407 goto cleanup; 3408 } 3409 3410 /* 3411 * Finally, call the driver. 3412 */ 3413 if ((ifp->if_eflags & IFEF_SENDLIST) != 0) { 3414 *send_tail = m; 3415 send_tail = &m->m_nextpkt; 3416 } else { 3417#if CONFIG_EMBEDDED 3418 iptap_ipf_output(ifp, proto_family, (struct mbuf *)m, 3419 pre, post); 3420#endif /* CONFIG_EMBEDDED */ 3421 ifp_inc_traffic_class_out(ifp, m); 3422 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 3423 0,0,0,0,0); 3424 retval = (*ifp->if_output)(ifp, m); 3425 if (retval == EQFULL || retval == EQSUSPENDED) { 3426 if (adv != NULL && adv->code == FADV_SUCCESS) { 3427 adv->code = (retval == EQFULL ? 3428 FADV_FLOW_CONTROLLED : 3429 FADV_SUSPENDED); 3430 } 3431 retval = 0; 3432 } 3433 if (retval && dlil_verbose) { 3434 printf("%s: output error on %s%d retval = %d\n", 3435 __func__, ifp->if_name, ifp->if_unit, 3436 retval); 3437 } 3438 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 3439 0,0,0,0,0); 3440 } 3441 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); 3442 3443next: 3444 m = packetlist; 3445 if (m) { 3446 packetlist = packetlist->m_nextpkt; 3447 m->m_nextpkt = NULL; 3448 } 3449 } while (m); 3450 3451 if (send_head) { 3452#if CONFIG_EMBEDDED 3453 iptap_ipf_output(ifp, proto_family, (struct mbuf *)send_head, 3454 pre, post); 3455#endif /* CONFIG_EMBEDDED */ 3456 ifp_inc_traffic_class_out(ifp, send_head); 3457 3458 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_START, 0,0,0,0,0); 3459 retval = (*ifp->if_output)(ifp, send_head); 3460 if (retval == EQFULL || retval == EQSUSPENDED) { 3461 if (adv != NULL) { 3462 adv->code = (retval == EQFULL ? 3463 FADV_FLOW_CONTROLLED : FADV_SUSPENDED); 3464 } 3465 retval = 0; 3466 } 3467 if (retval && dlil_verbose) { 3468 printf("%s: output error on %s%d retval = %d\n", 3469 __func__, ifp->if_name, ifp->if_unit, retval); 3470 } 3471 KERNEL_DEBUG(DBG_FNC_DLIL_IFOUT | DBG_FUNC_END, 0,0,0,0,0); 3472 } 3473 3474 KERNEL_DEBUG(DBG_FNC_DLIL_OUTPUT | DBG_FUNC_END,0,0,0,0,0); 3475 3476cleanup: 3477 if (proto != NULL) 3478 if_proto_free(proto); 3479 if (packetlist) /* if any packets are left, clean up */ 3480 mbuf_freem_list(packetlist); 3481 if (retval == EJUSTRETURN) 3482 retval = 0; 3483 if (iorefcnt == 1) 3484 ifnet_decr_iorefcnt(ifp); 3485 3486 return (retval); 3487} 3488 3489errno_t 3490ifnet_ioctl(ifnet_t ifp, protocol_family_t proto_fam, u_long ioctl_code, 3491 void *ioctl_arg) 3492{ 3493 struct ifnet_filter *filter; 3494 int retval = EOPNOTSUPP; 3495 int result = 0; 3496 3497 if (ifp == NULL || ioctl_code == 0) 3498 return (EINVAL); 3499 3500 /* Get an io ref count if the interface is attached */ 3501 if (!ifnet_is_attached(ifp, 1)) 3502 return (EOPNOTSUPP); 3503 3504 /* Run the interface filters first. 3505 * We want to run all filters before calling the protocol, 3506 * interface family, or interface. 3507 */ 3508 lck_mtx_lock_spin(&ifp->if_flt_lock); 3509 /* prevent filter list from changing in case we drop the lock */ 3510 if_flt_monitor_busy(ifp); 3511 TAILQ_FOREACH(filter, &ifp->if_flt_head, filt_next) { 3512 if (filter->filt_ioctl != NULL && (filter->filt_protocol == 0 || 3513 filter->filt_protocol == proto_fam)) { 3514 lck_mtx_unlock(&ifp->if_flt_lock); 3515 3516 result = filter->filt_ioctl(filter->filt_cookie, ifp, 3517 proto_fam, ioctl_code, ioctl_arg); 3518 3519 lck_mtx_lock_spin(&ifp->if_flt_lock); 3520 3521 /* Only update retval if no one has handled the ioctl */ 3522 if (retval == EOPNOTSUPP || result == EJUSTRETURN) { 3523 if (result == ENOTSUP) 3524 result = EOPNOTSUPP; 3525 retval = result; 3526 if (retval != 0 && retval != EOPNOTSUPP) { 3527 /* we're done with the filter list */ 3528 if_flt_monitor_unbusy(ifp); 3529 lck_mtx_unlock(&ifp->if_flt_lock); 3530 goto cleanup; 3531 } 3532 } 3533 } 3534 } 3535 /* we're done with the filter list */ 3536 if_flt_monitor_unbusy(ifp); 3537 lck_mtx_unlock(&ifp->if_flt_lock); 3538 3539 /* Allow the protocol to handle the ioctl */ 3540 if (proto_fam != 0) { 3541 struct if_proto *proto; 3542 3543 /* callee holds a proto refcnt upon success */ 3544 ifnet_lock_shared(ifp); 3545 proto = find_attached_proto(ifp, proto_fam); 3546 ifnet_lock_done(ifp); 3547 if (proto != NULL) { 3548 proto_media_ioctl ioctlp = 3549 (proto->proto_kpi == kProtoKPI_v1 ? 3550 proto->kpi.v1.ioctl : proto->kpi.v2.ioctl); 3551 result = EOPNOTSUPP; 3552 if (ioctlp != NULL) 3553 result = ioctlp(ifp, proto_fam, ioctl_code, 3554 ioctl_arg); 3555 if_proto_free(proto); 3556 3557 /* Only update retval if no one has handled the ioctl */ 3558 if (retval == EOPNOTSUPP || result == EJUSTRETURN) { 3559 if (result == ENOTSUP) 3560 result = EOPNOTSUPP; 3561 retval = result; 3562 if (retval && retval != EOPNOTSUPP) 3563 goto cleanup; 3564 } 3565 } 3566 } 3567 3568 /* retval is either 0 or EOPNOTSUPP */ 3569 3570 /* 3571 * Let the interface handle this ioctl. 3572 * If it returns EOPNOTSUPP, ignore that, we may have 3573 * already handled this in the protocol or family. 3574 */ 3575 if (ifp->if_ioctl) 3576 result = (*ifp->if_ioctl)(ifp, ioctl_code, ioctl_arg); 3577 3578 /* Only update retval if no one has handled the ioctl */ 3579 if (retval == EOPNOTSUPP || result == EJUSTRETURN) { 3580 if (result == ENOTSUP) 3581 result = EOPNOTSUPP; 3582 retval = result; 3583 if (retval && retval != EOPNOTSUPP) { 3584 goto cleanup; 3585 } 3586 } 3587 3588cleanup: 3589 if (retval == EJUSTRETURN) 3590 retval = 0; 3591 3592 ifnet_decr_iorefcnt(ifp); 3593 3594 return (retval); 3595} 3596 3597__private_extern__ errno_t 3598dlil_set_bpf_tap(ifnet_t ifp, bpf_tap_mode mode, bpf_packet_func callback) 3599{ 3600 errno_t error = 0; 3601 3602 3603 if (ifp->if_set_bpf_tap) { 3604 /* Get an io reference on the interface if it is attached */ 3605 if (!ifnet_is_attached(ifp, 1)) 3606 return ENXIO; 3607 error = ifp->if_set_bpf_tap(ifp, mode, callback); 3608 ifnet_decr_iorefcnt(ifp); 3609 } 3610 return (error); 3611} 3612 3613errno_t 3614dlil_resolve_multi(struct ifnet *ifp, const struct sockaddr *proto_addr, 3615 struct sockaddr *ll_addr, size_t ll_len) 3616{ 3617 errno_t result = EOPNOTSUPP; 3618 struct if_proto *proto; 3619 const struct sockaddr *verify; 3620 proto_media_resolve_multi resolvep; 3621 3622 if (!ifnet_is_attached(ifp, 1)) 3623 return result; 3624 3625 bzero(ll_addr, ll_len); 3626 3627 /* Call the protocol first; callee holds a proto refcnt upon success */ 3628 ifnet_lock_shared(ifp); 3629 proto = find_attached_proto(ifp, proto_addr->sa_family); 3630 ifnet_lock_done(ifp); 3631 if (proto != NULL) { 3632 resolvep = (proto->proto_kpi == kProtoKPI_v1 ? 3633 proto->kpi.v1.resolve_multi : proto->kpi.v2.resolve_multi); 3634 if (resolvep != NULL) 3635 result = resolvep(ifp, proto_addr, 3636 (struct sockaddr_dl*)(void *)ll_addr, ll_len); 3637 if_proto_free(proto); 3638 } 3639 3640 /* Let the interface verify the multicast address */ 3641 if ((result == EOPNOTSUPP || result == 0) && ifp->if_check_multi) { 3642 if (result == 0) 3643 verify = ll_addr; 3644 else 3645 verify = proto_addr; 3646 result = ifp->if_check_multi(ifp, verify); 3647 } 3648 3649 ifnet_decr_iorefcnt(ifp); 3650 return (result); 3651} 3652 3653__private_extern__ errno_t 3654dlil_send_arp_internal(ifnet_t ifp, u_short arpop, 3655 const struct sockaddr_dl* sender_hw, const struct sockaddr* sender_proto, 3656 const struct sockaddr_dl* target_hw, const struct sockaddr* target_proto) 3657{ 3658 struct if_proto *proto; 3659 errno_t result = 0; 3660 3661 /* callee holds a proto refcnt upon success */ 3662 ifnet_lock_shared(ifp); 3663 proto = find_attached_proto(ifp, target_proto->sa_family); 3664 ifnet_lock_done(ifp); 3665 if (proto == NULL) { 3666 result = ENOTSUP; 3667 } else { 3668 proto_media_send_arp arpp; 3669 arpp = (proto->proto_kpi == kProtoKPI_v1 ? 3670 proto->kpi.v1.send_arp : proto->kpi.v2.send_arp); 3671 if (arpp == NULL) 3672 result = ENOTSUP; 3673 else 3674 result = arpp(ifp, arpop, sender_hw, sender_proto, 3675 target_hw, target_proto); 3676 if_proto_free(proto); 3677 } 3678 3679 return (result); 3680} 3681 3682__private_extern__ errno_t 3683net_thread_check_lock(u_int32_t flag) 3684{ 3685 struct uthread *uth = get_bsdthread_info(current_thread()); 3686 return ((uth->uu_network_lock_held & flag) == flag); 3687} 3688 3689__private_extern__ void 3690net_thread_set_lock(u_int32_t flag) 3691{ 3692 struct uthread *uth = get_bsdthread_info(current_thread()); 3693 3694 VERIFY((uth->uu_network_lock_held & flag) != flag); 3695 uth->uu_network_lock_held |= flag; 3696} 3697 3698__private_extern__ void 3699net_thread_unset_lock(u_int32_t flag) 3700{ 3701 struct uthread *uth = get_bsdthread_info(current_thread()); 3702 3703 VERIFY((uth->uu_network_lock_held & flag) == flag); 3704 uth->uu_network_lock_held &= (~flag); 3705} 3706 3707static __inline__ int 3708_is_announcement(const struct sockaddr_in * sender_sin, 3709 const struct sockaddr_in * target_sin) 3710{ 3711 if (sender_sin == NULL) { 3712 return (FALSE); 3713 } 3714 return (sender_sin->sin_addr.s_addr == target_sin->sin_addr.s_addr); 3715} 3716 3717__private_extern__ errno_t 3718dlil_send_arp(ifnet_t ifp, u_short arpop, const struct sockaddr_dl* sender_hw, 3719 const struct sockaddr* sender_proto, const struct sockaddr_dl* target_hw, 3720 const struct sockaddr* target_proto0, u_int32_t rtflags) 3721{ 3722 errno_t result = 0; 3723 const struct sockaddr_in * sender_sin; 3724 const struct sockaddr_in * target_sin; 3725 struct sockaddr_inarp target_proto_sinarp; 3726 struct sockaddr *target_proto = (void *)(uintptr_t)target_proto0; 3727 3728 if (target_proto == NULL || (sender_proto != NULL && 3729 sender_proto->sa_family != target_proto->sa_family)) 3730 return (EINVAL); 3731 3732 /* 3733 * If the target is a (default) router, provide that 3734 * information to the send_arp callback routine. 3735 */ 3736 if (rtflags & RTF_ROUTER) { 3737 bcopy(target_proto, &target_proto_sinarp, 3738 sizeof (struct sockaddr_in)); 3739 target_proto_sinarp.sin_other |= SIN_ROUTER; 3740 target_proto = (struct sockaddr *)&target_proto_sinarp; 3741 } 3742 3743 /* 3744 * If this is an ARP request and the target IP is IPv4LL, 3745 * send the request on all interfaces. The exception is 3746 * an announcement, which must only appear on the specific 3747 * interface. 3748 */ 3749 sender_sin = (struct sockaddr_in *)(void *)(uintptr_t)sender_proto; 3750 target_sin = (struct sockaddr_in *)(void *)(uintptr_t)target_proto; 3751 if (target_proto->sa_family == AF_INET && 3752 IN_LINKLOCAL(ntohl(target_sin->sin_addr.s_addr)) && 3753 ipv4_ll_arp_aware != 0 && arpop == ARPOP_REQUEST && 3754 !_is_announcement(target_sin, sender_sin)) { 3755 ifnet_t *ifp_list; 3756 u_int32_t count; 3757 u_int32_t ifp_on; 3758 3759 result = ENOTSUP; 3760 3761 if (ifnet_list_get(IFNET_FAMILY_ANY, &ifp_list, &count) == 0) { 3762 for (ifp_on = 0; ifp_on < count; ifp_on++) { 3763 errno_t new_result; 3764 ifaddr_t source_hw = NULL; 3765 ifaddr_t source_ip = NULL; 3766 struct sockaddr_in source_ip_copy; 3767 struct ifnet *cur_ifp = ifp_list[ifp_on]; 3768 3769 /* 3770 * Only arp on interfaces marked for IPv4LL 3771 * ARPing. This may mean that we don't ARP on 3772 * the interface the subnet route points to. 3773 */ 3774 if (!(cur_ifp->if_eflags & IFEF_ARPLL)) 3775 continue; 3776 3777 /* Find the source IP address */ 3778 ifnet_lock_shared(cur_ifp); 3779 source_hw = cur_ifp->if_lladdr; 3780 TAILQ_FOREACH(source_ip, &cur_ifp->if_addrhead, 3781 ifa_link) { 3782 IFA_LOCK(source_ip); 3783 if (source_ip->ifa_addr != NULL && 3784 source_ip->ifa_addr->sa_family == 3785 AF_INET) { 3786 /* Copy the source IP address */ 3787 source_ip_copy = 3788 *(struct sockaddr_in *) 3789 (void *)source_ip->ifa_addr; 3790 IFA_UNLOCK(source_ip); 3791 break; 3792 } 3793 IFA_UNLOCK(source_ip); 3794 } 3795 3796 /* No IP Source, don't arp */ 3797 if (source_ip == NULL) { 3798 ifnet_lock_done(cur_ifp); 3799 continue; 3800 } 3801 3802 IFA_ADDREF(source_hw); 3803 ifnet_lock_done(cur_ifp); 3804 3805 /* Send the ARP */ 3806 new_result = dlil_send_arp_internal(cur_ifp, 3807 arpop, (struct sockaddr_dl *)(void *) 3808 source_hw->ifa_addr, 3809 (struct sockaddr *)&source_ip_copy, NULL, 3810 target_proto); 3811 3812 IFA_REMREF(source_hw); 3813 if (result == ENOTSUP) { 3814 result = new_result; 3815 } 3816 } 3817 ifnet_list_free(ifp_list); 3818 } 3819 } else { 3820 result = dlil_send_arp_internal(ifp, arpop, sender_hw, 3821 sender_proto, target_hw, target_proto); 3822 } 3823 3824 return (result); 3825} 3826 3827/* 3828 * Caller must hold ifnet head lock. 3829 */ 3830static int 3831ifnet_lookup(struct ifnet *ifp) 3832{ 3833 struct ifnet *_ifp; 3834 3835 lck_rw_assert(&ifnet_head_lock, LCK_RW_ASSERT_HELD); 3836 TAILQ_FOREACH(_ifp, &ifnet_head, if_link) { 3837 if (_ifp == ifp) 3838 break; 3839 } 3840 return (_ifp != NULL); 3841} 3842/* 3843 * Caller has to pass a non-zero refio argument to get a 3844 * IO reference count. This will prevent ifnet_detach from 3845 * being called when there are outstanding io reference counts. 3846 */ 3847int 3848ifnet_is_attached(struct ifnet *ifp, int refio) 3849{ 3850 int ret; 3851 3852 lck_mtx_lock_spin(&ifp->if_ref_lock); 3853 if ((ret = ((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) == 3854 IFRF_ATTACHED))) { 3855 if (refio > 0) 3856 ifp->if_refio++; 3857 } 3858 lck_mtx_unlock(&ifp->if_ref_lock); 3859 3860 return (ret); 3861} 3862 3863void 3864ifnet_decr_iorefcnt(struct ifnet *ifp) 3865{ 3866 lck_mtx_lock_spin(&ifp->if_ref_lock); 3867 VERIFY(ifp->if_refio > 0); 3868 VERIFY((ifp->if_refflags & (IFRF_ATTACHED | IFRF_DETACHING)) != 0); 3869 ifp->if_refio--; 3870 3871 /* if there are no more outstanding io references, wakeup the 3872 * ifnet_detach thread if detaching flag is set. 3873 */ 3874 if (ifp->if_refio == 0 && 3875 (ifp->if_refflags & IFRF_DETACHING) != 0) { 3876 wakeup(&(ifp->if_refio)); 3877 } 3878 lck_mtx_unlock(&ifp->if_ref_lock); 3879} 3880 3881static void 3882dlil_if_trace(struct dlil_ifnet *dl_if, int refhold) 3883{ 3884 struct dlil_ifnet_dbg *dl_if_dbg = (struct dlil_ifnet_dbg *)dl_if; 3885 ctrace_t *tr; 3886 u_int32_t idx; 3887 u_int16_t *cnt; 3888 3889 if (!(dl_if->dl_if_flags & DLIF_DEBUG)) { 3890 panic("%s: dl_if %p has no debug structure", __func__, dl_if); 3891 /* NOTREACHED */ 3892 } 3893 3894 if (refhold) { 3895 cnt = &dl_if_dbg->dldbg_if_refhold_cnt; 3896 tr = dl_if_dbg->dldbg_if_refhold; 3897 } else { 3898 cnt = &dl_if_dbg->dldbg_if_refrele_cnt; 3899 tr = dl_if_dbg->dldbg_if_refrele; 3900 } 3901 3902 idx = atomic_add_16_ov(cnt, 1) % IF_REF_TRACE_HIST_SIZE; 3903 ctrace_record(&tr[idx]); 3904} 3905 3906errno_t 3907dlil_if_ref(struct ifnet *ifp) 3908{ 3909 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; 3910 3911 if (dl_if == NULL) 3912 return (EINVAL); 3913 3914 lck_mtx_lock_spin(&dl_if->dl_if_lock); 3915 ++dl_if->dl_if_refcnt; 3916 if (dl_if->dl_if_refcnt == 0) { 3917 panic("%s: wraparound refcnt for ifp=%p", __func__, ifp); 3918 /* NOTREACHED */ 3919 } 3920 if (dl_if->dl_if_trace != NULL) 3921 (*dl_if->dl_if_trace)(dl_if, TRUE); 3922 lck_mtx_unlock(&dl_if->dl_if_lock); 3923 3924 return (0); 3925} 3926 3927errno_t 3928dlil_if_free(struct ifnet *ifp) 3929{ 3930 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; 3931 3932 if (dl_if == NULL) 3933 return (EINVAL); 3934 3935 lck_mtx_lock_spin(&dl_if->dl_if_lock); 3936 if (dl_if->dl_if_refcnt == 0) { 3937 panic("%s: negative refcnt for ifp=%p", __func__, ifp); 3938 /* NOTREACHED */ 3939 } 3940 --dl_if->dl_if_refcnt; 3941 if (dl_if->dl_if_trace != NULL) 3942 (*dl_if->dl_if_trace)(dl_if, FALSE); 3943 lck_mtx_unlock(&dl_if->dl_if_lock); 3944 3945 return (0); 3946} 3947 3948static errno_t 3949dlil_attach_protocol_internal(struct if_proto *proto, 3950 const struct ifnet_demux_desc *demux_list, u_int32_t demux_count) 3951{ 3952 struct kev_dl_proto_data ev_pr_data; 3953 struct ifnet *ifp = proto->ifp; 3954 int retval = 0; 3955 u_int32_t hash_value = proto_hash_value(proto->protocol_family); 3956 struct if_proto *prev_proto; 3957 struct if_proto *_proto; 3958 3959 /* callee holds a proto refcnt upon success */ 3960 ifnet_lock_exclusive(ifp); 3961 _proto = find_attached_proto(ifp, proto->protocol_family); 3962 if (_proto != NULL) { 3963 ifnet_lock_done(ifp); 3964 if_proto_free(_proto); 3965 return (EEXIST); 3966 } 3967 3968 /* 3969 * Call family module add_proto routine so it can refine the 3970 * demux descriptors as it wishes. 3971 */ 3972 retval = ifp->if_add_proto(ifp, proto->protocol_family, demux_list, 3973 demux_count); 3974 if (retval) { 3975 ifnet_lock_done(ifp); 3976 return (retval); 3977 } 3978 3979 /* 3980 * Insert the protocol in the hash 3981 */ 3982 prev_proto = SLIST_FIRST(&ifp->if_proto_hash[hash_value]); 3983 while (prev_proto != NULL && SLIST_NEXT(prev_proto, next_hash) != NULL) 3984 prev_proto = SLIST_NEXT(prev_proto, next_hash); 3985 if (prev_proto) 3986 SLIST_INSERT_AFTER(prev_proto, proto, next_hash); 3987 else 3988 SLIST_INSERT_HEAD(&ifp->if_proto_hash[hash_value], 3989 proto, next_hash); 3990 3991 /* hold a proto refcnt for attach */ 3992 if_proto_ref(proto); 3993 3994 /* 3995 * The reserved field carries the number of protocol still attached 3996 * (subject to change) 3997 */ 3998 ev_pr_data.proto_family = proto->protocol_family; 3999 ev_pr_data.proto_remaining_count = dlil_ifp_proto_count(ifp); 4000 ifnet_lock_done(ifp); 4001 4002 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_ATTACHED, 4003 (struct net_event_data *)&ev_pr_data, 4004 sizeof (struct kev_dl_proto_data)); 4005 return (retval); 4006} 4007 4008errno_t 4009ifnet_attach_protocol(ifnet_t ifp, protocol_family_t protocol, 4010 const struct ifnet_attach_proto_param *proto_details) 4011{ 4012 int retval = 0; 4013 struct if_proto *ifproto = NULL; 4014 4015 ifnet_head_lock_shared(); 4016 if (ifp == NULL || protocol == 0 || proto_details == NULL) { 4017 retval = EINVAL; 4018 goto end; 4019 } 4020 /* Check that the interface is in the global list */ 4021 if (!ifnet_lookup(ifp)) { 4022 retval = ENXIO; 4023 goto end; 4024 } 4025 4026 ifproto = zalloc(dlif_proto_zone); 4027 if (ifproto == NULL) { 4028 retval = ENOMEM; 4029 goto end; 4030 } 4031 bzero(ifproto, dlif_proto_size); 4032 4033 /* refcnt held above during lookup */ 4034 ifproto->ifp = ifp; 4035 ifproto->protocol_family = protocol; 4036 ifproto->proto_kpi = kProtoKPI_v1; 4037 ifproto->kpi.v1.input = proto_details->input; 4038 ifproto->kpi.v1.pre_output = proto_details->pre_output; 4039 ifproto->kpi.v1.event = proto_details->event; 4040 ifproto->kpi.v1.ioctl = proto_details->ioctl; 4041 ifproto->kpi.v1.detached = proto_details->detached; 4042 ifproto->kpi.v1.resolve_multi = proto_details->resolve; 4043 ifproto->kpi.v1.send_arp = proto_details->send_arp; 4044 4045 retval = dlil_attach_protocol_internal(ifproto, 4046 proto_details->demux_list, proto_details->demux_count); 4047 4048 if (dlil_verbose) { 4049 printf("%s%d: attached v1 protocol %d\n", ifp->if_name, 4050 ifp->if_unit, protocol); 4051 } 4052 4053end: 4054 if (retval != 0 && retval != EEXIST && ifp != NULL) { 4055 DLIL_PRINTF("%s%d: failed to attach v1 protocol %d (err=%d)\n", 4056 ifp->if_name, ifp->if_unit, protocol, retval); 4057 } 4058 ifnet_head_done(); 4059 if (retval != 0 && ifproto != NULL) 4060 zfree(dlif_proto_zone, ifproto); 4061 return (retval); 4062} 4063 4064errno_t 4065ifnet_attach_protocol_v2(ifnet_t ifp, protocol_family_t protocol, 4066 const struct ifnet_attach_proto_param_v2 *proto_details) 4067{ 4068 int retval = 0; 4069 struct if_proto *ifproto = NULL; 4070 4071 ifnet_head_lock_shared(); 4072 if (ifp == NULL || protocol == 0 || proto_details == NULL) { 4073 retval = EINVAL; 4074 goto end; 4075 } 4076 /* Check that the interface is in the global list */ 4077 if (!ifnet_lookup(ifp)) { 4078 retval = ENXIO; 4079 goto end; 4080 } 4081 4082 ifproto = zalloc(dlif_proto_zone); 4083 if (ifproto == NULL) { 4084 retval = ENOMEM; 4085 goto end; 4086 } 4087 bzero(ifproto, sizeof(*ifproto)); 4088 4089 /* refcnt held above during lookup */ 4090 ifproto->ifp = ifp; 4091 ifproto->protocol_family = protocol; 4092 ifproto->proto_kpi = kProtoKPI_v2; 4093 ifproto->kpi.v2.input = proto_details->input; 4094 ifproto->kpi.v2.pre_output = proto_details->pre_output; 4095 ifproto->kpi.v2.event = proto_details->event; 4096 ifproto->kpi.v2.ioctl = proto_details->ioctl; 4097 ifproto->kpi.v2.detached = proto_details->detached; 4098 ifproto->kpi.v2.resolve_multi = proto_details->resolve; 4099 ifproto->kpi.v2.send_arp = proto_details->send_arp; 4100 4101 retval = dlil_attach_protocol_internal(ifproto, 4102 proto_details->demux_list, proto_details->demux_count); 4103 4104 if (dlil_verbose) { 4105 printf("%s%d: attached v2 protocol %d\n", ifp->if_name, 4106 ifp->if_unit, protocol); 4107 } 4108 4109end: 4110 if (retval != 0 && retval != EEXIST && ifp != NULL) { 4111 DLIL_PRINTF("%s%d: failed to attach v2 protocol %d (err=%d)\n", 4112 ifp->if_name, ifp->if_unit, protocol, retval); 4113 } 4114 ifnet_head_done(); 4115 if (retval != 0 && ifproto != NULL) 4116 zfree(dlif_proto_zone, ifproto); 4117 return (retval); 4118} 4119 4120errno_t 4121ifnet_detach_protocol(ifnet_t ifp, protocol_family_t proto_family) 4122{ 4123 struct if_proto *proto = NULL; 4124 int retval = 0; 4125 4126 if (ifp == NULL || proto_family == 0) { 4127 retval = EINVAL; 4128 goto end; 4129 } 4130 4131 ifnet_lock_exclusive(ifp); 4132 /* callee holds a proto refcnt upon success */ 4133 proto = find_attached_proto(ifp, proto_family); 4134 if (proto == NULL) { 4135 retval = ENXIO; 4136 ifnet_lock_done(ifp); 4137 goto end; 4138 } 4139 4140 /* call family module del_proto */ 4141 if (ifp->if_del_proto) 4142 ifp->if_del_proto(ifp, proto->protocol_family); 4143 4144 SLIST_REMOVE(&ifp->if_proto_hash[proto_hash_value(proto_family)], 4145 proto, if_proto, next_hash); 4146 4147 if (proto->proto_kpi == kProtoKPI_v1) { 4148 proto->kpi.v1.input = ifproto_media_input_v1; 4149 proto->kpi.v1.pre_output= ifproto_media_preout; 4150 proto->kpi.v1.event = ifproto_media_event; 4151 proto->kpi.v1.ioctl = ifproto_media_ioctl; 4152 proto->kpi.v1.resolve_multi = ifproto_media_resolve_multi; 4153 proto->kpi.v1.send_arp = ifproto_media_send_arp; 4154 } else { 4155 proto->kpi.v2.input = ifproto_media_input_v2; 4156 proto->kpi.v2.pre_output = ifproto_media_preout; 4157 proto->kpi.v2.event = ifproto_media_event; 4158 proto->kpi.v2.ioctl = ifproto_media_ioctl; 4159 proto->kpi.v2.resolve_multi = ifproto_media_resolve_multi; 4160 proto->kpi.v2.send_arp = ifproto_media_send_arp; 4161 } 4162 proto->detached = 1; 4163 ifnet_lock_done(ifp); 4164 4165 if (dlil_verbose) { 4166 printf("%s%d: detached %s protocol %d\n", ifp->if_name, 4167 ifp->if_unit, (proto->proto_kpi == kProtoKPI_v1) ? 4168 "v1" : "v2", proto_family); 4169 } 4170 4171 /* release proto refcnt held during protocol attach */ 4172 if_proto_free(proto); 4173 4174 /* 4175 * Release proto refcnt held during lookup; the rest of 4176 * protocol detach steps will happen when the last proto 4177 * reference is released. 4178 */ 4179 if_proto_free(proto); 4180 4181end: 4182 return (retval); 4183} 4184 4185 4186static errno_t 4187ifproto_media_input_v1(struct ifnet *ifp, protocol_family_t protocol, 4188 struct mbuf *packet, char *header) 4189{ 4190#pragma unused(ifp, protocol, packet, header) 4191 return (ENXIO); 4192} 4193 4194static errno_t 4195ifproto_media_input_v2(struct ifnet *ifp, protocol_family_t protocol, 4196 struct mbuf *packet) 4197{ 4198#pragma unused(ifp, protocol, packet) 4199 return (ENXIO); 4200 4201} 4202 4203static errno_t 4204ifproto_media_preout(struct ifnet *ifp, protocol_family_t protocol, 4205 mbuf_t *packet, const struct sockaddr *dest, void *route, char *frame_type, 4206 char *link_layer_dest) 4207{ 4208#pragma unused(ifp, protocol, packet, dest, route, frame_type, link_layer_dest) 4209 return (ENXIO); 4210 4211} 4212 4213static void 4214ifproto_media_event(struct ifnet *ifp, protocol_family_t protocol, 4215 const struct kev_msg *event) 4216{ 4217#pragma unused(ifp, protocol, event) 4218} 4219 4220static errno_t 4221ifproto_media_ioctl(struct ifnet *ifp, protocol_family_t protocol, 4222 unsigned long command, void *argument) 4223{ 4224#pragma unused(ifp, protocol, command, argument) 4225 return (ENXIO); 4226} 4227 4228static errno_t 4229ifproto_media_resolve_multi(ifnet_t ifp, const struct sockaddr *proto_addr, 4230 struct sockaddr_dl *out_ll, size_t ll_len) 4231{ 4232#pragma unused(ifp, proto_addr, out_ll, ll_len) 4233 return (ENXIO); 4234} 4235 4236static errno_t 4237ifproto_media_send_arp(struct ifnet *ifp, u_short arpop, 4238 const struct sockaddr_dl *sender_hw, const struct sockaddr *sender_proto, 4239 const struct sockaddr_dl *target_hw, const struct sockaddr *target_proto) 4240{ 4241#pragma unused(ifp, arpop, sender_hw, sender_proto, target_hw, target_proto) 4242 return (ENXIO); 4243} 4244 4245extern int if_next_index(void); 4246 4247errno_t 4248ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) 4249{ 4250 struct ifnet *tmp_if; 4251 struct ifaddr *ifa; 4252 struct if_data_internal if_data_saved; 4253 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; 4254 struct dlil_threading_info *dl_inp; 4255 u_int32_t sflags = 0; 4256 int err; 4257 4258 if (ifp == NULL) 4259 return (EINVAL); 4260 4261 /* 4262 * Serialize ifnet attach using dlil_ifnet_lock, in order to 4263 * prevent the interface from being configured while it is 4264 * embryonic, as ifnet_head_lock is dropped and reacquired 4265 * below prior to marking the ifnet with IFRF_ATTACHED. 4266 */ 4267 dlil_if_lock(); 4268 ifnet_head_lock_exclusive(); 4269 /* Verify we aren't already on the list */ 4270 TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) { 4271 if (tmp_if == ifp) { 4272 ifnet_head_done(); 4273 dlil_if_unlock(); 4274 return (EEXIST); 4275 } 4276 } 4277 4278 lck_mtx_lock_spin(&ifp->if_ref_lock); 4279 if (ifp->if_refflags & IFRF_ATTACHED) { 4280 panic_plain("%s: flags mismatch (attached set) ifp=%p", 4281 __func__, ifp); 4282 /* NOTREACHED */ 4283 } 4284 lck_mtx_unlock(&ifp->if_ref_lock); 4285 4286 ifnet_lock_exclusive(ifp); 4287 4288 /* Sanity check */ 4289 VERIFY(ifp->if_detaching_link.tqe_next == NULL); 4290 VERIFY(ifp->if_detaching_link.tqe_prev == NULL); 4291 4292 if (ll_addr != NULL) { 4293 if (ifp->if_addrlen == 0) { 4294 ifp->if_addrlen = ll_addr->sdl_alen; 4295 } else if (ll_addr->sdl_alen != ifp->if_addrlen) { 4296 ifnet_lock_done(ifp); 4297 ifnet_head_done(); 4298 dlil_if_unlock(); 4299 return (EINVAL); 4300 } 4301 } 4302 4303 /* 4304 * Allow interfaces without protocol families to attach 4305 * only if they have the necessary fields filled out. 4306 */ 4307 if (ifp->if_add_proto == NULL || ifp->if_del_proto == NULL) { 4308 DLIL_PRINTF("%s: Attempt to attach interface without " 4309 "family module - %d\n", __func__, ifp->if_family); 4310 ifnet_lock_done(ifp); 4311 ifnet_head_done(); 4312 dlil_if_unlock(); 4313 return (ENODEV); 4314 } 4315 4316 /* Allocate protocol hash table */ 4317 VERIFY(ifp->if_proto_hash == NULL); 4318 ifp->if_proto_hash = zalloc(dlif_phash_zone); 4319 if (ifp->if_proto_hash == NULL) { 4320 ifnet_lock_done(ifp); 4321 ifnet_head_done(); 4322 dlil_if_unlock(); 4323 return (ENOBUFS); 4324 } 4325 bzero(ifp->if_proto_hash, dlif_phash_size); 4326 4327 lck_mtx_lock_spin(&ifp->if_flt_lock); 4328 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head)); 4329 TAILQ_INIT(&ifp->if_flt_head); 4330 VERIFY(ifp->if_flt_busy == 0); 4331 VERIFY(ifp->if_flt_waiters == 0); 4332 lck_mtx_unlock(&ifp->if_flt_lock); 4333 4334 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead)); 4335 TAILQ_INIT(&ifp->if_prefixhead); 4336 4337 if (!(dl_if->dl_if_flags & DLIF_REUSE)) { 4338 VERIFY(LIST_EMPTY(&ifp->if_multiaddrs)); 4339 LIST_INIT(&ifp->if_multiaddrs); 4340 } 4341 4342 VERIFY(ifp->if_allhostsinm == NULL); 4343 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead)); 4344 TAILQ_INIT(&ifp->if_addrhead); 4345 4346 if (ifp->if_index == 0) { 4347 int idx = if_next_index(); 4348 4349 if (idx == -1) { 4350 ifp->if_index = 0; 4351 ifnet_lock_done(ifp); 4352 ifnet_head_done(); 4353 dlil_if_unlock(); 4354 return (ENOBUFS); 4355 } 4356 ifp->if_index = idx; 4357 } 4358 /* There should not be anything occupying this slot */ 4359 VERIFY(ifindex2ifnet[ifp->if_index] == NULL); 4360 4361 /* allocate (if needed) and initialize a link address */ 4362 VERIFY(!(dl_if->dl_if_flags & DLIF_REUSE) || ifp->if_lladdr != NULL); 4363 ifa = dlil_alloc_lladdr(ifp, ll_addr); 4364 if (ifa == NULL) { 4365 ifnet_lock_done(ifp); 4366 ifnet_head_done(); 4367 dlil_if_unlock(); 4368 return (ENOBUFS); 4369 } 4370 4371 VERIFY(ifnet_addrs[ifp->if_index - 1] == NULL); 4372 ifnet_addrs[ifp->if_index - 1] = ifa; 4373 4374 /* make this address the first on the list */ 4375 IFA_LOCK(ifa); 4376 /* hold a reference for ifnet_addrs[] */ 4377 IFA_ADDREF_LOCKED(ifa); 4378 /* if_attach_link_ifa() holds a reference for ifa_link */ 4379 if_attach_link_ifa(ifp, ifa); 4380 IFA_UNLOCK(ifa); 4381 4382#if CONFIG_MACF_NET 4383 mac_ifnet_label_associate(ifp); 4384#endif 4385 4386 TAILQ_INSERT_TAIL(&ifnet_head, ifp, if_link); 4387 ifindex2ifnet[ifp->if_index] = ifp; 4388 4389 /* Hold a reference to the underlying dlil_ifnet */ 4390 ifnet_reference(ifp); 4391 4392 /* Clear stats (save and restore other fields that we care) */ 4393 if_data_saved = ifp->if_data; 4394 bzero(&ifp->if_data, sizeof (ifp->if_data)); 4395 ifp->if_data.ifi_type = if_data_saved.ifi_type; 4396 ifp->if_data.ifi_typelen = if_data_saved.ifi_typelen; 4397 ifp->if_data.ifi_physical = if_data_saved.ifi_physical; 4398 ifp->if_data.ifi_addrlen = if_data_saved.ifi_addrlen; 4399 ifp->if_data.ifi_hdrlen = if_data_saved.ifi_hdrlen; 4400 ifp->if_data.ifi_mtu = if_data_saved.ifi_mtu; 4401 ifp->if_data.ifi_baudrate = if_data_saved.ifi_baudrate; 4402 ifp->if_data.ifi_hwassist = if_data_saved.ifi_hwassist; 4403 ifp->if_data.ifi_tso_v4_mtu = if_data_saved.ifi_tso_v4_mtu; 4404 ifp->if_data.ifi_tso_v6_mtu = if_data_saved.ifi_tso_v6_mtu; 4405 ifnet_touch_lastchange(ifp); 4406 4407 VERIFY(ifp->if_output_sched_model == IFNET_SCHED_MODEL_NORMAL || 4408 ifp->if_output_sched_model == IFNET_SCHED_MODEL_DRIVER_MANAGED); 4409 4410 /* By default, use SFB and enable flow advisory */ 4411 sflags = PKTSCHEDF_QALG_SFB; 4412 if (if_flowadv) 4413 sflags |= PKTSCHEDF_QALG_FLOWCTL; 4414 4415 /* Initialize transmit queue(s) */ 4416 err = ifclassq_setup(ifp, sflags, (dl_if->dl_if_flags & DLIF_REUSE)); 4417 if (err != 0) { 4418 panic_plain("%s: ifp=%p couldn't initialize transmit queue; " 4419 "err=%d", __func__, ifp, err); 4420 /* NOTREACHED */ 4421 } 4422 4423 /* Sanity checks on the input thread storage */ 4424 dl_inp = &dl_if->dl_if_inpstorage; 4425 bzero(&dl_inp->stats, sizeof (dl_inp->stats)); 4426 VERIFY(dl_inp->input_waiting == 0); 4427 VERIFY(dl_inp->wtot == 0); 4428 VERIFY(dl_inp->ifp == NULL); 4429 VERIFY(qhead(&dl_inp->rcvq_pkts) == NULL && qempty(&dl_inp->rcvq_pkts)); 4430 VERIFY(qlimit(&dl_inp->rcvq_pkts) == 0); 4431 VERIFY(!dl_inp->net_affinity); 4432 VERIFY(ifp->if_inp == NULL); 4433 VERIFY(dl_inp->input_thr == THREAD_NULL); 4434 VERIFY(dl_inp->wloop_thr == THREAD_NULL); 4435 VERIFY(dl_inp->poll_thr == THREAD_NULL); 4436 VERIFY(dl_inp->tag == 0); 4437 VERIFY(dl_inp->mode == IFNET_MODEL_INPUT_POLL_OFF); 4438 bzero(&dl_inp->tstats, sizeof (dl_inp->tstats)); 4439 bzero(&dl_inp->pstats, sizeof (dl_inp->pstats)); 4440 bzero(&dl_inp->sstats, sizeof (dl_inp->sstats)); 4441#if IFNET_INPUT_SANITY_CHK 4442 VERIFY(dl_inp->input_mbuf_cnt == 0); 4443#endif /* IFNET_INPUT_SANITY_CHK */ 4444 4445 /* 4446 * A specific DLIL input thread is created per Ethernet/cellular 4447 * interface or for an interface which supports opportunistic 4448 * input polling. Pseudo interfaces or other types of interfaces 4449 * use the main input thread instead. 4450 */ 4451 if ((net_rxpoll && (ifp->if_eflags & IFEF_RXPOLL)) || 4452 ifp->if_type == IFT_ETHER || ifp->if_type == IFT_CELLULAR) { 4453 ifp->if_inp = dl_inp; 4454 err = dlil_create_input_thread(ifp, ifp->if_inp); 4455 if (err != 0) { 4456 panic_plain("%s: ifp=%p couldn't get an input thread; " 4457 "err=%d", __func__, ifp, err); 4458 /* NOTREACHED */ 4459 } 4460 } 4461 4462 /* 4463 * If the driver supports the new transmit model, create a workloop 4464 * starter thread to invoke the if_start callback where the packets 4465 * may be dequeued and transmitted. 4466 */ 4467 if (ifp->if_eflags & IFEF_TXSTART) { 4468 VERIFY(ifp->if_start != NULL); 4469 VERIFY(ifp->if_start_thread == THREAD_NULL); 4470 4471 ifnet_set_start_cycle(ifp, NULL); 4472 ifp->if_start_active = 0; 4473 ifp->if_start_req = 0; 4474 if ((err = kernel_thread_start(ifnet_start_thread_fn, ifp, 4475 &ifp->if_start_thread)) != KERN_SUCCESS) { 4476 panic_plain("%s: ifp=%p couldn't get a start thread; " 4477 "err=%d", __func__, ifp, err); 4478 /* NOTREACHED */ 4479 } 4480 ml_thread_policy(ifp->if_start_thread, MACHINE_GROUP, 4481 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP)); 4482 } 4483 4484 /* 4485 * If the driver supports the new receive model, create a poller 4486 * thread to invoke if_input_poll callback where the packets may 4487 * be dequeued from the driver and processed for reception. 4488 */ 4489 if (ifp->if_eflags & IFEF_RXPOLL) { 4490 VERIFY(ifp->if_input_poll != NULL); 4491 VERIFY(ifp->if_input_ctl != NULL); 4492 VERIFY(ifp->if_poll_thread == THREAD_NULL); 4493 4494 ifnet_set_poll_cycle(ifp, NULL); 4495 ifp->if_poll_update = 0; 4496 ifp->if_poll_active = 0; 4497 ifp->if_poll_req = 0; 4498 if ((err = kernel_thread_start(ifnet_poll_thread_fn, ifp, 4499 &ifp->if_poll_thread)) != KERN_SUCCESS) { 4500 panic_plain("%s: ifp=%p couldn't get a poll thread; " 4501 "err=%d", __func__, ifp, err); 4502 /* NOTREACHED */ 4503 } 4504 ml_thread_policy(ifp->if_poll_thread, MACHINE_GROUP, 4505 (MACHINE_NETWORK_GROUP|MACHINE_NETWORK_WORKLOOP)); 4506 } 4507 4508 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE); 4509 VERIFY(ifp->if_desc.ifd_len == 0); 4510 VERIFY(ifp->if_desc.ifd_desc != NULL); 4511 4512 /* Record attach PC stacktrace */ 4513 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_attach); 4514 4515 ifp->if_updatemcasts = 0; 4516 if (!LIST_EMPTY(&ifp->if_multiaddrs)) { 4517 struct ifmultiaddr *ifma; 4518 LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 4519 IFMA_LOCK(ifma); 4520 if (ifma->ifma_addr->sa_family == AF_LINK || 4521 ifma->ifma_addr->sa_family == AF_UNSPEC) 4522 ifp->if_updatemcasts++; 4523 IFMA_UNLOCK(ifma); 4524 } 4525 4526 printf("%s%d: attached with %d suspended link-layer multicast " 4527 "membership(s)\n", ifp->if_name, ifp->if_unit, 4528 ifp->if_updatemcasts); 4529 } 4530 4531 ifnet_lock_done(ifp); 4532 ifnet_head_done(); 4533 4534 lck_mtx_lock(&ifp->if_cached_route_lock); 4535 /* Enable forwarding cached route */ 4536 ifp->if_fwd_cacheok = 1; 4537 /* Clean up any existing cached routes */ 4538 if (ifp->if_fwd_route.ro_rt != NULL) 4539 rtfree(ifp->if_fwd_route.ro_rt); 4540 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route)); 4541 if (ifp->if_src_route.ro_rt != NULL) 4542 rtfree(ifp->if_src_route.ro_rt); 4543 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route)); 4544 if (ifp->if_src_route6.ro_rt != NULL) 4545 rtfree(ifp->if_src_route6.ro_rt); 4546 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6)); 4547 lck_mtx_unlock(&ifp->if_cached_route_lock); 4548 4549 ifnet_llreach_ifattach(ifp, (dl_if->dl_if_flags & DLIF_REUSE)); 4550 4551 /* 4552 * Allocate and attach IGMPv3/MLDv2 interface specific variables 4553 * and trees; do this before the ifnet is marked as attached. 4554 * The ifnet keeps the reference to the info structures even after 4555 * the ifnet is detached, since the network-layer records still 4556 * refer to the info structures even after that. This also 4557 * makes it possible for them to still function after the ifnet 4558 * is recycled or reattached. 4559 */ 4560#if INET 4561 if (IGMP_IFINFO(ifp) == NULL) { 4562 IGMP_IFINFO(ifp) = igmp_domifattach(ifp, M_WAITOK); 4563 VERIFY(IGMP_IFINFO(ifp) != NULL); 4564 } else { 4565 VERIFY(IGMP_IFINFO(ifp)->igi_ifp == ifp); 4566 igmp_domifreattach(IGMP_IFINFO(ifp)); 4567 } 4568#endif /* INET */ 4569#if INET6 4570 if (MLD_IFINFO(ifp) == NULL) { 4571 MLD_IFINFO(ifp) = mld_domifattach(ifp, M_WAITOK); 4572 VERIFY(MLD_IFINFO(ifp) != NULL); 4573 } else { 4574 VERIFY(MLD_IFINFO(ifp)->mli_ifp == ifp); 4575 mld_domifreattach(MLD_IFINFO(ifp)); 4576 } 4577#endif /* INET6 */ 4578 4579 /* 4580 * Finally, mark this ifnet as attached. 4581 */ 4582 lck_mtx_lock(rnh_lock); 4583 ifnet_lock_exclusive(ifp); 4584 /* Initialize Link Quality Metric (loopback [lo0] is always good) */ 4585 ifp->if_lqm = (ifp == lo_ifp) ? IFNET_LQM_THRESH_GOOD : 4586 IFNET_LQM_THRESH_UNKNOWN; 4587 lck_mtx_lock_spin(&ifp->if_ref_lock); 4588 ifp->if_refflags = IFRF_ATTACHED; 4589 lck_mtx_unlock(&ifp->if_ref_lock); 4590 if (net_rtref) { 4591 /* boot-args override; enable idle notification */ 4592 (void) ifnet_set_idle_flags_locked(ifp, IFRF_IDLE_NOTIFY, 4593 IFRF_IDLE_NOTIFY); 4594 } else { 4595 /* apply previous request(s) to set the idle flags, if any */ 4596 (void) ifnet_set_idle_flags_locked(ifp, ifp->if_idle_new_flags, 4597 ifp->if_idle_new_flags_mask); 4598 4599 } 4600 ifnet_lock_done(ifp); 4601 lck_mtx_unlock(rnh_lock); 4602 dlil_if_unlock(); 4603 4604#if PF 4605 /* 4606 * Attach packet filter to this interface, if enabled. 4607 */ 4608 pf_ifnet_hook(ifp, 1); 4609#endif /* PF */ 4610 4611 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_ATTACHED, NULL, 0); 4612 4613 if (dlil_verbose) { 4614 printf("%s%d: attached%s\n", ifp->if_name, ifp->if_unit, 4615 (dl_if->dl_if_flags & DLIF_REUSE) ? " (recycled)" : ""); 4616 } 4617 4618 return (0); 4619} 4620 4621/* 4622 * Prepare the storage for the first/permanent link address, which must 4623 * must have the same lifetime as the ifnet itself. Although the link 4624 * address gets removed from if_addrhead and ifnet_addrs[] at detach time, 4625 * its location in memory must never change as it may still be referred 4626 * to by some parts of the system afterwards (unfortunate implementation 4627 * artifacts inherited from BSD.) 4628 * 4629 * Caller must hold ifnet lock as writer. 4630 */ 4631static struct ifaddr * 4632dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr) 4633{ 4634 struct ifaddr *ifa, *oifa; 4635 struct sockaddr_dl *asdl, *msdl; 4636 char workbuf[IFNAMSIZ*2]; 4637 int namelen, masklen, socksize; 4638 struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp; 4639 4640 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE); 4641 VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen); 4642 4643 namelen = snprintf(workbuf, sizeof (workbuf), "%s%d", 4644 ifp->if_name, ifp->if_unit); 4645 masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + namelen; 4646 socksize = masklen + ifp->if_addrlen; 4647#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1))) 4648 if ((u_int32_t)socksize < sizeof (struct sockaddr_dl)) 4649 socksize = sizeof(struct sockaddr_dl); 4650 socksize = ROUNDUP(socksize); 4651#undef ROUNDUP 4652 4653 ifa = ifp->if_lladdr; 4654 if (socksize > DLIL_SDLMAXLEN || 4655 (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) { 4656 /* 4657 * Rare, but in the event that the link address requires 4658 * more storage space than DLIL_SDLMAXLEN, allocate the 4659 * largest possible storages for address and mask, such 4660 * that we can reuse the same space when if_addrlen grows. 4661 * This same space will be used when if_addrlen shrinks. 4662 */ 4663 if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) { 4664 int ifasize = sizeof (*ifa) + 2 * SOCK_MAXADDRLEN; 4665 ifa = _MALLOC(ifasize, M_IFADDR, M_WAITOK | M_ZERO); 4666 if (ifa == NULL) 4667 return (NULL); 4668 ifa_lock_init(ifa); 4669 /* Don't set IFD_ALLOC, as this is permanent */ 4670 ifa->ifa_debug = IFD_LINK; 4671 } 4672 IFA_LOCK(ifa); 4673 /* address and mask sockaddr_dl locations */ 4674 asdl = (struct sockaddr_dl *)(ifa + 1); 4675 bzero(asdl, SOCK_MAXADDRLEN); 4676 msdl = (struct sockaddr_dl *)(void *) 4677 ((char *)asdl + SOCK_MAXADDRLEN); 4678 bzero(msdl, SOCK_MAXADDRLEN); 4679 } else { 4680 VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa); 4681 /* 4682 * Use the storage areas for address and mask within the 4683 * dlil_ifnet structure. This is the most common case. 4684 */ 4685 if (ifa == NULL) { 4686 ifa = &dl_if->dl_if_lladdr.ifa; 4687 ifa_lock_init(ifa); 4688 /* Don't set IFD_ALLOC, as this is permanent */ 4689 ifa->ifa_debug = IFD_LINK; 4690 } 4691 IFA_LOCK(ifa); 4692 /* address and mask sockaddr_dl locations */ 4693 asdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.asdl; 4694 bzero(asdl, sizeof (dl_if->dl_if_lladdr.asdl)); 4695 msdl = (struct sockaddr_dl *)(void *)&dl_if->dl_if_lladdr.msdl; 4696 bzero(msdl, sizeof (dl_if->dl_if_lladdr.msdl)); 4697 } 4698 4699 /* hold a permanent reference for the ifnet itself */ 4700 IFA_ADDREF_LOCKED(ifa); 4701 oifa = ifp->if_lladdr; 4702 ifp->if_lladdr = ifa; 4703 4704 VERIFY(ifa->ifa_debug == IFD_LINK); 4705 ifa->ifa_ifp = ifp; 4706 ifa->ifa_rtrequest = link_rtrequest; 4707 ifa->ifa_addr = (struct sockaddr *)asdl; 4708 asdl->sdl_len = socksize; 4709 asdl->sdl_family = AF_LINK; 4710 bcopy(workbuf, asdl->sdl_data, namelen); 4711 asdl->sdl_nlen = namelen; 4712 asdl->sdl_index = ifp->if_index; 4713 asdl->sdl_type = ifp->if_type; 4714 if (ll_addr != NULL) { 4715 asdl->sdl_alen = ll_addr->sdl_alen; 4716 bcopy(CONST_LLADDR(ll_addr), LLADDR(asdl), asdl->sdl_alen); 4717 } else { 4718 asdl->sdl_alen = 0; 4719 } 4720 ifa->ifa_netmask = (struct sockaddr*)msdl; 4721 msdl->sdl_len = masklen; 4722 while (namelen != 0) 4723 msdl->sdl_data[--namelen] = 0xff; 4724 IFA_UNLOCK(ifa); 4725 4726 if (oifa != NULL) 4727 IFA_REMREF(oifa); 4728 4729 return (ifa); 4730} 4731 4732static void 4733if_purgeaddrs(struct ifnet *ifp) 4734{ 4735#if INET 4736 in_purgeaddrs(ifp); 4737#endif /* INET */ 4738#if INET6 4739 in6_purgeaddrs(ifp); 4740#endif /* INET6 */ 4741#if NETAT 4742 at_purgeaddrs(ifp); 4743#endif 4744} 4745 4746errno_t 4747ifnet_detach(ifnet_t ifp) 4748{ 4749 if (ifp == NULL) 4750 return (EINVAL); 4751 4752 lck_mtx_lock(rnh_lock); 4753 ifnet_head_lock_exclusive(); 4754 ifnet_lock_exclusive(ifp); 4755 4756 /* 4757 * Check to see if this interface has previously triggered 4758 * aggressive protocol draining; if so, decrement the global 4759 * refcnt and clear PR_AGGDRAIN on the route domain if 4760 * there are no more of such an interface around. 4761 */ 4762 (void) ifnet_set_idle_flags_locked(ifp, 0, ~0); 4763 4764 lck_mtx_lock_spin(&ifp->if_ref_lock); 4765 if (!(ifp->if_refflags & IFRF_ATTACHED)) { 4766 lck_mtx_unlock(&ifp->if_ref_lock); 4767 ifnet_lock_done(ifp); 4768 ifnet_head_done(); 4769 lck_mtx_unlock(rnh_lock); 4770 return (EINVAL); 4771 } else if (ifp->if_refflags & IFRF_DETACHING) { 4772 /* Interface has already been detached */ 4773 lck_mtx_unlock(&ifp->if_ref_lock); 4774 ifnet_lock_done(ifp); 4775 ifnet_head_done(); 4776 lck_mtx_unlock(rnh_lock); 4777 return (ENXIO); 4778 } 4779 /* Indicate this interface is being detached */ 4780 ifp->if_refflags &= ~IFRF_ATTACHED; 4781 ifp->if_refflags |= IFRF_DETACHING; 4782 lck_mtx_unlock(&ifp->if_ref_lock); 4783 4784 if (dlil_verbose) 4785 printf("%s%d: detaching\n", ifp->if_name, ifp->if_unit); 4786 4787 /* 4788 * Remove ifnet from the ifnet_head, ifindex2ifnet[]; it will 4789 * no longer be visible during lookups from this point. 4790 */ 4791 VERIFY(ifindex2ifnet[ifp->if_index] == ifp); 4792 TAILQ_REMOVE(&ifnet_head, ifp, if_link); 4793 ifp->if_link.tqe_next = NULL; 4794 ifp->if_link.tqe_prev = NULL; 4795 ifindex2ifnet[ifp->if_index] = NULL; 4796 4797 /* Record detach PC stacktrace */ 4798 ctrace_record(&((struct dlil_ifnet *)ifp)->dl_if_detach); 4799 4800 ifnet_lock_done(ifp); 4801 ifnet_head_done(); 4802 lck_mtx_unlock(rnh_lock); 4803 4804 /* Reset Link Quality Metric (unless loopback [lo0]) */ 4805 if (ifp != lo_ifp) 4806 if_lqm_update(ifp, IFNET_LQM_THRESH_OFF); 4807 4808 /* Reset TCP local statistics */ 4809 if (ifp->if_tcp_stat != NULL) 4810 bzero(ifp->if_tcp_stat, sizeof(*ifp->if_tcp_stat)); 4811 4812 /* Reset UDP local statistics */ 4813 if (ifp->if_udp_stat != NULL) 4814 bzero(ifp->if_udp_stat, sizeof(*ifp->if_udp_stat)); 4815 4816 /* Let BPF know we're detaching */ 4817 bpfdetach(ifp); 4818 4819 /* Mark the interface as DOWN */ 4820 if_down(ifp); 4821 4822 /* Drain send queue */ 4823 ifclassq_teardown(ifp); 4824 4825 /* Disable forwarding cached route */ 4826 lck_mtx_lock(&ifp->if_cached_route_lock); 4827 ifp->if_fwd_cacheok = 0; 4828 lck_mtx_unlock(&ifp->if_cached_route_lock); 4829 4830 /* 4831 * Drain any deferred IGMPv3/MLDv2 query responses, but keep the 4832 * references to the info structures and leave them attached to 4833 * this ifnet. 4834 */ 4835#if INET 4836 igmp_domifdetach(ifp); 4837#endif /* INET */ 4838#if INET6 4839 mld_domifdetach(ifp); 4840#endif /* INET6 */ 4841 4842 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0); 4843 4844 /* Let worker thread take care of the rest, to avoid reentrancy */ 4845 dlil_if_lock(); 4846 ifnet_detaching_enqueue(ifp); 4847 dlil_if_unlock(); 4848 4849 return (0); 4850} 4851 4852static void 4853ifnet_detaching_enqueue(struct ifnet *ifp) 4854{ 4855 dlil_if_lock_assert(); 4856 4857 ++ifnet_detaching_cnt; 4858 VERIFY(ifnet_detaching_cnt != 0); 4859 TAILQ_INSERT_TAIL(&ifnet_detaching_head, ifp, if_detaching_link); 4860 wakeup((caddr_t)&ifnet_delayed_run); 4861} 4862 4863static struct ifnet * 4864ifnet_detaching_dequeue(void) 4865{ 4866 struct ifnet *ifp; 4867 4868 dlil_if_lock_assert(); 4869 4870 ifp = TAILQ_FIRST(&ifnet_detaching_head); 4871 VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL); 4872 if (ifp != NULL) { 4873 VERIFY(ifnet_detaching_cnt != 0); 4874 --ifnet_detaching_cnt; 4875 TAILQ_REMOVE(&ifnet_detaching_head, ifp, if_detaching_link); 4876 ifp->if_detaching_link.tqe_next = NULL; 4877 ifp->if_detaching_link.tqe_prev = NULL; 4878 } 4879 return (ifp); 4880} 4881 4882static int 4883ifnet_detacher_thread_cont(int err) 4884{ 4885#pragma unused(err) 4886 struct ifnet *ifp; 4887 4888 for (;;) { 4889 dlil_if_lock_assert(); 4890 while (ifnet_detaching_cnt == 0) { 4891 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock, 4892 (PZERO - 1), "ifnet_detacher_cont", 0, 4893 ifnet_detacher_thread_cont); 4894 /* NOTREACHED */ 4895 } 4896 4897 VERIFY(TAILQ_FIRST(&ifnet_detaching_head) != NULL); 4898 4899 /* Take care of detaching ifnet */ 4900 ifp = ifnet_detaching_dequeue(); 4901 if (ifp != NULL) { 4902 dlil_if_unlock(); 4903 ifnet_detach_final(ifp); 4904 dlil_if_lock(); 4905 } 4906 } 4907 /* NOTREACHED */ 4908 return (0); 4909} 4910 4911static void 4912ifnet_detacher_thread_func(void *v, wait_result_t w) 4913{ 4914#pragma unused(v, w) 4915 dlil_if_lock(); 4916 (void) msleep0(&ifnet_delayed_run, &dlil_ifnet_lock, 4917 (PZERO - 1), "ifnet_detacher", 0, ifnet_detacher_thread_cont); 4918 /* 4919 * msleep0() shouldn't have returned as PCATCH was not set; 4920 * therefore assert in this case. 4921 */ 4922 dlil_if_unlock(); 4923 VERIFY(0); 4924} 4925 4926static void 4927ifnet_detach_final(struct ifnet *ifp) 4928{ 4929 struct ifnet_filter *filter, *filter_next; 4930 struct ifnet_filter_head fhead; 4931 struct dlil_threading_info *inp; 4932 struct ifaddr *ifa; 4933 ifnet_detached_func if_free; 4934 int i; 4935 4936 lck_mtx_lock(&ifp->if_ref_lock); 4937 if (!(ifp->if_refflags & IFRF_DETACHING)) { 4938 panic("%s: flags mismatch (detaching not set) ifp=%p", 4939 __func__, ifp); 4940 /* NOTREACHED */ 4941 } 4942 4943 /* 4944 * Wait until the existing IO references get released 4945 * before we proceed with ifnet_detach. This is not a 4946 * common case, so block without using a continuation. 4947 */ 4948 while (ifp->if_refio > 0) { 4949 printf("%s: Waiting for IO references on %s%d interface " 4950 "to be released\n", __func__, ifp->if_name, ifp->if_unit); 4951 (void) msleep(&(ifp->if_refio), &ifp->if_ref_lock, 4952 (PZERO - 1), "ifnet_ioref_wait", NULL); 4953 } 4954 lck_mtx_unlock(&ifp->if_ref_lock); 4955 4956 /* Detach interface filters */ 4957 lck_mtx_lock(&ifp->if_flt_lock); 4958 if_flt_monitor_enter(ifp); 4959 4960 lck_mtx_assert(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED); 4961 fhead = ifp->if_flt_head; 4962 TAILQ_INIT(&ifp->if_flt_head); 4963 4964 for (filter = TAILQ_FIRST(&fhead); filter; filter = filter_next) { 4965 filter_next = TAILQ_NEXT(filter, filt_next); 4966 lck_mtx_unlock(&ifp->if_flt_lock); 4967 4968 dlil_detach_filter_internal(filter, 1); 4969 lck_mtx_lock(&ifp->if_flt_lock); 4970 } 4971 if_flt_monitor_leave(ifp); 4972 lck_mtx_unlock(&ifp->if_flt_lock); 4973 4974 /* Tell upper layers to drop their network addresses */ 4975 if_purgeaddrs(ifp); 4976 4977 ifnet_lock_exclusive(ifp); 4978 4979 /* Uplumb all protocols */ 4980 for (i = 0; i < PROTO_HASH_SLOTS; i++) { 4981 struct if_proto *proto; 4982 4983 proto = SLIST_FIRST(&ifp->if_proto_hash[i]); 4984 while (proto != NULL) { 4985 protocol_family_t family = proto->protocol_family; 4986 ifnet_lock_done(ifp); 4987 proto_unplumb(family, ifp); 4988 ifnet_lock_exclusive(ifp); 4989 proto = SLIST_FIRST(&ifp->if_proto_hash[i]); 4990 } 4991 /* There should not be any protocols left */ 4992 VERIFY(SLIST_EMPTY(&ifp->if_proto_hash[i])); 4993 } 4994 zfree(dlif_phash_zone, ifp->if_proto_hash); 4995 ifp->if_proto_hash = NULL; 4996 4997 /* Detach (permanent) link address from if_addrhead */ 4998 ifa = TAILQ_FIRST(&ifp->if_addrhead); 4999 VERIFY(ifnet_addrs[ifp->if_index - 1] == ifa); 5000 IFA_LOCK(ifa); 5001 if_detach_link_ifa(ifp, ifa); 5002 IFA_UNLOCK(ifa); 5003 5004 /* Remove (permanent) link address from ifnet_addrs[] */ 5005 IFA_REMREF(ifa); 5006 ifnet_addrs[ifp->if_index - 1] = NULL; 5007 5008 /* This interface should not be on {ifnet_head,detaching} */ 5009 VERIFY(ifp->if_link.tqe_next == NULL); 5010 VERIFY(ifp->if_link.tqe_prev == NULL); 5011 VERIFY(ifp->if_detaching_link.tqe_next == NULL); 5012 VERIFY(ifp->if_detaching_link.tqe_prev == NULL); 5013 5014 /* Prefix list should be empty by now */ 5015 VERIFY(TAILQ_EMPTY(&ifp->if_prefixhead)); 5016 5017 /* The slot should have been emptied */ 5018 VERIFY(ifindex2ifnet[ifp->if_index] == NULL); 5019 5020 /* There should not be any addresses left */ 5021 VERIFY(TAILQ_EMPTY(&ifp->if_addrhead)); 5022 5023 /* 5024 * Signal the starter thread to terminate itself. 5025 */ 5026 if (ifp->if_start_thread != THREAD_NULL) { 5027 lck_mtx_lock_spin(&ifp->if_start_lock); 5028 ifp->if_start_thread = THREAD_NULL; 5029 wakeup_one((caddr_t)&ifp->if_start_thread); 5030 lck_mtx_unlock(&ifp->if_start_lock); 5031 } 5032 5033 /* 5034 * Signal the poller thread to terminate itself. 5035 */ 5036 if (ifp->if_poll_thread != THREAD_NULL) { 5037 lck_mtx_lock_spin(&ifp->if_poll_lock); 5038 ifp->if_poll_thread = THREAD_NULL; 5039 wakeup_one((caddr_t)&ifp->if_poll_thread); 5040 lck_mtx_unlock(&ifp->if_poll_lock); 5041 } 5042 5043 /* 5044 * If thread affinity was set for the workloop thread, we will need 5045 * to tear down the affinity and release the extra reference count 5046 * taken at attach time. Does not apply to lo0 or other interfaces 5047 * without dedicated input threads. 5048 */ 5049 if ((inp = ifp->if_inp) != NULL) { 5050 VERIFY(inp != dlil_main_input_thread); 5051 5052 if (inp->net_affinity) { 5053 struct thread *tp, *wtp, *ptp; 5054 5055 lck_mtx_lock_spin(&inp->input_lck); 5056 wtp = inp->wloop_thr; 5057 inp->wloop_thr = THREAD_NULL; 5058 ptp = inp->poll_thr; 5059 inp->poll_thr = THREAD_NULL; 5060 tp = inp->input_thr; /* don't nullify now */ 5061 inp->tag = 0; 5062 inp->net_affinity = FALSE; 5063 lck_mtx_unlock(&inp->input_lck); 5064 5065 /* Tear down poll thread affinity */ 5066 if (ptp != NULL) { 5067 VERIFY(ifp->if_eflags & IFEF_RXPOLL); 5068 (void) dlil_affinity_set(ptp, 5069 THREAD_AFFINITY_TAG_NULL); 5070 thread_deallocate(ptp); 5071 } 5072 5073 /* Tear down workloop thread affinity */ 5074 if (wtp != NULL) { 5075 (void) dlil_affinity_set(wtp, 5076 THREAD_AFFINITY_TAG_NULL); 5077 thread_deallocate(wtp); 5078 } 5079 5080 /* Tear down DLIL input thread affinity */ 5081 (void) dlil_affinity_set(tp, THREAD_AFFINITY_TAG_NULL); 5082 thread_deallocate(tp); 5083 } 5084 5085 /* disassociate ifp DLIL input thread */ 5086 ifp->if_inp = NULL; 5087 5088 lck_mtx_lock_spin(&inp->input_lck); 5089 inp->input_waiting |= DLIL_INPUT_TERMINATE; 5090 if (!(inp->input_waiting & DLIL_INPUT_RUNNING)) { 5091 wakeup_one((caddr_t)&inp->input_waiting); 5092 } 5093 lck_mtx_unlock(&inp->input_lck); 5094 } 5095 5096 /* The driver might unload, so point these to ourselves */ 5097 if_free = ifp->if_free; 5098 ifp->if_output = ifp_if_output; 5099 ifp->if_pre_enqueue = ifp_if_output; 5100 ifp->if_start = ifp_if_start; 5101 ifp->if_output_ctl = ifp_if_ctl; 5102 ifp->if_input_poll = ifp_if_input_poll; 5103 ifp->if_input_ctl = ifp_if_ctl; 5104 ifp->if_ioctl = ifp_if_ioctl; 5105 ifp->if_set_bpf_tap = ifp_if_set_bpf_tap; 5106 ifp->if_free = ifp_if_free; 5107 ifp->if_demux = ifp_if_demux; 5108 ifp->if_event = ifp_if_event; 5109 ifp->if_framer = ifp_if_framer; 5110 ifp->if_add_proto = ifp_if_add_proto; 5111 ifp->if_del_proto = ifp_if_del_proto; 5112 ifp->if_check_multi = ifp_if_check_multi; 5113 5114 /* wipe out interface description */ 5115 VERIFY(ifp->if_desc.ifd_maxlen == IF_DESCSIZE); 5116 ifp->if_desc.ifd_len = 0; 5117 VERIFY(ifp->if_desc.ifd_desc != NULL); 5118 bzero(ifp->if_desc.ifd_desc, IF_DESCSIZE); 5119 5120 ifnet_lock_done(ifp); 5121 5122#if PF 5123 /* 5124 * Detach this interface from packet filter, if enabled. 5125 */ 5126 pf_ifnet_hook(ifp, 0); 5127#endif /* PF */ 5128 5129 /* Filter list should be empty */ 5130 lck_mtx_lock_spin(&ifp->if_flt_lock); 5131 VERIFY(TAILQ_EMPTY(&ifp->if_flt_head)); 5132 VERIFY(ifp->if_flt_busy == 0); 5133 VERIFY(ifp->if_flt_waiters == 0); 5134 lck_mtx_unlock(&ifp->if_flt_lock); 5135 5136 /* Last chance to drain send queue */ 5137 if_qflush(ifp, 0); 5138 5139 /* Last chance to cleanup any cached route */ 5140 lck_mtx_lock(&ifp->if_cached_route_lock); 5141 VERIFY(!ifp->if_fwd_cacheok); 5142 if (ifp->if_fwd_route.ro_rt != NULL) 5143 rtfree(ifp->if_fwd_route.ro_rt); 5144 bzero(&ifp->if_fwd_route, sizeof (ifp->if_fwd_route)); 5145 if (ifp->if_src_route.ro_rt != NULL) 5146 rtfree(ifp->if_src_route.ro_rt); 5147 bzero(&ifp->if_src_route, sizeof (ifp->if_src_route)); 5148 if (ifp->if_src_route6.ro_rt != NULL) 5149 rtfree(ifp->if_src_route6.ro_rt); 5150 bzero(&ifp->if_src_route6, sizeof (ifp->if_src_route6)); 5151 lck_mtx_unlock(&ifp->if_cached_route_lock); 5152 5153 ifnet_llreach_ifdetach(ifp); 5154 5155 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHED, NULL, 0); 5156 5157 if (if_free != NULL) 5158 if_free(ifp); 5159 5160 /* 5161 * Finally, mark this ifnet as detached. 5162 */ 5163 lck_mtx_lock_spin(&ifp->if_ref_lock); 5164 if (!(ifp->if_refflags & IFRF_DETACHING)) { 5165 panic("%s: flags mismatch (detaching not set) ifp=%p", 5166 __func__, ifp); 5167 /* NOTREACHED */ 5168 } 5169 ifp->if_refflags &= ~IFRF_DETACHING; 5170 lck_mtx_unlock(&ifp->if_ref_lock); 5171 5172 if (dlil_verbose) 5173 printf("%s%d: detached\n", ifp->if_name, ifp->if_unit); 5174 5175 /* Release reference held during ifnet attach */ 5176 ifnet_release(ifp); 5177} 5178 5179static errno_t 5180ifp_if_output(struct ifnet *ifp, struct mbuf *m) 5181{ 5182#pragma unused(ifp) 5183 m_freem(m); 5184 return (0); 5185} 5186 5187static void 5188ifp_if_start(struct ifnet *ifp) 5189{ 5190 ifnet_purge(ifp); 5191} 5192 5193static void 5194ifp_if_input_poll(struct ifnet *ifp, u_int32_t flags, u_int32_t max_cnt, 5195 struct mbuf **m_head, struct mbuf **m_tail, u_int32_t *cnt, u_int32_t *len) 5196{ 5197#pragma unused(ifp, flags, max_cnt) 5198 if (m_head != NULL) 5199 *m_head = NULL; 5200 if (m_tail != NULL) 5201 *m_tail = NULL; 5202 if (cnt != NULL) 5203 *cnt = 0; 5204 if (len != NULL) 5205 *len = 0; 5206} 5207 5208static errno_t 5209ifp_if_ctl(struct ifnet *ifp, ifnet_ctl_cmd_t cmd, u_int32_t arglen, void *arg) 5210{ 5211#pragma unused(ifp, cmd, arglen, arg) 5212 return (EOPNOTSUPP); 5213} 5214 5215static errno_t 5216ifp_if_demux(struct ifnet *ifp, struct mbuf *m, char *fh, protocol_family_t *pf) 5217{ 5218#pragma unused(ifp, fh, pf) 5219 m_freem(m); 5220 return (EJUSTRETURN); 5221} 5222 5223static errno_t 5224ifp_if_add_proto(struct ifnet *ifp, protocol_family_t pf, 5225 const struct ifnet_demux_desc *da, u_int32_t dc) 5226{ 5227#pragma unused(ifp, pf, da, dc) 5228 return (EINVAL); 5229} 5230 5231static errno_t 5232ifp_if_del_proto(struct ifnet *ifp, protocol_family_t pf) 5233{ 5234#pragma unused(ifp, pf) 5235 return (EINVAL); 5236} 5237 5238static errno_t 5239ifp_if_check_multi(struct ifnet *ifp, const struct sockaddr *sa) 5240{ 5241#pragma unused(ifp, sa) 5242 return (EOPNOTSUPP); 5243} 5244 5245static errno_t ifp_if_framer(struct ifnet *ifp, struct mbuf **m, 5246const struct sockaddr *sa, const char *ll, const char *t 5247#if CONFIG_EMBEDDED 5248 , 5249 u_int32_t *pre, u_int32_t *post 5250#endif /* CONFIG_EMBEDDED */ 5251 ) 5252{ 5253#pragma unused(ifp, m, sa, ll, t) 5254 m_freem(*m); 5255 *m = NULL; 5256#if CONFIG_EMBEDDED 5257 *pre = 0; 5258 *post = 0; 5259#endif /* CONFIG_EMBEDDED */ 5260 return (EJUSTRETURN); 5261} 5262 5263errno_t 5264ifp_if_ioctl(struct ifnet *ifp, unsigned long cmd, void *arg) 5265{ 5266#pragma unused(ifp, cmd, arg) 5267 return (EOPNOTSUPP); 5268} 5269 5270static errno_t 5271ifp_if_set_bpf_tap(struct ifnet *ifp, bpf_tap_mode tm, bpf_packet_func f) 5272{ 5273#pragma unused(ifp, tm, f) 5274 /* XXX not sure what to do here */ 5275 return (0); 5276} 5277 5278static void 5279ifp_if_free(struct ifnet *ifp) 5280{ 5281#pragma unused(ifp) 5282} 5283 5284static void 5285ifp_if_event(struct ifnet *ifp, const struct kev_msg *e) 5286{ 5287#pragma unused(ifp, e) 5288} 5289 5290__private_extern__ 5291int dlil_if_acquire(u_int32_t family, const void *uniqueid, 5292 size_t uniqueid_len, struct ifnet **ifp) 5293{ 5294 struct ifnet *ifp1 = NULL; 5295 struct dlil_ifnet *dlifp1 = NULL; 5296 void *buf, *base, **pbuf; 5297 int ret = 0; 5298 5299 dlil_if_lock(); 5300 TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) { 5301 ifp1 = (struct ifnet *)dlifp1; 5302 5303 if (ifp1->if_family != family) 5304 continue; 5305 5306 lck_mtx_lock(&dlifp1->dl_if_lock); 5307 /* same uniqueid and same len or no unique id specified */ 5308 if ((uniqueid_len == dlifp1->dl_if_uniqueid_len) && 5309 !bcmp(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len)) { 5310 /* check for matching interface in use */ 5311 if (dlifp1->dl_if_flags & DLIF_INUSE) { 5312 if (uniqueid_len) { 5313 ret = EBUSY; 5314 lck_mtx_unlock(&dlifp1->dl_if_lock); 5315 goto end; 5316 } 5317 } else { 5318 dlifp1->dl_if_flags |= (DLIF_INUSE|DLIF_REUSE); 5319 lck_mtx_unlock(&dlifp1->dl_if_lock); 5320 *ifp = ifp1; 5321 goto end; 5322 } 5323 } 5324 lck_mtx_unlock(&dlifp1->dl_if_lock); 5325 } 5326 5327 /* no interface found, allocate a new one */ 5328 buf = zalloc(dlif_zone); 5329 if (buf == NULL) { 5330 ret = ENOMEM; 5331 goto end; 5332 } 5333 bzero(buf, dlif_bufsize); 5334 5335 /* Get the 64-bit aligned base address for this object */ 5336 base = (void *)P2ROUNDUP((intptr_t)buf + sizeof (u_int64_t), 5337 sizeof (u_int64_t)); 5338 VERIFY(((intptr_t)base + dlif_size) <= ((intptr_t)buf + dlif_bufsize)); 5339 5340 /* 5341 * Wind back a pointer size from the aligned base and 5342 * save the original address so we can free it later. 5343 */ 5344 pbuf = (void **)((intptr_t)base - sizeof (void *)); 5345 *pbuf = buf; 5346 dlifp1 = base; 5347 5348 if (uniqueid_len) { 5349 MALLOC(dlifp1->dl_if_uniqueid, void *, uniqueid_len, 5350 M_NKE, M_WAITOK); 5351 if (dlifp1->dl_if_uniqueid == NULL) { 5352 zfree(dlif_zone, dlifp1); 5353 ret = ENOMEM; 5354 goto end; 5355 } 5356 bcopy(uniqueid, dlifp1->dl_if_uniqueid, uniqueid_len); 5357 dlifp1->dl_if_uniqueid_len = uniqueid_len; 5358 } 5359 5360 ifp1 = (struct ifnet *)dlifp1; 5361 dlifp1->dl_if_flags = DLIF_INUSE; 5362 if (ifnet_debug) { 5363 dlifp1->dl_if_flags |= DLIF_DEBUG; 5364 dlifp1->dl_if_trace = dlil_if_trace; 5365 } 5366 ifp1->if_name = dlifp1->dl_if_namestorage; 5367 5368 /* initialize interface description */ 5369 ifp1->if_desc.ifd_maxlen = IF_DESCSIZE; 5370 ifp1->if_desc.ifd_len = 0; 5371 ifp1->if_desc.ifd_desc = dlifp1->dl_if_descstorage; 5372 5373#if CONFIG_MACF_NET 5374 mac_ifnet_label_init(ifp1); 5375#endif 5376 5377 if ((ret = dlil_alloc_local_stats(ifp1)) != 0) { 5378 DLIL_PRINTF("%s: failed to allocate if local stats, " 5379 "error: %d\n", __func__, ret); 5380 /* This probably shouldn't be fatal */ 5381 ret = 0; 5382 } 5383 5384 lck_mtx_init(&dlifp1->dl_if_lock, ifnet_lock_group, ifnet_lock_attr); 5385 lck_rw_init(&ifp1->if_lock, ifnet_lock_group, ifnet_lock_attr); 5386 lck_mtx_init(&ifp1->if_ref_lock, ifnet_lock_group, ifnet_lock_attr); 5387 lck_mtx_init(&ifp1->if_flt_lock, ifnet_lock_group, ifnet_lock_attr); 5388 lck_mtx_init(&ifp1->if_addrconfig_lock, ifnet_lock_group, 5389 ifnet_lock_attr); 5390 lck_rw_init(&ifp1->if_llreach_lock, ifnet_lock_group, ifnet_lock_attr); 5391 5392 /* for send data paths */ 5393 lck_mtx_init(&ifp1->if_start_lock, ifnet_snd_lock_group, 5394 ifnet_lock_attr); 5395 lck_mtx_init(&ifp1->if_cached_route_lock, ifnet_snd_lock_group, 5396 ifnet_lock_attr); 5397 lck_mtx_init(&ifp1->if_snd.ifcq_lock, ifnet_snd_lock_group, 5398 ifnet_lock_attr); 5399 5400 /* for receive data paths */ 5401 lck_mtx_init(&ifp1->if_poll_lock, ifnet_rcv_lock_group, 5402 ifnet_lock_attr); 5403 5404 TAILQ_INSERT_TAIL(&dlil_ifnet_head, dlifp1, dl_if_link); 5405 5406 *ifp = ifp1; 5407 5408end: 5409 dlil_if_unlock(); 5410 5411 VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) && 5412 IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t)))); 5413 5414 return (ret); 5415} 5416 5417__private_extern__ void 5418dlil_if_release(ifnet_t ifp) 5419{ 5420 struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp; 5421 5422 ifnet_lock_exclusive(ifp); 5423 lck_mtx_lock(&dlifp->dl_if_lock); 5424 dlifp->dl_if_flags &= ~DLIF_INUSE; 5425 strncpy(dlifp->dl_if_namestorage, ifp->if_name, IFNAMSIZ); 5426 ifp->if_name = dlifp->dl_if_namestorage; 5427 lck_mtx_unlock(&dlifp->dl_if_lock); 5428#if CONFIG_MACF_NET 5429 /* 5430 * We can either recycle the MAC label here or in dlil_if_acquire(). 5431 * It seems logical to do it here but this means that anything that 5432 * still has a handle on ifp will now see it as unlabeled. 5433 * Since the interface is "dead" that may be OK. Revisit later. 5434 */ 5435 mac_ifnet_label_recycle(ifp); 5436#endif 5437 ifnet_lock_done(ifp); 5438} 5439 5440__private_extern__ void 5441dlil_if_lock(void) 5442{ 5443 lck_mtx_lock(&dlil_ifnet_lock); 5444} 5445 5446__private_extern__ void 5447dlil_if_unlock(void) 5448{ 5449 lck_mtx_unlock(&dlil_ifnet_lock); 5450} 5451 5452__private_extern__ void 5453dlil_if_lock_assert(void) 5454{ 5455 lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED); 5456} 5457 5458__private_extern__ void 5459dlil_proto_unplumb_all(struct ifnet *ifp) 5460{ 5461 /* 5462 * if_proto_hash[0-3] are for PF_INET, PF_INET6, PF_APPLETALK 5463 * and PF_VLAN, where each bucket contains exactly one entry; 5464 * PF_VLAN does not need an explicit unplumb. 5465 * 5466 * if_proto_hash[4] is for other protocols; we expect anything 5467 * in this bucket to respond to the DETACHING event (which would 5468 * have happened by now) and do the unplumb then. 5469 */ 5470 (void) proto_unplumb(PF_INET, ifp); 5471#if INET6 5472 (void) proto_unplumb(PF_INET6, ifp); 5473#endif /* INET6 */ 5474#if NETAT 5475 (void) proto_unplumb(PF_APPLETALK, ifp); 5476#endif /* NETAT */ 5477} 5478 5479static void 5480ifp_src_route_copyout(struct ifnet *ifp, struct route *dst) 5481{ 5482 lck_mtx_lock_spin(&ifp->if_cached_route_lock); 5483 lck_mtx_convert_spin(&ifp->if_cached_route_lock); 5484 5485 route_copyout(dst, &ifp->if_src_route, sizeof (*dst)); 5486 5487 lck_mtx_unlock(&ifp->if_cached_route_lock); 5488} 5489 5490static void 5491ifp_src_route_copyin(struct ifnet *ifp, struct route *src) 5492{ 5493 lck_mtx_lock_spin(&ifp->if_cached_route_lock); 5494 lck_mtx_convert_spin(&ifp->if_cached_route_lock); 5495 5496 if (ifp->if_fwd_cacheok) { 5497 route_copyin(src, &ifp->if_src_route, sizeof (*src)); 5498 } else { 5499 rtfree(src->ro_rt); 5500 src->ro_rt = NULL; 5501 } 5502 lck_mtx_unlock(&ifp->if_cached_route_lock); 5503} 5504 5505#if INET6 5506static void 5507ifp_src_route6_copyout(struct ifnet *ifp, struct route_in6 *dst) 5508{ 5509 lck_mtx_lock_spin(&ifp->if_cached_route_lock); 5510 lck_mtx_convert_spin(&ifp->if_cached_route_lock); 5511 5512 route_copyout((struct route *)dst, (struct route *)&ifp->if_src_route6, 5513 sizeof (*dst)); 5514 5515 lck_mtx_unlock(&ifp->if_cached_route_lock); 5516} 5517 5518static void 5519ifp_src_route6_copyin(struct ifnet *ifp, struct route_in6 *src) 5520{ 5521 lck_mtx_lock_spin(&ifp->if_cached_route_lock); 5522 lck_mtx_convert_spin(&ifp->if_cached_route_lock); 5523 5524 if (ifp->if_fwd_cacheok) { 5525 route_copyin((struct route *)src, 5526 (struct route *)&ifp->if_src_route6, sizeof (*src)); 5527 } else { 5528 rtfree(src->ro_rt); 5529 src->ro_rt = NULL; 5530 } 5531 lck_mtx_unlock(&ifp->if_cached_route_lock); 5532} 5533#endif /* INET6 */ 5534 5535struct rtentry * 5536ifnet_cached_rtlookup_inet(struct ifnet *ifp, struct in_addr src_ip) 5537{ 5538 struct route src_rt; 5539 struct sockaddr_in *dst; 5540 5541 dst = (struct sockaddr_in *)(void *)(&src_rt.ro_dst); 5542 5543 ifp_src_route_copyout(ifp, &src_rt); 5544 5545 if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) || 5546 src_ip.s_addr != dst->sin_addr.s_addr || 5547 src_rt.ro_rt->generation_id != route_generation) { 5548 if (src_rt.ro_rt != NULL) { 5549 rtfree(src_rt.ro_rt); 5550 src_rt.ro_rt = NULL; 5551 } else if (dst->sin_family != AF_INET) { 5552 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst)); 5553 dst->sin_len = sizeof (src_rt.ro_dst); 5554 dst->sin_family = AF_INET; 5555 } 5556 dst->sin_addr = src_ip; 5557 5558 if (src_rt.ro_rt == NULL) { 5559 src_rt.ro_rt = rtalloc1_scoped((struct sockaddr *)dst, 5560 0, 0, ifp->if_index); 5561 5562 if (src_rt.ro_rt != NULL) { 5563 /* retain a ref, copyin consumes one */ 5564 struct rtentry *rte = src_rt.ro_rt; 5565 RT_ADDREF(rte); 5566 ifp_src_route_copyin(ifp, &src_rt); 5567 src_rt.ro_rt = rte; 5568 } 5569 } 5570 } 5571 5572 return (src_rt.ro_rt); 5573} 5574 5575#if INET6 5576struct rtentry* 5577ifnet_cached_rtlookup_inet6(struct ifnet *ifp, struct in6_addr *src_ip6) 5578{ 5579 struct route_in6 src_rt; 5580 5581 ifp_src_route6_copyout(ifp, &src_rt); 5582 5583 if (src_rt.ro_rt == NULL || !(src_rt.ro_rt->rt_flags & RTF_UP) || 5584 !IN6_ARE_ADDR_EQUAL(src_ip6, &src_rt.ro_dst.sin6_addr) || 5585 src_rt.ro_rt->generation_id != route_generation) { 5586 if (src_rt.ro_rt != NULL) { 5587 rtfree(src_rt.ro_rt); 5588 src_rt.ro_rt = NULL; 5589 } else if (src_rt.ro_dst.sin6_family != AF_INET6) { 5590 bzero(&src_rt.ro_dst, sizeof (src_rt.ro_dst)); 5591 src_rt.ro_dst.sin6_len = sizeof (src_rt.ro_dst); 5592 src_rt.ro_dst.sin6_family = AF_INET6; 5593 } 5594 src_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(ifp, src_ip6); 5595 bcopy(src_ip6, &src_rt.ro_dst.sin6_addr, 5596 sizeof (src_rt.ro_dst.sin6_addr)); 5597 5598 if (src_rt.ro_rt == NULL) { 5599 src_rt.ro_rt = rtalloc1_scoped( 5600 (struct sockaddr *)&src_rt.ro_dst, 0, 0, 5601 ifp->if_index); 5602 5603 if (src_rt.ro_rt != NULL) { 5604 /* retain a ref, copyin consumes one */ 5605 struct rtentry *rte = src_rt.ro_rt; 5606 RT_ADDREF(rte); 5607 ifp_src_route6_copyin(ifp, &src_rt); 5608 src_rt.ro_rt = rte; 5609 } 5610 } 5611 } 5612 5613 return (src_rt.ro_rt); 5614} 5615#endif /* INET6 */ 5616 5617void 5618if_lqm_update(struct ifnet *ifp, int lqm) 5619{ 5620 struct kev_dl_link_quality_metric_data ev_lqm_data; 5621 5622 VERIFY(lqm >= IFNET_LQM_MIN && lqm <= IFNET_LQM_MAX); 5623 5624 /* Normalize to edge */ 5625 if (lqm > IFNET_LQM_THRESH_UNKNOWN && lqm <= IFNET_LQM_THRESH_POOR) 5626 lqm = IFNET_LQM_THRESH_POOR; 5627 else if (lqm > IFNET_LQM_THRESH_POOR && lqm <= IFNET_LQM_THRESH_GOOD) 5628 lqm = IFNET_LQM_THRESH_GOOD; 5629 5630 ifnet_lock_exclusive(ifp); 5631 if (lqm == ifp->if_lqm) { 5632 ifnet_lock_done(ifp); 5633 return; /* nothing to update */ 5634 } 5635 ifp->if_lqm = lqm; 5636 ifnet_lock_done(ifp); 5637 5638 bzero(&ev_lqm_data, sizeof (ev_lqm_data)); 5639 ev_lqm_data.link_quality_metric = lqm; 5640 5641 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_LINK_QUALITY_METRIC_CHANGED, 5642 (struct net_event_data *)&ev_lqm_data, sizeof (ev_lqm_data)); 5643} 5644 5645/* for uuid.c */ 5646int 5647uuid_get_ethernet(u_int8_t *node) 5648{ 5649 struct ifnet *ifp; 5650 struct sockaddr_dl *sdl; 5651 5652 ifnet_head_lock_shared(); 5653 TAILQ_FOREACH(ifp, &ifnet_head, if_link) { 5654 ifnet_lock_shared(ifp); 5655 IFA_LOCK_SPIN(ifp->if_lladdr); 5656 sdl = (struct sockaddr_dl *)(void *)ifp->if_lladdr->ifa_addr; 5657 if (sdl->sdl_type == IFT_ETHER) { 5658 memcpy(node, LLADDR(sdl), ETHER_ADDR_LEN); 5659 IFA_UNLOCK(ifp->if_lladdr); 5660 ifnet_lock_done(ifp); 5661 ifnet_head_done(); 5662 return (0); 5663 } 5664 IFA_UNLOCK(ifp->if_lladdr); 5665 ifnet_lock_done(ifp); 5666 } 5667 ifnet_head_done(); 5668 5669 return (-1); 5670} 5671 5672static int 5673sysctl_rxpoll SYSCTL_HANDLER_ARGS 5674{ 5675#pragma unused(arg1, arg2) 5676 int i, err; 5677 5678 i = if_rxpoll; 5679 5680 err = sysctl_handle_int(oidp, &i, 0, req); 5681 if (err != 0 || req->newptr == USER_ADDR_NULL) 5682 return (err); 5683 5684 if (net_rxpoll == 0) 5685 return (ENXIO); 5686 5687 if_rxpoll = i; 5688 return (err); 5689} 5690 5691static int 5692sysctl_sndq_maxlen SYSCTL_HANDLER_ARGS 5693{ 5694#pragma unused(arg1, arg2) 5695 int i, err; 5696 5697 i = if_sndq_maxlen; 5698 5699 err = sysctl_handle_int(oidp, &i, 0, req); 5700 if (err != 0 || req->newptr == USER_ADDR_NULL) 5701 return (err); 5702 5703 if (i < IF_SNDQ_MINLEN) 5704 i = IF_SNDQ_MINLEN; 5705 5706 if_sndq_maxlen = i; 5707 return (err); 5708} 5709 5710static int 5711sysctl_rcvq_maxlen SYSCTL_HANDLER_ARGS 5712{ 5713#pragma unused(arg1, arg2) 5714 int i, err; 5715 5716 i = if_rcvq_maxlen; 5717 5718 err = sysctl_handle_int(oidp, &i, 0, req); 5719 if (err != 0 || req->newptr == USER_ADDR_NULL) 5720 return (err); 5721 5722 if (i < IF_RCVQ_MINLEN) 5723 i = IF_RCVQ_MINLEN; 5724 5725 if_rcvq_maxlen = i; 5726 return (err); 5727} 5728 5729void 5730ifnet_fclist_append(struct sfb *sp, struct sfb_fc_list *fcl) 5731{ 5732 struct sfb_bin_fcentry *fce, *tfce; 5733 5734 lck_mtx_lock_spin(&ifnet_fclist_lock); 5735 5736 SLIST_FOREACH_SAFE(fce, fcl, fce_link, tfce) { 5737 SLIST_REMOVE(fcl, fce, sfb_bin_fcentry, fce_link); 5738 SLIST_INSERT_HEAD(&ifnet_fclist, fce, fce_link); 5739 sp->sfb_stats.flow_feedback++; 5740 } 5741 VERIFY(SLIST_EMPTY(fcl) && !SLIST_EMPTY(&ifnet_fclist)); 5742 5743 wakeup(&ifnet_fclist); 5744 5745 lck_mtx_unlock(&ifnet_fclist_lock); 5746} 5747 5748struct sfb_bin_fcentry * 5749ifnet_fce_alloc(int how) 5750{ 5751 struct sfb_bin_fcentry *fce; 5752 5753 fce = (how == M_WAITOK) ? zalloc(ifnet_fcezone) : 5754 zalloc_noblock(ifnet_fcezone); 5755 if (fce != NULL) 5756 bzero(fce, ifnet_fcezone_size); 5757 5758 return (fce); 5759} 5760 5761void 5762ifnet_fce_free(struct sfb_bin_fcentry *fce) 5763{ 5764 zfree(ifnet_fcezone, fce); 5765} 5766 5767static void 5768ifnet_fc_init(void) 5769{ 5770 thread_t thread = THREAD_NULL; 5771 5772 SLIST_INIT(&ifnet_fclist); 5773 lck_mtx_init(&ifnet_fclist_lock, ifnet_snd_lock_group, NULL); 5774 5775 ifnet_fcezone_size = P2ROUNDUP(sizeof (struct sfb_bin_fcentry), 5776 sizeof (u_int64_t)); 5777 ifnet_fcezone = zinit(ifnet_fcezone_size, 5778 IFNET_FCEZONE_MAX * ifnet_fcezone_size, 0, IFNET_FCEZONE_NAME); 5779 if (ifnet_fcezone == NULL) { 5780 panic("%s: failed allocating %s", __func__, IFNET_FCEZONE_NAME); 5781 /* NOTREACHED */ 5782 } 5783 zone_change(ifnet_fcezone, Z_EXPAND, TRUE); 5784 zone_change(ifnet_fcezone, Z_CALLERACCT, FALSE); 5785 5786 if (kernel_thread_start(ifnet_fc_thread_func, 5787 NULL, &thread) != KERN_SUCCESS) { 5788 panic("%s: couldn't create flow event advisory thread", 5789 __func__); 5790 /* NOTREACHED */ 5791 } 5792 thread_deallocate(thread); 5793} 5794 5795static int 5796ifnet_fc_thread_cont(int err) 5797{ 5798#pragma unused(err) 5799 struct sfb_bin_fcentry *fce; 5800 struct inp_fc_entry *infc; 5801 5802 for (;;) { 5803 lck_mtx_assert(&ifnet_fclist_lock, LCK_MTX_ASSERT_OWNED); 5804 while (SLIST_EMPTY(&ifnet_fclist)) { 5805 (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock, 5806 (PSOCK | PSPIN), "ifnet_fc_cont", 0, 5807 ifnet_fc_thread_cont); 5808 /* NOTREACHED */ 5809 } 5810 5811 fce = SLIST_FIRST(&ifnet_fclist); 5812 SLIST_REMOVE(&ifnet_fclist, fce, sfb_bin_fcentry, fce_link); 5813 SLIST_NEXT(fce, fce_link) = NULL; 5814 lck_mtx_unlock(&ifnet_fclist_lock); 5815 5816 infc = inp_fc_getinp(fce->fce_flowhash); 5817 if (infc == NULL) { 5818 ifnet_fce_free(fce); 5819 lck_mtx_lock_spin(&ifnet_fclist_lock); 5820 continue; 5821 } 5822 VERIFY(infc->infc_inp != NULL); 5823 5824 inp_fc_feedback(infc->infc_inp); 5825 5826 inp_fc_entry_free(infc); 5827 ifnet_fce_free(fce); 5828 lck_mtx_lock_spin(&ifnet_fclist_lock); 5829 } 5830} 5831 5832static void 5833ifnet_fc_thread_func(void *v, wait_result_t w) 5834{ 5835#pragma unused(v, w) 5836 lck_mtx_lock(&ifnet_fclist_lock); 5837 (void) msleep0(&ifnet_fclist, &ifnet_fclist_lock, 5838 (PSOCK | PSPIN), "ifnet_fc", 0, ifnet_fc_thread_cont); 5839 /* 5840 * msleep0() shouldn't have returned as PCATCH was not set; 5841 * therefore assert in this case. 5842 */ 5843 lck_mtx_unlock(&ifnet_fclist_lock); 5844 VERIFY(0); 5845} 5846 5847void 5848dlil_node_present(struct ifnet *ifp, struct sockaddr *sa, 5849 int32_t rssi, int lqm, int npm, u_int8_t srvinfo[48]) 5850{ 5851 struct kev_dl_node_presence kev; 5852 struct sockaddr_dl *sdl; 5853 struct sockaddr_in6 *sin6; 5854 5855 VERIFY(ifp); 5856 VERIFY(sa); 5857 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6); 5858 5859 bzero(&kev, sizeof (kev)); 5860 sin6 = &kev.sin6_node_address; 5861 sdl = &kev.sdl_node_address; 5862 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6); 5863 kev.rssi = rssi; 5864 kev.link_quality_metric = lqm; 5865 kev.node_proximity_metric = npm; 5866 bcopy(srvinfo, kev.node_service_info, sizeof (kev.node_service_info)); 5867 5868 nd6_alt_node_present(ifp, sin6, sdl, rssi, lqm, npm); 5869 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_PRESENCE, 5870 &kev.link_data, sizeof (kev)); 5871} 5872 5873void 5874dlil_node_absent(struct ifnet *ifp, struct sockaddr *sa) 5875{ 5876 struct kev_dl_node_absence kev; 5877 struct sockaddr_in6 *sin6; 5878 struct sockaddr_dl *sdl; 5879 5880 VERIFY(ifp); 5881 VERIFY(sa); 5882 VERIFY(sa->sa_family == AF_LINK || sa->sa_family == AF_INET6); 5883 5884 bzero(&kev, sizeof (kev)); 5885 sin6 = &kev.sin6_node_address; 5886 sdl = &kev.sdl_node_address; 5887 nd6_alt_node_addr_decompose(ifp, sa, sdl, sin6); 5888 5889 nd6_alt_node_absent(ifp, sin6); 5890 dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_NODE_ABSENCE, 5891 &kev.link_data, sizeof (kev)); 5892} 5893 5894errno_t 5895ifnet_getset_opportunistic(ifnet_t ifp, u_long cmd, struct ifreq *ifr, 5896 struct proc *p) 5897{ 5898 u_int32_t level = IFNET_THROTTLE_OFF; 5899 errno_t result = 0; 5900 5901 VERIFY(cmd == SIOCSIFOPPORTUNISTIC || cmd == SIOCGIFOPPORTUNISTIC); 5902 5903 if (cmd == SIOCSIFOPPORTUNISTIC) { 5904 /* 5905 * XXX: Use priv_check_cred() instead of root check? 5906 */ 5907 if ((result = proc_suser(p)) != 0) 5908 return (result); 5909 5910 if (ifr->ifr_opportunistic.ifo_flags == 5911 IFRIFOF_BLOCK_OPPORTUNISTIC) 5912 level = IFNET_THROTTLE_OPPORTUNISTIC; 5913 else if (ifr->ifr_opportunistic.ifo_flags == 0) 5914 level = IFNET_THROTTLE_OFF; 5915 else 5916 result = EINVAL; 5917 5918 if (result == 0) 5919 result = ifnet_set_throttle(ifp, level); 5920 } else if ((result = ifnet_get_throttle(ifp, &level)) == 0) { 5921 ifr->ifr_opportunistic.ifo_flags = 0; 5922 if (level == IFNET_THROTTLE_OPPORTUNISTIC) { 5923 ifr->ifr_opportunistic.ifo_flags |= 5924 IFRIFOF_BLOCK_OPPORTUNISTIC; 5925 } 5926 } 5927 5928 /* 5929 * Return the count of current opportunistic connections 5930 * over the interface. 5931 */ 5932 if (result == 0) { 5933 uint32_t flags = 0; 5934 flags |= (cmd == SIOCSIFOPPORTUNISTIC) ? 5935 INPCB_OPPORTUNISTIC_SETCMD : 0; 5936 flags |= (level == IFNET_THROTTLE_OPPORTUNISTIC) ? 5937 INPCB_OPPORTUNISTIC_THROTTLEON : 0; 5938 ifr->ifr_opportunistic.ifo_inuse = 5939 udp_count_opportunistic(ifp->if_index, flags) + 5940 tcp_count_opportunistic(ifp->if_index, flags); 5941 } 5942 5943 if (result == EALREADY) 5944 result = 0; 5945 5946 return (result); 5947} 5948 5949int 5950ifnet_get_throttle(struct ifnet *ifp, u_int32_t *level) 5951{ 5952 struct ifclassq *ifq; 5953 int err = 0; 5954 5955 if (!(ifp->if_eflags & IFEF_TXSTART)) 5956 return (ENXIO); 5957 5958 *level = IFNET_THROTTLE_OFF; 5959 5960 ifq = &ifp->if_snd; 5961 IFCQ_LOCK(ifq); 5962 /* Throttling works only for IFCQ, not ALTQ instances */ 5963 if (IFCQ_IS_ENABLED(ifq)) 5964 IFCQ_GET_THROTTLE(ifq, *level, err); 5965 IFCQ_UNLOCK(ifq); 5966 5967 return (err); 5968} 5969 5970int 5971ifnet_set_throttle(struct ifnet *ifp, u_int32_t level) 5972{ 5973 struct ifclassq *ifq; 5974 int err = 0; 5975 5976 if (!(ifp->if_eflags & IFEF_TXSTART)) 5977 return (ENXIO); 5978 5979 switch (level) { 5980 case IFNET_THROTTLE_OFF: 5981 case IFNET_THROTTLE_OPPORTUNISTIC: 5982#if PF_ALTQ 5983 /* Throttling works only for IFCQ, not ALTQ instances */ 5984 if (ALTQ_IS_ENABLED(IFCQ_ALTQ(ifq))) 5985 return (ENXIO); 5986#endif /* PF_ALTQ */ 5987 break; 5988 default: 5989 return (EINVAL); 5990 } 5991 5992 ifq = &ifp->if_snd; 5993 IFCQ_LOCK(ifq); 5994 if (IFCQ_IS_ENABLED(ifq)) 5995 IFCQ_SET_THROTTLE(ifq, level, err); 5996 IFCQ_UNLOCK(ifq); 5997 5998 if (err == 0) { 5999 printf("%s%d: throttling level set to %d\n", ifp->if_name, 6000 ifp->if_unit, level); 6001 if (level == IFNET_THROTTLE_OFF) 6002 ifnet_start(ifp); 6003 } 6004 6005 return (err); 6006} 6007