1262028Sglebius/*- 2262030Sglebius * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org> 3262028Sglebius * Copyright (c) 2008-2010, BitGravity Inc. 4262028Sglebius * All rights reserved. 5262028Sglebius * 6262028Sglebius * Redistribution and use in source and binary forms, with or without 7262028Sglebius * modification, are permitted provided that the following conditions are met: 8262028Sglebius * 9262028Sglebius * 1. Redistributions of source code must retain the above copyright notice, 10262028Sglebius * this list of conditions and the following disclaimer. 11262028Sglebius * 12262028Sglebius * 2. Neither the name of the BitGravity Corporation nor the names of its 13262028Sglebius * contributors may be used to endorse or promote products derived from 14262028Sglebius * this software without specific prior written permission. 15262028Sglebius * 16262028Sglebius * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17262028Sglebius * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18262028Sglebius * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19262028Sglebius * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20262028Sglebius * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21262028Sglebius * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22262028Sglebius * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23262028Sglebius * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24262028Sglebius * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25262028Sglebius * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26262028Sglebius * POSSIBILITY OF SUCH DAMAGE. 27262028Sglebius */ 28191255Skmacy 29191255Skmacy#include "opt_route.h" 30191255Skmacy#include "opt_mpath.h" 31196368Skmacy#include "opt_ddb.h" 32205066Skmacy#include "opt_inet.h" 33205066Skmacy#include "opt_inet6.h" 34191255Skmacy 35191255Skmacy#include <sys/cdefs.h> 36191255Skmacy__FBSDID("$FreeBSD$"); 37191255Skmacy 38261582Sglebius#include <sys/param.h> 39191255Skmacy#include <sys/types.h> 40191255Skmacy#include <sys/bitstring.h> 41196368Skmacy#include <sys/condvar.h> 42191255Skmacy#include <sys/callout.h> 43240086Sglebius#include <sys/hash.h> 44261582Sglebius#include <sys/kernel.h> 45191255Skmacy#include <sys/kthread.h> 46191255Skmacy#include <sys/limits.h> 47191255Skmacy#include <sys/malloc.h> 48191255Skmacy#include <sys/mbuf.h> 49261823Sglebius#include <sys/pcpu.h> 50191255Skmacy#include <sys/proc.h> 51261823Sglebius#include <sys/queue.h> 52205066Skmacy#include <sys/sbuf.h> 53191255Skmacy#include <sys/sched.h> 54191255Skmacy#include <sys/smp.h> 55191255Skmacy#include <sys/socket.h> 56191255Skmacy#include <sys/syslog.h> 57191255Skmacy#include <sys/sysctl.h> 58261823Sglebius#include <vm/uma.h> 59191255Skmacy 60191255Skmacy#include <net/if.h> 61191255Skmacy#include <net/if_llatbl.h> 62191255Skmacy#include <net/if_var.h> 63261582Sglebius#include <net/route.h> 64191255Skmacy#include <net/flowtable.h> 65195837Srwatson#include <net/vnet.h> 66191255Skmacy 67191255Skmacy#include <netinet/in.h> 68191255Skmacy#include <netinet/in_systm.h> 69191255Skmacy#include <netinet/in_var.h> 70191255Skmacy#include <netinet/if_ether.h> 71191255Skmacy#include <netinet/ip.h> 72205066Skmacy#ifdef INET6 73205066Skmacy#include <netinet/ip6.h> 74205066Skmacy#endif 75262027Sglebius#ifdef FLOWTABLE_HASH_ALL 76191255Skmacy#include <netinet/tcp.h> 77191255Skmacy#include <netinet/udp.h> 78191255Skmacy#include <netinet/sctp.h> 79262027Sglebius#endif 80191255Skmacy 81196368Skmacy#include <ddb/ddb.h> 82191255Skmacy 83262027Sglebius#ifdef FLOWTABLE_HASH_ALL 84262027Sglebius#define KEY_PORTS (sizeof(uint16_t) * 2) 85262027Sglebius#define KEY_ADDRS 2 86262027Sglebius#else 87262027Sglebius#define KEY_PORTS 0 88262027Sglebius#define KEY_ADDRS 1 89262027Sglebius#endif 90191255Skmacy 91262027Sglebius#ifdef INET6 92262027Sglebius#define KEY_ADDR_LEN sizeof(struct in6_addr) 93262027Sglebius#else 94262027Sglebius#define KEY_ADDR_LEN sizeof(struct in_addr) 95261823Sglebius#endif 96191255Skmacy 97262027Sglebius#define KEYLEN ((KEY_ADDR_LEN * KEY_ADDRS + KEY_PORTS) / sizeof(uint32_t)) 98191255Skmacy 99191255Skmacystruct flentry { 100262027Sglebius uint32_t f_hash; /* hash flowing forward */ 101262027Sglebius uint32_t f_key[KEYLEN]; /* address(es and ports) */ 102262027Sglebius uint32_t f_uptime; /* uptime at last access */ 103262027Sglebius uint16_t f_fibnum; /* fib index */ 104262027Sglebius#ifdef FLOWTABLE_HASH_ALL 105191255Skmacy uint8_t f_proto; /* protocol */ 106262027Sglebius uint8_t f_flags; /* stale? */ 107262027Sglebius#define FL_STALE 1 108262027Sglebius#endif 109261823Sglebius SLIST_ENTRY(flentry) f_next; /* pointer to collision entry */ 110261823Sglebius struct rtentry *f_rt; /* rtentry for flow */ 111261823Sglebius struct llentry *f_lle; /* llentry for flow */ 112191255Skmacy}; 113262027Sglebius#undef KEYLEN 114191255Skmacy 115261823SglebiusSLIST_HEAD(flist, flentry); 116261823Sglebius/* Make sure we can use pcpu_zone_ptr for struct flist. */ 117261823SglebiusCTASSERT(sizeof(struct flist) == sizeof(void *)); 118191255Skmacy 119191255Skmacystruct flowtable { 120261601Sglebius counter_u64_t *ft_stat; 121191255Skmacy int ft_size; 122205488Skmacy /* 123261823Sglebius * ft_table is a malloc(9)ed array of pointers. Pointers point to 124261823Sglebius * memory from UMA_ZONE_PCPU zone. 125261823Sglebius * ft_masks is per-cpu pointer itself. Each instance points 126261823Sglebius * to a malloc(9)ed bitset, that is private to corresponding CPU. 127261582Sglebius */ 128261823Sglebius struct flist **ft_table; 129261823Sglebius bitstr_t **ft_masks; 130191324Skmacy bitstr_t *ft_tmpmask; 131261823Sglebius}; 132205488Skmacy 133261601Sglebius#define FLOWSTAT_ADD(ft, name, v) \ 134261601Sglebius counter_u64_add((ft)->ft_stat[offsetof(struct flowtable_stat, name) / sizeof(uint64_t)], (v)) 135261601Sglebius#define FLOWSTAT_INC(ft, name) FLOWSTAT_ADD(ft, name, 1) 136261601Sglebius 137191255Skmacystatic struct proc *flowcleanerproc; 138261601Sglebiusstatic uint32_t flow_hashjitter; 139191255Skmacy 140216855Sbzstatic struct cv flowclean_f_cv; 141216855Sbzstatic struct cv flowclean_c_cv; 142196368Skmacystatic struct mtx flowclean_lock; 143196368Skmacystatic uint32_t flowclean_cycles; 144196368Skmacy 145191255Skmacy/* 146191255Skmacy * TODO: 147261582Sglebius * - add sysctls to resize && flush flow tables 148191255Skmacy * - Add per flowtable sysctls for statistics and configuring timeouts 149191255Skmacy * - add saturation counter to rtentry to support per-packet load-balancing 150191255Skmacy * add flag to indicate round-robin flow, add list lookup from head 151191255Skmacy for flows 152191255Skmacy * - add sysctl / device node / syscall to support exporting and importing 153191255Skmacy * of flows with flag to indicate that a flow was imported so should 154191255Skmacy * not be considered for auto-cleaning 155191255Skmacy * - support explicit connection state (currently only ad-hoc for DSR) 156194660Szec * - idetach() cleanup for options VIMAGE builds. 157191255Skmacy */ 158261601Sglebius#ifdef INET 159261601Sglebiusstatic VNET_DEFINE(struct flowtable, ip4_ft); 160261823Sglebius#define V_ip4_ft VNET(ip4_ft) 161261601Sglebius#endif 162261601Sglebius#ifdef INET6 163261601Sglebiusstatic VNET_DEFINE(struct flowtable, ip6_ft); 164261601Sglebius#define V_ip6_ft VNET(ip6_ft) 165261601Sglebius#endif 166261601Sglebius 167261823Sglebiusstatic uma_zone_t flow_zone; 168261823Sglebius 169261601Sglebiusstatic VNET_DEFINE(int, flowtable_enable) = 1; 170195727Srwatson#define V_flowtable_enable VNET(flowtable_enable) 171195699Srwatson 172261601Sglebiusstatic SYSCTL_NODE(_net, OID_AUTO, flowtable, CTLFLAG_RD, NULL, 173227309Sed "flowtable"); 174274225SglebiusSYSCTL_INT(_net_flowtable, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW, 175195699Srwatson &VNET_NAME(flowtable_enable), 0, "enable flowtable caching."); 176261823SglebiusSYSCTL_UMA_MAX(_net_flowtable, OID_AUTO, maxflows, CTLFLAG_RW, 177261823Sglebius &flow_zone, "Maximum number of flows allowed"); 178191255Skmacy 179261823Sglebiusstatic MALLOC_DEFINE(M_FTABLE, "flowtable", "flowtable hashes and bitstrings"); 180191255Skmacy 181262027Sglebiusstatic struct flentry * 182262027Sglebiusflowtable_lookup_common(struct flowtable *, uint32_t *, int, uint32_t); 183205066Skmacy 184205066Skmacy#ifdef INET 185262027Sglebiusstatic struct flentry * 186262027Sglebiusflowtable_lookup_ipv4(struct mbuf *m, struct route *ro) 187205066Skmacy{ 188262027Sglebius struct flentry *fle; 189262027Sglebius struct sockaddr_in *sin; 190205066Skmacy struct ip *ip; 191262027Sglebius uint32_t fibnum; 192262027Sglebius#ifdef FLOWTABLE_HASH_ALL 193262027Sglebius uint32_t key[3]; 194191255Skmacy int iphlen; 195205066Skmacy uint16_t sport, dport; 196262027Sglebius uint8_t proto; 197262027Sglebius#endif 198191255Skmacy 199205066Skmacy ip = mtod(m, struct ip *); 200191255Skmacy 201262027Sglebius if (ip->ip_src.s_addr == ip->ip_dst.s_addr || 202262027Sglebius (ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 203262027Sglebius (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) 204262027Sglebius return (NULL); 205262027Sglebius 206262027Sglebius fibnum = M_GETFIB(m); 207262027Sglebius 208262027Sglebius#ifdef FLOWTABLE_HASH_ALL 209262027Sglebius iphlen = ip->ip_hl << 2; 210205066Skmacy proto = ip->ip_p; 211191255Skmacy 212262027Sglebius switch (proto) { 213262027Sglebius case IPPROTO_TCP: { 214262027Sglebius struct tcphdr *th; 215205066Skmacy 216262027Sglebius th = (struct tcphdr *)((char *)ip + iphlen); 217205066Skmacy sport = th->th_sport; 218205066Skmacy dport = th->th_dport; 219262027Sglebius if (th->th_flags & (TH_RST|TH_FIN)) 220262027Sglebius fibnum |= (FL_STALE << 24); 221261582Sglebius break; 222262027Sglebius } 223262027Sglebius case IPPROTO_UDP: { 224262027Sglebius struct udphdr *uh; 225262027Sglebius 226262027Sglebius uh = (struct udphdr *)((char *)ip + iphlen); 227191255Skmacy sport = uh->uh_sport; 228191255Skmacy dport = uh->uh_dport; 229261582Sglebius break; 230262027Sglebius } 231262027Sglebius case IPPROTO_SCTP: { 232262027Sglebius struct sctphdr *sh; 233262027Sglebius 234262027Sglebius sh = (struct sctphdr *)((char *)ip + iphlen); 235191255Skmacy sport = sh->src_port; 236191255Skmacy dport = sh->dest_port; 237262027Sglebius /* XXXGL: handle stale? */ 238261582Sglebius break; 239262027Sglebius } 240191255Skmacy default: 241262027Sglebius sport = dport = 0; 242201758Smbr break; 243191255Skmacy } 244191255Skmacy 245262027Sglebius key[0] = ip->ip_dst.s_addr; 246262027Sglebius key[1] = ip->ip_src.s_addr; 247262027Sglebius key[2] = (dport << 16) | sport; 248262027Sglebius fibnum |= proto << 16; 249191255Skmacy 250262027Sglebius fle = flowtable_lookup_common(&V_ip4_ft, key, 3 * sizeof(uint32_t), 251262027Sglebius fibnum); 252191255Skmacy 253262027Sglebius#else /* !FLOWTABLE_HASH_ALL */ 254191255Skmacy 255262027Sglebius fle = flowtable_lookup_common(&V_ip4_ft, (uint32_t *)&ip->ip_dst, 256262027Sglebius sizeof(struct in_addr), fibnum); 257205066Skmacy 258262027Sglebius#endif /* FLOWTABLE_HASH_ALL */ 259205066Skmacy 260262027Sglebius if (fle == NULL) 261205066Skmacy return (NULL); 262205066Skmacy 263205066Skmacy sin = (struct sockaddr_in *)&ro->ro_dst; 264205066Skmacy sin->sin_family = AF_INET; 265205066Skmacy sin->sin_len = sizeof(*sin); 266262027Sglebius sin->sin_addr = ip->ip_dst; 267262027Sglebius 268262027Sglebius return (fle); 269205066Skmacy} 270205066Skmacy#endif /* INET */ 271205066Skmacy 272205066Skmacy#ifdef INET6 273205066Skmacy/* 274205066Skmacy * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, 275205066Skmacy * then it sets p to point at the offset "len" in the mbuf. WARNING: the 276205066Skmacy * pointer might become stale after other pullups (but we never use it 277205066Skmacy * this way). 278205066Skmacy */ 279205066Skmacy#define PULLUP_TO(_len, p, T) \ 280205066Skmacydo { \ 281205066Skmacy int x = (_len) + sizeof(T); \ 282262027Sglebius if ((m)->m_len < x) \ 283262027Sglebius return (NULL); \ 284205066Skmacy p = (mtod(m, char *) + (_len)); \ 285205066Skmacy} while (0) 286205066Skmacy 287205066Skmacy#define TCP(p) ((struct tcphdr *)(p)) 288205066Skmacy#define SCTP(p) ((struct sctphdr *)(p)) 289205066Skmacy#define UDP(p) ((struct udphdr *)(p)) 290205066Skmacy 291262027Sglebiusstatic struct flentry * 292262027Sglebiusflowtable_lookup_ipv6(struct mbuf *m, struct route *ro) 293205066Skmacy{ 294262027Sglebius struct flentry *fle; 295262027Sglebius struct sockaddr_in6 *sin6; 296205066Skmacy struct ip6_hdr *ip6; 297262027Sglebius uint32_t fibnum; 298262027Sglebius#ifdef FLOWTABLE_HASH_ALL 299262027Sglebius uint32_t key[9]; 300262027Sglebius void *ulp; 301205066Skmacy int hlen; 302262027Sglebius uint16_t sport, dport; 303205066Skmacy u_short offset; 304262027Sglebius uint8_t proto; 305262027Sglebius#else 306262027Sglebius uint32_t key[4]; 307262027Sglebius#endif 308205066Skmacy 309205066Skmacy ip6 = mtod(m, struct ip6_hdr *); 310262027Sglebius if (in6_localaddr(&ip6->ip6_dst)) 311262027Sglebius return (NULL); 312262027Sglebius 313262027Sglebius fibnum = M_GETFIB(m); 314262027Sglebius 315262027Sglebius#ifdef FLOWTABLE_HASH_ALL 316205066Skmacy hlen = sizeof(struct ip6_hdr); 317205066Skmacy proto = ip6->ip6_nxt; 318262027Sglebius offset = sport = dport = 0; 319262027Sglebius ulp = NULL; 320205066Skmacy while (ulp == NULL) { 321205066Skmacy switch (proto) { 322205066Skmacy case IPPROTO_ICMPV6: 323205066Skmacy case IPPROTO_OSPFIGP: 324205066Skmacy case IPPROTO_PIM: 325205066Skmacy case IPPROTO_CARP: 326205066Skmacy case IPPROTO_ESP: 327205066Skmacy case IPPROTO_NONE: 328205066Skmacy ulp = ip6; 329205066Skmacy break; 330205066Skmacy case IPPROTO_TCP: 331205066Skmacy PULLUP_TO(hlen, ulp, struct tcphdr); 332262027Sglebius dport = TCP(ulp)->th_dport; 333262027Sglebius sport = TCP(ulp)->th_sport; 334262027Sglebius if (TCP(ulp)->th_flags & (TH_RST|TH_FIN)) 335262027Sglebius fibnum |= (FL_STALE << 24); 336205066Skmacy break; 337205066Skmacy case IPPROTO_SCTP: 338205066Skmacy PULLUP_TO(hlen, ulp, struct sctphdr); 339262027Sglebius dport = SCTP(ulp)->src_port; 340262027Sglebius sport = SCTP(ulp)->dest_port; 341262027Sglebius /* XXXGL: handle stale? */ 342205066Skmacy break; 343205066Skmacy case IPPROTO_UDP: 344205066Skmacy PULLUP_TO(hlen, ulp, struct udphdr); 345262027Sglebius dport = UDP(ulp)->uh_dport; 346262027Sglebius sport = UDP(ulp)->uh_sport; 347205066Skmacy break; 348205066Skmacy case IPPROTO_HOPOPTS: /* RFC 2460 */ 349205066Skmacy PULLUP_TO(hlen, ulp, struct ip6_hbh); 350205066Skmacy hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; 351205066Skmacy proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; 352205066Skmacy ulp = NULL; 353205066Skmacy break; 354205066Skmacy case IPPROTO_ROUTING: /* RFC 2460 */ 355261640Sglebius PULLUP_TO(hlen, ulp, struct ip6_rthdr); 356205066Skmacy hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3; 357205066Skmacy proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; 358205066Skmacy ulp = NULL; 359205066Skmacy break; 360205066Skmacy case IPPROTO_FRAGMENT: /* RFC 2460 */ 361205066Skmacy PULLUP_TO(hlen, ulp, struct ip6_frag); 362205066Skmacy hlen += sizeof (struct ip6_frag); 363205066Skmacy proto = ((struct ip6_frag *)ulp)->ip6f_nxt; 364205066Skmacy offset = ((struct ip6_frag *)ulp)->ip6f_offlg & 365205066Skmacy IP6F_OFF_MASK; 366205066Skmacy ulp = NULL; 367205066Skmacy break; 368205066Skmacy case IPPROTO_DSTOPTS: /* RFC 2460 */ 369205066Skmacy PULLUP_TO(hlen, ulp, struct ip6_hbh); 370205066Skmacy hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; 371205066Skmacy proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; 372205066Skmacy ulp = NULL; 373205066Skmacy break; 374205066Skmacy case IPPROTO_AH: /* RFC 2402 */ 375205066Skmacy PULLUP_TO(hlen, ulp, struct ip6_ext); 376205066Skmacy hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2; 377205066Skmacy proto = ((struct ip6_ext *)ulp)->ip6e_nxt; 378205066Skmacy ulp = NULL; 379205066Skmacy break; 380205066Skmacy default: 381205066Skmacy PULLUP_TO(hlen, ulp, struct ip6_ext); 382205066Skmacy break; 383205066Skmacy } 384205066Skmacy } 385205066Skmacy 386262027Sglebius bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr)); 387262027Sglebius bcopy(&ip6->ip6_src, &key[4], sizeof(struct in6_addr)); 388262027Sglebius key[8] = (dport << 16) | sport; 389262027Sglebius fibnum |= proto << 16; 390205066Skmacy 391262027Sglebius fle = flowtable_lookup_common(&V_ip6_ft, key, 9 * sizeof(uint32_t), 392262027Sglebius fibnum); 393262027Sglebius#else /* !FLOWTABLE_HASH_ALL */ 394262027Sglebius bcopy(&ip6->ip6_dst, &key[0], sizeof(struct in6_addr)); 395262027Sglebius fle = flowtable_lookup_common(&V_ip6_ft, key, sizeof(struct in6_addr), 396262027Sglebius fibnum); 397262027Sglebius#endif /* FLOWTABLE_HASH_ALL */ 398205066Skmacy 399262027Sglebius if (fle == NULL) 400205066Skmacy return (NULL); 401205066Skmacy 402205066Skmacy sin6 = (struct sockaddr_in6 *)&ro->ro_dst; 403205066Skmacy sin6->sin6_family = AF_INET6; 404205066Skmacy sin6->sin6_len = sizeof(*sin6); 405262027Sglebius bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(struct in6_addr)); 406262027Sglebius 407262027Sglebius return (fle); 408205066Skmacy} 409205066Skmacy#endif /* INET6 */ 410205066Skmacy 411191255Skmacystatic bitstr_t * 412191255Skmacyflowtable_mask(struct flowtable *ft) 413191255Skmacy{ 414196368Skmacy 415262029Sglebius /* 416261823Sglebius * flowtable_free_stale() calls w/o critical section, but 417261823Sglebius * with sched_bind(). Since pointer is stable throughout 418261823Sglebius * ft lifetime, it is safe, otherwise... 419261823Sglebius * 420261823Sglebius * CRITICAL_ASSERT(curthread); 421261823Sglebius */ 422191255Skmacy 423261823Sglebius return (*(bitstr_t **)zpcpu_get(ft->ft_masks)); 424191255Skmacy} 425191255Skmacy 426261823Sglebiusstatic struct flist * 427261823Sglebiusflowtable_list(struct flowtable *ft, uint32_t hash) 428191255Skmacy{ 429191255Skmacy 430261823Sglebius CRITICAL_ASSERT(curthread); 431261823Sglebius return (zpcpu_get(ft->ft_table[hash % ft->ft_size])); 432191255Skmacy} 433191255Skmacy 434191255Skmacystatic int 435262027Sglebiusflow_stale(struct flowtable *ft, struct flentry *fle, int maxidle) 436191255Skmacy{ 437191255Skmacy 438290276Srrs if (((fle->f_rt->rt_flags & RTF_UP) == 0) || 439262027Sglebius (fle->f_rt->rt_ifp == NULL) || 440262027Sglebius !RT_LINK_IS_UP(fle->f_rt->rt_ifp) || 441262027Sglebius (fle->f_lle->la_flags & LLE_VALID) == 0) 442191255Skmacy return (1); 443191255Skmacy 444262027Sglebius if (time_uptime - fle->f_uptime > maxidle) 445262027Sglebius return (1); 446191255Skmacy 447262027Sglebius#ifdef FLOWTABLE_HASH_ALL 448262027Sglebius if (fle->f_flags & FL_STALE) 449191255Skmacy return (1); 450262027Sglebius#endif 451191255Skmacy 452191255Skmacy return (0); 453191255Skmacy} 454191255Skmacy 455191255Skmacystatic int 456262027Sglebiusflow_full(void) 457205488Skmacy{ 458261601Sglebius int count, max; 459261640Sglebius 460261823Sglebius count = uma_zone_get_cur(flow_zone); 461261823Sglebius max = uma_zone_get_max(flow_zone); 462205488Skmacy 463262027Sglebius return (count > (max - (max >> 3))); 464205488Skmacy} 465205488Skmacy 466205488Skmacystatic int 467262027Sglebiusflow_matches(struct flentry *fle, uint32_t *key, int keylen, uint32_t fibnum) 468261883Sglebius{ 469262027Sglebius#ifdef FLOWTABLE_HASH_ALL 470262027Sglebius uint8_t proto; 471261883Sglebius 472262027Sglebius proto = (fibnum >> 16) & 0xff; 473262027Sglebius fibnum &= 0xffff; 474262027Sglebius#endif 475262027Sglebius 476262027Sglebius CRITICAL_ASSERT(curthread); 477262027Sglebius 478262027Sglebius /* Microoptimization for IPv4: don't use bcmp(). */ 479290276Srrs if (((keylen == sizeof(uint32_t) && (fle->f_key[0] == key[0])) || 480262027Sglebius (bcmp(fle->f_key, key, keylen) == 0)) && 481262027Sglebius fibnum == fle->f_fibnum && 482262027Sglebius#ifdef FLOWTABLE_HASH_ALL 483262027Sglebius proto == fle->f_proto && 484262027Sglebius#endif 485261883Sglebius (fle->f_rt->rt_flags & RTF_UP) && 486261883Sglebius fle->f_rt->rt_ifp != NULL && 487261883Sglebius (fle->f_lle->la_flags & LLE_VALID)) 488261883Sglebius return (1); 489261883Sglebius 490261883Sglebius return (0); 491261883Sglebius} 492261883Sglebius 493261883Sglebiusstatic struct flentry * 494191255Skmacyflowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key, 495262027Sglebius int keylen, uint32_t fibnum0) 496191255Skmacy{ 497262027Sglebius#ifdef INET6 498262029Sglebius struct route_in6 sro6; 499262027Sglebius#endif 500262027Sglebius#ifdef INET 501262029Sglebius struct route sro; 502262027Sglebius#endif 503262027Sglebius struct route *ro = NULL; 504262027Sglebius struct rtentry *rt; 505262027Sglebius struct lltable *lt = NULL; 506262027Sglebius struct llentry *lle; 507262027Sglebius struct sockaddr_storage *l3addr; 508262027Sglebius struct ifnet *ifp; 509261823Sglebius struct flist *flist; 510261823Sglebius struct flentry *fle, *iter; 511261883Sglebius bitstr_t *mask; 512262027Sglebius uint16_t fibnum = fibnum0; 513262027Sglebius#ifdef FLOWTABLE_HASH_ALL 514261883Sglebius uint8_t proto; 515191255Skmacy 516262027Sglebius proto = (fibnum0 >> 16) & 0xff; 517262027Sglebius fibnum = fibnum0 & 0xffff; 518262027Sglebius#endif 519262027Sglebius 520262027Sglebius /* 521262027Sglebius * This bit of code ends up locking the 522262027Sglebius * same route 3 times (just like ip_output + ether_output) 523262027Sglebius * - at lookup 524262027Sglebius * - in rt_check when called by arpresolve 525262027Sglebius * - dropping the refcount for the rtentry 526262027Sglebius * 527262027Sglebius * This could be consolidated to one if we wrote a variant 528262027Sglebius * of arpresolve with an rt_check variant that expected to 529262027Sglebius * receive the route locked 530262027Sglebius */ 531262027Sglebius#ifdef INET 532262027Sglebius if (ft == &V_ip4_ft) { 533262027Sglebius struct sockaddr_in *sin; 534262027Sglebius 535262027Sglebius ro = &sro; 536262027Sglebius bzero(&sro.ro_dst, sizeof(sro.ro_dst)); 537262027Sglebius 538262027Sglebius sin = (struct sockaddr_in *)&sro.ro_dst; 539262027Sglebius sin->sin_family = AF_INET; 540262027Sglebius sin->sin_len = sizeof(*sin); 541262027Sglebius sin->sin_addr.s_addr = key[0]; 542262027Sglebius } 543262027Sglebius#endif 544262027Sglebius#ifdef INET6 545262027Sglebius if (ft == &V_ip6_ft) { 546262027Sglebius struct sockaddr_in6 *sin6; 547262027Sglebius 548262027Sglebius ro = (struct route *)&sro6; 549262027Sglebius sin6 = &sro6.ro_dst; 550262027Sglebius 551262027Sglebius bzero(sin6, sizeof(*sin6)); 552262027Sglebius sin6->sin6_family = AF_INET6; 553262027Sglebius sin6->sin6_len = sizeof(*sin6); 554262027Sglebius bcopy(key, &sin6->sin6_addr, sizeof(struct in6_addr)); 555262027Sglebius } 556262027Sglebius#endif 557262027Sglebius 558262027Sglebius ro->ro_rt = NULL; 559262027Sglebius#ifdef RADIX_MPATH 560262027Sglebius rtalloc_mpath_fib(ro, hash, fibnum); 561262027Sglebius#else 562262027Sglebius rtalloc_ign_fib(ro, 0, fibnum); 563262027Sglebius#endif 564262027Sglebius if (ro->ro_rt == NULL) 565262027Sglebius return (NULL); 566262027Sglebius 567262027Sglebius rt = ro->ro_rt; 568262027Sglebius ifp = rt->rt_ifp; 569262027Sglebius 570262027Sglebius if (ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) { 571262027Sglebius RTFREE(rt); 572262027Sglebius return (NULL); 573262027Sglebius } 574262027Sglebius 575262027Sglebius#ifdef INET 576262027Sglebius if (ft == &V_ip4_ft) 577262027Sglebius lt = LLTABLE(ifp); 578262027Sglebius#endif 579262027Sglebius#ifdef INET6 580262027Sglebius if (ft == &V_ip6_ft) 581262027Sglebius lt = LLTABLE6(ifp); 582262027Sglebius#endif 583262027Sglebius 584262027Sglebius if (rt->rt_flags & RTF_GATEWAY) 585262027Sglebius l3addr = (struct sockaddr_storage *)rt->rt_gateway; 586262027Sglebius else 587262027Sglebius l3addr = (struct sockaddr_storage *)&ro->ro_dst; 588262027Sglebius lle = llentry_alloc(ifp, lt, l3addr); 589262027Sglebius 590262027Sglebius if (lle == NULL) { 591262027Sglebius RTFREE(rt); 592262027Sglebius return (NULL); 593262027Sglebius } 594262027Sglebius 595262027Sglebius /* Don't insert the entry if the ARP hasn't yet finished resolving. */ 596262027Sglebius if ((lle->la_flags & LLE_VALID) == 0) { 597262027Sglebius RTFREE(rt); 598262027Sglebius LLE_FREE(lle); 599262027Sglebius FLOWSTAT_INC(ft, ft_fail_lle_invalid); 600262027Sglebius return (NULL); 601262027Sglebius } 602262027Sglebius 603261823Sglebius fle = uma_zalloc(flow_zone, M_NOWAIT | M_ZERO); 604262027Sglebius if (fle == NULL) { 605262027Sglebius RTFREE(rt); 606262027Sglebius LLE_FREE(lle); 607261883Sglebius return (NULL); 608262027Sglebius } 609191255Skmacy 610262027Sglebius fle->f_hash = hash; 611262027Sglebius bcopy(key, &fle->f_key, keylen); 612262027Sglebius fle->f_rt = rt; 613262027Sglebius fle->f_lle = lle; 614261823Sglebius fle->f_fibnum = fibnum; 615261823Sglebius fle->f_uptime = time_uptime; 616262027Sglebius#ifdef FLOWTABLE_HASH_ALL 617262027Sglebius fle->f_proto = proto; 618262027Sglebius fle->f_flags = fibnum0 >> 24; 619262027Sglebius#endif 620205066Skmacy 621261823Sglebius critical_enter(); 622191255Skmacy mask = flowtable_mask(ft); 623261823Sglebius flist = flowtable_list(ft, hash); 624191255Skmacy 625261823Sglebius if (SLIST_EMPTY(flist)) { 626261823Sglebius bit_set(mask, (hash % ft->ft_size)); 627261823Sglebius SLIST_INSERT_HEAD(flist, fle, f_next); 628191255Skmacy goto skip; 629261582Sglebius } 630261640Sglebius 631191255Skmacy /* 632191255Skmacy * find end of list and make sure that we were not 633191255Skmacy * preempted by another thread handling this flow 634191255Skmacy */ 635261823Sglebius SLIST_FOREACH(iter, flist, f_next) { 636262162Sglebius KASSERT(iter->f_hash % ft->ft_size == hash % ft->ft_size, 637262162Sglebius ("%s: wrong hash", __func__)); 638262027Sglebius if (flow_matches(iter, key, keylen, fibnum)) { 639191255Skmacy /* 640261883Sglebius * We probably migrated to an other CPU after 641261883Sglebius * lookup in flowtable_lookup_common() failed. 642261883Sglebius * It appeared that this CPU already has flow 643261883Sglebius * entry. 644191255Skmacy */ 645261883Sglebius iter->f_uptime = time_uptime; 646262027Sglebius#ifdef FLOWTABLE_HASH_ALL 647262027Sglebius iter->f_flags |= fibnum >> 24; 648262027Sglebius#endif 649261823Sglebius critical_exit(); 650261883Sglebius FLOWSTAT_INC(ft, ft_collisions); 651261823Sglebius uma_zfree(flow_zone, fle); 652261883Sglebius return (iter); 653191255Skmacy } 654261582Sglebius } 655191255Skmacy 656261823Sglebius SLIST_INSERT_HEAD(flist, fle, f_next); 657191255Skmacyskip: 658261823Sglebius critical_exit(); 659261883Sglebius FLOWSTAT_INC(ft, ft_inserts); 660191255Skmacy 661261883Sglebius return (fle); 662191255Skmacy} 663191255Skmacy 664262027Sglebiusint 665262027Sglebiusflowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro) 666191255Skmacy{ 667262027Sglebius struct flentry *fle; 668292978Smelifaro struct llentry *lle; 669205066Skmacy 670262027Sglebius if (V_flowtable_enable == 0) 671262027Sglebius return (ENXIO); 672262027Sglebius 673261601Sglebius switch (sa) { 674205066Skmacy#ifdef INET 675261601Sglebius case AF_INET: 676262027Sglebius fle = flowtable_lookup_ipv4(m, ro); 677262027Sglebius break; 678205066Skmacy#endif 679205066Skmacy#ifdef INET6 680261601Sglebius case AF_INET6: 681262027Sglebius fle = flowtable_lookup_ipv6(m, ro); 682262027Sglebius break; 683261601Sglebius#endif 684261601Sglebius default: 685261601Sglebius panic("%s: sa %d", __func__, sa); 686205066Skmacy } 687262027Sglebius 688262027Sglebius if (fle == NULL) 689262027Sglebius return (EHOSTUNREACH); 690262027Sglebius 691275358Shselasky if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE) { 692301538Ssephe M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE_HASH); 693262027Sglebius m->m_pkthdr.flowid = fle->f_hash; 694262027Sglebius } 695262027Sglebius 696262027Sglebius ro->ro_rt = fle->f_rt; 697262027Sglebius ro->ro_flags |= RT_NORTREF; 698292978Smelifaro lle = fle->f_lle; 699301217Sgnn if (lle != NULL && (lle->la_flags & LLE_VALID)) 700301217Sgnn ro->ro_lle = lle; /* share ref with fle->f_lle */ 701262027Sglebius 702262027Sglebius return (0); 703205066Skmacy} 704261601Sglebius 705261601Sglebiusstatic struct flentry * 706262027Sglebiusflowtable_lookup_common(struct flowtable *ft, uint32_t *key, int keylen, 707262027Sglebius uint32_t fibnum) 708205066Skmacy{ 709261823Sglebius struct flist *flist; 710191255Skmacy struct flentry *fle; 711262027Sglebius uint32_t hash; 712191255Skmacy 713262027Sglebius FLOWSTAT_INC(ft, ft_lookups); 714261601Sglebius 715262027Sglebius hash = jenkins_hash32(key, keylen / sizeof(uint32_t), flow_hashjitter); 716261601Sglebius 717261823Sglebius critical_enter(); 718261823Sglebius flist = flowtable_list(ft, hash); 719262027Sglebius SLIST_FOREACH(fle, flist, f_next) { 720262162Sglebius KASSERT(fle->f_hash % ft->ft_size == hash % ft->ft_size, 721262162Sglebius ("%s: wrong hash", __func__)); 722262027Sglebius if (flow_matches(fle, key, keylen, fibnum)) { 723261823Sglebius fle->f_uptime = time_uptime; 724262027Sglebius#ifdef FLOWTABLE_HASH_ALL 725262027Sglebius fle->f_flags |= fibnum >> 24; 726262027Sglebius#endif 727261823Sglebius critical_exit(); 728261823Sglebius FLOWSTAT_INC(ft, ft_hits); 729262027Sglebius return (fle); 730261823Sglebius } 731262027Sglebius } 732261823Sglebius critical_exit(); 733261823Sglebius 734261601Sglebius FLOWSTAT_INC(ft, ft_misses); 735191255Skmacy 736262027Sglebius return (flowtable_insert(ft, hash, key, keylen, fibnum)); 737191255Skmacy} 738191255Skmacy 739261601Sglebiusstatic void 740261601Sglebiusflowtable_alloc(struct flowtable *ft) 741191255Skmacy{ 742302378Snwhitehorn int i; 743191255Skmacy 744261823Sglebius ft->ft_table = malloc(ft->ft_size * sizeof(struct flist), 745261823Sglebius M_FTABLE, M_WAITOK); 746261823Sglebius for (int i = 0; i < ft->ft_size; i++) 747261823Sglebius ft->ft_table[i] = uma_zalloc(pcpu_zone_ptr, M_WAITOK | M_ZERO); 748191255Skmacy 749261823Sglebius ft->ft_masks = uma_zalloc(pcpu_zone_ptr, M_WAITOK); 750302372Snwhitehorn CPU_FOREACH(i) { 751261823Sglebius bitstr_t **b; 752261640Sglebius 753261823Sglebius b = zpcpu_get_cpu(ft->ft_masks, i); 754299090Sasomers *b = bit_alloc(ft->ft_size, M_FTABLE, M_WAITOK); 755191255Skmacy } 756299090Sasomers ft->ft_tmpmask = bit_alloc(ft->ft_size, M_FTABLE, M_WAITOK); 757191255Skmacy} 758191255Skmacy 759191255Skmacystatic void 760262027Sglebiusflowtable_free_stale(struct flowtable *ft, struct rtentry *rt, int maxidle) 761191255Skmacy{ 762261823Sglebius struct flist *flist, freelist; 763261823Sglebius struct flentry *fle, *fle1, *fleprev; 764191324Skmacy bitstr_t *mask, *tmpmask; 765261823Sglebius int curbit, tmpsize; 766205066Skmacy 767261823Sglebius SLIST_INIT(&freelist); 768191255Skmacy mask = flowtable_mask(ft); 769191324Skmacy tmpmask = ft->ft_tmpmask; 770256563Semax tmpsize = ft->ft_size; 771191324Skmacy memcpy(tmpmask, mask, ft->ft_size/8); 772261823Sglebius curbit = 0; 773262770Sglebius fleprev = NULL; /* pacify gcc */ 774191324Skmacy /* 775191324Skmacy * XXX Note to self, bit_ffs operates at the byte level 776191324Skmacy * and thus adds gratuitous overhead 777191324Skmacy */ 778191324Skmacy bit_ffs(tmpmask, ft->ft_size, &curbit); 779191324Skmacy while (curbit != -1) { 780191257Skmacy if (curbit >= ft->ft_size || curbit < -1) { 781191257Skmacy log(LOG_ALERT, 782191257Skmacy "warning: bad curbit value %d \n", 783191255Skmacy curbit); 784191257Skmacy break; 785191255Skmacy } 786205066Skmacy 787261823Sglebius FLOWSTAT_INC(ft, ft_free_checks); 788191255Skmacy 789261823Sglebius critical_enter(); 790261823Sglebius flist = flowtable_list(ft, curbit); 791191257Skmacy#ifdef DIAGNOSTIC 792261823Sglebius if (SLIST_EMPTY(flist) && curbit > 0) { 793191257Skmacy log(LOG_ALERT, 794191257Skmacy "warning bit=%d set, but no fle found\n", 795191257Skmacy curbit); 796191255Skmacy } 797261640Sglebius#endif 798261823Sglebius SLIST_FOREACH_SAFE(fle, flist, f_next, fle1) { 799261823Sglebius if (rt != NULL && fle->f_rt != rt) { 800191255Skmacy fleprev = fle; 801191255Skmacy continue; 802191255Skmacy } 803262027Sglebius if (!flow_stale(ft, fle, maxidle)) { 804261823Sglebius fleprev = fle; 805261823Sglebius continue; 806191255Skmacy } 807205066Skmacy 808261823Sglebius if (fle == SLIST_FIRST(flist)) 809261823Sglebius SLIST_REMOVE_HEAD(flist, f_next); 810261823Sglebius else 811261823Sglebius SLIST_REMOVE_AFTER(fleprev, f_next); 812261823Sglebius SLIST_INSERT_HEAD(&freelist, fle, f_next); 813191255Skmacy } 814261823Sglebius if (SLIST_EMPTY(flist)) 815191255Skmacy bit_clear(mask, curbit); 816261823Sglebius critical_exit(); 817261823Sglebius 818191324Skmacy bit_clear(tmpmask, curbit); 819256563Semax bit_ffs(tmpmask, tmpsize, &curbit); 820191255Skmacy } 821261823Sglebius 822261823Sglebius SLIST_FOREACH_SAFE(fle, &freelist, f_next, fle1) { 823261601Sglebius FLOWSTAT_INC(ft, ft_frees); 824261823Sglebius if (fle->f_rt != NULL) 825261823Sglebius RTFREE(fle->f_rt); 826261823Sglebius if (fle->f_lle != NULL) 827261823Sglebius LLE_FREE(fle->f_lle); 828261823Sglebius uma_zfree(flow_zone, fle); 829191255Skmacy } 830191255Skmacy} 831191255Skmacy 832261823Sglebiusstatic void 833262027Sglebiusflowtable_clean_vnet(struct flowtable *ft, struct rtentry *rt, int maxidle) 834261823Sglebius{ 835261823Sglebius int i; 836261823Sglebius 837261823Sglebius CPU_FOREACH(i) { 838261823Sglebius if (smp_started == 1) { 839261823Sglebius thread_lock(curthread); 840261823Sglebius sched_bind(curthread, i); 841261823Sglebius thread_unlock(curthread); 842261823Sglebius } 843261823Sglebius 844262027Sglebius flowtable_free_stale(ft, rt, maxidle); 845261823Sglebius 846261823Sglebius if (smp_started == 1) { 847261823Sglebius thread_lock(curthread); 848261823Sglebius sched_unbind(curthread); 849261823Sglebius thread_unlock(curthread); 850261823Sglebius } 851261823Sglebius } 852261823Sglebius} 853261823Sglebius 854197687Sqinglivoid 855261601Sglebiusflowtable_route_flush(sa_family_t sa, struct rtentry *rt) 856197687Sqingli{ 857261601Sglebius struct flowtable *ft; 858205066Skmacy 859261601Sglebius switch (sa) { 860261601Sglebius#ifdef INET 861261601Sglebius case AF_INET: 862261601Sglebius ft = &V_ip4_ft; 863261601Sglebius break; 864261601Sglebius#endif 865261601Sglebius#ifdef INET6 866261601Sglebius case AF_INET6: 867261601Sglebius ft = &V_ip6_ft; 868261601Sglebius break; 869261601Sglebius#endif 870261601Sglebius default: 871261601Sglebius panic("%s: sa %d", __func__, sa); 872261601Sglebius } 873261601Sglebius 874262027Sglebius flowtable_clean_vnet(ft, rt, 0); 875197687Sqingli} 876197687Sqingli 877191255Skmacystatic void 878194660Szecflowtable_cleaner(void) 879194660Szec{ 880194660Szec VNET_ITERATOR_DECL(vnet_iter); 881217076Sjhb struct thread *td; 882194660Szec 883194660Szec if (bootverbose) 884194660Szec log(LOG_INFO, "flowtable cleaner started\n"); 885217076Sjhb td = curthread; 886194660Szec while (1) { 887262027Sglebius uint32_t flowclean_freq, maxidle; 888262027Sglebius 889262027Sglebius /* 890262027Sglebius * The maximum idle time, as well as frequency are arbitrary. 891262027Sglebius */ 892262027Sglebius if (flow_full()) 893262027Sglebius maxidle = 5; 894262027Sglebius else 895262027Sglebius maxidle = 30; 896262027Sglebius 897194660Szec VNET_LIST_RLOCK(); 898194660Szec VNET_FOREACH(vnet_iter) { 899194660Szec CURVNET_SET(vnet_iter); 900261601Sglebius#ifdef INET 901262027Sglebius flowtable_clean_vnet(&V_ip4_ft, NULL, maxidle); 902261601Sglebius#endif 903261601Sglebius#ifdef INET6 904262027Sglebius flowtable_clean_vnet(&V_ip6_ft, NULL, maxidle); 905261601Sglebius#endif 906194660Szec CURVNET_RESTORE(); 907194660Szec } 908194660Szec VNET_LIST_RUNLOCK(); 909194660Szec 910262027Sglebius if (flow_full()) 911262027Sglebius flowclean_freq = 4*hz; 912262027Sglebius else 913262027Sglebius flowclean_freq = 20*hz; 914196368Skmacy mtx_lock(&flowclean_lock); 915217076Sjhb thread_lock(td); 916217076Sjhb sched_prio(td, PPAUSE); 917217076Sjhb thread_unlock(td); 918216855Sbz flowclean_cycles++; 919216855Sbz cv_broadcast(&flowclean_f_cv); 920216855Sbz cv_timedwait(&flowclean_c_cv, &flowclean_lock, flowclean_freq); 921196368Skmacy mtx_unlock(&flowclean_lock); 922191255Skmacy } 923191255Skmacy} 924191255Skmacy 925196368Skmacystatic void 926196368Skmacyflowtable_flush(void *unused __unused) 927196368Skmacy{ 928196368Skmacy uint64_t start; 929205066Skmacy 930196368Skmacy mtx_lock(&flowclean_lock); 931196368Skmacy start = flowclean_cycles; 932196368Skmacy while (start == flowclean_cycles) { 933216855Sbz cv_broadcast(&flowclean_c_cv); 934216855Sbz cv_wait(&flowclean_f_cv, &flowclean_lock); 935196368Skmacy } 936196368Skmacy mtx_unlock(&flowclean_lock); 937196368Skmacy} 938196368Skmacy 939191255Skmacystatic struct kproc_desc flow_kp = { 940191255Skmacy "flowcleaner", 941191255Skmacy flowtable_cleaner, 942191255Skmacy &flowcleanerproc 943191255Skmacy}; 944191255SkmacySYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp); 945193863Skmacy 946261601Sglebiusstatic int 947261601Sglebiusflowtable_get_size(char *name) 948196368Skmacy{ 949261601Sglebius int size; 950196368Skmacy 951261601Sglebius if (TUNABLE_INT_FETCH(name, &size)) { 952261601Sglebius if (size < 256) 953261601Sglebius size = 256; 954261601Sglebius if (!powerof2(size)) { 955261601Sglebius printf("%s must be power of 2\n", name); 956261601Sglebius size = 2048; 957261601Sglebius } 958261601Sglebius } else { 959261601Sglebius /* 960261601Sglebius * round up to the next power of 2 961261601Sglebius */ 962261601Sglebius size = 1 << fls((1024 + maxusers * 64) - 1); 963261601Sglebius } 964261601Sglebius 965261601Sglebius return (size); 966196368Skmacy} 967196368Skmacy 968196368Skmacystatic void 969196368Skmacyflowtable_init(const void *unused __unused) 970196368Skmacy{ 971196368Skmacy 972261601Sglebius flow_hashjitter = arc4random(); 973261601Sglebius 974261823Sglebius flow_zone = uma_zcreate("flows", sizeof(struct flentry), 975262027Sglebius NULL, NULL, NULL, NULL, (64-1), UMA_ZONE_MAXBUCKET); 976261823Sglebius uma_zone_set_max(flow_zone, 1024 + maxusers * 64 * mp_ncpus); 977261601Sglebius 978216855Sbz cv_init(&flowclean_c_cv, "c_flowcleanwait"); 979216855Sbz cv_init(&flowclean_f_cv, "f_flowcleanwait"); 980196368Skmacy mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF); 981196368Skmacy EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL, 982196368Skmacy EVENTHANDLER_PRI_ANY); 983196368Skmacy} 984261601SglebiusSYSINIT(flowtable_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, 985196368Skmacy flowtable_init, NULL); 986196368Skmacy 987261601Sglebius#ifdef INET 988261601Sglebiusstatic SYSCTL_NODE(_net_flowtable, OID_AUTO, ip4, CTLFLAG_RD, NULL, 989261601Sglebius "Flowtable for IPv4"); 990196368Skmacy 991261601Sglebiusstatic VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip4_ftstat); 992261601SglebiusVNET_PCPUSTAT_SYSINIT(ip4_ftstat); 993261601SglebiusVNET_PCPUSTAT_SYSUNINIT(ip4_ftstat); 994261601SglebiusSYSCTL_VNET_PCPUSTAT(_net_flowtable_ip4, OID_AUTO, stat, struct flowtable_stat, 995261601Sglebius ip4_ftstat, "Flowtable statistics for IPv4 " 996261601Sglebius "(struct flowtable_stat, net/flowtable.h)"); 997261601Sglebius 998196368Skmacystatic void 999261601Sglebiusflowtable_init_vnet_v4(const void *unused __unused) 1000196368Skmacy{ 1001196368Skmacy 1002261601Sglebius V_ip4_ft.ft_size = flowtable_get_size("net.flowtable.ip4.size"); 1003261601Sglebius V_ip4_ft.ft_stat = VNET(ip4_ftstat); 1004261601Sglebius flowtable_alloc(&V_ip4_ft); 1005196368Skmacy} 1006261601SglebiusVNET_SYSINIT(ft_vnet_v4, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 1007261601Sglebius flowtable_init_vnet_v4, NULL); 1008261601Sglebius#endif /* INET */ 1009196368Skmacy 1010261601Sglebius#ifdef INET6 1011261601Sglebiusstatic SYSCTL_NODE(_net_flowtable, OID_AUTO, ip6, CTLFLAG_RD, NULL, 1012261601Sglebius "Flowtable for IPv6"); 1013196368Skmacy 1014261601Sglebiusstatic VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip6_ftstat); 1015261601SglebiusVNET_PCPUSTAT_SYSINIT(ip6_ftstat); 1016261601SglebiusVNET_PCPUSTAT_SYSUNINIT(ip6_ftstat); 1017261601SglebiusSYSCTL_VNET_PCPUSTAT(_net_flowtable_ip6, OID_AUTO, stat, struct flowtable_stat, 1018261601Sglebius ip6_ftstat, "Flowtable statistics for IPv6 " 1019261601Sglebius "(struct flowtable_stat, net/flowtable.h)"); 1020261601Sglebius 1021261601Sglebiusstatic void 1022261601Sglebiusflowtable_init_vnet_v6(const void *unused __unused) 1023261601Sglebius{ 1024261601Sglebius 1025261601Sglebius V_ip6_ft.ft_size = flowtable_get_size("net.flowtable.ip6.size"); 1026261601Sglebius V_ip6_ft.ft_stat = VNET(ip6_ftstat); 1027261601Sglebius flowtable_alloc(&V_ip6_ft); 1028261601Sglebius} 1029261601SglebiusVNET_SYSINIT(flowtable_init_vnet_v6, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, 1030261601Sglebius flowtable_init_vnet_v6, NULL); 1031261601Sglebius#endif /* INET6 */ 1032261601Sglebius 1033196368Skmacy#ifdef DDB 1034196368Skmacystatic bitstr_t * 1035196368Skmacyflowtable_mask_pcpu(struct flowtable *ft, int cpuid) 1036196368Skmacy{ 1037196368Skmacy 1038261823Sglebius return (zpcpu_get_cpu(*ft->ft_masks, cpuid)); 1039196368Skmacy} 1040196368Skmacy 1041261823Sglebiusstatic struct flist * 1042261823Sglebiusflowtable_list_pcpu(struct flowtable *ft, uint32_t hash, int cpuid) 1043196368Skmacy{ 1044196368Skmacy 1045261823Sglebius return (zpcpu_get_cpu(&ft->ft_table[hash % ft->ft_size], cpuid)); 1046196368Skmacy} 1047196368Skmacy 1048196368Skmacystatic void 1049196368Skmacyflow_show(struct flowtable *ft, struct flentry *fle) 1050196368Skmacy{ 1051196368Skmacy int idle_time; 1052205066Skmacy int rt_valid, ifp_valid; 1053205066Skmacy volatile struct rtentry *rt; 1054205066Skmacy struct ifnet *ifp = NULL; 1055262027Sglebius uint32_t *hashkey = fle->f_key; 1056196368Skmacy 1057196368Skmacy idle_time = (int)(time_uptime - fle->f_uptime); 1058205066Skmacy rt = fle->f_rt; 1059205066Skmacy rt_valid = rt != NULL; 1060261582Sglebius if (rt_valid) 1061205066Skmacy ifp = rt->rt_ifp; 1062205066Skmacy ifp_valid = ifp != NULL; 1063205066Skmacy 1064262027Sglebius#ifdef INET 1065262027Sglebius if (ft == &V_ip4_ft) { 1066262027Sglebius char daddr[4*sizeof "123"]; 1067262027Sglebius#ifdef FLOWTABLE_HASH_ALL 1068262027Sglebius char saddr[4*sizeof "123"]; 1069262027Sglebius uint16_t sport, dport; 1070262027Sglebius#endif 1071262027Sglebius 1072262027Sglebius inet_ntoa_r(*(struct in_addr *) &hashkey[0], daddr); 1073262027Sglebius#ifdef FLOWTABLE_HASH_ALL 1074261640Sglebius inet_ntoa_r(*(struct in_addr *) &hashkey[1], saddr); 1075262027Sglebius dport = ntohs((uint16_t)(hashkey[2] >> 16)); 1076262027Sglebius sport = ntohs((uint16_t)(hashkey[2] & 0xffff)); 1077262027Sglebius db_printf("%s:%d->%s:%d", saddr, sport, daddr, dport); 1078262027Sglebius#else 1079205066Skmacy db_printf("%s ", daddr); 1080262027Sglebius#endif 1081262027Sglebius } 1082262027Sglebius#endif /* INET */ 1083262027Sglebius#ifdef INET6 1084262027Sglebius if (ft == &V_ip6_ft) { 1085262027Sglebius#ifdef FLOWTABLE_HASH_ALL 1086262027Sglebius db_printf("\n\tkey=%08x:%08x:%08x%08x:%08x:%08x%08x:%08x:%08x", 1087262027Sglebius hashkey[0], hashkey[1], hashkey[2], 1088262027Sglebius hashkey[3], hashkey[4], hashkey[5], 1089262027Sglebius hashkey[6], hashkey[7], hashkey[8]); 1090262027Sglebius#else 1091262027Sglebius db_printf("\n\tkey=%08x:%08x:%08x ", 1092262027Sglebius hashkey[0], hashkey[1], hashkey[2]); 1093262027Sglebius#endif 1094262027Sglebius } 1095262027Sglebius#endif /* INET6 */ 1096261582Sglebius 1097262027Sglebius db_printf("hash=%08x idle_time=%03d" 1098262027Sglebius "\n\tfibnum=%02d rt=%p", 1099262027Sglebius fle->f_hash, idle_time, fle->f_fibnum, fle->f_rt); 1100262027Sglebius 1101262027Sglebius#ifdef FLOWTABLE_HASH_ALL 1102196368Skmacy if (fle->f_flags & FL_STALE) 1103196368Skmacy db_printf(" FL_STALE "); 1104262027Sglebius#endif 1105205066Skmacy if (rt_valid) { 1106205066Skmacy if (rt->rt_flags & RTF_UP) 1107205066Skmacy db_printf(" RTF_UP "); 1108205066Skmacy } 1109205066Skmacy if (ifp_valid) { 1110205066Skmacy if (ifp->if_flags & IFF_LOOPBACK) 1111205066Skmacy db_printf(" IFF_LOOPBACK "); 1112205066Skmacy if (ifp->if_flags & IFF_UP) 1113261640Sglebius db_printf(" IFF_UP "); 1114205066Skmacy if (ifp->if_flags & IFF_POINTOPOINT) 1115261640Sglebius db_printf(" IFF_POINTOPOINT "); 1116205066Skmacy } 1117196368Skmacy db_printf("\n"); 1118196368Skmacy} 1119196368Skmacy 1120196368Skmacystatic void 1121196368Skmacyflowtable_show(struct flowtable *ft, int cpuid) 1122196368Skmacy{ 1123196368Skmacy int curbit = 0; 1124196368Skmacy bitstr_t *mask, *tmpmask; 1125196368Skmacy 1126205066Skmacy if (cpuid != -1) 1127205066Skmacy db_printf("cpu: %d\n", cpuid); 1128196368Skmacy mask = flowtable_mask_pcpu(ft, cpuid); 1129196368Skmacy tmpmask = ft->ft_tmpmask; 1130196368Skmacy memcpy(tmpmask, mask, ft->ft_size/8); 1131196368Skmacy /* 1132196368Skmacy * XXX Note to self, bit_ffs operates at the byte level 1133196368Skmacy * and thus adds gratuitous overhead 1134196368Skmacy */ 1135196368Skmacy bit_ffs(tmpmask, ft->ft_size, &curbit); 1136196368Skmacy while (curbit != -1) { 1137261823Sglebius struct flist *flist; 1138261823Sglebius struct flentry *fle; 1139261823Sglebius 1140196368Skmacy if (curbit >= ft->ft_size || curbit < -1) { 1141196368Skmacy db_printf("warning: bad curbit value %d \n", 1142196368Skmacy curbit); 1143196368Skmacy break; 1144196368Skmacy } 1145196368Skmacy 1146261823Sglebius flist = flowtable_list_pcpu(ft, curbit, cpuid); 1147196368Skmacy 1148261823Sglebius SLIST_FOREACH(fle, flist, f_next) 1149196368Skmacy flow_show(ft, fle); 1150196368Skmacy bit_clear(tmpmask, curbit); 1151196368Skmacy bit_ffs(tmpmask, ft->ft_size, &curbit); 1152196368Skmacy } 1153196368Skmacy} 1154196368Skmacy 1155196368Skmacystatic void 1156261601Sglebiusflowtable_show_vnet(struct flowtable *ft) 1157196368Skmacy{ 1158196368Skmacy 1159261823Sglebius int i; 1160261601Sglebius 1161261823Sglebius CPU_FOREACH(i) 1162261823Sglebius flowtable_show(ft, i); 1163196368Skmacy} 1164196368Skmacy 1165196368SkmacyDB_SHOW_COMMAND(flowtables, db_show_flowtables) 1166196368Skmacy{ 1167196368Skmacy VNET_ITERATOR_DECL(vnet_iter); 1168196368Skmacy 1169196368Skmacy VNET_FOREACH(vnet_iter) { 1170196368Skmacy CURVNET_SET(vnet_iter); 1171216856Sbz#ifdef VIMAGE 1172216856Sbz db_printf("vnet %p\n", vnet_iter); 1173216856Sbz#endif 1174261601Sglebius#ifdef INET 1175261601Sglebius printf("IPv4:\n"); 1176261601Sglebius flowtable_show_vnet(&V_ip4_ft); 1177261601Sglebius#endif 1178261601Sglebius#ifdef INET6 1179261601Sglebius printf("IPv6:\n"); 1180261601Sglebius flowtable_show_vnet(&V_ip6_ft); 1181261601Sglebius#endif 1182196368Skmacy CURVNET_RESTORE(); 1183196368Skmacy } 1184196368Skmacy} 1185196368Skmacy#endif 1186