1135332Sglebius/*-
2219182Sglebius * Copyright (c) 2010-2011 Alexander V. Chernikov <melifaro@ipfw.ru>
3143923Sglebius * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org>
4135332Sglebius * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net>
5135332Sglebius * All rights reserved.
6135332Sglebius *
7135332Sglebius * Redistribution and use in source and binary forms, with or without
8135332Sglebius * modification, are permitted provided that the following conditions
9135332Sglebius * are met:
10135332Sglebius * 1. Redistributions of source code must retain the above copyright
11135332Sglebius *    notice, this list of conditions and the following disclaimer.
12135332Sglebius * 2. Redistributions in binary form must reproduce the above copyright
13135332Sglebius *    notice, this list of conditions and the following disclaimer in the
14135332Sglebius *    documentation and/or other materials provided with the distribution.
15135332Sglebius *
16135332Sglebius * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17135332Sglebius * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18135332Sglebius * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19135332Sglebius * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20135332Sglebius * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21135332Sglebius * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22135332Sglebius * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23135332Sglebius * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24135332Sglebius * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25135332Sglebius * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26135332Sglebius * SUCH DAMAGE.
27135332Sglebius *
28135332Sglebius * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $
29135332Sglebius */
30135332Sglebius
31135332Sglebiusstatic const char rcs_id[] =
32135332Sglebius    "@(#) $FreeBSD$";
33135332Sglebius
34219182Sglebius#include "opt_inet6.h"
35219182Sglebius#include "opt_route.h"
36135332Sglebius#include <sys/param.h>
37135332Sglebius#include <sys/kernel.h>
38135332Sglebius#include <sys/limits.h>
39135332Sglebius#include <sys/mbuf.h>
40140511Sglebius#include <sys/syslog.h>
41135332Sglebius#include <sys/systm.h>
42135332Sglebius#include <sys/socket.h>
43219182Sglebius#include <sys/endian.h>
44135332Sglebius
45146092Sglebius#include <machine/atomic.h>
46219182Sglebius#include <machine/stdarg.h>
47146092Sglebius
48135332Sglebius#include <net/if.h>
49135332Sglebius#include <net/route.h>
50219182Sglebius#include <net/ethernet.h>
51135332Sglebius#include <netinet/in.h>
52135332Sglebius#include <netinet/in_systm.h>
53135332Sglebius#include <netinet/ip.h>
54219182Sglebius#include <netinet/ip6.h>
55135332Sglebius#include <netinet/tcp.h>
56135332Sglebius#include <netinet/udp.h>
57135332Sglebius
58135332Sglebius#include <netgraph/ng_message.h>
59135332Sglebius#include <netgraph/netgraph.h>
60135332Sglebius
61135332Sglebius#include <netgraph/netflow/netflow.h>
62219182Sglebius#include <netgraph/netflow/netflow_v9.h>
63135332Sglebius#include <netgraph/netflow/ng_netflow.h>
64135332Sglebius
65146092Sglebius#define	NBUCKETS	(65536)		/* must be power of 2 */
66135332Sglebius
67163238Sglebius/* This hash is for TCP or UDP packets. */
68163238Sglebius#define FULL_HASH(addr1, addr2, port1, port2)	\
69163238Sglebius	(((addr1 ^ (addr1 >> 16) ^ 		\
70163238Sglebius	htons(addr2 ^ (addr2 >> 16))) ^ 	\
71163241Sglebius	port1 ^ htons(port2)) &			\
72163238Sglebius	(NBUCKETS - 1))
73135332Sglebius
74163238Sglebius/* This hash is for all other IP packets. */
75163238Sglebius#define ADDR_HASH(addr1, addr2)			\
76163238Sglebius	((addr1 ^ (addr1 >> 16) ^ 		\
77163238Sglebius	htons(addr2 ^ (addr2 >> 16))) &		\
78163238Sglebius	(NBUCKETS - 1))
79135332Sglebius
80135332Sglebius/* Macros to shorten logical constructions */
81135332Sglebius/* XXX: priv must exist in namespace */
82135332Sglebius#define	INACTIVE(fle)	(time_uptime - fle->f.last > priv->info.nfinfo_inact_t)
83135332Sglebius#define	AGED(fle)	(time_uptime - fle->f.first > priv->info.nfinfo_act_t)
84135332Sglebius#define	ISFREE(fle)	(fle->f.packets == 0)
85135332Sglebius
86135332Sglebius/*
87135332Sglebius * 4 is a magical number: statistically number of 4-packet flows is
88135332Sglebius * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP
89135332Sglebius * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case
90135332Sglebius * of reachable host and 4-packet otherwise.
91135332Sglebius */
92135332Sglebius#define	SMALL(fle)	(fle->f.packets <= 4)
93143103Sglebius
94151897SrwatsonMALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash");
95135332Sglebius
96146092Sglebiusstatic int export_add(item_p, struct flow_entry *);
97219182Sglebiusstatic int export_send(priv_p, fib_export_p, item_p, int);
98135332Sglebius
99248724Sglebiusstatic int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *,
100248724Sglebius    int, uint8_t, uint8_t);
101219229Sbz#ifdef INET6
102248724Sglebiusstatic int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *,
103248724Sglebius    int, uint8_t, uint8_t);
104219229Sbz#endif
105219182Sglebius
106248724Sglebiusstatic void expire_flow(priv_p, fib_export_p, struct flow_entry *, int);
107219182Sglebius
108219182Sglebius/*
109219182Sglebius * Generate hash for a given flow record.
110219182Sglebius *
111219182Sglebius * FIB is not used here, because:
112219182Sglebius * most VRFS will carry public IPv4 addresses which are unique even
113219182Sglebius * without FIB private addresses can overlap, but this is worked out
114219182Sglebius * via flow_rec bcmp() containing fib id. In IPv6 world addresses are
115219182Sglebius * all globally unique (it's not fully true, there is FC00::/7 for example,
116219182Sglebius * but chances of address overlap are MUCH smaller)
117219182Sglebius */
118248724Sglebiusstatic inline uint32_t
119135332Sglebiusip_hash(struct flow_rec *r)
120135332Sglebius{
121248724Sglebius
122135332Sglebius	switch (r->r_ip_p) {
123135332Sglebius	case IPPROTO_TCP:
124135332Sglebius	case IPPROTO_UDP:
125135332Sglebius		return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr,
126135332Sglebius		    r->r_sport, r->r_dport);
127135332Sglebius	default:
128135332Sglebius		return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr);
129135332Sglebius	}
130135332Sglebius}
131135332Sglebius
132219182Sglebius#ifdef INET6
133219182Sglebius/* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */
134248724Sglebiusstatic inline uint32_t
135219182Sglebiusip6_hash(struct flow6_rec *r)
136219182Sglebius{
137248724Sglebius
138219182Sglebius	switch (r->r_ip_p) {
139219182Sglebius	case IPPROTO_TCP:
140219182Sglebius	case IPPROTO_UDP:
141219182Sglebius		return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
142219182Sglebius		    r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport,
143219182Sglebius		    r->r_dport);
144219182Sglebius	default:
145219182Sglebius		return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
146219182Sglebius		    r->dst.r_dst6.__u6_addr.__u6_addr32[3]);
147219182Sglebius 	}
148219182Sglebius}
149219182Sglebius#endif
150219182Sglebius
151146092Sglebius/* This is callback from uma(9), called on alloc. */
152146092Sglebiusstatic int
153146092Sglebiusuma_ctor_flow(void *mem, int size, void *arg, int how)
154135332Sglebius{
155146092Sglebius	priv_p priv = (priv_p )arg;
156135332Sglebius
157146092Sglebius	if (atomic_load_acq_32(&priv->info.nfinfo_used) >= CACHESIZE)
158146092Sglebius		return (ENOMEM);
159135332Sglebius
160146092Sglebius	atomic_add_32(&priv->info.nfinfo_used, 1);
161146092Sglebius
162146092Sglebius	return (0);
163135332Sglebius}
164135332Sglebius
165146092Sglebius/* This is callback from uma(9), called on free. */
166146092Sglebiusstatic void
167146092Sglebiusuma_dtor_flow(void *mem, int size, void *arg)
168135332Sglebius{
169146092Sglebius	priv_p priv = (priv_p )arg;
170135332Sglebius
171146092Sglebius	atomic_subtract_32(&priv->info.nfinfo_used, 1);
172146092Sglebius}
173135332Sglebius
174219182Sglebius#ifdef INET6
175219182Sglebius/* This is callback from uma(9), called on alloc. */
176219182Sglebiusstatic int
177219182Sglebiusuma_ctor_flow6(void *mem, int size, void *arg, int how)
178219182Sglebius{
179219182Sglebius	priv_p priv = (priv_p )arg;
180219182Sglebius
181219182Sglebius	if (atomic_load_acq_32(&priv->info.nfinfo_used6) >= CACHESIZE)
182219182Sglebius		return (ENOMEM);
183219182Sglebius
184219182Sglebius	atomic_add_32(&priv->info.nfinfo_used6, 1);
185219182Sglebius
186219182Sglebius	return (0);
187219182Sglebius}
188219182Sglebius
189219182Sglebius/* This is callback from uma(9), called on free. */
190219182Sglebiusstatic void
191219182Sglebiusuma_dtor_flow6(void *mem, int size, void *arg)
192219182Sglebius{
193219182Sglebius	priv_p priv = (priv_p )arg;
194219182Sglebius
195219182Sglebius	atomic_subtract_32(&priv->info.nfinfo_used6, 1);
196219182Sglebius}
197219182Sglebius#endif
198219182Sglebius
199146092Sglebius/*
200146092Sglebius * Detach export datagram from priv, if there is any.
201146092Sglebius * If there is no, allocate a new one.
202146092Sglebius */
203146092Sglebiusstatic item_p
204219182Sglebiusget_export_dgram(priv_p priv, fib_export_p fe)
205146092Sglebius{
206146092Sglebius	item_p	item = NULL;
207146092Sglebius
208219182Sglebius	mtx_lock(&fe->export_mtx);
209219182Sglebius	if (fe->exp.item != NULL) {
210219182Sglebius		item = fe->exp.item;
211219182Sglebius		fe->exp.item = NULL;
212135332Sglebius	}
213219182Sglebius	mtx_unlock(&fe->export_mtx);
214135332Sglebius
215146092Sglebius	if (item == NULL) {
216146092Sglebius		struct netflow_v5_export_dgram *dgram;
217146092Sglebius		struct mbuf *m;
218135332Sglebius
219243882Sglebius		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
220146092Sglebius		if (m == NULL)
221146092Sglebius			return (NULL);
222146285Sglebius		item = ng_package_data(m, NG_NOFLAGS);
223146092Sglebius		if (item == NULL)
224146092Sglebius			return (NULL);
225146092Sglebius		dgram = mtod(m, struct netflow_v5_export_dgram *);
226146092Sglebius		dgram->header.count = 0;
227146092Sglebius		dgram->header.version = htons(NETFLOW_V5);
228210500Sglebius		dgram->header.pad = 0;
229146092Sglebius	}
230135332Sglebius
231146092Sglebius	return (item);
232135332Sglebius}
233135332Sglebius
234146092Sglebius/*
235146092Sglebius * Re-attach incomplete datagram back to priv.
236146092Sglebius * If there is already another one, then send incomplete. */
237146092Sglebiusstatic void
238219182Sglebiusreturn_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags)
239135332Sglebius{
240248724Sglebius
241146092Sglebius	/*
242146092Sglebius	 * It may happen on SMP, that some thread has already
243146092Sglebius	 * put its item there, in this case we bail out and
244146092Sglebius	 * send what we have to collector.
245146092Sglebius	 */
246219182Sglebius	mtx_lock(&fe->export_mtx);
247219182Sglebius	if (fe->exp.item == NULL) {
248219182Sglebius		fe->exp.item = item;
249219182Sglebius		mtx_unlock(&fe->export_mtx);
250146092Sglebius	} else {
251219182Sglebius		mtx_unlock(&fe->export_mtx);
252219182Sglebius		export_send(priv, fe, item, flags);
253146092Sglebius	}
254135332Sglebius}
255135332Sglebius
256146092Sglebius/*
257146092Sglebius * The flow is over. Call export_add() and free it. If datagram is
258146092Sglebius * full, then call export_send().
259146092Sglebius */
260248724Sglebiusstatic void
261219182Sglebiusexpire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags)
262135332Sglebius{
263219182Sglebius	struct netflow_export_item exp;
264219182Sglebius	uint16_t version = fle->f.version;
265219182Sglebius
266219182Sglebius	if ((priv->export != NULL) && (version == IPVERSION)) {
267219182Sglebius		exp.item = get_export_dgram(priv, fe);
268219182Sglebius		if (exp.item == NULL) {
269219182Sglebius			atomic_add_32(&priv->info.nfinfo_export_failed, 1);
270219182Sglebius			if (priv->export9 != NULL)
271219182Sglebius				atomic_add_32(&priv->info.nfinfo_export9_failed, 1);
272248724Sglebius			/* fle definitely contains IPv4 flow. */
273219182Sglebius			uma_zfree_arg(priv->zone, fle, priv);
274219182Sglebius			return;
275219182Sglebius		}
276219182Sglebius
277219182Sglebius		if (export_add(exp.item, fle) > 0)
278219182Sglebius			export_send(priv, fe, exp.item, flags);
279219182Sglebius		else
280219182Sglebius			return_export_dgram(priv, fe, exp.item, NG_QUEUE);
281146092Sglebius	}
282219182Sglebius
283219182Sglebius	if (priv->export9 != NULL) {
284219182Sglebius		exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt);
285219182Sglebius		if (exp.item9 == NULL) {
286219182Sglebius			atomic_add_32(&priv->info.nfinfo_export9_failed, 1);
287219182Sglebius			if (version == IPVERSION)
288219182Sglebius				uma_zfree_arg(priv->zone, fle, priv);
289219229Sbz#ifdef INET6
290219182Sglebius			else if (version == IP6VERSION)
291219182Sglebius				uma_zfree_arg(priv->zone6, fle, priv);
292219229Sbz#endif
293219182Sglebius			else
294248724Sglebius				panic("ng_netflow: Unknown IP proto: %d",
295248724Sglebius				    version);
296219182Sglebius			return;
297219182Sglebius		}
298219182Sglebius
299219182Sglebius		if (export9_add(exp.item9, exp.item9_opt, fle) > 0)
300219182Sglebius			export9_send(priv, fe, exp.item9, exp.item9_opt, flags);
301219182Sglebius		else
302248724Sglebius			return_export9_dgram(priv, fe, exp.item9,
303248724Sglebius			    exp.item9_opt, NG_QUEUE);
304146092Sglebius	}
305219182Sglebius
306219182Sglebius	if (version == IPVERSION)
307219182Sglebius		uma_zfree_arg(priv->zone, fle, priv);
308219229Sbz#ifdef INET6
309219182Sglebius	else if (version == IP6VERSION)
310219182Sglebius		uma_zfree_arg(priv->zone6, fle, priv);
311219229Sbz#endif
312135332Sglebius}
313135332Sglebius
314135332Sglebius/* Get a snapshot of node statistics */
315135332Sglebiusvoid
316135332Sglebiusng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i)
317135332Sglebius{
318248724Sglebius
319146092Sglebius	/* XXX: atomic */
320135332Sglebius	memcpy((void *)i, (void *)&priv->info, sizeof(priv->info));
321135332Sglebius}
322135332Sglebius
323135332Sglebius/*
324135332Sglebius * Insert a record into defined slot.
325135332Sglebius *
326135332Sglebius * First we get for us a free flow entry, then fill in all
327146092Sglebius * possible fields in it.
328146092Sglebius *
329146092Sglebius * TODO: consider dropping hash mutex while filling in datagram,
330146092Sglebius * as this was done in previous version. Need to test & profile
331146092Sglebius * to be sure.
332135332Sglebius */
333237227Smelifarostatic int
334219182Sglebiushash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r,
335237227Smelifaro	int plen, uint8_t flags, uint8_t tcp_flags)
336135332Sglebius{
337176085Sglebius	struct flow_entry *fle;
338176085Sglebius	struct sockaddr_in sin;
339176085Sglebius	struct rtentry *rt;
340135332Sglebius
341146092Sglebius	mtx_assert(&hsh->mtx, MA_OWNED);
342146092Sglebius
343146092Sglebius	fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT);
344146092Sglebius	if (fle == NULL) {
345146139Sglebius		atomic_add_32(&priv->info.nfinfo_alloc_failed, 1);
346135332Sglebius		return (ENOMEM);
347146092Sglebius	}
348135332Sglebius
349135332Sglebius	/*
350135332Sglebius	 * Now fle is totally ours. It is detached from all lists,
351135332Sglebius	 * we can safely edit it.
352135332Sglebius	 */
353219182Sglebius	fle->f.version = IPVERSION;
354135332Sglebius	bcopy(r, &fle->f.r, sizeof(struct flow_rec));
355135332Sglebius	fle->f.bytes = plen;
356135332Sglebius	fle->f.packets = 1;
357143890Sglebius	fle->f.tcp_flags = tcp_flags;
358135332Sglebius
359135332Sglebius	fle->f.first = fle->f.last = time_uptime;
360135332Sglebius
361135332Sglebius	/*
362135332Sglebius	 * First we do route table lookup on destination address. So we can
363135332Sglebius	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
364135332Sglebius	 */
365237227Smelifaro	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
366237227Smelifaro		bzero(&sin, sizeof(sin));
367237227Smelifaro		sin.sin_len = sizeof(struct sockaddr_in);
368237227Smelifaro		sin.sin_family = AF_INET;
369237227Smelifaro		sin.sin_addr = fle->f.r.r_dst;
370237227Smelifaro		rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib);
371237227Smelifaro		if (rt != NULL) {
372237227Smelifaro			fle->f.fle_o_ifx = rt->rt_ifp->if_index;
373135332Sglebius
374237227Smelifaro			if (rt->rt_flags & RTF_GATEWAY &&
375237227Smelifaro			    rt->rt_gateway->sa_family == AF_INET)
376237227Smelifaro				fle->f.next_hop =
377237227Smelifaro				    ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr;
378135332Sglebius
379237227Smelifaro			if (rt_mask(rt))
380248724Sglebius				fle->f.dst_mask =
381248724Sglebius				    bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr);
382237227Smelifaro			else if (rt->rt_flags & RTF_HOST)
383237227Smelifaro				/* Give up. We can't determine mask :( */
384237227Smelifaro				fle->f.dst_mask = 32;
385135332Sglebius
386237227Smelifaro			RTFREE_LOCKED(rt);
387237227Smelifaro		}
388135332Sglebius	}
389135332Sglebius
390135332Sglebius	/* Do route lookup on source address, to fill in src_mask. */
391237227Smelifaro	if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
392237227Smelifaro		bzero(&sin, sizeof(sin));
393237227Smelifaro		sin.sin_len = sizeof(struct sockaddr_in);
394237227Smelifaro		sin.sin_family = AF_INET;
395237227Smelifaro		sin.sin_addr = fle->f.r.r_src;
396237227Smelifaro		rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib);
397237227Smelifaro		if (rt != NULL) {
398237227Smelifaro			if (rt_mask(rt))
399248724Sglebius				fle->f.src_mask =
400248724Sglebius				    bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr);
401237227Smelifaro			else if (rt->rt_flags & RTF_HOST)
402237227Smelifaro				/* Give up. We can't determine mask :( */
403237227Smelifaro				fle->f.src_mask = 32;
404135332Sglebius
405237227Smelifaro			RTFREE_LOCKED(rt);
406237227Smelifaro		}
407135332Sglebius	}
408135332Sglebius
409146092Sglebius	/* Push new flow at the and of hash. */
410146092Sglebius	TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
411135332Sglebius
412135332Sglebius	return (0);
413135332Sglebius}
414135332Sglebius
415219182Sglebius#ifdef INET6
416219182Sglebius/* XXX: make normal function, instead of.. */
417219182Sglebius#define ipv6_masklen(x)		bitcount32((x).__u6_addr.__u6_addr32[0]) + \
418219182Sglebius				bitcount32((x).__u6_addr.__u6_addr32[1]) + \
419219182Sglebius				bitcount32((x).__u6_addr.__u6_addr32[2]) + \
420219182Sglebius				bitcount32((x).__u6_addr.__u6_addr32[3])
421237227Smelifaro#define RT_MASK6(x)	(ipv6_masklen(((struct sockaddr_in6 *)rt_mask(x))->sin6_addr))
422237227Smelifarostatic int
423223787Sglebiushash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r,
424237227Smelifaro	int plen, uint8_t flags, uint8_t tcp_flags)
425219182Sglebius{
426219182Sglebius	struct flow6_entry *fle6;
427219182Sglebius	struct sockaddr_in6 *src, *dst;
428219182Sglebius	struct rtentry *rt;
429219182Sglebius	struct route_in6 rin6;
430135332Sglebius
431219182Sglebius	mtx_assert(&hsh6->mtx, MA_OWNED);
432219182Sglebius
433219182Sglebius	fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT);
434219182Sglebius	if (fle6 == NULL) {
435219182Sglebius		atomic_add_32(&priv->info.nfinfo_alloc_failed, 1);
436219182Sglebius		return (ENOMEM);
437219182Sglebius	}
438219182Sglebius
439219182Sglebius	/*
440219182Sglebius	 * Now fle is totally ours. It is detached from all lists,
441219182Sglebius	 * we can safely edit it.
442219182Sglebius	 */
443219182Sglebius
444219182Sglebius	fle6->f.version = IP6VERSION;
445219182Sglebius	bcopy(r, &fle6->f.r, sizeof(struct flow6_rec));
446219182Sglebius	fle6->f.bytes = plen;
447219182Sglebius	fle6->f.packets = 1;
448219182Sglebius	fle6->f.tcp_flags = tcp_flags;
449219182Sglebius
450219182Sglebius	fle6->f.first = fle6->f.last = time_uptime;
451219182Sglebius
452219182Sglebius	/*
453219182Sglebius	 * First we do route table lookup on destination address. So we can
454219182Sglebius	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
455219182Sglebius	 */
456248724Sglebius	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
457237227Smelifaro		bzero(&rin6, sizeof(struct route_in6));
458237227Smelifaro		dst = (struct sockaddr_in6 *)&rin6.ro_dst;
459237227Smelifaro		dst->sin6_len = sizeof(struct sockaddr_in6);
460237227Smelifaro		dst->sin6_family = AF_INET6;
461237227Smelifaro		dst->sin6_addr = r->dst.r_dst6;
462219182Sglebius
463237227Smelifaro		rin6.ro_rt = rtalloc1_fib((struct sockaddr *)dst, 0, 0, r->fib);
464219182Sglebius
465237227Smelifaro		if (rin6.ro_rt != NULL) {
466237227Smelifaro			rt = rin6.ro_rt;
467237227Smelifaro			fle6->f.fle_o_ifx = rt->rt_ifp->if_index;
468219182Sglebius
469237227Smelifaro			if (rt->rt_flags & RTF_GATEWAY &&
470237227Smelifaro			    rt->rt_gateway->sa_family == AF_INET6)
471237227Smelifaro				fle6->f.n.next_hop6 =
472237227Smelifaro				    ((struct sockaddr_in6 *)(rt->rt_gateway))->sin6_addr;
473219182Sglebius
474237227Smelifaro			if (rt_mask(rt))
475237227Smelifaro				fle6->f.dst_mask = RT_MASK6(rt);
476237227Smelifaro			else
477237227Smelifaro				fle6->f.dst_mask = 128;
478219182Sglebius
479237227Smelifaro			RTFREE_LOCKED(rt);
480237227Smelifaro		}
481219182Sglebius	}
482219182Sglebius
483248724Sglebius	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
484237227Smelifaro		/* Do route lookup on source address, to fill in src_mask. */
485237227Smelifaro		bzero(&rin6, sizeof(struct route_in6));
486237227Smelifaro		src = (struct sockaddr_in6 *)&rin6.ro_dst;
487237227Smelifaro		src->sin6_len = sizeof(struct sockaddr_in6);
488237227Smelifaro		src->sin6_family = AF_INET6;
489237227Smelifaro		src->sin6_addr = r->src.r_src6;
490219182Sglebius
491237227Smelifaro		rin6.ro_rt = rtalloc1_fib((struct sockaddr *)src, 0, 0, r->fib);
492219182Sglebius
493237227Smelifaro		if (rin6.ro_rt != NULL) {
494237227Smelifaro			rt = rin6.ro_rt;
495219182Sglebius
496237227Smelifaro			if (rt_mask(rt))
497237227Smelifaro				fle6->f.src_mask = RT_MASK6(rt);
498237227Smelifaro			else
499237227Smelifaro				fle6->f.src_mask = 128;
500219182Sglebius
501237227Smelifaro			RTFREE_LOCKED(rt);
502237227Smelifaro		}
503219182Sglebius	}
504219182Sglebius
505219182Sglebius	/* Push new flow at the and of hash. */
506223787Sglebius	TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash);
507219182Sglebius
508219182Sglebius	return (0);
509219182Sglebius}
510237227Smelifaro#undef ipv6_masklen
511237227Smelifaro#undef RT_MASK6
512219182Sglebius#endif
513219182Sglebius
514219182Sglebius
515135332Sglebius/*
516135332Sglebius * Non-static functions called from ng_netflow.c
517135332Sglebius */
518135332Sglebius
519135332Sglebius/* Allocate memory and set up flow cache */
520220769Sglebiusvoid
521135332Sglebiusng_netflow_cache_init(priv_p priv)
522135332Sglebius{
523219182Sglebius	struct flow_hash_entry *hsh;
524135332Sglebius	int i;
525135332Sglebius
526146092Sglebius	/* Initialize cache UMA zone. */
527248724Sglebius	priv->zone = uma_zcreate("NetFlow IPv4 cache",
528248724Sglebius	    sizeof(struct flow_entry), uma_ctor_flow, uma_dtor_flow, NULL,
529248724Sglebius	    NULL, UMA_ALIGN_CACHE, 0);
530146092Sglebius	uma_zone_set_max(priv->zone, CACHESIZE);
531219182Sglebius#ifdef INET6
532248724Sglebius	priv->zone6 = uma_zcreate("NetFlow IPv6 cache",
533248724Sglebius	    sizeof(struct flow6_entry), uma_ctor_flow6, uma_dtor_flow6, NULL,
534248724Sglebius	    NULL, UMA_ALIGN_CACHE, 0);
535219182Sglebius	uma_zone_set_max(priv->zone6, CACHESIZE);
536219182Sglebius#endif
537135332Sglebius
538146092Sglebius	/* Allocate hash. */
539184214Sdes	priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
540146092Sglebius	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
541135332Sglebius
542146092Sglebius	/* Initialize hash. */
543146092Sglebius	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) {
544146092Sglebius		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
545146092Sglebius		TAILQ_INIT(&hsh->head);
546146092Sglebius	}
547135332Sglebius
548219182Sglebius#ifdef INET6
549219182Sglebius	/* Allocate hash. */
550223787Sglebius	priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
551219182Sglebius	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
552135332Sglebius
553219182Sglebius	/* Initialize hash. */
554223787Sglebius	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) {
555223787Sglebius		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
556223787Sglebius		TAILQ_INIT(&hsh->head);
557219182Sglebius	}
558219182Sglebius#endif
559219182Sglebius
560219182Sglebius	ng_netflow_v9_cache_init(priv);
561219182Sglebius	CTR0(KTR_NET, "ng_netflow startup()");
562135332Sglebius}
563135332Sglebius
564219182Sglebius/* Initialize new FIB table for v5 and v9 */
565219182Sglebiusint
566219182Sglebiusng_netflow_fib_init(priv_p priv, int fib)
567219182Sglebius{
568219182Sglebius	fib_export_p	fe = priv_to_fib(priv, fib);
569219182Sglebius
570219182Sglebius	CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib);
571219182Sglebius
572219182Sglebius	if (fe != NULL)
573219182Sglebius		return (0);
574219182Sglebius
575248724Sglebius	if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH,
576248724Sglebius	    M_NOWAIT | M_ZERO)) == NULL)
577248725Sglebius		return (ENOMEM);
578219182Sglebius
579219182Sglebius	mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF);
580219182Sglebius	mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF);
581219182Sglebius	fe->fib = fib;
582219182Sglebius	fe->domain_id = fib;
583219182Sglebius
584248724Sglebius	if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib],
585248724Sglebius	    (uintptr_t)NULL, (uintptr_t)fe) == 0) {
586219182Sglebius		/* FIB already set up by other ISR */
587248724Sglebius		CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p",
588248724Sglebius		    fib, fe, priv_to_fib(priv, fib));
589219182Sglebius		mtx_destroy(&fe->export_mtx);
590219182Sglebius		mtx_destroy(&fe->export9_mtx);
591219182Sglebius		free(fe, M_NETGRAPH);
592219182Sglebius	} else {
593219182Sglebius		/* Increase counter for statistics */
594248724Sglebius		CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)",
595248724Sglebius		    fib, fe, priv_to_fib(priv, fib));
596219182Sglebius		atomic_fetchadd_32(&priv->info.nfinfo_alloc_fibs, 1);
597219182Sglebius	}
598219182Sglebius
599219182Sglebius	return (0);
600219182Sglebius}
601219182Sglebius
602135332Sglebius/* Free all flow cache memory. Called from node close method. */
603135332Sglebiusvoid
604135332Sglebiusng_netflow_cache_flush(priv_p priv)
605135332Sglebius{
606146092Sglebius	struct flow_entry	*fle, *fle1;
607146092Sglebius	struct flow_hash_entry	*hsh;
608219182Sglebius	struct netflow_export_item exp;
609219182Sglebius	fib_export_p fe;
610135332Sglebius	int i;
611135332Sglebius
612219182Sglebius	bzero(&exp, sizeof(exp));
613219182Sglebius
614135332Sglebius	/*
615135332Sglebius	 * We are going to free probably billable data.
616135332Sglebius	 * Expire everything before freeing it.
617135332Sglebius	 * No locking is required since callout is already drained.
618135332Sglebius	 */
619146092Sglebius	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++)
620146092Sglebius		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
621146092Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
622219182Sglebius			fe = priv_to_fib(priv, fle->f.r.fib);
623219182Sglebius			expire_flow(priv, fe, fle, NG_QUEUE);
624146092Sglebius		}
625219182Sglebius#ifdef INET6
626223787Sglebius	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++)
627223787Sglebius		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
628223787Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
629223787Sglebius			fe = priv_to_fib(priv, fle->f.r.fib);
630223787Sglebius			expire_flow(priv, fe, fle, NG_QUEUE);
631219182Sglebius		}
632219182Sglebius#endif
633135332Sglebius
634146092Sglebius	uma_zdestroy(priv->zone);
635146092Sglebius	/* Destroy hash mutexes. */
636146092Sglebius	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++)
637146092Sglebius		mtx_destroy(&hsh->mtx);
638146092Sglebius
639146092Sglebius	/* Free hash memory. */
640219182Sglebius	if (priv->hash != NULL)
641184205Sdes		free(priv->hash, M_NETFLOW_HASH);
642219182Sglebius#ifdef INET6
643219182Sglebius	uma_zdestroy(priv->zone6);
644219182Sglebius	/* Destroy hash mutexes. */
645223787Sglebius	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++)
646223787Sglebius		mtx_destroy(&hsh->mtx);
647135332Sglebius
648219182Sglebius	/* Free hash memory. */
649219182Sglebius	if (priv->hash6 != NULL)
650219182Sglebius		free(priv->hash6, M_NETFLOW_HASH);
651219182Sglebius#endif
652219182Sglebius
653232921Smelifaro	for (i = 0; i < priv->maxfibs; i++) {
654219182Sglebius		if ((fe = priv_to_fib(priv, i)) == NULL)
655219182Sglebius			continue;
656219182Sglebius
657219182Sglebius		if (fe->exp.item != NULL)
658219182Sglebius			export_send(priv, fe, fe->exp.item, NG_QUEUE);
659219182Sglebius
660219182Sglebius		if (fe->exp.item9 != NULL)
661248724Sglebius			export9_send(priv, fe, fe->exp.item9,
662248724Sglebius			    fe->exp.item9_opt, NG_QUEUE);
663219182Sglebius
664219182Sglebius		mtx_destroy(&fe->export_mtx);
665219182Sglebius		mtx_destroy(&fe->export9_mtx);
666219182Sglebius		free(fe, M_NETGRAPH);
667219182Sglebius	}
668219182Sglebius
669219182Sglebius	ng_netflow_v9_cache_flush(priv);
670135332Sglebius}
671135332Sglebius
672146092Sglebius/* Insert packet from into flow cache. */
673135332Sglebiusint
674248724Sglebiusng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip,
675248724Sglebius    caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
676248724Sglebius    unsigned int src_if_index)
677135332Sglebius{
678248724Sglebius	struct flow_entry	*fle, *fle1;
679219182Sglebius	struct flow_hash_entry	*hsh;
680135332Sglebius	struct flow_rec		r;
681143923Sglebius	int			hlen, plen;
682146092Sglebius	int			error = 0;
683248724Sglebius	uint16_t		eproto;
684135332Sglebius	uint8_t			tcp_flags = 0;
685135332Sglebius
686135332Sglebius	bzero(&r, sizeof(r));
687248724Sglebius
688143923Sglebius	if (ip->ip_v != IPVERSION)
689143923Sglebius		return (EINVAL);
690135332Sglebius
691143923Sglebius	hlen = ip->ip_hl << 2;
692143923Sglebius	if (hlen < sizeof(struct ip))
693143923Sglebius		return (EINVAL);
694143923Sglebius
695219182Sglebius	eproto = ETHERTYPE_IP;
696219182Sglebius	/* Assume L4 template by default */
697219182Sglebius	r.flow_type = NETFLOW_V9_FLOW_V4_L4;
698219182Sglebius
699143923Sglebius	r.r_src = ip->ip_src;
700143923Sglebius	r.r_dst = ip->ip_dst;
701219182Sglebius	r.fib = fe->fib;
702143923Sglebius
703143923Sglebius	plen = ntohs(ip->ip_len);
704143923Sglebius
705143923Sglebius	r.r_ip_p = ip->ip_p;
706143923Sglebius	r.r_tos = ip->ip_tos;
707143923Sglebius
708183693Smav	r.r_i_ifx = src_if_index;
709143923Sglebius
710143923Sglebius	/*
711143923Sglebius	 * XXX NOTE: only first fragment of fragmented TCP, UDP and
712143923Sglebius	 * ICMP packet will be recorded with proper s_port and d_port.
713143923Sglebius	 * Following fragments will be recorded simply as IP packet with
714143923Sglebius	 * ip_proto = ip->ip_p and s_port, d_port set to zero.
715143923Sglebius	 * I know, it looks like bug. But I don't want to re-implement
716143923Sglebius	 * ip packet assebmling here. Anyway, (in)famous trafd works this way -
717143923Sglebius	 * and nobody complains yet :)
718143923Sglebius	 */
719144901Sglebius	if ((ip->ip_off & htons(IP_OFFMASK)) == 0)
720144901Sglebius		switch(r.r_ip_p) {
721144901Sglebius		case IPPROTO_TCP:
722248724Sglebius		    {
723248724Sglebius			struct tcphdr *tcp;
724143923Sglebius
725144901Sglebius			tcp = (struct tcphdr *)((caddr_t )ip + hlen);
726144901Sglebius			r.r_sport = tcp->th_sport;
727144901Sglebius			r.r_dport = tcp->th_dport;
728144901Sglebius			tcp_flags = tcp->th_flags;
729144901Sglebius			break;
730248724Sglebius		    }
731248724Sglebius		case IPPROTO_UDP:
732144901Sglebius			r.r_ports = *(uint32_t *)((caddr_t )ip + hlen);
733144901Sglebius			break;
734144901Sglebius		}
735143923Sglebius
736219182Sglebius	atomic_fetchadd_32(&priv->info.nfinfo_packets, 1);
737219182Sglebius	/* XXX: atomic */
738139374Sglebius	priv->info.nfinfo_bytes += plen;
739139374Sglebius
740146092Sglebius	/* Find hash slot. */
741146092Sglebius	hsh = &priv->hash[ip_hash(&r)];
742135332Sglebius
743146092Sglebius	mtx_lock(&hsh->mtx);
744135332Sglebius
745146092Sglebius	/*
746146092Sglebius	 * Go through hash and find our entry. If we encounter an
747146092Sglebius	 * entry, that should be expired, purge it. We do a reverse
748146092Sglebius	 * search since most active entries are first, and most
749146092Sglebius	 * searches are done on most active entries.
750146092Sglebius	 */
751146092Sglebius	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
752146092Sglebius		if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0)
753146092Sglebius			break;
754146092Sglebius		if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) {
755146092Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
756248724Sglebius			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
757248724Sglebius			    fle, NG_QUEUE);
758146092Sglebius			atomic_add_32(&priv->info.nfinfo_act_exp, 1);
759146092Sglebius		}
760146092Sglebius	}
761135332Sglebius
762146092Sglebius	if (fle) {			/* An existent entry. */
763146092Sglebius
764135332Sglebius		fle->f.bytes += plen;
765135332Sglebius		fle->f.packets ++;
766135332Sglebius		fle->f.tcp_flags |= tcp_flags;
767135332Sglebius		fle->f.last = time_uptime;
768135332Sglebius
769135332Sglebius		/*
770135332Sglebius		 * We have the following reasons to expire flow in active way:
771135332Sglebius		 * - it hit active timeout
772135332Sglebius		 * - a TCP connection closed
773135332Sglebius		 * - it is going to overflow counter
774135332Sglebius		 */
775135332Sglebius		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) ||
776219182Sglebius		    (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
777146092Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
778248724Sglebius			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
779248724Sglebius			    fle, NG_QUEUE);
780154277Sglebius			atomic_add_32(&priv->info.nfinfo_act_exp, 1);
781146092Sglebius		} else {
782146092Sglebius			/*
783146092Sglebius			 * It is the newest, move it to the tail,
784146092Sglebius			 * if it isn't there already. Next search will
785146092Sglebius			 * locate it quicker.
786146092Sglebius			 */
787146092Sglebius			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
788146092Sglebius				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
789146092Sglebius				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
790146092Sglebius			}
791146092Sglebius		}
792146092Sglebius	} else				/* A new flow entry. */
793237227Smelifaro		error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags);
794135332Sglebius
795146092Sglebius	mtx_unlock(&hsh->mtx);
796135332Sglebius
797219182Sglebius	return (error);
798219182Sglebius}
799135332Sglebius
800219182Sglebius#ifdef INET6
801219182Sglebius/* Insert IPv6 packet from into flow cache. */
802219182Sglebiusint
803248724Sglebiusng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6,
804248724Sglebius    caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
805248724Sglebius    unsigned int src_if_index)
806219182Sglebius{
807248724Sglebius	struct flow_entry	*fle = NULL, *fle1;
808248724Sglebius	struct flow6_entry	*fle6;
809248724Sglebius	struct flow_hash_entry	*hsh;
810248724Sglebius	struct flow6_rec	r;
811219182Sglebius	int			plen;
812219182Sglebius	int			error = 0;
813219182Sglebius	uint8_t			tcp_flags = 0;
814219182Sglebius
815219182Sglebius	/* check version */
816219182Sglebius	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
817219182Sglebius		return (EINVAL);
818219182Sglebius
819219182Sglebius	bzero(&r, sizeof(r));
820219182Sglebius
821219182Sglebius	r.src.r_src6 = ip6->ip6_src;
822219182Sglebius	r.dst.r_dst6 = ip6->ip6_dst;
823219182Sglebius	r.fib = fe->fib;
824219182Sglebius
825219182Sglebius	/* Assume L4 template by default */
826219182Sglebius	r.flow_type = NETFLOW_V9_FLOW_V6_L4;
827219182Sglebius
828219182Sglebius	plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
829219182Sglebius
830248724Sglebius#if 0
831219182Sglebius	/* XXX: set DSCP/CoS value */
832219182Sglebius	r.r_tos = ip->ip_tos;
833219182Sglebius#endif
834237227Smelifaro	if ((flags & NG_NETFLOW_IS_FRAG) == 0) {
835219182Sglebius		switch(upper_proto) {
836219182Sglebius		case IPPROTO_TCP:
837248724Sglebius		    {
838248724Sglebius			struct tcphdr *tcp;
839219182Sglebius
840219182Sglebius			tcp = (struct tcphdr *)upper_ptr;
841219182Sglebius			r.r_ports = *(uint32_t *)upper_ptr;
842219182Sglebius			tcp_flags = tcp->th_flags;
843219182Sglebius			break;
844248724Sglebius		    }
845219182Sglebius 		case IPPROTO_UDP:
846219182Sglebius		case IPPROTO_SCTP:
847219182Sglebius			r.r_ports = *(uint32_t *)upper_ptr;
848219182Sglebius			break;
849219182Sglebius		}
850219182Sglebius	}
851219182Sglebius
852219182Sglebius	r.r_ip_p = upper_proto;
853219182Sglebius	r.r_i_ifx = src_if_index;
854219182Sglebius
855219182Sglebius	atomic_fetchadd_32(&priv->info.nfinfo_packets6, 1);
856219182Sglebius	/* XXX: atomic */
857219182Sglebius	priv->info.nfinfo_bytes6 += plen;
858219182Sglebius
859219182Sglebius	/* Find hash slot. */
860223787Sglebius	hsh = &priv->hash6[ip6_hash(&r)];
861219182Sglebius
862223787Sglebius	mtx_lock(&hsh->mtx);
863219182Sglebius
864219182Sglebius	/*
865219182Sglebius	 * Go through hash and find our entry. If we encounter an
866219182Sglebius	 * entry, that should be expired, purge it. We do a reverse
867219182Sglebius	 * search since most active entries are first, and most
868219182Sglebius	 * searches are done on most active entries.
869219182Sglebius	 */
870223787Sglebius	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
871223787Sglebius		if (fle->f.version != IP6VERSION)
872219182Sglebius			continue;
873223787Sglebius		fle6 = (struct flow6_entry *)fle;
874219182Sglebius		if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0)
875219182Sglebius			break;
876219182Sglebius		if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) {
877223787Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
878223787Sglebius			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
879223787Sglebius			    NG_QUEUE);
880219182Sglebius			atomic_add_32(&priv->info.nfinfo_act_exp, 1);
881219182Sglebius		}
882219182Sglebius	}
883219182Sglebius
884223787Sglebius	if (fle != NULL) {			/* An existent entry. */
885223787Sglebius		fle6 = (struct flow6_entry *)fle;
886219182Sglebius
887219182Sglebius		fle6->f.bytes += plen;
888219182Sglebius		fle6->f.packets ++;
889219182Sglebius		fle6->f.tcp_flags |= tcp_flags;
890219182Sglebius		fle6->f.last = time_uptime;
891219182Sglebius
892219182Sglebius		/*
893219182Sglebius		 * We have the following reasons to expire flow in active way:
894219182Sglebius		 * - it hit active timeout
895219182Sglebius		 * - a TCP connection closed
896219182Sglebius		 * - it is going to overflow counter
897219182Sglebius		 */
898219182Sglebius		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) ||
899219182Sglebius		    (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
900223787Sglebius			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
901223787Sglebius			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
902223787Sglebius			    NG_QUEUE);
903219182Sglebius			atomic_add_32(&priv->info.nfinfo_act_exp, 1);
904219182Sglebius		} else {
905219182Sglebius			/*
906219182Sglebius			 * It is the newest, move it to the tail,
907219182Sglebius			 * if it isn't there already. Next search will
908219182Sglebius			 * locate it quicker.
909219182Sglebius			 */
910223787Sglebius			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
911223787Sglebius				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
912223787Sglebius				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
913219182Sglebius			}
914219182Sglebius		}
915219182Sglebius	} else				/* A new flow entry. */
916237227Smelifaro		error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags);
917219182Sglebius
918223787Sglebius	mtx_unlock(&hsh->mtx);
919219182Sglebius
920146092Sglebius	return (error);
921135332Sglebius}
922219182Sglebius#endif
923135332Sglebius
924135332Sglebius/*
925146092Sglebius * Return records from cache to userland.
926135332Sglebius *
927135332Sglebius * TODO: matching particular IP should be done in kernel, here.
928135332Sglebius */
929135332Sglebiusint
930223787Sglebiusng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req,
931223787Sglebiusstruct ngnf_show_header *resp)
932135332Sglebius{
933219182Sglebius	struct flow_hash_entry	*hsh;
934219182Sglebius	struct flow_entry	*fle;
935223787Sglebius	struct flow_entry_data	*data = (struct flow_entry_data *)(resp + 1);
936223787Sglebius#ifdef INET6
937223787Sglebius	struct flow6_entry_data	*data6 = (struct flow6_entry_data *)(resp + 1);
938223787Sglebius#endif
939223787Sglebius	int	i, max;
940135332Sglebius
941223787Sglebius	i = req->hash_id;
942223787Sglebius	if (i > NBUCKETS-1)
943223787Sglebius		return (EINVAL);
944135332Sglebius
945223787Sglebius#ifdef INET6
946223787Sglebius	if (req->version == 6) {
947223787Sglebius		resp->version = 6;
948223787Sglebius		hsh = priv->hash6 + i;
949223787Sglebius		max = NREC6_AT_ONCE;
950223787Sglebius	} else
951223787Sglebius#endif
952223787Sglebius	if (req->version == 4) {
953223787Sglebius		resp->version = 4;
954223787Sglebius		hsh = priv->hash + i;
955223787Sglebius		max = NREC_AT_ONCE;
956223787Sglebius	} else
957223787Sglebius		return (EINVAL);
958135332Sglebius
959135332Sglebius	/*
960135332Sglebius	 * We will transfer not more than NREC_AT_ONCE. More data
961135332Sglebius	 * will come in next message.
962223787Sglebius	 * We send current hash index and current record number in list
963223787Sglebius	 * to userland, and userland should return it back to us.
964223787Sglebius	 * Then, we will restart with new entry.
965146092Sglebius	 *
966223787Sglebius	 * The resulting cache snapshot can be inaccurate if flow expiration
967223787Sglebius	 * is taking place on hash item between userland data requests for
968223787Sglebius	 * this hash item id.
969135332Sglebius	 */
970223787Sglebius	resp->nentries = 0;
971146092Sglebius	for (; i < NBUCKETS; hsh++, i++) {
972223787Sglebius		int list_id;
973146092Sglebius
974223787Sglebius		if (mtx_trylock(&hsh->mtx) == 0) {
975223787Sglebius			/*
976223787Sglebius			 * Requested hash index is not available,
977223787Sglebius			 * relay decision to skip or re-request data
978223787Sglebius			 * to userland.
979223787Sglebius			 */
980223787Sglebius			resp->hash_id = i;
981223787Sglebius			resp->list_id = 0;
982223787Sglebius			return (0);
983223787Sglebius		}
984223787Sglebius
985223787Sglebius		list_id = 0;
986146092Sglebius		TAILQ_FOREACH(fle, &hsh->head, fle_hash) {
987223787Sglebius			if (hsh->mtx.mtx_lock & MTX_CONTESTED) {
988223787Sglebius				resp->hash_id = i;
989223787Sglebius				resp->list_id = list_id;
990223822Sglebius				mtx_unlock(&hsh->mtx);
991223787Sglebius				return (0);
992223787Sglebius			}
993146092Sglebius
994223787Sglebius			list_id++;
995223787Sglebius			/* Search for particular record in list. */
996223787Sglebius			if (req->list_id > 0) {
997223787Sglebius				if (list_id < req->list_id)
998223787Sglebius					continue;
999223787Sglebius
1000223787Sglebius				/* Requested list position found. */
1001223787Sglebius				req->list_id = 0;
1002223787Sglebius			}
1003223787Sglebius#ifdef INET6
1004223787Sglebius			if (req->version == 6) {
1005223787Sglebius				struct flow6_entry *fle6;
1006223787Sglebius
1007223787Sglebius				fle6 = (struct flow6_entry *)fle;
1008223787Sglebius				bcopy(&fle6->f, data6 + resp->nentries,
1009223787Sglebius				    sizeof(fle6->f));
1010223787Sglebius			} else
1011223787Sglebius#endif
1012223787Sglebius				bcopy(&fle->f, data + resp->nentries,
1013223787Sglebius				    sizeof(fle->f));
1014223787Sglebius			resp->nentries++;
1015223787Sglebius			if (resp->nentries == max) {
1016223787Sglebius				resp->hash_id = i;
1017223787Sglebius				/*
1018223787Sglebius				 * If it was the last item in list
1019223787Sglebius				 * we simply skip to next hash_id.
1020223787Sglebius				 */
1021223787Sglebius				resp->list_id = list_id + 1;
1022223822Sglebius				mtx_unlock(&hsh->mtx);
1023146092Sglebius				return (0);
1024146092Sglebius			}
1025135332Sglebius		}
1026146092Sglebius		mtx_unlock(&hsh->mtx);
1027146092Sglebius	}
1028135332Sglebius
1029223787Sglebius	resp->hash_id = resp->list_id = 0;
1030223787Sglebius
1031135332Sglebius	return (0);
1032135332Sglebius}
1033135332Sglebius
1034135332Sglebius/* We have full datagram in privdata. Send it to export hook. */
1035135332Sglebiusstatic int
1036219182Sglebiusexport_send(priv_p priv, fib_export_p fe, item_p item, int flags)
1037135332Sglebius{
1038146092Sglebius	struct mbuf *m = NGI_M(item);
1039146092Sglebius	struct netflow_v5_export_dgram *dgram = mtod(m,
1040146092Sglebius					struct netflow_v5_export_dgram *);
1041146092Sglebius	struct netflow_v5_header *header = &dgram->header;
1042135332Sglebius	struct timespec ts;
1043135332Sglebius	int error = 0;
1044135332Sglebius
1045146092Sglebius	/* Fill mbuf header. */
1046146092Sglebius	m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) *
1047146092Sglebius	   header->count + sizeof(struct netflow_v5_header);
1048146092Sglebius
1049146092Sglebius	/* Fill export header. */
1050143103Sglebius	header->sys_uptime = htonl(MILLIUPTIME(time_uptime));
1051135332Sglebius	getnanotime(&ts);
1052135332Sglebius	header->unix_secs  = htonl(ts.tv_sec);
1053135332Sglebius	header->unix_nsecs = htonl(ts.tv_nsec);
1054158027Smaxim	header->engine_type = 0;
1055219182Sglebius	header->engine_id = fe->domain_id;
1056158027Smaxim	header->pad = 0;
1057219182Sglebius	header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq,
1058152847Sglebius	    header->count));
1059135332Sglebius	header->count = htons(header->count);
1060135332Sglebius
1061146092Sglebius	if (priv->export != NULL)
1062154277Sglebius		NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags);
1063175718Smav	else
1064175718Smav		NG_FREE_ITEM(item);
1065135332Sglebius
1066135332Sglebius	return (error);
1067135332Sglebius}
1068135332Sglebius
1069135332Sglebius
1070146092Sglebius/* Add export record to dgram. */
1071135332Sglebiusstatic int
1072146092Sglebiusexport_add(item_p item, struct flow_entry *fle)
1073135332Sglebius{
1074146092Sglebius	struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item),
1075146092Sglebius					struct netflow_v5_export_dgram *);
1076146092Sglebius	struct netflow_v5_header *header = &dgram->header;
1077135332Sglebius	struct netflow_v5_record *rec;
1078135332Sglebius
1079175717Smav	rec = &dgram->r[header->count];
1080175717Smav	header->count ++;
1081135332Sglebius
1082146092Sglebius	KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS,
1083146092Sglebius	    ("ng_netflow: export too big"));
1084146092Sglebius
1085146092Sglebius	/* Fill in export record. */
1086135332Sglebius	rec->src_addr = fle->f.r.r_src.s_addr;
1087135332Sglebius	rec->dst_addr = fle->f.r.r_dst.s_addr;
1088135332Sglebius	rec->next_hop = fle->f.next_hop.s_addr;
1089135332Sglebius	rec->i_ifx    = htons(fle->f.fle_i_ifx);
1090135332Sglebius	rec->o_ifx    = htons(fle->f.fle_o_ifx);
1091135332Sglebius	rec->packets  = htonl(fle->f.packets);
1092135332Sglebius	rec->octets   = htonl(fle->f.bytes);
1093143103Sglebius	rec->first    = htonl(MILLIUPTIME(fle->f.first));
1094143103Sglebius	rec->last     = htonl(MILLIUPTIME(fle->f.last));
1095135332Sglebius	rec->s_port   = fle->f.r.r_sport;
1096135332Sglebius	rec->d_port   = fle->f.r.r_dport;
1097135332Sglebius	rec->flags    = fle->f.tcp_flags;
1098135332Sglebius	rec->prot     = fle->f.r.r_ip_p;
1099135332Sglebius	rec->tos      = fle->f.r.r_tos;
1100135332Sglebius	rec->dst_mask = fle->f.dst_mask;
1101135332Sglebius	rec->src_mask = fle->f.src_mask;
1102210500Sglebius	rec->pad1     = 0;
1103210500Sglebius	rec->pad2     = 0;
1104135332Sglebius
1105146092Sglebius	/* Not supported fields. */
1106146092Sglebius	rec->src_as = rec->dst_as = 0;
1107135332Sglebius
1108146092Sglebius	if (header->count == NETFLOW_V5_MAX_RECORDS)
1109146092Sglebius		return (1); /* end of datagram */
1110146092Sglebius	else
1111146092Sglebius		return (0);
1112135332Sglebius}
1113135332Sglebius
1114135332Sglebius/* Periodic flow expiry run. */
1115135332Sglebiusvoid
1116135332Sglebiusng_netflow_expire(void *arg)
1117135332Sglebius{
1118146092Sglebius	struct flow_entry	*fle, *fle1;
1119146092Sglebius	struct flow_hash_entry	*hsh;
1120146092Sglebius	priv_p			priv = (priv_p )arg;
1121146092Sglebius	uint32_t		used;
1122146092Sglebius	int			i;
1123135332Sglebius
1124146092Sglebius	/*
1125146092Sglebius	 * Going through all the cache.
1126146092Sglebius	 */
1127146092Sglebius	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) {
1128135332Sglebius		/*
1129146092Sglebius		 * Skip entries, that are already being worked on.
1130135332Sglebius		 */
1131146092Sglebius		if (mtx_trylock(&hsh->mtx) == 0)
1132146092Sglebius			continue;
1133135332Sglebius
1134146092Sglebius		used = atomic_load_acq_32(&priv->info.nfinfo_used);
1135146092Sglebius		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
1136146092Sglebius			/*
1137146092Sglebius			 * Interrupt thread wants this entry!
1138146092Sglebius			 * Quick! Quick! Bail out!
1139146092Sglebius			 */
1140146092Sglebius			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
1141146092Sglebius				break;
1142135332Sglebius
1143135332Sglebius			/*
1144146092Sglebius			 * Don't expire aggressively while hash collision
1145146092Sglebius			 * ratio is predicted small.
1146135332Sglebius			 */
1147146092Sglebius			if (used <= (NBUCKETS*2) && !INACTIVE(fle))
1148146092Sglebius				break;
1149135332Sglebius
1150163239Sglebius			if ((INACTIVE(fle) && (SMALL(fle) ||
1151163240Sglebius			    (used > (NBUCKETS*2)))) || AGED(fle)) {
1152146092Sglebius				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
1153248724Sglebius				expire_flow(priv, priv_to_fib(priv,
1154248724Sglebius				    fle->f.r.fib), fle, NG_NOFLAGS);
1155146092Sglebius				used--;
1156146092Sglebius				atomic_add_32(&priv->info.nfinfo_inact_exp, 1);
1157146092Sglebius			}
1158135332Sglebius		}
1159146092Sglebius		mtx_unlock(&hsh->mtx);
1160146092Sglebius	}
1161135332Sglebius
1162219182Sglebius#ifdef INET6
1163223787Sglebius	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) {
1164223787Sglebius		struct flow6_entry	*fle6;
1165223787Sglebius
1166219182Sglebius		/*
1167219182Sglebius		 * Skip entries, that are already being worked on.
1168219182Sglebius		 */
1169223787Sglebius		if (mtx_trylock(&hsh->mtx) == 0)
1170219182Sglebius			continue;
1171135332Sglebius
1172219182Sglebius		used = atomic_load_acq_32(&priv->info.nfinfo_used6);
1173223787Sglebius		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
1174223787Sglebius			fle6 = (struct flow6_entry *)fle;
1175219182Sglebius			/*
1176219182Sglebius			 * Interrupt thread wants this entry!
1177219182Sglebius			 * Quick! Quick! Bail out!
1178219182Sglebius			 */
1179223787Sglebius			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
1180219182Sglebius				break;
1181219182Sglebius
1182219182Sglebius			/*
1183219182Sglebius			 * Don't expire aggressively while hash collision
1184219182Sglebius			 * ratio is predicted small.
1185219182Sglebius			 */
1186219182Sglebius			if (used <= (NBUCKETS*2) && !INACTIVE(fle6))
1187219182Sglebius				break;
1188219182Sglebius
1189219182Sglebius			if ((INACTIVE(fle6) && (SMALL(fle6) ||
1190219182Sglebius			    (used > (NBUCKETS*2)))) || AGED(fle6)) {
1191223787Sglebius				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
1192223787Sglebius				expire_flow(priv, priv_to_fib(priv,
1193223787Sglebius				    fle->f.r.fib), fle, NG_NOFLAGS);
1194219182Sglebius				used--;
1195219182Sglebius				atomic_add_32(&priv->info.nfinfo_inact_exp, 1);
1196219182Sglebius			}
1197219182Sglebius		}
1198223787Sglebius		mtx_unlock(&hsh->mtx);
1199219182Sglebius	}
1200219182Sglebius#endif
1201219182Sglebius
1202146092Sglebius	/* Schedule next expire. */
1203135332Sglebius	callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire,
1204135332Sglebius	    (void *)priv);
1205135332Sglebius}
1206