tcp_hostcache.c revision 193731
11556Srgrimes/*-
21556Srgrimes * Copyright (c) 2002 Andre Oppermann, Internet Business Solutions AG
31556Srgrimes * All rights reserved.
41556Srgrimes *
51556Srgrimes * Redistribution and use in source and binary forms, with or without
61556Srgrimes * modification, are permitted provided that the following conditions
71556Srgrimes * are met:
81556Srgrimes * 1. Redistributions of source code must retain the above copyright
91556Srgrimes *    notice, this list of conditions and the following disclaimer.
101556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111556Srgrimes *    notice, this list of conditions and the following disclaimer in the
121556Srgrimes *    documentation and/or other materials provided with the distribution.
131556Srgrimes * 3. The name of the author may not be used to endorse or promote
141556Srgrimes *    products derived from this software without specific prior written
151556Srgrimes *    permission.
161556Srgrimes *
171556Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
181556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201556Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
211556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251556Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261556Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271556Srgrimes * SUCH DAMAGE.
281556Srgrimes */
291556Srgrimes
301556Srgrimes/*
311556Srgrimes * The tcp_hostcache moves the tcp-specific cached metrics from the routing
321556Srgrimes * table to a dedicated structure indexed by the remote IP address.  It keeps
331556Srgrimes * information on the measured TCP parameters of past TCP sessions to allow
341556Srgrimes * better initial start values to be used with later connections to/from the
351556Srgrimes * same source.  Depending on the network parameters (delay, bandwidth, max
361556Srgrimes * MTU, congestion window) between local and remote sites, this can lead to
371556Srgrimes * significant speed-ups for new TCP connections after the first one.
381556Srgrimes *
3920420Ssteve * Due to the tcp_hostcache, all TCP-specific metrics information in the
401556Srgrimes * routing table have been removed.  The inpcb no longer keeps a pointer to
411556Srgrimes * the routing entry, and protocol-initiated route cloning has been removed
421556Srgrimes * as well.  With these changes, the routing table has gone back to being
431556Srgrimes * more lightwight and only carries information related to packet forwarding.
441556Srgrimes *
4535773Scharnier * tcp_hostcache is designed for multiple concurrent access in SMP
4636007Scharnier * environments and high contention.  All bucket rows have their own lock and
4735773Scharnier * thus multiple lookups and modifies can be done at the same time as long as
4835773Scharnier * they are in different bucket rows.  If a request for insertion of a new
4948026Sgreen * record can't be satisfied, it simply returns an empty structure.  Nobody
501556Srgrimes * and nothing outside of tcp_hostcache.c will ever point directly to any
511556Srgrimes * entry in the tcp_hostcache.  All communication is done in an
521556Srgrimes * object-oriented way and only functions of tcp_hostcache will manipulate
531556Srgrimes * hostcache entries.  Otherwise, we are unable to achieve good behaviour in
541556Srgrimes * concurrent access situations.  Since tcp_hostcache is only caching
551556Srgrimes * information, there are no fatal consequences if we either can't satisfy
561556Srgrimes * any particular request or have to drop/overwrite an existing entry because
571556Srgrimes * of bucket limit memory constrains.
581556Srgrimes */
591556Srgrimes
6019720Sphk/*
611556Srgrimes * Many thanks to jlemon for basic structure of tcp_syncache which is being
621556Srgrimes * followed here.
631556Srgrimes */
641556Srgrimes
651556Srgrimes#include <sys/cdefs.h>
661556Srgrimes__FBSDID("$FreeBSD: head/sys/netinet/tcp_hostcache.c 193731 2009-06-08 17:15:40Z zec $");
671556Srgrimes
681556Srgrimes#include "opt_inet6.h"
691556Srgrimes
701556Srgrimes#include <sys/param.h>
711556Srgrimes#include <sys/systm.h>
721556Srgrimes#include <sys/kernel.h>
731556Srgrimes#include <sys/lock.h>
741556Srgrimes#include <sys/mutex.h>
751556Srgrimes#include <sys/malloc.h>
761556Srgrimes#include <sys/socket.h>
7748051Sgreen#include <sys/socketvar.h>
7848051Sgreen#include <sys/sysctl.h>
791556Srgrimes#include <sys/vimage.h>
8048051Sgreen
8148051Sgreen#include <net/if.h>
821556Srgrimes
831556Srgrimes#include <netinet/in.h>
841556Srgrimes#include <netinet/in_systm.h>
851556Srgrimes#include <netinet/ip.h>
861556Srgrimes#include <netinet/in_var.h>
871556Srgrimes#include <netinet/in_pcb.h>
881556Srgrimes#include <netinet/ip_var.h>
8919720Sphk#ifdef INET6
901556Srgrimes#include <netinet/ip6.h>
911556Srgrimes#include <netinet6/ip6_var.h>
921556Srgrimes#endif
931556Srgrimes#include <netinet/tcp.h>
941556Srgrimes#include <netinet/tcp_var.h>
951556Srgrimes#include <netinet/tcp_hostcache.h>
961556Srgrimes#include <netinet/vinet.h>
971556Srgrimes#ifdef INET6
981556Srgrimes#include <netinet6/tcp6_var.h>
991556Srgrimes#endif
1001556Srgrimes
1011556Srgrimes#include <vm/uma.h>
1021556Srgrimes
1031556Srgrimes/* Arbitrary values */
1041556Srgrimes#define TCP_HOSTCACHE_HASHSIZE		512
1051556Srgrimes#define TCP_HOSTCACHE_BUCKETLIMIT	30
1061556Srgrimes#define TCP_HOSTCACHE_EXPIRE		60*60	/* one hour */
1071556Srgrimes#define TCP_HOSTCACHE_PRUNE		5*60	/* every 5 minutes */
1081556Srgrimes
10919720Sphk#ifdef VIMAGE_GLOBALS
1101556Srgrimesstatic struct tcp_hostcache tcp_hostcache;
1111556Srgrimesstatic struct callout tcp_hc_callout;
1121556Srgrimes#endif
1131556Srgrimes
1141556Srgrimesstatic struct hc_metrics *tcp_hc_lookup(struct in_conninfo *);
11548051Sgreenstatic struct hc_metrics *tcp_hc_insert(struct in_conninfo *);
11648026Sgreenstatic int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS);
1171556Srgrimesstatic void tcp_hc_purge(void *);
1181556Srgrimes
1191556SrgrimesSYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0,
1201556Srgrimes    "TCP Host cache");
1211556Srgrimes
1221556SrgrimesSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, cachelimit,
1231556Srgrimes    CTLFLAG_RDTUN, tcp_hostcache.cache_limit, 0,
1241556Srgrimes    "Overall entry limit for hostcache");
1251556Srgrimes
1261556SrgrimesSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, hashsize,
1271556Srgrimes    CTLFLAG_RDTUN, tcp_hostcache.hashsize, 0,
1281556Srgrimes    "Size of TCP hostcache hashtable");
1291556Srgrimes
1301556SrgrimesSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, bucketlimit,
1311556Srgrimes    CTLFLAG_RDTUN, tcp_hostcache.bucket_limit, 0,
1321556Srgrimes    "Per-bucket hash limit for hostcache");
1331556Srgrimes
1341556SrgrimesSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, count,
1351556Srgrimes    CTLFLAG_RD, tcp_hostcache.cache_count, 0,
1361556Srgrimes    "Current number of entries in hostcache");
1371556Srgrimes
13848026SgreenSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, expire,
1391556Srgrimes    CTLFLAG_RW, tcp_hostcache.expire, 0,
1401556Srgrimes    "Expire time of TCP hostcache entries");
1411556Srgrimes
14248026SgreenSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, prune,
1431556Srgrimes     CTLFLAG_RW, tcp_hostcache.prune, 0, "Time between purge runs");
1441556Srgrimes
1451556SrgrimesSYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, purge,
1461556Srgrimes    CTLFLAG_RW, tcp_hostcache.purgeall, 0,
1471556Srgrimes    "Expire all entires on next purge run");
1481556Srgrimes
1491556SrgrimesSYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
1501556Srgrimes    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
1511556Srgrimes    sysctl_tcp_hc_list, "A", "List of all hostcache entries");
1521556Srgrimes
1531556Srgrimes
1541556Srgrimesstatic MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache");
1551556Srgrimes
15648051Sgreen#define HOSTCACHE_HASH(ip) \
15748051Sgreen	(((ip)->s_addr ^ ((ip)->s_addr >> 7) ^ ((ip)->s_addr >> 17)) &	\
1581556Srgrimes	  V_tcp_hostcache.hashmask)
1591556Srgrimes
1601556Srgrimes/* XXX: What is the recommended hash to get good entropy for IPv6 addresses? */
1611556Srgrimes#define HOSTCACHE_HASH6(ip6)				\
1621556Srgrimes	(((ip6)->s6_addr32[0] ^				\
1631556Srgrimes	  (ip6)->s6_addr32[1] ^				\
1641556Srgrimes	  (ip6)->s6_addr32[2] ^				\
1651556Srgrimes	  (ip6)->s6_addr32[3]) &			\
1661556Srgrimes	 V_tcp_hostcache.hashmask)
1671556Srgrimes
1681556Srgrimes#define THC_LOCK(lp)		mtx_lock(lp)
1691556Srgrimes#define THC_UNLOCK(lp)		mtx_unlock(lp)
1701556Srgrimes
1711556Srgrimesvoid
17220420Sstevetcp_hc_init(void)
17348026Sgreen{
1741556Srgrimes	INIT_VNET_INET(curvnet);
1751556Srgrimes	int i;
1761556Srgrimes
1771556Srgrimes	/*
1781556Srgrimes	 * Initialize hostcache structures.
1791556Srgrimes	 */
18046073Simp	V_tcp_hostcache.cache_count = 0;
18146073Simp	V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
1821556Srgrimes	V_tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT;
1835702Sache	V_tcp_hostcache.cache_limit =
1841556Srgrimes	    V_tcp_hostcache.hashsize * V_tcp_hostcache.bucket_limit;
1851556Srgrimes	V_tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
1861556Srgrimes	V_tcp_hostcache.prune = TCP_HOSTCACHE_PRUNE;
1875702Sache
1881556Srgrimes	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize",
1891556Srgrimes	    &V_tcp_hostcache.hashsize);
1901556Srgrimes	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit",
19146073Simp	    &V_tcp_hostcache.cache_limit);
1921556Srgrimes	TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit",
1935702Sache	    &V_tcp_hostcache.bucket_limit);
1941556Srgrimes	if (!powerof2(V_tcp_hostcache.hashsize)) {
1951556Srgrimes		printf("WARNING: hostcache hash size is not a power of 2.\n");
1961556Srgrimes		V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; /* default */
1975702Sache	}
1981556Srgrimes	V_tcp_hostcache.hashmask = V_tcp_hostcache.hashsize - 1;
1991556Srgrimes
2001556Srgrimes	/*
20146073Simp	 * Allocate the hash table.
2021556Srgrimes	 */
2035701Sache	V_tcp_hostcache.hashbase = (struct hc_head *)
2045702Sache	    malloc(V_tcp_hostcache.hashsize * sizeof(struct hc_head),
2055701Sache		   M_HOSTCACHE, M_WAITOK | M_ZERO);
2065701Sache
2075702Sache	/*
2085702Sache	 * Initialize the hash buckets.
2095701Sache	 */
2105702Sache	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
2115701Sache		TAILQ_INIT(&V_tcp_hostcache.hashbase[i].hch_bucket);
2125701Sache		V_tcp_hostcache.hashbase[i].hch_length = 0;
2135702Sache		mtx_init(&V_tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry",
2145702Sache			  NULL, MTX_DEF);
2155701Sache	}
2165701Sache
21746073Simp	/*
21819720Sphk	 * Allocate the hostcache entries.
21919720Sphk	 */
2201556Srgrimes	V_tcp_hostcache.zone =
2211556Srgrimes	    uma_zcreate("hostcache", sizeof(struct hc_metrics),
2221556Srgrimes	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2231556Srgrimes	uma_zone_set_max(V_tcp_hostcache.zone, V_tcp_hostcache.cache_limit);
2241556Srgrimes
2251556Srgrimes	/*
2261556Srgrimes	 * Set up periodic cache cleanup.
2271556Srgrimes	 */
2281556Srgrimes	callout_init(&V_tcp_hc_callout, CALLOUT_MPSAFE);
2291556Srgrimes	callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz,
2301556Srgrimes	    tcp_hc_purge, curvnet);
2311556Srgrimes}
2321556Srgrimes
2331556Srgrimes#ifdef VIMAGE
2341556Srgrimesvoid
2351556Srgrimestcp_hc_destroy(void)
2361556Srgrimes{
2371556Srgrimes	INIT_VNET_INET(curvnet);
2381556Srgrimes
2391556Srgrimes	/* XXX TODO walk the hashtable and free all entries  */
24048026Sgreen
2411556Srgrimes	callout_drain(&V_tcp_hc_callout);
24228430Sjlemon}
2431556Srgrimes#endif
2441556Srgrimes
2451556Srgrimes/*
2461556Srgrimes * Internal function: look up an entry in the hostcache or return NULL.
24728430Sjlemon *
2481556Srgrimes * If an entry has been returned, the caller becomes responsible for
2491556Srgrimes * unlocking the bucket row after he is done reading/modifying the entry.
25048026Sgreen */
25128430Sjlemonstatic struct hc_metrics *
2521556Srgrimestcp_hc_lookup(struct in_conninfo *inc)
2531556Srgrimes{
2541556Srgrimes	INIT_VNET_INET(curvnet);
25548026Sgreen	int hash;
2561556Srgrimes	struct hc_head *hc_head;
2571556Srgrimes	struct hc_metrics *hc_entry;
2581556Srgrimes
2591556Srgrimes	KASSERT(inc != NULL, ("tcp_hc_lookup with NULL in_conninfo pointer"));
2601556Srgrimes
2611556Srgrimes	/*
2621556Srgrimes	 * Hash the foreign ip address.
2631556Srgrimes	 */
26448026Sgreen	if (inc->inc_flags & INC_ISIPV6)
2651556Srgrimes		hash = HOSTCACHE_HASH6(&inc->inc6_faddr);
2661556Srgrimes	else
2671556Srgrimes		hash = HOSTCACHE_HASH(&inc->inc_faddr);
2681556Srgrimes
26928430Sjlemon	hc_head = &V_tcp_hostcache.hashbase[hash];
2701556Srgrimes
2711556Srgrimes	/*
2721556Srgrimes	 * Acquire lock for this bucket row; we release the lock if we don't
2731556Srgrimes	 * find an entry, otherwise the caller has to unlock after he is
2741556Srgrimes	 * done.
2751556Srgrimes	 */
2761556Srgrimes	THC_LOCK(&hc_head->hch_mtx);
2771556Srgrimes
2781556Srgrimes	/*
2791556Srgrimes	 * Iterate through entries in bucket row looking for a match.
2801556Srgrimes	 */
2811556Srgrimes	TAILQ_FOREACH(hc_entry, &hc_head->hch_bucket, rmx_q) {
2821556Srgrimes		if (inc->inc_flags & INC_ISIPV6) {
2831556Srgrimes			if (memcmp(&inc->inc6_faddr, &hc_entry->ip6,
2841556Srgrimes			    sizeof(inc->inc6_faddr)) == 0)
2851556Srgrimes				return hc_entry;
2861556Srgrimes		} else {
2871556Srgrimes			if (memcmp(&inc->inc_faddr, &hc_entry->ip4,
2881556Srgrimes			    sizeof(inc->inc_faddr)) == 0)
2891556Srgrimes				return hc_entry;
2901556Srgrimes		}
2911556Srgrimes	}
2921556Srgrimes
2931556Srgrimes	/*
2941556Srgrimes	 * We were unsuccessful and didn't find anything.
2951556Srgrimes	 */
2961556Srgrimes	THC_UNLOCK(&hc_head->hch_mtx);
2971556Srgrimes	return NULL;
2981556Srgrimes}
2991556Srgrimes
3001556Srgrimes/*
3011556Srgrimes * Internal function: insert an entry into the hostcache or return NULL if
3021556Srgrimes * unable to allocate a new one.
3031556Srgrimes *
3041556Srgrimes * If an entry has been returned, the caller becomes responsible for
3051556Srgrimes * unlocking the bucket row after he is done reading/modifying the entry.
3061556Srgrimes */
3071556Srgrimesstatic struct hc_metrics *
3081556Srgrimestcp_hc_insert(struct in_conninfo *inc)
3091556Srgrimes{
3101556Srgrimes	INIT_VNET_INET(curvnet);
3111556Srgrimes	int hash;
3121556Srgrimes	struct hc_head *hc_head;
3131556Srgrimes	struct hc_metrics *hc_entry;
3141556Srgrimes
3151556Srgrimes	KASSERT(inc != NULL, ("tcp_hc_insert with NULL in_conninfo pointer"));
3161556Srgrimes
3171556Srgrimes	/*
3181556Srgrimes	 * Hash the foreign ip address.
3191556Srgrimes	 */
32032324Sjoerg	if (inc->inc_flags & INC_ISIPV6)
3211556Srgrimes		hash = HOSTCACHE_HASH6(&inc->inc6_faddr);
3221556Srgrimes	else
3231556Srgrimes		hash = HOSTCACHE_HASH(&inc->inc_faddr);
3241556Srgrimes
3251556Srgrimes	hc_head = &V_tcp_hostcache.hashbase[hash];
3261556Srgrimes
3271556Srgrimes	/*
3281556Srgrimes	 * Acquire lock for this bucket row; we release the lock if we don't
3291556Srgrimes	 * find an entry, otherwise the caller has to unlock after he is
3301556Srgrimes	 * done.
3311556Srgrimes	 */
3321556Srgrimes	THC_LOCK(&hc_head->hch_mtx);
3331556Srgrimes
3341556Srgrimes	/*
3351556Srgrimes	 * If the bucket limit is reached, reuse the least-used element.
3361556Srgrimes	 */
3371556Srgrimes	if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit ||
3381556Srgrimes	    V_tcp_hostcache.cache_count >= V_tcp_hostcache.cache_limit) {
3391556Srgrimes		hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
3401556Srgrimes		/*
3411556Srgrimes		 * At first we were dropping the last element, just to
3421556Srgrimes		 * reacquire it in the next two lines again, which isn't very
3431556Srgrimes		 * efficient.  Instead just reuse the least used element.
3441556Srgrimes		 * We may drop something that is still "in-use" but we can be
34528430Sjlemon		 * "lossy".
34628430Sjlemon		 * Just give up if this bucket row is empty and we don't have
34728430Sjlemon		 * anything to replace.
34828430Sjlemon		 */
34928430Sjlemon		if (hc_entry == NULL) {
3501556Srgrimes			THC_UNLOCK(&hc_head->hch_mtx);
3511556Srgrimes			return NULL;
35230312Sjoerg		}
3531556Srgrimes		TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
3541556Srgrimes		V_tcp_hostcache.hashbase[hash].hch_length--;
3551556Srgrimes		V_tcp_hostcache.cache_count--;
3561556Srgrimes		TCPSTAT_INC(tcps_hc_bucketoverflow);
3571556Srgrimes#if 0
3581556Srgrimes		uma_zfree(V_tcp_hostcache.zone, hc_entry);
3591556Srgrimes#endif
3601556Srgrimes	} else {
36148051Sgreen		/*
36248026Sgreen		 * Allocate a new entry, or balk if not possible.
3631556Srgrimes		 */
3641556Srgrimes		hc_entry = uma_zalloc(V_tcp_hostcache.zone, M_NOWAIT);
3651556Srgrimes		if (hc_entry == NULL) {
3661556Srgrimes			THC_UNLOCK(&hc_head->hch_mtx);
3671556Srgrimes			return NULL;
3681556Srgrimes		}
3691556Srgrimes	}
3701556Srgrimes
3711556Srgrimes	/*
3721556Srgrimes	 * Initialize basic information of hostcache entry.
3731556Srgrimes	 */
3741556Srgrimes	bzero(hc_entry, sizeof(*hc_entry));
3751556Srgrimes	if (inc->inc_flags & INC_ISIPV6)
3761556Srgrimes		bcopy(&inc->inc6_faddr, &hc_entry->ip6, sizeof(hc_entry->ip6));
3771556Srgrimes	else
3781556Srgrimes		hc_entry->ip4 = inc->inc_faddr;
3791556Srgrimes	hc_entry->rmx_head = hc_head;
3801556Srgrimes	hc_entry->rmx_expire = V_tcp_hostcache.expire;
3811556Srgrimes
3821556Srgrimes	/*
3831556Srgrimes	 * Put it upfront.
38430312Sjoerg	 */
38530312Sjoerg	TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
38630312Sjoerg	V_tcp_hostcache.hashbase[hash].hch_length++;
38730312Sjoerg	V_tcp_hostcache.cache_count++;
38830312Sjoerg	TCPSTAT_INC(tcps_hc_added);
38930312Sjoerg
39030312Sjoerg	return hc_entry;
39130312Sjoerg}
39230312Sjoerg
39330312Sjoerg/*
39430312Sjoerg * External function: look up an entry in the hostcache and fill out the
39530312Sjoerg * supplied TCP metrics structure.  Fills in NULL when no entry was found or
39630312Sjoerg * a value is not set.
39730312Sjoerg */
39830312Sjoergvoid
39930312Sjoergtcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
40048051Sgreen{
40148051Sgreen	INIT_VNET_INET(curvnet);
40248051Sgreen	struct hc_metrics *hc_entry;
40348051Sgreen
40430312Sjoerg	/*
40530312Sjoerg	 * Find the right bucket.
40630312Sjoerg	 */
40730312Sjoerg	hc_entry = tcp_hc_lookup(inc);
40830312Sjoerg
40930312Sjoerg	/*
41030312Sjoerg	 * If we don't have an existing object.
41130312Sjoerg	 */
41230312Sjoerg	if (hc_entry == NULL) {
41330312Sjoerg		bzero(hc_metrics_lite, sizeof(*hc_metrics_lite));
4141556Srgrimes		return;
4151556Srgrimes	}
4161556Srgrimes	hc_entry->rmx_hits++;
4171556Srgrimes	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
4181556Srgrimes
4191556Srgrimes	hc_metrics_lite->rmx_mtu = hc_entry->rmx_mtu;
4201556Srgrimes	hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh;
4211556Srgrimes	hc_metrics_lite->rmx_rtt = hc_entry->rmx_rtt;
4221556Srgrimes	hc_metrics_lite->rmx_rttvar = hc_entry->rmx_rttvar;
4231556Srgrimes	hc_metrics_lite->rmx_bandwidth = hc_entry->rmx_bandwidth;
4241556Srgrimes	hc_metrics_lite->rmx_cwnd = hc_entry->rmx_cwnd;
4251556Srgrimes	hc_metrics_lite->rmx_sendpipe = hc_entry->rmx_sendpipe;
4261556Srgrimes	hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe;
4271556Srgrimes
4281556Srgrimes	/*
4291556Srgrimes	 * Unlock bucket row.
4301556Srgrimes	 */
4311556Srgrimes	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
4321556Srgrimes}
4331556Srgrimes
4341556Srgrimes/*
4351556Srgrimes * External function: look up an entry in the hostcache and return the
43648051Sgreen * discovered path MTU.  Returns NULL if no entry is found or value is not
4371556Srgrimes * set.
4381556Srgrimes */
43948051Sgreenu_long
4401556Srgrimestcp_hc_getmtu(struct in_conninfo *inc)
4411556Srgrimes{
4421556Srgrimes	INIT_VNET_INET(curvnet);
4431556Srgrimes	struct hc_metrics *hc_entry;
4441556Srgrimes	u_long mtu;
4451556Srgrimes
4461556Srgrimes	hc_entry = tcp_hc_lookup(inc);
4471556Srgrimes	if (hc_entry == NULL) {
4481556Srgrimes		return 0;
4491556Srgrimes	}
450	hc_entry->rmx_hits++;
451	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
452
453	mtu = hc_entry->rmx_mtu;
454	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
455	return mtu;
456}
457
458/*
459 * External function: update the MTU value of an entry in the hostcache.
460 * Creates a new entry if none was found.
461 */
462void
463tcp_hc_updatemtu(struct in_conninfo *inc, u_long mtu)
464{
465	INIT_VNET_INET(curvnet);
466	struct hc_metrics *hc_entry;
467
468	/*
469	 * Find the right bucket.
470	 */
471	hc_entry = tcp_hc_lookup(inc);
472
473	/*
474	 * If we don't have an existing object, try to insert a new one.
475	 */
476	if (hc_entry == NULL) {
477		hc_entry = tcp_hc_insert(inc);
478		if (hc_entry == NULL)
479			return;
480	}
481	hc_entry->rmx_updates++;
482	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
483
484	hc_entry->rmx_mtu = mtu;
485
486	/*
487	 * Put it upfront so we find it faster next time.
488	 */
489	TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
490	TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
491
492	/*
493	 * Unlock bucket row.
494	 */
495	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
496}
497
498/*
499 * External function: update the TCP metrics of an entry in the hostcache.
500 * Creates a new entry if none was found.
501 */
502void
503tcp_hc_update(struct in_conninfo *inc, struct hc_metrics_lite *hcml)
504{
505	INIT_VNET_INET(curvnet);
506	struct hc_metrics *hc_entry;
507
508	hc_entry = tcp_hc_lookup(inc);
509	if (hc_entry == NULL) {
510		hc_entry = tcp_hc_insert(inc);
511		if (hc_entry == NULL)
512			return;
513	}
514	hc_entry->rmx_updates++;
515	hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
516
517	if (hcml->rmx_rtt != 0) {
518		if (hc_entry->rmx_rtt == 0)
519			hc_entry->rmx_rtt = hcml->rmx_rtt;
520		else
521			hc_entry->rmx_rtt =
522			    (hc_entry->rmx_rtt + hcml->rmx_rtt) / 2;
523		TCPSTAT_INC(tcps_cachedrtt);
524	}
525	if (hcml->rmx_rttvar != 0) {
526	        if (hc_entry->rmx_rttvar == 0)
527			hc_entry->rmx_rttvar = hcml->rmx_rttvar;
528		else
529			hc_entry->rmx_rttvar =
530			    (hc_entry->rmx_rttvar + hcml->rmx_rttvar) / 2;
531		TCPSTAT_INC(tcps_cachedrttvar);
532	}
533	if (hcml->rmx_ssthresh != 0) {
534		if (hc_entry->rmx_ssthresh == 0)
535			hc_entry->rmx_ssthresh = hcml->rmx_ssthresh;
536		else
537			hc_entry->rmx_ssthresh =
538			    (hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2;
539		TCPSTAT_INC(tcps_cachedssthresh);
540	}
541	if (hcml->rmx_bandwidth != 0) {
542		if (hc_entry->rmx_bandwidth == 0)
543			hc_entry->rmx_bandwidth = hcml->rmx_bandwidth;
544		else
545			hc_entry->rmx_bandwidth =
546			    (hc_entry->rmx_bandwidth + hcml->rmx_bandwidth) / 2;
547		/* TCPSTAT_INC(tcps_cachedbandwidth); */
548	}
549	if (hcml->rmx_cwnd != 0) {
550		if (hc_entry->rmx_cwnd == 0)
551			hc_entry->rmx_cwnd = hcml->rmx_cwnd;
552		else
553			hc_entry->rmx_cwnd =
554			    (hc_entry->rmx_cwnd + hcml->rmx_cwnd) / 2;
555		/* TCPSTAT_INC(tcps_cachedcwnd); */
556	}
557	if (hcml->rmx_sendpipe != 0) {
558		if (hc_entry->rmx_sendpipe == 0)
559			hc_entry->rmx_sendpipe = hcml->rmx_sendpipe;
560		else
561			hc_entry->rmx_sendpipe =
562			    (hc_entry->rmx_sendpipe + hcml->rmx_sendpipe) /2;
563		/* TCPSTAT_INC(tcps_cachedsendpipe); */
564	}
565	if (hcml->rmx_recvpipe != 0) {
566		if (hc_entry->rmx_recvpipe == 0)
567			hc_entry->rmx_recvpipe = hcml->rmx_recvpipe;
568		else
569			hc_entry->rmx_recvpipe =
570			    (hc_entry->rmx_recvpipe + hcml->rmx_recvpipe) /2;
571		/* TCPSTAT_INC(tcps_cachedrecvpipe); */
572	}
573
574	TAILQ_REMOVE(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
575	TAILQ_INSERT_HEAD(&hc_entry->rmx_head->hch_bucket, hc_entry, rmx_q);
576	THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
577}
578
579/*
580 * Sysctl function: prints the list and values of all hostcache entries in
581 * unsorted order.
582 */
583static int
584sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
585{
586	INIT_VNET_INET(curvnet);
587	int bufsize;
588	int linesize = 128;
589	char *p, *buf;
590	int len, i, error;
591	struct hc_metrics *hc_entry;
592#ifdef INET6
593	char ip6buf[INET6_ADDRSTRLEN];
594#endif
595
596	bufsize = linesize * (V_tcp_hostcache.cache_count + 1);
597
598	p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
599
600	len = snprintf(p, linesize,
601		"\nIP address        MTU  SSTRESH      RTT   RTTVAR BANDWIDTH "
602		"    CWND SENDPIPE RECVPIPE HITS  UPD  EXP\n");
603	p += len;
604
605#define msec(u) (((u) + 500) / 1000)
606	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
607		THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
608		TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket,
609			      rmx_q) {
610			len = snprintf(p, linesize,
611			    "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu "
612			    "%4lu %4lu %4i\n",
613			    hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) :
614#ifdef INET6
615				ip6_sprintf(ip6buf, &hc_entry->ip6),
616#else
617				"IPv6?",
618#endif
619			    hc_entry->rmx_mtu,
620			    hc_entry->rmx_ssthresh,
621			    msec(hc_entry->rmx_rtt *
622				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
623			    msec(hc_entry->rmx_rttvar *
624				(RTM_RTTUNIT / (hz * TCP_RTT_SCALE))),
625			    hc_entry->rmx_bandwidth * 8,
626			    hc_entry->rmx_cwnd,
627			    hc_entry->rmx_sendpipe,
628			    hc_entry->rmx_recvpipe,
629			    hc_entry->rmx_hits,
630			    hc_entry->rmx_updates,
631			    hc_entry->rmx_expire);
632			p += len;
633		}
634		THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
635	}
636#undef msec
637	error = SYSCTL_OUT(req, buf, p - buf);
638	free(buf, M_TEMP);
639	return(error);
640}
641
642/*
643 * Expire and purge (old|all) entries in the tcp_hostcache.  Runs
644 * periodically from the callout.
645 */
646static void
647tcp_hc_purge(void *arg)
648{
649	CURVNET_SET((struct vnet *) arg);
650	INIT_VNET_INET(curvnet);
651	struct hc_metrics *hc_entry, *hc_next;
652	int all = 0;
653	int i;
654
655	if (V_tcp_hostcache.purgeall) {
656		all = 1;
657		V_tcp_hostcache.purgeall = 0;
658	}
659
660	for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
661		THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
662		TAILQ_FOREACH_SAFE(hc_entry,
663		    &V_tcp_hostcache.hashbase[i].hch_bucket, rmx_q, hc_next) {
664			if (all || hc_entry->rmx_expire <= 0) {
665				TAILQ_REMOVE(&V_tcp_hostcache.hashbase[i].hch_bucket,
666					      hc_entry, rmx_q);
667				uma_zfree(V_tcp_hostcache.zone, hc_entry);
668				V_tcp_hostcache.hashbase[i].hch_length--;
669				V_tcp_hostcache.cache_count--;
670			} else
671				hc_entry->rmx_expire -= V_tcp_hostcache.prune;
672		}
673		THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
674	}
675
676	callout_reset(&V_tcp_hc_callout, V_tcp_hostcache.prune * hz,
677	    tcp_hc_purge, arg);
678	CURVNET_RESTORE();
679}
680