tcp_timer.c revision 309108
1193267Sjkim/*-
2193267Sjkim * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3193267Sjkim *	The Regents of the University of California.  All rights reserved.
4193267Sjkim *
5193267Sjkim * Redistribution and use in source and binary forms, with or without
6193267Sjkim * modification, are permitted provided that the following conditions
7217365Sjkim * are met:
8306536Sjkim * 1. Redistributions of source code must retain the above copyright
9193267Sjkim *    notice, this list of conditions and the following disclaimer.
10193267Sjkim * 2. Redistributions in binary form must reproduce the above copyright
11217365Sjkim *    notice, this list of conditions and the following disclaimer in the
12217365Sjkim *    documentation and/or other materials provided with the distribution.
13217365Sjkim * 4. Neither the name of the University nor the names of its contributors
14217365Sjkim *    may be used to endorse or promote products derived from this software
15217365Sjkim *    without specific prior written permission.
16217365Sjkim *
17217365Sjkim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18217365Sjkim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19217365Sjkim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20217365Sjkim * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21217365Sjkim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22217365Sjkim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23217365Sjkim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24217365Sjkim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25193267Sjkim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26217365Sjkim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27217365Sjkim * SUCH DAMAGE.
28217365Sjkim *
29193267Sjkim *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
30217365Sjkim */
31217365Sjkim
32217365Sjkim#include <sys/cdefs.h>
33217365Sjkim__FBSDID("$FreeBSD: stable/10/sys/netinet/tcp_timer.c 309108 2016-11-24 14:48:46Z jch $");
34217365Sjkim
35217365Sjkim#include "opt_inet.h"
36217365Sjkim#include "opt_inet6.h"
37217365Sjkim#include "opt_tcpdebug.h"
38217365Sjkim
39217365Sjkim#include <sys/param.h>
40217365Sjkim#include <sys/kernel.h>
41217365Sjkim#include <sys/lock.h>
42217365Sjkim#include <sys/mbuf.h>
43193267Sjkim#include <sys/mutex.h>
44281075Sdim#include <sys/protosw.h>
45281075Sdim#include <sys/smp.h>
46193341Sjkim#include <sys/socket.h>
47193341Sjkim#include <sys/socketvar.h>
48193341Sjkim#include <sys/sysctl.h>
49193267Sjkim#include <sys/systm.h>
50193267Sjkim
51193267Sjkim#include <net/if.h>
52193267Sjkim#include <net/route.h>
53193267Sjkim#include <net/vnet.h>
54193267Sjkim
55193267Sjkim#include <netinet/cc.h>
56193267Sjkim#include <netinet/in.h>
57193267Sjkim#include <netinet/in_pcb.h>
58193267Sjkim#include <netinet/in_systm.h>
59193267Sjkim#ifdef INET6
60193267Sjkim#include <netinet6/in6_pcb.h>
61193267Sjkim#endif
62193267Sjkim#include <netinet/ip_var.h>
63193267Sjkim#include <netinet/tcp_fsm.h>
64193267Sjkim#include <netinet/tcp_timer.h>
65193267Sjkim#include <netinet/tcp_var.h>
66193267Sjkim#ifdef INET6
67193267Sjkim#include <netinet6/tcp6_var.h>
68193267Sjkim#endif
69193267Sjkim#include <netinet/tcpip.h>
70193267Sjkim#ifdef TCPDEBUG
71193267Sjkim#include <netinet/tcp_debug.h>
72193267Sjkim#endif
73193267Sjkim
74193267Sjkimint    tcp_persmin;
75193267SjkimSYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW,
76193267Sjkim    &tcp_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval");
77193267Sjkim
78193267Sjkimint    tcp_persmax;
79193267SjkimSYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW,
80193267Sjkim    &tcp_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval");
81193267Sjkim
82193267Sjkimint	tcp_keepinit;
83193267SjkimSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
84193267Sjkim    &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
85193267Sjkim
86193267Sjkimint	tcp_keepidle;
87193267SjkimSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
88193267Sjkim    &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
89197104Sjkim
90197104Sjkimint	tcp_keepintvl;
91197104SjkimSYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
92197104Sjkim    &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
93197104Sjkim
94197104Sjkimint	tcp_delacktime;
95281075SdimSYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
96281075Sdim    &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
97281075Sdim    "Time before a delayed ACK is sent");
98281075Sdim
99281075Sdimint	tcp_msl;
100281075SdimSYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
101197104Sjkim    &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
102197104Sjkim
103281075Sdimint	tcp_rexmit_min;
104197104SjkimSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
105197104Sjkim    &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
106197104Sjkim    "Minimum Retransmission Timeout");
107197104Sjkim
108193267Sjkimint	tcp_rexmit_slop;
109197104SjkimSYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
110197104Sjkim    &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
111197104Sjkim    "Retransmission Timer Slop");
112193267Sjkim
113193267Sjkimstatic int	always_keepalive = 1;
114193267SjkimSYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
115193267Sjkim    &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
116193267Sjkim
117193267Sjkimint    tcp_fast_finwait2_recycle = 0;
118193267SjkimSYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW,
119193267Sjkim    &tcp_fast_finwait2_recycle, 0,
120193267Sjkim    "Recycle closed FIN_WAIT_2 connections faster");
121193267Sjkim
122193267Sjkimint    tcp_finwait2_timeout;
123193267SjkimSYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
124193267Sjkim    &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
125193267Sjkim
126193267Sjkimint	tcp_keepcnt = TCPTV_KEEPCNT;
127193267SjkimSYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
128193267Sjkim    "Number of keepalive probes to send");
129197104Sjkim
130197104Sjkim	/* max idle probes */
131197104Sjkimint	tcp_maxpersistidle;
132197104Sjkim
133197104Sjkimstatic int	tcp_rexmit_drop_options = 0;
134197104SjkimSYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
135193267Sjkim    &tcp_rexmit_drop_options, 0,
136193267Sjkim    "Drop TCP options from 3rd and later retransmitted SYN");
137193267Sjkim
138193267Sjkimstatic VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
139197104Sjkim#define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
140193267SjkimSYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
141193267Sjkim    CTLFLAG_RW,
142254745Sjkim    &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
143254745Sjkim    "Path MTU Discovery Black Hole Detection Enabled");
144193267Sjkim
145193267Sjkimstatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
146193267Sjkim#define	V_tcp_pmtud_blackhole_activated \
147193267Sjkim    VNET(tcp_pmtud_blackhole_activated)
148193267SjkimSYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
149193267Sjkim    CTLFLAG_RD,
150193267Sjkim    &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
151193267Sjkim    "Path MTU Discovery Black Hole Detection, Activation Count");
152197104Sjkim
153193267Sjkimstatic VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
154193267Sjkim#define	V_tcp_pmtud_blackhole_activated_min_mss \
155193267Sjkim    VNET(tcp_pmtud_blackhole_activated_min_mss)
156193267SjkimSYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
157197104Sjkim    CTLFLAG_RD,
158193267Sjkim    &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
159197104Sjkim    "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
160197104Sjkim
161193267Sjkimstatic VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
162197104Sjkim#define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
163193267SjkimSYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
164193267Sjkim    CTLFLAG_RD,
165193267Sjkim    &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
166254745Sjkim    "Path MTU Discovery Black Hole Detection, Failure Count");
167193267Sjkim
168193267Sjkim#ifdef INET
169197104Sjkimstatic VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
170193267Sjkim#define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
171197104SjkimSYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
172306536Sjkim    CTLFLAG_RW,
173197104Sjkim    &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
174197104Sjkim    "Path MTU Discovery Black Hole Detection lowered MSS");
175197104Sjkim#endif
176197104Sjkim
177197104Sjkim#ifdef INET6
178197104Sjkimstatic VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
179197104Sjkim#define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
180254745SjkimSYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
181254745Sjkim    CTLFLAG_RW,
182254745Sjkim    &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
183231844Sjkim    "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
184231844Sjkim#endif
185231844Sjkim
186231844Sjkimstatic int	per_cpu_timers = 0;
187231844SjkimSYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
188231844Sjkim    &per_cpu_timers , 0, "run tcp timers on all cpus");
189197104Sjkim
190306536Sjkim#define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
191197104Sjkim		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
192197104Sjkim
193197104Sjkim/*
194197104Sjkim * Tcp protocol timeout routine called every 500 ms.
195193267Sjkim * Updates timestamps used for TCP
196197104Sjkim * causes finite state machine actions if timers expire.
197197104Sjkim */
198197104Sjkimvoid
199193267Sjkimtcp_slowtimo(void)
200197104Sjkim{
201306536Sjkim	VNET_ITERATOR_DECL(vnet_iter);
202197104Sjkim
203197104Sjkim	VNET_LIST_RLOCK_NOSLEEP();
204197104Sjkim	VNET_FOREACH(vnet_iter) {
205197104Sjkim		CURVNET_SET(vnet_iter);
206197104Sjkim		(void) tcp_tw_2msl_scan(0);
207254745Sjkim		CURVNET_RESTORE();
208254745Sjkim	}
209254745Sjkim	VNET_LIST_RUNLOCK_NOSLEEP();
210254745Sjkim}
211193267Sjkim
212193267Sjkimint	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
213193267Sjkim    { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
214197104Sjkim
215197104Sjkimint	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
216197104Sjkim    { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
217193267Sjkim
218193267Sjkimstatic int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
219254745Sjkim
220193267Sjkim/*
221193267Sjkim * TCP timer processing.
222193267Sjkim */
223193267Sjkim
224193267Sjkimvoid
225193267Sjkimtcp_timer_delack(void *xtp)
226193267Sjkim{
227193267Sjkim	struct tcpcb *tp = xtp;
228193267Sjkim	struct inpcb *inp;
229197104Sjkim	CURVNET_SET(tp->t_vnet);
230193267Sjkim
231193267Sjkim	inp = tp->t_inpcb;
232193267Sjkim	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
233193267Sjkim	INP_WLOCK(inp);
234193267Sjkim	if (callout_pending(&tp->t_timers->tt_delack) ||
235193267Sjkim	    !callout_active(&tp->t_timers->tt_delack)) {
236193267Sjkim		INP_WUNLOCK(inp);
237193267Sjkim		CURVNET_RESTORE();
238193267Sjkim		return;
239193267Sjkim	}
240197104Sjkim	callout_deactivate(&tp->t_timers->tt_delack);
241193267Sjkim	if ((inp->inp_flags & INP_DROPPED) != 0) {
242193267Sjkim		INP_WUNLOCK(inp);
243193267Sjkim		CURVNET_RESTORE();
244193267Sjkim		return;
245193267Sjkim	}
246193267Sjkim	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
247193267Sjkim		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
248193267Sjkim	KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
249193267Sjkim		("%s: tp %p delack callout should be running", __func__, tp));
250193267Sjkim
251197104Sjkim	tp->t_flags |= TF_ACKNOW;
252193267Sjkim	TCPSTAT_INC(tcps_delack);
253197104Sjkim	(void) tcp_output(tp);
254197104Sjkim	INP_WUNLOCK(inp);
255193267Sjkim	CURVNET_RESTORE();
256197104Sjkim}
257193267Sjkim
258193267Sjkimvoid
259193267Sjkimtcp_timer_2msl(void *xtp)
260197104Sjkim{
261197104Sjkim	struct tcpcb *tp = xtp;
262193267Sjkim	struct inpcb *inp;
263197104Sjkim	CURVNET_SET(tp->t_vnet);
264193267Sjkim#ifdef TCPDEBUG
265197104Sjkim	int ostate;
266306536Sjkim
267197104Sjkim	ostate = tp->t_state;
268197104Sjkim#endif
269197104Sjkim	INP_INFO_RLOCK(&V_tcbinfo);
270197104Sjkim	inp = tp->t_inpcb;
271197104Sjkim	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
272197104Sjkim	INP_WLOCK(inp);
273197104Sjkim	tcp_free_sackholes(tp);
274231844Sjkim	if (callout_pending(&tp->t_timers->tt_2msl) ||
275231844Sjkim	    !callout_active(&tp->t_timers->tt_2msl)) {
276231844Sjkim		INP_WUNLOCK(tp->t_inpcb);
277231844Sjkim		INP_INFO_RUNLOCK(&V_tcbinfo);
278231844Sjkim		CURVNET_RESTORE();
279231844Sjkim		return;
280197104Sjkim	}
281306536Sjkim	callout_deactivate(&tp->t_timers->tt_2msl);
282197104Sjkim	if ((inp->inp_flags & INP_DROPPED) != 0) {
283197104Sjkim		INP_WUNLOCK(inp);
284197104Sjkim		INP_INFO_RUNLOCK(&V_tcbinfo);
285197104Sjkim		CURVNET_RESTORE();
286193267Sjkim		return;
287197104Sjkim	}
288197104Sjkim	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
289197104Sjkim		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
290306536Sjkim	KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
291197104Sjkim		("%s: tp %p 2msl callout should be running", __func__, tp));
292197104Sjkim	/*
293197104Sjkim	 * 2 MSL timeout in shutdown went off.  If we're closed but
294197104Sjkim	 * still waiting for peer to close and connection has been idle
295197104Sjkim	 * too long delete connection control block.  Otherwise, check
296193267Sjkim	 * again in a bit.
297193267Sjkim	 *
298193267Sjkim	 * If in TIME_WAIT state just ignore as this timeout is handled in
299197104Sjkim	 * tcp_tw_2msl_scan().
300197104Sjkim	 *
301197104Sjkim	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed,
302193267Sjkim	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it.
303193267Sjkim	 * Ignore fact that there were recent incoming segments.
304193267Sjkim	 */
305193267Sjkim	if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
306193267Sjkim		INP_WUNLOCK(inp);
307193267Sjkim		INP_INFO_RUNLOCK(&V_tcbinfo);
308193267Sjkim		CURVNET_RESTORE();
309193267Sjkim		return;
310231844Sjkim	}
311193267Sjkim	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
312193267Sjkim	    tp->t_inpcb && tp->t_inpcb->inp_socket &&
313193267Sjkim	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
314193267Sjkim		TCPSTAT_INC(tcps_finwait2_drops);
315193267Sjkim		tp = tcp_close(tp);
316193267Sjkim	} else {
317193267Sjkim		if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
318193267Sjkim			if (!callout_reset(&tp->t_timers->tt_2msl,
319193267Sjkim			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
320193267Sjkim				tp->t_timers->tt_flags &= ~TT_2MSL_RST;
321193267Sjkim			}
322193267Sjkim		} else
323193267Sjkim		       tp = tcp_close(tp);
324193267Sjkim       }
325193267Sjkim
326193267Sjkim#ifdef TCPDEBUG
327193267Sjkim	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
328193267Sjkim		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
329193267Sjkim			  PRU_SLOWTIMO);
330193267Sjkim#endif
331193267Sjkim	if (tp != NULL)
332193267Sjkim		INP_WUNLOCK(inp);
333193267Sjkim	INP_INFO_RUNLOCK(&V_tcbinfo);
334193267Sjkim	CURVNET_RESTORE();
335193267Sjkim}
336193267Sjkim
337193267Sjkimvoid
338193267Sjkimtcp_timer_keep(void *xtp)
339193267Sjkim{
340193267Sjkim	struct tcpcb *tp = xtp;
341193267Sjkim	struct tcptemp *t_template;
342193267Sjkim	struct inpcb *inp;
343193267Sjkim	CURVNET_SET(tp->t_vnet);
344193267Sjkim#ifdef TCPDEBUG
345193267Sjkim	int ostate;
346193267Sjkim
347193267Sjkim	ostate = tp->t_state;
348193267Sjkim#endif
349193267Sjkim	INP_INFO_RLOCK(&V_tcbinfo);
350193267Sjkim	inp = tp->t_inpcb;
351193267Sjkim	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
352193267Sjkim	INP_WLOCK(inp);
353193267Sjkim	if (callout_pending(&tp->t_timers->tt_keep) ||
354193267Sjkim	    !callout_active(&tp->t_timers->tt_keep)) {
355193267Sjkim		INP_WUNLOCK(inp);
356193267Sjkim		INP_INFO_RUNLOCK(&V_tcbinfo);
357193267Sjkim		CURVNET_RESTORE();
358193267Sjkim		return;
359193267Sjkim	}
360193267Sjkim	callout_deactivate(&tp->t_timers->tt_keep);
361306536Sjkim	if ((inp->inp_flags & INP_DROPPED) != 0) {
362193267Sjkim		INP_WUNLOCK(inp);
363193267Sjkim		INP_INFO_RUNLOCK(&V_tcbinfo);
364193267Sjkim		CURVNET_RESTORE();
365193267Sjkim		return;
366193267Sjkim	}
367193267Sjkim	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
368193267Sjkim		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
369193267Sjkim	KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
370306536Sjkim		("%s: tp %p keep callout should be running", __func__, tp));
371193267Sjkim	/*
372193267Sjkim	 * Keep-alive timer went off; send something
373193267Sjkim	 * or drop connection if idle for too long.
374193267Sjkim	 */
375193267Sjkim	TCPSTAT_INC(tcps_keeptimeo);
376193267Sjkim	if (tp->t_state < TCPS_ESTABLISHED)
377193267Sjkim		goto dropit;
378193267Sjkim	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
379193267Sjkim	    tp->t_state <= TCPS_CLOSING) {
380193267Sjkim		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
381193267Sjkim			goto dropit;
382193267Sjkim		/*
383193267Sjkim		 * Send a packet designed to force a response
384193267Sjkim		 * if the peer is up and reachable:
385193267Sjkim		 * either an ACK if the connection is still alive,
386193267Sjkim		 * or an RST if the peer has closed the connection
387193267Sjkim		 * due to timeout or reboot.
388193267Sjkim		 * Using sequence number tp->snd_una-1
389245582Sjkim		 * causes the transmitted zero-length segment
390193267Sjkim		 * to lie outside the receive window;
391193267Sjkim		 * by the protocol spec, this requires the
392193267Sjkim		 * correspondent TCP to respond.
393193267Sjkim		 */
394193267Sjkim		TCPSTAT_INC(tcps_keepprobe);
395193267Sjkim		t_template = tcpip_maketemplate(inp);
396193267Sjkim		if (t_template) {
397193267Sjkim			tcp_respond(tp, t_template->tt_ipgen,
398193267Sjkim				    &t_template->tt_t, (struct mbuf *)NULL,
399193267Sjkim				    tp->rcv_nxt, tp->snd_una - 1, 0);
400193267Sjkim			free(t_template, M_TEMP);
401193267Sjkim		}
402193267Sjkim		if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
403193267Sjkim		    tcp_timer_keep, tp)) {
404193267Sjkim			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
405193267Sjkim		}
406193267Sjkim	} else if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
407193267Sjkim		    tcp_timer_keep, tp)) {
408193267Sjkim			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
409193267Sjkim		}
410193267Sjkim
411193267Sjkim#ifdef TCPDEBUG
412193267Sjkim	if (inp->inp_socket->so_options & SO_DEBUG)
413193267Sjkim		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
414193267Sjkim			  PRU_SLOWTIMO);
415193267Sjkim#endif
416193267Sjkim	INP_WUNLOCK(inp);
417193267Sjkim	INP_INFO_RUNLOCK(&V_tcbinfo);
418193267Sjkim	CURVNET_RESTORE();
419193267Sjkim	return;
420193267Sjkim
421193267Sjkimdropit:
422193267Sjkim	TCPSTAT_INC(tcps_keepdrops);
423193267Sjkim	tp = tcp_drop(tp, ETIMEDOUT);
424193267Sjkim
425193267Sjkim#ifdef TCPDEBUG
426193267Sjkim	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
427193267Sjkim		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
428193267Sjkim			  PRU_SLOWTIMO);
429193267Sjkim#endif
430193267Sjkim	if (tp != NULL)
431193267Sjkim		INP_WUNLOCK(tp->t_inpcb);
432193267Sjkim	INP_INFO_RUNLOCK(&V_tcbinfo);
433193267Sjkim	CURVNET_RESTORE();
434193267Sjkim}
435193267Sjkim
436193267Sjkimvoid
437193267Sjkimtcp_timer_persist(void *xtp)
438193267Sjkim{
439193267Sjkim	struct tcpcb *tp = xtp;
440193267Sjkim	struct inpcb *inp;
441193267Sjkim	CURVNET_SET(tp->t_vnet);
442306536Sjkim#ifdef TCPDEBUG
443193267Sjkim	int ostate;
444193267Sjkim
445193267Sjkim	ostate = tp->t_state;
446193267Sjkim#endif
447193267Sjkim	INP_INFO_RLOCK(&V_tcbinfo);
448193267Sjkim	inp = tp->t_inpcb;
449193267Sjkim	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
450193267Sjkim	INP_WLOCK(inp);
451193267Sjkim	if (callout_pending(&tp->t_timers->tt_persist) ||
452193267Sjkim	    !callout_active(&tp->t_timers->tt_persist)) {
453193267Sjkim		INP_WUNLOCK(inp);
454193267Sjkim		INP_INFO_RUNLOCK(&V_tcbinfo);
455193267Sjkim		CURVNET_RESTORE();
456306536Sjkim		return;
457193267Sjkim	}
458193267Sjkim	callout_deactivate(&tp->t_timers->tt_persist);
459193267Sjkim	if ((inp->inp_flags & INP_DROPPED) != 0) {
460193267Sjkim		INP_WUNLOCK(inp);
461193267Sjkim		INP_INFO_RUNLOCK(&V_tcbinfo);
462193267Sjkim		CURVNET_RESTORE();
463193267Sjkim		return;
464193267Sjkim	}
465193267Sjkim	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
466193267Sjkim		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
467193267Sjkim	KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0,
468193267Sjkim		("%s: tp %p persist callout should be running", __func__, tp));
469193267Sjkim	/*
470193267Sjkim	 * Persistance timer into zero window.
471193267Sjkim	 * Force a byte to be output, if possible.
472193267Sjkim	 */
473193267Sjkim	TCPSTAT_INC(tcps_persisttimeo);
474193267Sjkim	/*
475193267Sjkim	 * Hack: if the peer is dead/unreachable, we do not
476306536Sjkim	 * time out if the window is closed.  After a full
477193267Sjkim	 * backoff, drop the connection if the idle time
478193267Sjkim	 * (no responses to probes) reaches the maximum
479193267Sjkim	 * backoff that we would use if retransmitting.
480193267Sjkim	 */
481193267Sjkim	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
482193267Sjkim	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
483193267Sjkim	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
484193267Sjkim		TCPSTAT_INC(tcps_persistdrop);
485193267Sjkim		tp = tcp_drop(tp, ETIMEDOUT);
486193267Sjkim		goto out;
487193267Sjkim	}
488193267Sjkim	/*
489193267Sjkim	 * If the user has closed the socket then drop a persisting
490193267Sjkim	 * connection after a much reduced timeout.
491193267Sjkim	 */
492193267Sjkim	if (tp->t_state > TCPS_CLOSE_WAIT &&
493231844Sjkim	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
494193267Sjkim		TCPSTAT_INC(tcps_persistdrop);
495231844Sjkim		tp = tcp_drop(tp, ETIMEDOUT);
496193267Sjkim		goto out;
497193267Sjkim	}
498193267Sjkim	tcp_setpersist(tp);
499193267Sjkim	tp->t_flags |= TF_FORCEDATA;
500193267Sjkim	(void) tcp_output(tp);
501193267Sjkim	tp->t_flags &= ~TF_FORCEDATA;
502193267Sjkim
503193267Sjkimout:
504245582Sjkim#ifdef TCPDEBUG
505193267Sjkim	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
506245582Sjkim		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
507245582Sjkim#endif
508193267Sjkim	if (tp != NULL)
509245582Sjkim		INP_WUNLOCK(inp);
510245582Sjkim	INP_INFO_RUNLOCK(&V_tcbinfo);
511245582Sjkim	CURVNET_RESTORE();
512245582Sjkim}
513245582Sjkim
514245582Sjkimvoid
515245582Sjkimtcp_timer_rexmt(void * xtp)
516245582Sjkim{
517245582Sjkim	struct tcpcb *tp = xtp;
518245582Sjkim	CURVNET_SET(tp->t_vnet);
519245582Sjkim	int rexmt;
520245582Sjkim	int headlocked;
521245582Sjkim	struct inpcb *inp;
522245582Sjkim#ifdef TCPDEBUG
523245582Sjkim	int ostate;
524245582Sjkim
525245582Sjkim	ostate = tp->t_state;
526245582Sjkim#endif
527245582Sjkim
528245582Sjkim	INP_INFO_RLOCK(&V_tcbinfo);
529245582Sjkim	inp = tp->t_inpcb;
530245582Sjkim	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
531245582Sjkim	INP_WLOCK(inp);
532193267Sjkim	if (callout_pending(&tp->t_timers->tt_rexmt) ||
533193267Sjkim	    !callout_active(&tp->t_timers->tt_rexmt)) {
534193267Sjkim		INP_WUNLOCK(inp);
535193267Sjkim		INP_INFO_RUNLOCK(&V_tcbinfo);
536193267Sjkim		CURVNET_RESTORE();
537193267Sjkim		return;
538193267Sjkim	}
539193267Sjkim	callout_deactivate(&tp->t_timers->tt_rexmt);
540245582Sjkim	if ((inp->inp_flags & INP_DROPPED) != 0) {
541193267Sjkim		INP_WUNLOCK(inp);
542245582Sjkim		INP_INFO_RUNLOCK(&V_tcbinfo);
543193267Sjkim		CURVNET_RESTORE();
544193267Sjkim		return;
545193267Sjkim	}
546193267Sjkim	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
547193267Sjkim		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
548193267Sjkim	KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
549193267Sjkim		("%s: tp %p rexmt callout should be running", __func__, tp));
550193267Sjkim	tcp_free_sackholes(tp);
551245582Sjkim	/*
552193267Sjkim	 * Retransmission timer went off.  Message has not
553193267Sjkim	 * been acked within retransmit interval.  Back off
554193267Sjkim	 * to a longer retransmit interval and retransmit one segment.
555193267Sjkim	 */
556193267Sjkim	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
557193267Sjkim		tp->t_rxtshift = TCP_MAXRXTSHIFT;
558193267Sjkim		TCPSTAT_INC(tcps_timeoutdrop);
559193267Sjkim
560193267Sjkim		tp = tcp_drop(tp, tp->t_softerror ?
561193267Sjkim			      tp->t_softerror : ETIMEDOUT);
562193267Sjkim		headlocked = 1;
563193267Sjkim		goto out;
564245582Sjkim	}
565245582Sjkim	INP_INFO_RUNLOCK(&V_tcbinfo);
566245582Sjkim	headlocked = 0;
567245582Sjkim	if (tp->t_state == TCPS_SYN_SENT) {
568306536Sjkim		/*
569306536Sjkim		 * If the SYN was retransmitted, indicate CWND to be
570193267Sjkim		 * limited to 1 segment in cc_conn_init().
571193267Sjkim		 */
572193267Sjkim		tp->snd_cwnd = 1;
573306536Sjkim	} else if (tp->t_rxtshift == 1) {
574306536Sjkim		/*
575306536Sjkim		 * first retransmit; record ssthresh and cwnd so they can
576306536Sjkim		 * be recovered if this turns out to be a "bad" retransmit.
577306536Sjkim		 * A retransmit is considered "bad" if an ACK for this
578306536Sjkim		 * segment is received within RTT/2 interval; the assumption
579306536Sjkim		 * here is that the ACK was already in flight.  See
580306536Sjkim		 * "On Estimating End-to-End Network Path Properties" by
581193267Sjkim		 * Allman and Paxson for more details.
582193267Sjkim		 */
583193267Sjkim		tp->snd_cwnd_prev = tp->snd_cwnd;
584193267Sjkim		tp->snd_ssthresh_prev = tp->snd_ssthresh;
585193267Sjkim		tp->snd_recover_prev = tp->snd_recover;
586193267Sjkim		if (IN_FASTRECOVERY(tp->t_flags))
587193267Sjkim			tp->t_flags |= TF_WASFRECOVERY;
588249663Sjkim		else
589245582Sjkim			tp->t_flags &= ~TF_WASFRECOVERY;
590306536Sjkim		if (IN_CONGRECOVERY(tp->t_flags))
591193267Sjkim			tp->t_flags |= TF_WASCRECOVERY;
592193267Sjkim		else
593245582Sjkim			tp->t_flags &= ~TF_WASCRECOVERY;
594193267Sjkim		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
595245582Sjkim		tp->t_flags |= TF_PREVVALID;
596193267Sjkim	} else
597193267Sjkim		tp->t_flags &= ~TF_PREVVALID;
598193267Sjkim	TCPSTAT_INC(tcps_rexmttimeo);
599306536Sjkim	if ((tp->t_state == TCPS_SYN_SENT) ||
600193267Sjkim	    (tp->t_state == TCPS_SYN_RECEIVED))
601193267Sjkim		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
602193267Sjkim	else
603245582Sjkim		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
604245582Sjkim	TCPT_RANGESET(tp->t_rxtcur, rexmt,
605245582Sjkim		      tp->t_rttmin, TCPTV_REXMTMAX);
606193267Sjkim
607245582Sjkim	/*
608245582Sjkim	 * We enter the path for PLMTUD if connection is established or, if
609193267Sjkim	 * connection is FIN_WAIT_1 status, reason for the last is that if
610245582Sjkim	 * amount of data we send is very small, we could send it in couple of
611250838Sjkim	 * packets and process straight to FIN. In that case we won't catch
612245582Sjkim	 * ESTABLISHED state.
613245582Sjkim	 */
614193267Sjkim	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
615245582Sjkim	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
616250838Sjkim		int optlen;
617245582Sjkim#ifdef INET6
618245582Sjkim		int isipv6;
619245582Sjkim#endif
620245582Sjkim
621245582Sjkim		/*
622193267Sjkim		 * Idea here is that at each stage of mtu probe (usually, 1448
623245582Sjkim		 * -> 1188 -> 524) should be given 2 chances to recover before
624193267Sjkim		 *  further clamping down. 'tp->t_rxtshift % 2 == 0' should
625245582Sjkim		 *  take care of that.
626245582Sjkim		 */
627245582Sjkim		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
628245582Sjkim		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
629245582Sjkim		    (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) {
630245582Sjkim			/*
631250838Sjkim			 * Enter Path MTU Black-hole Detection mechanism:
632245582Sjkim			 * - Disable Path MTU Discovery (IP "DF" bit).
633245582Sjkim			 * - Reduce MTU to lower value than what we
634245582Sjkim			 *   negotiated with peer.
635245582Sjkim			 */
636245582Sjkim			/* Record that we may have found a black hole. */
637245582Sjkim			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
638245582Sjkim
639245582Sjkim			/* Keep track of previous MSS. */
640245582Sjkim			optlen = tp->t_maxopd - tp->t_maxseg;
641245582Sjkim			tp->t_pmtud_saved_maxopd = tp->t_maxopd;
642245582Sjkim
643245582Sjkim			/*
644193267Sjkim			 * Reduce the MSS to blackhole value or to the default
645193267Sjkim			 * in an attempt to retransmit.
646306536Sjkim			 */
647245582Sjkim#ifdef INET6
648245582Sjkim			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
649306536Sjkim			if (isipv6 &&
650193267Sjkim			    tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
651193267Sjkim				/* Use the sysctl tuneable blackhole MSS. */
652193267Sjkim				tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
653306536Sjkim				V_tcp_pmtud_blackhole_activated++;
654306536Sjkim			} else if (isipv6) {
655193267Sjkim				/* Use the default MSS. */
656193267Sjkim				tp->t_maxopd = V_tcp_v6mssdflt;
657306536Sjkim				/*
658193267Sjkim				 * Disable Path MTU Discovery when we switch to
659193267Sjkim				 * minmss.
660193267Sjkim				 */
661193267Sjkim				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
662193267Sjkim				V_tcp_pmtud_blackhole_activated_min_mss++;
663			}
664#endif
665#if defined(INET6) && defined(INET)
666			else
667#endif
668#ifdef INET
669			if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
670				/* Use the sysctl tuneable blackhole MSS. */
671				tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
672				V_tcp_pmtud_blackhole_activated++;
673			} else {
674				/* Use the default MSS. */
675				tp->t_maxopd = V_tcp_mssdflt;
676				/*
677				 * Disable Path MTU Discovery when we switch to
678				 * minmss.
679				 */
680				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
681				V_tcp_pmtud_blackhole_activated_min_mss++;
682			}
683#endif
684			tp->t_maxseg = tp->t_maxopd - optlen;
685			/*
686			 * Reset the slow-start flight size
687			 * as it may depend on the new MSS.
688			 */
689			if (CC_ALGO(tp)->conn_init != NULL)
690				CC_ALGO(tp)->conn_init(tp->ccv);
691		} else {
692			/*
693			 * If further retransmissions are still unsuccessful
694			 * with a lowered MTU, maybe this isn't a blackhole and
695			 * we restore the previous MSS and blackhole detection
696			 * flags.
697			 * The limit '6' is determined by giving each probe
698			 * stage (1448, 1188, 524) 2 chances to recover.
699			 */
700			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
701			    (tp->t_rxtshift > 6)) {
702				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
703				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
704				optlen = tp->t_maxopd - tp->t_maxseg;
705				tp->t_maxopd = tp->t_pmtud_saved_maxopd;
706				tp->t_maxseg = tp->t_maxopd - optlen;
707				V_tcp_pmtud_blackhole_failed++;
708				/*
709				 * Reset the slow-start flight size as it
710				 * may depend on the new MSS.
711				 */
712				if (CC_ALGO(tp)->conn_init != NULL)
713					CC_ALGO(tp)->conn_init(tp->ccv);
714			}
715		}
716	}
717
718	/*
719	 * Disable RFC1323 and SACK if we haven't got any response to
720	 * our third SYN to work-around some broken terminal servers
721	 * (most of which have hopefully been retired) that have bad VJ
722	 * header compression code which trashes TCP segments containing
723	 * unknown-to-them TCP options.
724	 */
725	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
726	    (tp->t_rxtshift == 3))
727		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
728	/*
729	 * If we backed off this far, our srtt estimate is probably bogus.
730	 * Clobber it so we'll take the next rtt measurement as our srtt;
731	 * move the current srtt into rttvar to keep the current
732	 * retransmit times until then.
733	 */
734	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
735#ifdef INET6
736		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
737			in6_losing(tp->t_inpcb);
738#endif
739		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
740		tp->t_srtt = 0;
741	}
742	tp->snd_nxt = tp->snd_una;
743	tp->snd_recover = tp->snd_max;
744	/*
745	 * Force a segment to be sent.
746	 */
747	tp->t_flags |= TF_ACKNOW;
748	/*
749	 * If timing a segment in this window, stop the timer.
750	 */
751	tp->t_rtttime = 0;
752
753	cc_cong_signal(tp, NULL, CC_RTO);
754
755	(void) tcp_output(tp);
756
757out:
758#ifdef TCPDEBUG
759	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
760		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
761			  PRU_SLOWTIMO);
762#endif
763	if (tp != NULL)
764		INP_WUNLOCK(inp);
765	if (headlocked)
766		INP_INFO_RUNLOCK(&V_tcbinfo);
767	CURVNET_RESTORE();
768}
769
770void
771tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
772{
773	struct callout *t_callout;
774	timeout_t *f_callout;
775	struct inpcb *inp = tp->t_inpcb;
776	int cpu = INP_CPU(inp);
777	uint32_t f_reset;
778
779#ifdef TCP_OFFLOAD
780	if (tp->t_flags & TF_TOE)
781		return;
782#endif
783
784	if (tp->t_timers->tt_flags & TT_STOPPED)
785		return;
786
787	switch (timer_type) {
788		case TT_DELACK:
789			t_callout = &tp->t_timers->tt_delack;
790			f_callout = tcp_timer_delack;
791			f_reset = TT_DELACK_RST;
792			break;
793		case TT_REXMT:
794			t_callout = &tp->t_timers->tt_rexmt;
795			f_callout = tcp_timer_rexmt;
796			f_reset = TT_REXMT_RST;
797			break;
798		case TT_PERSIST:
799			t_callout = &tp->t_timers->tt_persist;
800			f_callout = tcp_timer_persist;
801			f_reset = TT_PERSIST_RST;
802			break;
803		case TT_KEEP:
804			t_callout = &tp->t_timers->tt_keep;
805			f_callout = tcp_timer_keep;
806			f_reset = TT_KEEP_RST;
807			break;
808		case TT_2MSL:
809			t_callout = &tp->t_timers->tt_2msl;
810			f_callout = tcp_timer_2msl;
811			f_reset = TT_2MSL_RST;
812			break;
813		default:
814			panic("tp %p bad timer_type %#x", tp, timer_type);
815		}
816	if (delta == 0) {
817		if ((tp->t_timers->tt_flags & timer_type) &&
818		    callout_stop(t_callout) &&
819		    (tp->t_timers->tt_flags & f_reset)) {
820			tp->t_timers->tt_flags &= ~(timer_type | f_reset);
821		}
822	} else {
823		if ((tp->t_timers->tt_flags & timer_type) == 0) {
824			tp->t_timers->tt_flags |= (timer_type | f_reset);
825			callout_reset_on(t_callout, delta, f_callout, tp, cpu);
826		} else {
827			/* Reset already running callout on the same CPU. */
828			if (!callout_reset(t_callout, delta, f_callout, tp)) {
829				/*
830				 * Callout not cancelled, consider it as not
831				 * properly restarted. */
832				tp->t_timers->tt_flags &= ~f_reset;
833			}
834		}
835	}
836}
837
838int
839tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
840{
841	struct callout *t_callout;
842
843	switch (timer_type) {
844		case TT_DELACK:
845			t_callout = &tp->t_timers->tt_delack;
846			break;
847		case TT_REXMT:
848			t_callout = &tp->t_timers->tt_rexmt;
849			break;
850		case TT_PERSIST:
851			t_callout = &tp->t_timers->tt_persist;
852			break;
853		case TT_KEEP:
854			t_callout = &tp->t_timers->tt_keep;
855			break;
856		case TT_2MSL:
857			t_callout = &tp->t_timers->tt_2msl;
858			break;
859		default:
860			panic("tp %p bad timer_type %#x", tp, timer_type);
861		}
862	return callout_active(t_callout);
863}
864
865void
866tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
867{
868	struct callout *t_callout;
869	timeout_t *f_callout;
870	uint32_t f_reset;
871
872	tp->t_timers->tt_flags |= TT_STOPPED;
873
874	switch (timer_type) {
875		case TT_DELACK:
876			t_callout = &tp->t_timers->tt_delack;
877			f_callout = tcp_timer_delack_discard;
878			f_reset = TT_DELACK_RST;
879			break;
880		case TT_REXMT:
881			t_callout = &tp->t_timers->tt_rexmt;
882			f_callout = tcp_timer_rexmt_discard;
883			f_reset = TT_REXMT_RST;
884			break;
885		case TT_PERSIST:
886			t_callout = &tp->t_timers->tt_persist;
887			f_callout = tcp_timer_persist_discard;
888			f_reset = TT_PERSIST_RST;
889			break;
890		case TT_KEEP:
891			t_callout = &tp->t_timers->tt_keep;
892			f_callout = tcp_timer_keep_discard;
893			f_reset = TT_KEEP_RST;
894			break;
895		case TT_2MSL:
896			t_callout = &tp->t_timers->tt_2msl;
897			f_callout = tcp_timer_2msl_discard;
898			f_reset = TT_2MSL_RST;
899			break;
900		default:
901			panic("tp %p bad timer_type %#x", tp, timer_type);
902		}
903
904	if (tp->t_timers->tt_flags & timer_type) {
905		if (callout_stop(t_callout) &&
906		    (tp->t_timers->tt_flags & f_reset)) {
907			tp->t_timers->tt_flags &= ~(timer_type | f_reset);
908		} else {
909			/*
910			 * Can't stop the callout, defer tcpcb actual deletion
911			 * to the last tcp timer discard callout.
912			 * The TT_STOPPED flag will ensure that no tcp timer
913			 * callouts can be restarted on our behalf, and
914			 * past this point currently running callouts waiting
915			 * on inp lock will return right away after the
916			 * classical check for callout reset/stop events:
917			 * callout_pending() || !callout_active()
918			 */
919			callout_reset(t_callout, 1, f_callout, tp);
920		}
921	}
922}
923
924#define	ticks_to_msecs(t)	(1000*(t) / hz)
925
926void
927tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
928    struct xtcp_timer *xtimer)
929{
930	sbintime_t now;
931
932	bzero(xtimer, sizeof(*xtimer));
933	if (timer == NULL)
934		return;
935	now = getsbinuptime();
936	if (callout_active(&timer->tt_delack))
937		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
938	if (callout_active(&timer->tt_rexmt))
939		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
940	if (callout_active(&timer->tt_persist))
941		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
942	if (callout_active(&timer->tt_keep))
943		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
944	if (callout_active(&timer->tt_2msl))
945		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
946	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
947}
948