1/*-
2 * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
3 * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
4 * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * a) Redistributions of source code must retain the above copyright notice,
10 *    this list of conditions and the following disclaimer.
11 *
12 * b) Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in
14 *    the documentation and/or other materials provided with the distribution.
15 *
16 * c) Neither the name of Cisco Systems, Inc. nor the names of its
17 *    contributors may be used to endorse or promote products derived
18 *    from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD$");
35
36#include <netinet/sctp_os.h>
37#include <netinet/sctp_var.h>
38#include <netinet/sctp_sysctl.h>
39#include <netinet/sctp_pcb.h>
40#include <netinet/sctp_header.h>
41#include <netinet/sctputil.h>
42#include <netinet/sctp_output.h>
43#include <netinet/sctp_input.h>
44#include <netinet/sctp_indata.h>
45#include <netinet/sctp_uio.h>
46#include <netinet/sctp_timer.h>
47#include <netinet/sctp_auth.h>
48#include <netinet/sctp_asconf.h>
49#include <netinet/sctp_dtrace_declare.h>
50
51#define SHIFT_MPTCP_MULTI_N 40
52#define SHIFT_MPTCP_MULTI_Z 16
53#define SHIFT_MPTCP_MULTI 8
54
55static void
56sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
57{
58	struct sctp_association *assoc;
59	uint32_t cwnd_in_mtu;
60
61	assoc = &stcb->asoc;
62	cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
63	if (cwnd_in_mtu == 0) {
64		/* Using 0 means that the value of RFC 4960 is used. */
65		net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
66	} else {
67		/*
68		 * We take the minimum of the burst limit and the initial
69		 * congestion window.
70		 */
71		if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst))
72			cwnd_in_mtu = assoc->max_burst;
73		net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
74	}
75	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
76	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
77		/* In case of resource pooling initialize appropriately */
78		net->cwnd /= assoc->numnets;
79		if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
80			net->cwnd = net->mtu - sizeof(struct sctphdr);
81		}
82	}
83	net->ssthresh = assoc->peers_rwnd;
84	SDT_PROBE(sctp, cwnd, net, init,
85	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
86	    0, net->cwnd);
87	if (SCTP_BASE_SYSCTL(sctp_logging_level) &
88	    (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
89		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
90	}
91}
92
93static void
94sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
95    struct sctp_association *asoc)
96{
97	struct sctp_nets *net;
98	uint32_t t_ssthresh, t_cwnd;
99	uint64_t t_ucwnd_sbw;
100
101	/* MT FIXME: Don't compute this over and over again */
102	t_ssthresh = 0;
103	t_cwnd = 0;
104	t_ucwnd_sbw = 0;
105	if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
106	    (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
107		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
108			t_ssthresh += net->ssthresh;
109			t_cwnd += net->cwnd;
110			if (net->lastsa > 0) {
111				t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) net->lastsa;
112			}
113		}
114		if (t_ucwnd_sbw == 0) {
115			t_ucwnd_sbw = 1;
116		}
117	}
118	/*-
119	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
120	 * (net->fast_retran_loss_recovery == 0)))
121	 */
122	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
123		if ((asoc->fast_retran_loss_recovery == 0) ||
124		    (asoc->sctp_cmt_on_off > 0)) {
125			/* out of a RFC2582 Fast recovery window? */
126			if (net->net_ack > 0) {
127				/*
128				 * per section 7.2.3, are there any
129				 * destinations that had a fast retransmit
130				 * to them. If so what we need to do is
131				 * adjust ssthresh and cwnd.
132				 */
133				struct sctp_tmit_chunk *lchk;
134				int old_cwnd = net->cwnd;
135
136				if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
137				    (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
138					if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) {
139						net->ssthresh = (uint32_t) (((uint64_t) 4 *
140						    (uint64_t) net->mtu *
141						    (uint64_t) net->ssthresh) /
142						    (uint64_t) t_ssthresh);
143
144					}
145					if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) {
146						uint32_t srtt;
147
148						srtt = net->lastsa;
149						/*
150						 * lastsa>>3;  we don't need
151						 * to devide ...
152						 */
153						if (srtt == 0) {
154							srtt = 1;
155						}
156						/*
157						 * Short Version => Equal to
158						 * Contel Version MBe
159						 */
160						net->ssthresh = (uint32_t) (((uint64_t) 4 *
161						    (uint64_t) net->mtu *
162						    (uint64_t) net->cwnd) /
163						    ((uint64_t) srtt *
164						    t_ucwnd_sbw));
165						 /* INCREASE FACTOR */ ;
166					}
167					if ((net->cwnd > t_cwnd / 2) &&
168					    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
169						net->ssthresh = net->cwnd - t_cwnd / 2;
170					}
171					if (net->ssthresh < net->mtu) {
172						net->ssthresh = net->mtu;
173					}
174				} else {
175					net->ssthresh = net->cwnd / 2;
176					if (net->ssthresh < (net->mtu * 2)) {
177						net->ssthresh = 2 * net->mtu;
178					}
179				}
180				net->cwnd = net->ssthresh;
181				SDT_PROBE(sctp, cwnd, net, fr,
182				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
183				    old_cwnd, net->cwnd);
184				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
185					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
186					    SCTP_CWND_LOG_FROM_FR);
187				}
188				lchk = TAILQ_FIRST(&asoc->send_queue);
189
190				net->partial_bytes_acked = 0;
191				/* Turn on fast recovery window */
192				asoc->fast_retran_loss_recovery = 1;
193				if (lchk == NULL) {
194					/* Mark end of the window */
195					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
196				} else {
197					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
198				}
199
200				/*
201				 * CMT fast recovery -- per destination
202				 * recovery variable.
203				 */
204				net->fast_retran_loss_recovery = 1;
205
206				if (lchk == NULL) {
207					/* Mark end of the window */
208					net->fast_recovery_tsn = asoc->sending_seq - 1;
209				} else {
210					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
211				}
212
213				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
214				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
215				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
216				    stcb->sctp_ep, stcb, net);
217			}
218		} else if (net->net_ack > 0) {
219			/*
220			 * Mark a peg that we WOULD have done a cwnd
221			 * reduction but RFC2582 prevented this action.
222			 */
223			SCTP_STAT_INCR(sctps_fastretransinrtt);
224		}
225	}
226}
227
228/* Defines for instantaneous bw decisions */
229#define SCTP_INST_LOOSING 1	/* Loosing to other flows */
230#define SCTP_INST_NEUTRAL 2	/* Neutral, no indication */
231#define SCTP_INST_GAINING 3	/* Gaining, step down possible */
232
233
234static int
235cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
236    uint64_t rtt_offset, uint64_t vtag, uint8_t inst_ind)
237{
238	uint64_t oth, probepoint;
239
240	probepoint = (((uint64_t) net->cwnd) << 32);
241	if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) {
242		/*
243		 * rtt increased we don't update bw.. so we don't update the
244		 * rtt either.
245		 */
246		/* Probe point 5 */
247		probepoint |= ((5 << 16) | 1);
248		SDT_PROBE(sctp, cwnd, net, rttvar,
249		    vtag,
250		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
251		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
252		    net->flight_size,
253		    probepoint);
254		if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) {
255			if (net->cc_mod.rtcc.last_step_state == 5)
256				net->cc_mod.rtcc.step_cnt++;
257			else
258				net->cc_mod.rtcc.step_cnt = 1;
259			net->cc_mod.rtcc.last_step_state = 5;
260			if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) ||
261			    ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) &&
262			    ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) {
263				/* Try a step down */
264				oth = net->cc_mod.rtcc.vol_reduce;
265				oth <<= 16;
266				oth |= net->cc_mod.rtcc.step_cnt;
267				oth <<= 16;
268				oth |= net->cc_mod.rtcc.last_step_state;
269				SDT_PROBE(sctp, cwnd, net, rttstep,
270				    vtag,
271				    ((net->cc_mod.rtcc.lbw << 32) | nbw),
272				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
273				    oth,
274				    probepoint);
275				if (net->cwnd > (4 * net->mtu)) {
276					net->cwnd -= net->mtu;
277					net->cc_mod.rtcc.vol_reduce++;
278				} else {
279					net->cc_mod.rtcc.step_cnt = 0;
280				}
281			}
282		}
283		return (1);
284	}
285	if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) {
286		/*
287		 * rtt decreased, there could be more room. we update both
288		 * the bw and the rtt here to lock this in as a good step
289		 * down.
290		 */
291		/* Probe point 6 */
292		probepoint |= ((6 << 16) | 0);
293		SDT_PROBE(sctp, cwnd, net, rttvar,
294		    vtag,
295		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
296		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
297		    net->flight_size,
298		    probepoint);
299		if (net->cc_mod.rtcc.steady_step) {
300			oth = net->cc_mod.rtcc.vol_reduce;
301			oth <<= 16;
302			oth |= net->cc_mod.rtcc.step_cnt;
303			oth <<= 16;
304			oth |= net->cc_mod.rtcc.last_step_state;
305			SDT_PROBE(sctp, cwnd, net, rttstep,
306			    vtag,
307			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
308			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
309			    oth,
310			    probepoint);
311			if ((net->cc_mod.rtcc.last_step_state == 5) &&
312			    (net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step)) {
313				/* Step down worked */
314				net->cc_mod.rtcc.step_cnt = 0;
315				return (1);
316			} else {
317				net->cc_mod.rtcc.last_step_state = 6;
318				net->cc_mod.rtcc.step_cnt = 0;
319			}
320		}
321		net->cc_mod.rtcc.lbw = nbw;
322		net->cc_mod.rtcc.lbw_rtt = net->rtt;
323		net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
324		if (inst_ind == SCTP_INST_GAINING)
325			return (1);
326		else if (inst_ind == SCTP_INST_NEUTRAL)
327			return (1);
328		else
329			return (0);
330	}
331	/*
332	 * Ok bw and rtt remained the same .. no update to any
333	 */
334	/* Probe point 7 */
335	probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq);
336	SDT_PROBE(sctp, cwnd, net, rttvar,
337	    vtag,
338	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
339	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
340	    net->flight_size,
341	    probepoint);
342	if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) {
343		if (net->cc_mod.rtcc.last_step_state == 5)
344			net->cc_mod.rtcc.step_cnt++;
345		else
346			net->cc_mod.rtcc.step_cnt = 1;
347		net->cc_mod.rtcc.last_step_state = 5;
348		if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) ||
349		    ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) &&
350		    ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) {
351			/* Try a step down */
352			if (net->cwnd > (4 * net->mtu)) {
353				net->cwnd -= net->mtu;
354				net->cc_mod.rtcc.vol_reduce++;
355				return (1);
356			} else {
357				net->cc_mod.rtcc.step_cnt = 0;
358			}
359		}
360	}
361	if (inst_ind == SCTP_INST_GAINING)
362		return (1);
363	else if (inst_ind == SCTP_INST_NEUTRAL)
364		return (1);
365	else
366		return ((int)net->cc_mod.rtcc.ret_from_eq);
367}
368
369static int
370cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t rtt_offset,
371    uint64_t vtag, uint8_t inst_ind)
372{
373	uint64_t oth, probepoint;
374
375	/* Bandwidth decreased. */
376	probepoint = (((uint64_t) net->cwnd) << 32);
377	if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) {
378		/* rtt increased */
379		/* Did we add more */
380		if ((net->cwnd > net->cc_mod.rtcc.cwnd_at_bw_set) &&
381		    (inst_ind != SCTP_INST_LOOSING)) {
382			/* We caused it maybe.. back off? */
383			/* PROBE POINT 1 */
384			probepoint |= ((1 << 16) | 1);
385			SDT_PROBE(sctp, cwnd, net, rttvar,
386			    vtag,
387			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
388			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
389			    net->flight_size,
390			    probepoint);
391			if (net->cc_mod.rtcc.ret_from_eq) {
392				/*
393				 * Switch over to CA if we are less
394				 * aggressive
395				 */
396				net->ssthresh = net->cwnd - 1;
397				net->partial_bytes_acked = 0;
398			}
399			return (1);
400		}
401		/* Probe point 2 */
402		probepoint |= ((2 << 16) | 0);
403		SDT_PROBE(sctp, cwnd, net, rttvar,
404		    vtag,
405		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
406		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
407		    net->flight_size,
408		    probepoint);
409		/* Someone else - fight for more? */
410		if (net->cc_mod.rtcc.steady_step) {
411			oth = net->cc_mod.rtcc.vol_reduce;
412			oth <<= 16;
413			oth |= net->cc_mod.rtcc.step_cnt;
414			oth <<= 16;
415			oth |= net->cc_mod.rtcc.last_step_state;
416			SDT_PROBE(sctp, cwnd, net, rttstep,
417			    vtag,
418			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
419			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
420			    oth,
421			    probepoint);
422			/*
423			 * Did we voluntarily give up some? if so take one
424			 * back please
425			 */
426			if ((net->cc_mod.rtcc.vol_reduce) &&
427			    (inst_ind != SCTP_INST_GAINING)) {
428				net->cwnd += net->mtu;
429				net->cc_mod.rtcc.vol_reduce--;
430			}
431			net->cc_mod.rtcc.last_step_state = 2;
432			net->cc_mod.rtcc.step_cnt = 0;
433		}
434		goto out_decision;
435	} else if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) {
436		/* bw & rtt decreased */
437		/* Probe point 3 */
438		probepoint |= ((3 << 16) | 0);
439		SDT_PROBE(sctp, cwnd, net, rttvar,
440		    vtag,
441		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
442		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
443		    net->flight_size,
444		    probepoint);
445		if (net->cc_mod.rtcc.steady_step) {
446			oth = net->cc_mod.rtcc.vol_reduce;
447			oth <<= 16;
448			oth |= net->cc_mod.rtcc.step_cnt;
449			oth <<= 16;
450			oth |= net->cc_mod.rtcc.last_step_state;
451			SDT_PROBE(sctp, cwnd, net, rttstep,
452			    vtag,
453			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
454			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
455			    oth,
456			    probepoint);
457			if ((net->cc_mod.rtcc.vol_reduce) &&
458			    (inst_ind != SCTP_INST_GAINING)) {
459				net->cwnd += net->mtu;
460				net->cc_mod.rtcc.vol_reduce--;
461			}
462			net->cc_mod.rtcc.last_step_state = 3;
463			net->cc_mod.rtcc.step_cnt = 0;
464		}
465		goto out_decision;
466	}
467	/* The bw decreased but rtt stayed the same */
468	/* Probe point 4 */
469	probepoint |= ((4 << 16) | 0);
470	SDT_PROBE(sctp, cwnd, net, rttvar,
471	    vtag,
472	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
473	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
474	    net->flight_size,
475	    probepoint);
476	if (net->cc_mod.rtcc.steady_step) {
477		oth = net->cc_mod.rtcc.vol_reduce;
478		oth <<= 16;
479		oth |= net->cc_mod.rtcc.step_cnt;
480		oth <<= 16;
481		oth |= net->cc_mod.rtcc.last_step_state;
482		SDT_PROBE(sctp, cwnd, net, rttstep,
483		    vtag,
484		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
485		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
486		    oth,
487		    probepoint);
488		if ((net->cc_mod.rtcc.vol_reduce) &&
489		    (inst_ind != SCTP_INST_GAINING)) {
490			net->cwnd += net->mtu;
491			net->cc_mod.rtcc.vol_reduce--;
492		}
493		net->cc_mod.rtcc.last_step_state = 4;
494		net->cc_mod.rtcc.step_cnt = 0;
495	}
496out_decision:
497	net->cc_mod.rtcc.lbw = nbw;
498	net->cc_mod.rtcc.lbw_rtt = net->rtt;
499	net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
500	if (inst_ind == SCTP_INST_GAINING) {
501		return (1);
502	} else {
503		return (0);
504	}
505}
506
507static int
508cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t vtag)
509{
510	uint64_t oth, probepoint;
511
512	/*
513	 * BW increased, so update and return 0, since all actions in our
514	 * table say to do the normal CC update. Note that we pay no
515	 * attention to the inst_ind since our overall sum is increasing.
516	 */
517	/* PROBE POINT 0 */
518	probepoint = (((uint64_t) net->cwnd) << 32);
519	SDT_PROBE(sctp, cwnd, net, rttvar,
520	    vtag,
521	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
522	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
523	    net->flight_size,
524	    probepoint);
525	if (net->cc_mod.rtcc.steady_step) {
526		oth = net->cc_mod.rtcc.vol_reduce;
527		oth <<= 16;
528		oth |= net->cc_mod.rtcc.step_cnt;
529		oth <<= 16;
530		oth |= net->cc_mod.rtcc.last_step_state;
531		SDT_PROBE(sctp, cwnd, net, rttstep,
532		    vtag,
533		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
534		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
535		    oth,
536		    probepoint);
537		net->cc_mod.rtcc.last_step_state = 0;
538		net->cc_mod.rtcc.step_cnt = 0;
539		net->cc_mod.rtcc.vol_reduce = 0;
540	}
541	net->cc_mod.rtcc.lbw = nbw;
542	net->cc_mod.rtcc.lbw_rtt = net->rtt;
543	net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
544	return (0);
545}
546
547/* RTCC Algoritm to limit growth of cwnd, return
548 * true if you want to NOT allow cwnd growth
549 */
550static int
551cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw)
552{
553	uint64_t bw_offset, rtt_offset;
554	uint64_t probepoint, rtt, vtag;
555	uint64_t bytes_for_this_rtt, inst_bw;
556	uint64_t div, inst_off;
557	int bw_shift;
558	uint8_t inst_ind;
559	int ret;
560
561	/*-
562	 * Here we need to see if we want
563	 * to limit cwnd growth due to increase
564	 * in overall rtt but no increase in bw.
565	 * We use the following table to figure
566	 * out what we should do. When we return
567	 * 0, cc update goes on as planned. If we
568	 * return 1, then no cc update happens and cwnd
569	 * stays where it is at.
570	 * ----------------------------------
571	 *   BW    |    RTT   | Action
572	 * *********************************
573	 *   INC   |    INC   | return 0
574	 * ----------------------------------
575	 *   INC   |    SAME  | return 0
576	 * ----------------------------------
577	 *   INC   |    DECR  | return 0
578	 * ----------------------------------
579	 *   SAME  |    INC   | return 1
580	 * ----------------------------------
581	 *   SAME  |    SAME  | return 1
582	 * ----------------------------------
583	 *   SAME  |    DECR  | return 0
584	 * ----------------------------------
585	 *   DECR  |    INC   | return 0 or 1 based on if we caused.
586	 * ----------------------------------
587	 *   DECR  |    SAME  | return 0
588	 * ----------------------------------
589	 *   DECR  |    DECR  | return 0
590	 * ----------------------------------
591	 *
592	 * We are a bit fuzz on what an increase or
593	 * decrease is. For BW it is the same if
594	 * it did not change within 1/64th. For
595	 * RTT it stayed the same if it did not
596	 * change within 1/32nd
597	 */
598	bw_shift = SCTP_BASE_SYSCTL(sctp_rttvar_bw);
599	rtt = stcb->asoc.my_vtag;
600	vtag = (rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport);
601	probepoint = (((uint64_t) net->cwnd) << 32);
602	rtt = net->rtt;
603	if (net->cc_mod.rtcc.rtt_set_this_sack) {
604		net->cc_mod.rtcc.rtt_set_this_sack = 0;
605		bytes_for_this_rtt = net->cc_mod.rtcc.bw_bytes - net->cc_mod.rtcc.bw_bytes_at_last_rttc;
606		net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes;
607		if (net->rtt) {
608			div = net->rtt / 1000;
609			if (div) {
610				inst_bw = bytes_for_this_rtt / div;
611				inst_off = inst_bw >> bw_shift;
612				if (inst_bw > nbw)
613					inst_ind = SCTP_INST_GAINING;
614				else if ((inst_bw + inst_off) < nbw)
615					inst_ind = SCTP_INST_LOOSING;
616				else
617					inst_ind = SCTP_INST_NEUTRAL;
618				probepoint |= ((0xb << 16) | inst_ind);
619			} else {
620				inst_ind = net->cc_mod.rtcc.last_inst_ind;
621				inst_bw = bytes_for_this_rtt / (uint64_t) (net->rtt);
622				/* Can't determine do not change */
623				probepoint |= ((0xc << 16) | inst_ind);
624			}
625		} else {
626			inst_ind = net->cc_mod.rtcc.last_inst_ind;
627			inst_bw = bytes_for_this_rtt;
628			/* Can't determine do not change */
629			probepoint |= ((0xd << 16) | inst_ind);
630		}
631		SDT_PROBE(sctp, cwnd, net, rttvar,
632		    vtag,
633		    ((nbw << 32) | inst_bw),
634		    ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt),
635		    net->flight_size,
636		    probepoint);
637	} else {
638		/* No rtt measurement, use last one */
639		inst_ind = net->cc_mod.rtcc.last_inst_ind;
640	}
641	bw_offset = net->cc_mod.rtcc.lbw >> bw_shift;
642	if (nbw > net->cc_mod.rtcc.lbw + bw_offset) {
643		ret = cc_bw_increase(stcb, net, nbw, vtag);
644		goto out;
645	}
646	rtt_offset = net->cc_mod.rtcc.lbw_rtt >> SCTP_BASE_SYSCTL(sctp_rttvar_rtt);
647	if (nbw < net->cc_mod.rtcc.lbw - bw_offset) {
648		ret = cc_bw_decrease(stcb, net, nbw, rtt_offset, vtag, inst_ind);
649		goto out;
650	}
651	/*
652	 * If we reach here then we are in a situation where the bw stayed
653	 * the same.
654	 */
655	ret = cc_bw_same(stcb, net, nbw, rtt_offset, vtag, inst_ind);
656out:
657	net->cc_mod.rtcc.last_inst_ind = inst_ind;
658	return (ret);
659}
660
661static void
662sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
663    struct sctp_association *asoc,
664    int accum_moved, int reneged_all SCTP_UNUSED, int will_exit, int use_rtcc)
665{
666	struct sctp_nets *net;
667	int old_cwnd;
668	uint32_t t_ssthresh, t_cwnd, incr;
669	uint64_t t_ucwnd_sbw;
670	uint64_t t_path_mptcp;
671	uint64_t mptcp_like_alpha;
672	uint32_t srtt;
673	uint64_t max_path;
674
675	/* MT FIXME: Don't compute this over and over again */
676	t_ssthresh = 0;
677	t_cwnd = 0;
678	t_ucwnd_sbw = 0;
679	t_path_mptcp = 0;
680	mptcp_like_alpha = 1;
681	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
682	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) ||
683	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) {
684		max_path = 0;
685		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
686			t_ssthresh += net->ssthresh;
687			t_cwnd += net->cwnd;
688			/* lastsa>>3;  we don't need to devide ... */
689			srtt = net->lastsa;
690			if (srtt > 0) {
691				uint64_t tmp;
692
693				t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) srtt;
694				t_path_mptcp += (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_Z) /
695				    (((uint64_t) net->mtu) * (uint64_t) srtt);
696				tmp = (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_N) /
697				    ((uint64_t) net->mtu * (uint64_t) (srtt * srtt));
698				if (tmp > max_path) {
699					max_path = tmp;
700				}
701			}
702		}
703		if (t_path_mptcp > 0) {
704			mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp);
705		} else {
706			mptcp_like_alpha = 1;
707		}
708	}
709	if (t_ssthresh == 0) {
710		t_ssthresh = 1;
711	}
712	if (t_ucwnd_sbw == 0) {
713		t_ucwnd_sbw = 1;
714	}
715	/******************************/
716	/* update cwnd and Early FR   */
717	/******************************/
718	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
719
720#ifdef JANA_CMT_FAST_RECOVERY
721		/*
722		 * CMT fast recovery code. Need to debug.
723		 */
724		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
725			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
726			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
727				net->will_exit_fast_recovery = 1;
728			}
729		}
730#endif
731		/* if nothing was acked on this destination skip it */
732		if (net->net_ack == 0) {
733			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
734				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
735			}
736			continue;
737		}
738#ifdef JANA_CMT_FAST_RECOVERY
739		/*
740		 * CMT fast recovery code
741		 */
742		/*
743		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
744		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
745		 * } else if (sctp_cmt_on_off == 0 &&
746		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
747		 */
748#endif
749
750		if (asoc->fast_retran_loss_recovery &&
751		    (will_exit == 0) &&
752		    (asoc->sctp_cmt_on_off == 0)) {
753			/*
754			 * If we are in loss recovery we skip any cwnd
755			 * update
756			 */
757			return;
758		}
759		/*
760		 * Did any measurements go on for this network?
761		 */
762		if (use_rtcc && (net->cc_mod.rtcc.tls_needs_set > 0)) {
763			uint64_t nbw;
764
765			/*
766			 * At this point our bw_bytes has been updated by
767			 * incoming sack information.
768			 *
769			 * But our bw may not yet be set.
770			 *
771			 */
772			if ((net->cc_mod.rtcc.new_tot_time / 1000) > 0) {
773				nbw = net->cc_mod.rtcc.bw_bytes / (net->cc_mod.rtcc.new_tot_time / 1000);
774			} else {
775				nbw = net->cc_mod.rtcc.bw_bytes;
776			}
777			if (net->cc_mod.rtcc.lbw) {
778				if (cc_bw_limit(stcb, net, nbw)) {
779					/* Hold here, no update */
780					continue;
781				}
782			} else {
783				uint64_t vtag, probepoint;
784
785				probepoint = (((uint64_t) net->cwnd) << 32);
786				probepoint |= ((0xa << 16) | 0);
787				vtag = (net->rtt << 32) |
788				    (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
789				    (stcb->rport);
790
791				SDT_PROBE(sctp, cwnd, net, rttvar,
792				    vtag,
793				    nbw,
794				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
795				    net->flight_size,
796				    probepoint);
797				net->cc_mod.rtcc.lbw = nbw;
798				net->cc_mod.rtcc.lbw_rtt = net->rtt;
799				if (net->cc_mod.rtcc.rtt_set_this_sack) {
800					net->cc_mod.rtcc.rtt_set_this_sack = 0;
801					net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes;
802				}
803			}
804		}
805		/*
806		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
807		 * moved.
808		 */
809		if (accum_moved ||
810		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
811			/* If the cumulative ack moved we can proceed */
812			if (net->cwnd <= net->ssthresh) {
813				/* We are in slow start */
814				if (net->flight_size + net->net_ack >= net->cwnd) {
815					uint32_t limit;
816
817					old_cwnd = net->cwnd;
818					switch (asoc->sctp_cmt_on_off) {
819					case SCTP_CMT_RPV1:
820						limit = (uint32_t) (((uint64_t) net->mtu *
821						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
822						    (uint64_t) net->ssthresh) /
823						    (uint64_t) t_ssthresh);
824						incr = (uint32_t) (((uint64_t) net->net_ack *
825						    (uint64_t) net->ssthresh) /
826						    (uint64_t) t_ssthresh);
827						if (incr > limit) {
828							incr = limit;
829						}
830						if (incr == 0) {
831							incr = 1;
832						}
833						break;
834					case SCTP_CMT_RPV2:
835						/*
836						 * lastsa>>3;  we don't need
837						 * to divide ...
838						 */
839						srtt = net->lastsa;
840						if (srtt == 0) {
841							srtt = 1;
842						}
843						limit = (uint32_t) (((uint64_t) net->mtu *
844						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
845						    (uint64_t) net->cwnd) /
846						    ((uint64_t) srtt * t_ucwnd_sbw));
847						/* INCREASE FACTOR */
848						incr = (uint32_t) (((uint64_t) net->net_ack *
849						    (uint64_t) net->cwnd) /
850						    ((uint64_t) srtt * t_ucwnd_sbw));
851						/* INCREASE FACTOR */
852						if (incr > limit) {
853							incr = limit;
854						}
855						if (incr == 0) {
856							incr = 1;
857						}
858						break;
859					case SCTP_CMT_MPTCP:
860						limit = (uint32_t) (((uint64_t) net->mtu *
861						    mptcp_like_alpha *
862						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >>
863						    SHIFT_MPTCP_MULTI);
864						incr = (uint32_t) (((uint64_t) net->net_ack *
865						    mptcp_like_alpha) >>
866						    SHIFT_MPTCP_MULTI);
867						if (incr > limit) {
868							incr = limit;
869						}
870						if (incr > net->net_ack) {
871							incr = net->net_ack;
872						}
873						if (incr > net->mtu) {
874							incr = net->mtu;
875						}
876						break;
877					default:
878						incr = net->net_ack;
879						if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) {
880							incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable);
881						}
882						break;
883					}
884					net->cwnd += incr;
885					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
886						sctp_log_cwnd(stcb, net, incr,
887						    SCTP_CWND_LOG_FROM_SS);
888					}
889					SDT_PROBE(sctp, cwnd, net, ack,
890					    stcb->asoc.my_vtag,
891					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
892					    net,
893					    old_cwnd, net->cwnd);
894				} else {
895					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
896						sctp_log_cwnd(stcb, net, net->net_ack,
897						    SCTP_CWND_LOG_NOADV_SS);
898					}
899				}
900			} else {
901				/* We are in congestion avoidance */
902				/*
903				 * Add to pba
904				 */
905				net->partial_bytes_acked += net->net_ack;
906
907				if ((net->flight_size + net->net_ack >= net->cwnd) &&
908				    (net->partial_bytes_acked >= net->cwnd)) {
909					net->partial_bytes_acked -= net->cwnd;
910					old_cwnd = net->cwnd;
911					switch (asoc->sctp_cmt_on_off) {
912					case SCTP_CMT_RPV1:
913						incr = (uint32_t) (((uint64_t) net->mtu *
914						    (uint64_t) net->ssthresh) /
915						    (uint64_t) t_ssthresh);
916						if (incr == 0) {
917							incr = 1;
918						}
919						break;
920					case SCTP_CMT_RPV2:
921						/*
922						 * lastsa>>3;  we don't need
923						 * to divide ...
924						 */
925						srtt = net->lastsa;
926						if (srtt == 0) {
927							srtt = 1;
928						}
929						incr = (uint32_t) ((uint64_t) net->mtu *
930						    (uint64_t) net->cwnd /
931						    ((uint64_t) srtt *
932						    t_ucwnd_sbw));
933						/* INCREASE FACTOR */
934						if (incr == 0) {
935							incr = 1;
936						}
937						break;
938					case SCTP_CMT_MPTCP:
939						incr = (uint32_t) ((mptcp_like_alpha *
940						    (uint64_t) net->cwnd) >>
941						    SHIFT_MPTCP_MULTI);
942						if (incr > net->mtu) {
943							incr = net->mtu;
944						}
945						break;
946					default:
947						incr = net->mtu;
948						break;
949					}
950					net->cwnd += incr;
951					SDT_PROBE(sctp, cwnd, net, ack,
952					    stcb->asoc.my_vtag,
953					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
954					    net,
955					    old_cwnd, net->cwnd);
956					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
957						sctp_log_cwnd(stcb, net, net->mtu,
958						    SCTP_CWND_LOG_FROM_CA);
959					}
960				} else {
961					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
962						sctp_log_cwnd(stcb, net, net->net_ack,
963						    SCTP_CWND_LOG_NOADV_CA);
964					}
965				}
966			}
967		} else {
968			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
969				sctp_log_cwnd(stcb, net, net->mtu,
970				    SCTP_CWND_LOG_NO_CUMACK);
971			}
972		}
973	}
974}
975
976static void
977sctp_cwnd_update_exit_pf_common(struct sctp_tcb *stcb, struct sctp_nets *net)
978{
979	int old_cwnd;
980
981	old_cwnd = net->cwnd;
982	net->cwnd = net->mtu;
983	SDT_PROBE(sctp, cwnd, net, ack,
984	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
985	    old_cwnd, net->cwnd);
986	SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
987	    (void *)net, net->cwnd);
988}
989
990
991static void
992sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
993{
994	int old_cwnd = net->cwnd;
995	uint32_t t_ssthresh, t_cwnd;
996	uint64_t t_ucwnd_sbw;
997
998	/* MT FIXME: Don't compute this over and over again */
999	t_ssthresh = 0;
1000	t_cwnd = 0;
1001	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
1002	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
1003		struct sctp_nets *lnet;
1004		uint32_t srtt;
1005
1006		t_ucwnd_sbw = 0;
1007		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
1008			t_ssthresh += lnet->ssthresh;
1009			t_cwnd += lnet->cwnd;
1010			srtt = lnet->lastsa;
1011			/* lastsa>>3;  we don't need to divide ... */
1012			if (srtt > 0) {
1013				t_ucwnd_sbw += (uint64_t) lnet->cwnd / (uint64_t) srtt;
1014			}
1015		}
1016		if (t_ssthresh < 1) {
1017			t_ssthresh = 1;
1018		}
1019		if (t_ucwnd_sbw < 1) {
1020			t_ucwnd_sbw = 1;
1021		}
1022		if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) {
1023			net->ssthresh = (uint32_t) (((uint64_t) 4 *
1024			    (uint64_t) net->mtu *
1025			    (uint64_t) net->ssthresh) /
1026			    (uint64_t) t_ssthresh);
1027		} else {
1028			uint64_t cc_delta;
1029
1030			srtt = net->lastsa;
1031			/* lastsa>>3;  we don't need to divide ... */
1032			if (srtt == 0) {
1033				srtt = 1;
1034			}
1035			cc_delta = t_ucwnd_sbw * (uint64_t) srtt / 2;
1036			if (cc_delta < t_cwnd) {
1037				net->ssthresh = (uint32_t) ((uint64_t) t_cwnd - cc_delta);
1038			} else {
1039				net->ssthresh = net->mtu;
1040			}
1041		}
1042		if ((net->cwnd > t_cwnd / 2) &&
1043		    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
1044			net->ssthresh = net->cwnd - t_cwnd / 2;
1045		}
1046		if (net->ssthresh < net->mtu) {
1047			net->ssthresh = net->mtu;
1048		}
1049	} else {
1050		net->ssthresh = max(net->cwnd / 2, 4 * net->mtu);
1051	}
1052	net->cwnd = net->mtu;
1053	net->partial_bytes_acked = 0;
1054	SDT_PROBE(sctp, cwnd, net, to,
1055	    stcb->asoc.my_vtag,
1056	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1057	    net,
1058	    old_cwnd, net->cwnd);
1059	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1060		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
1061	}
1062}
1063
1064static void
1065sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *net,
1066    int in_window, int num_pkt_lost, int use_rtcc)
1067{
1068	int old_cwnd = net->cwnd;
1069
1070	if ((use_rtcc) && (net->lan_type == SCTP_LAN_LOCAL) && (net->cc_mod.rtcc.use_dccc_ecn)) {
1071		/* Data center Congestion Control */
1072		if (in_window == 0) {
1073			/*
1074			 * Go to CA with the cwnd at the point we sent the
1075			 * TSN that was marked with a CE.
1076			 */
1077			if (net->ecn_prev_cwnd < net->cwnd) {
1078				/* Restore to prev cwnd */
1079				net->cwnd = net->ecn_prev_cwnd - (net->mtu * num_pkt_lost);
1080			} else {
1081				/* Just cut in 1/2 */
1082				net->cwnd /= 2;
1083			}
1084			/* Drop to CA */
1085			net->ssthresh = net->cwnd - (num_pkt_lost * net->mtu);
1086			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1087				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1088			}
1089		} else {
1090			/*
1091			 * Further tuning down required over the drastic
1092			 * orginal cut
1093			 */
1094			net->ssthresh -= (net->mtu * num_pkt_lost);
1095			net->cwnd -= (net->mtu * num_pkt_lost);
1096			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1097				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1098			}
1099		}
1100		SCTP_STAT_INCR(sctps_ecnereducedcwnd);
1101	} else {
1102		if (in_window == 0) {
1103			SCTP_STAT_INCR(sctps_ecnereducedcwnd);
1104			net->ssthresh = net->cwnd / 2;
1105			if (net->ssthresh < net->mtu) {
1106				net->ssthresh = net->mtu;
1107				/*
1108				 * here back off the timer as well, to slow
1109				 * us down
1110				 */
1111				net->RTO <<= 1;
1112			}
1113			net->cwnd = net->ssthresh;
1114			SDT_PROBE(sctp, cwnd, net, ecn,
1115			    stcb->asoc.my_vtag,
1116			    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1117			    net,
1118			    old_cwnd, net->cwnd);
1119			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1120				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1121			}
1122		}
1123	}
1124
1125}
1126
1127static void
1128sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
1129    struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
1130    uint32_t * bottle_bw, uint32_t * on_queue)
1131{
1132	uint32_t bw_avail;
1133	unsigned int incr;
1134	int old_cwnd = net->cwnd;
1135
1136	/* get bottle neck bw */
1137	*bottle_bw = ntohl(cp->bottle_bw);
1138	/* and whats on queue */
1139	*on_queue = ntohl(cp->current_onq);
1140	/*
1141	 * adjust the on-queue if our flight is more it could be that the
1142	 * router has not yet gotten data "in-flight" to it
1143	 */
1144	if (*on_queue < net->flight_size) {
1145		*on_queue = net->flight_size;
1146	}
1147	/* rtt is measured in micro seconds, bottle_bw in bytes per second */
1148	bw_avail = (uint32_t) (((uint64_t) (*bottle_bw) * net->rtt) / (uint64_t) 1000000);
1149	if (bw_avail > *bottle_bw) {
1150		/*
1151		 * Cap the growth to no more than the bottle neck. This can
1152		 * happen as RTT slides up due to queues. It also means if
1153		 * you have more than a 1 second RTT with a empty queue you
1154		 * will be limited to the bottle_bw per second no matter if
1155		 * other points have 1/2 the RTT and you could get more
1156		 * out...
1157		 */
1158		bw_avail = *bottle_bw;
1159	}
1160	if (*on_queue > bw_avail) {
1161		/*
1162		 * No room for anything else don't allow anything else to be
1163		 * "added to the fire".
1164		 */
1165		int seg_inflight, seg_onqueue, my_portion;
1166
1167		net->partial_bytes_acked = 0;
1168		/* how much are we over queue size? */
1169		incr = *on_queue - bw_avail;
1170		if (stcb->asoc.seen_a_sack_this_pkt) {
1171			/*
1172			 * undo any cwnd adjustment that the sack might have
1173			 * made
1174			 */
1175			net->cwnd = net->prev_cwnd;
1176		}
1177		/* Now how much of that is mine? */
1178		seg_inflight = net->flight_size / net->mtu;
1179		seg_onqueue = *on_queue / net->mtu;
1180		my_portion = (incr * seg_inflight) / seg_onqueue;
1181
1182		/* Have I made an adjustment already */
1183		if (net->cwnd > net->flight_size) {
1184			/*
1185			 * for this flight I made an adjustment we need to
1186			 * decrease the portion by a share our previous
1187			 * adjustment.
1188			 */
1189			int diff_adj;
1190
1191			diff_adj = net->cwnd - net->flight_size;
1192			if (diff_adj > my_portion)
1193				my_portion = 0;
1194			else
1195				my_portion -= diff_adj;
1196		}
1197		/*
1198		 * back down to the previous cwnd (assume we have had a sack
1199		 * before this packet). minus what ever portion of the
1200		 * overage is my fault.
1201		 */
1202		net->cwnd -= my_portion;
1203
1204		/* we will NOT back down more than 1 MTU */
1205		if (net->cwnd <= net->mtu) {
1206			net->cwnd = net->mtu;
1207		}
1208		/* force into CA */
1209		net->ssthresh = net->cwnd - 1;
1210	} else {
1211		/*
1212		 * Take 1/4 of the space left or max burst up .. whichever
1213		 * is less.
1214		 */
1215		incr = (bw_avail - *on_queue) >> 2;
1216		if ((stcb->asoc.max_burst > 0) &&
1217		    (stcb->asoc.max_burst * net->mtu < incr)) {
1218			incr = stcb->asoc.max_burst * net->mtu;
1219		}
1220		net->cwnd += incr;
1221	}
1222	if (net->cwnd > bw_avail) {
1223		/* We can't exceed the pipe size */
1224		net->cwnd = bw_avail;
1225	}
1226	if (net->cwnd < net->mtu) {
1227		/* We always have 1 MTU */
1228		net->cwnd = net->mtu;
1229	}
1230	if (net->cwnd - old_cwnd != 0) {
1231		/* log only changes */
1232		SDT_PROBE(sctp, cwnd, net, pd,
1233		    stcb->asoc.my_vtag,
1234		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1235		    net,
1236		    old_cwnd, net->cwnd);
1237		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1238			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
1239			    SCTP_CWND_LOG_FROM_SAT);
1240		}
1241	}
1242}
1243
1244static void
1245sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
1246    struct sctp_nets *net, int burst_limit)
1247{
1248	int old_cwnd = net->cwnd;
1249
1250	if (net->ssthresh < net->cwnd)
1251		net->ssthresh = net->cwnd;
1252	if (burst_limit) {
1253		net->cwnd = (net->flight_size + (burst_limit * net->mtu));
1254		SDT_PROBE(sctp, cwnd, net, bl,
1255		    stcb->asoc.my_vtag,
1256		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1257		    net,
1258		    old_cwnd, net->cwnd);
1259		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1260			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
1261		}
1262	}
1263}
1264
1265static void
1266sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
1267    struct sctp_association *asoc,
1268    int accum_moved, int reneged_all, int will_exit)
1269{
1270	/* Passing a zero argument in last disables the rtcc algoritm */
1271	sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0);
1272}
1273
1274static void
1275sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
1276    int in_window, int num_pkt_lost)
1277{
1278	/* Passing a zero argument in last disables the rtcc algoritm */
1279	sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0);
1280}
1281
1282/* Here starts the RTCCVAR type CC invented by RRS which
1283 * is a slight mod to RFC2581. We reuse a common routine or
1284 * two since these algoritms are so close and need to
1285 * remain the same.
1286 */
1287static void
1288sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
1289    int in_window, int num_pkt_lost)
1290{
1291	sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 1);
1292}
1293
1294
1295static
1296void
1297sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net,
1298    struct sctp_tmit_chunk *tp1)
1299{
1300	net->cc_mod.rtcc.bw_bytes += tp1->send_size;
1301}
1302
1303static void
1304sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb *stcb SCTP_UNUSED,
1305    struct sctp_nets *net)
1306{
1307	if (net->cc_mod.rtcc.tls_needs_set > 0) {
1308		/* We had a bw measurment going on */
1309		struct timeval ltls;
1310
1311		SCTP_GETPTIME_TIMEVAL(&ltls);
1312		timevalsub(&ltls, &net->cc_mod.rtcc.tls);
1313		net->cc_mod.rtcc.new_tot_time = (ltls.tv_sec * 1000000) + ltls.tv_usec;
1314	}
1315}
1316
1317static void
1318sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb,
1319    struct sctp_nets *net)
1320{
1321	uint64_t vtag, probepoint;
1322
1323	if (net->cc_mod.rtcc.lbw) {
1324		/* Clear the old bw.. we went to 0 in-flight */
1325		vtag = (net->rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
1326		    (stcb->rport);
1327		probepoint = (((uint64_t) net->cwnd) << 32);
1328		/* Probe point 8 */
1329		probepoint |= ((8 << 16) | 0);
1330		SDT_PROBE(sctp, cwnd, net, rttvar,
1331		    vtag,
1332		    ((net->cc_mod.rtcc.lbw << 32) | 0),
1333		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
1334		    net->flight_size,
1335		    probepoint);
1336		net->cc_mod.rtcc.lbw_rtt = 0;
1337		net->cc_mod.rtcc.cwnd_at_bw_set = 0;
1338		net->cc_mod.rtcc.lbw = 0;
1339		net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0;
1340		net->cc_mod.rtcc.vol_reduce = 0;
1341		net->cc_mod.rtcc.bw_tot_time = 0;
1342		net->cc_mod.rtcc.bw_bytes = 0;
1343		net->cc_mod.rtcc.tls_needs_set = 0;
1344		if (net->cc_mod.rtcc.steady_step) {
1345			net->cc_mod.rtcc.vol_reduce = 0;
1346			net->cc_mod.rtcc.step_cnt = 0;
1347			net->cc_mod.rtcc.last_step_state = 0;
1348		}
1349		if (net->cc_mod.rtcc.ret_from_eq) {
1350			/* less aggressive one - reset cwnd too */
1351			uint32_t cwnd_in_mtu, cwnd;
1352
1353			cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
1354			if (cwnd_in_mtu == 0) {
1355				/*
1356				 * Using 0 means that the value of RFC 4960
1357				 * is used.
1358				 */
1359				cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
1360			} else {
1361				/*
1362				 * We take the minimum of the burst limit
1363				 * and the initial congestion window.
1364				 */
1365				if ((stcb->asoc.max_burst > 0) && (cwnd_in_mtu > stcb->asoc.max_burst))
1366					cwnd_in_mtu = stcb->asoc.max_burst;
1367				cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
1368			}
1369			if (net->cwnd > cwnd) {
1370				/*
1371				 * Only set if we are not a timeout (i.e.
1372				 * down to 1 mtu)
1373				 */
1374				net->cwnd = cwnd;
1375			}
1376		}
1377	}
1378}
1379
1380static void
1381sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb,
1382    struct sctp_nets *net)
1383{
1384	uint64_t vtag, probepoint;
1385
1386	sctp_set_initial_cc_param(stcb, net);
1387	stcb->asoc.use_precise_time = 1;
1388	probepoint = (((uint64_t) net->cwnd) << 32);
1389	probepoint |= ((9 << 16) | 0);
1390	vtag = (net->rtt << 32) |
1391	    (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
1392	    (stcb->rport);
1393	SDT_PROBE(sctp, cwnd, net, rttvar,
1394	    vtag,
1395	    0,
1396	    0,
1397	    0,
1398	    probepoint);
1399	net->cc_mod.rtcc.lbw_rtt = 0;
1400	net->cc_mod.rtcc.cwnd_at_bw_set = 0;
1401	net->cc_mod.rtcc.vol_reduce = 0;
1402	net->cc_mod.rtcc.lbw = 0;
1403	net->cc_mod.rtcc.vol_reduce = 0;
1404	net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0;
1405	net->cc_mod.rtcc.bw_tot_time = 0;
1406	net->cc_mod.rtcc.bw_bytes = 0;
1407	net->cc_mod.rtcc.tls_needs_set = 0;
1408	net->cc_mod.rtcc.ret_from_eq = SCTP_BASE_SYSCTL(sctp_rttvar_eqret);
1409	net->cc_mod.rtcc.steady_step = SCTP_BASE_SYSCTL(sctp_steady_step);
1410	net->cc_mod.rtcc.use_dccc_ecn = SCTP_BASE_SYSCTL(sctp_use_dccc_ecn);
1411	net->cc_mod.rtcc.step_cnt = 0;
1412	net->cc_mod.rtcc.last_step_state = 0;
1413
1414
1415}
1416
1417static int
1418sctp_cwnd_rtcc_socket_option(struct sctp_tcb *stcb, int setorget,
1419    struct sctp_cc_option *cc_opt)
1420{
1421	struct sctp_nets *net;
1422
1423	if (setorget == 1) {
1424		/* a set */
1425		if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) {
1426			if ((cc_opt->aid_value.assoc_value != 0) &&
1427			    (cc_opt->aid_value.assoc_value != 1)) {
1428				return (EINVAL);
1429			}
1430			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1431				net->cc_mod.rtcc.ret_from_eq = cc_opt->aid_value.assoc_value;
1432			}
1433		} else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) {
1434			if ((cc_opt->aid_value.assoc_value != 0) &&
1435			    (cc_opt->aid_value.assoc_value != 1)) {
1436				return (EINVAL);
1437			}
1438			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1439				net->cc_mod.rtcc.use_dccc_ecn = cc_opt->aid_value.assoc_value;
1440			}
1441		} else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) {
1442			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1443				net->cc_mod.rtcc.steady_step = cc_opt->aid_value.assoc_value;
1444			}
1445		} else {
1446			return (EINVAL);
1447		}
1448	} else {
1449		/* a get */
1450		if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) {
1451			net = TAILQ_FIRST(&stcb->asoc.nets);
1452			if (net == NULL) {
1453				return (EFAULT);
1454			}
1455			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.ret_from_eq;
1456		} else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) {
1457			net = TAILQ_FIRST(&stcb->asoc.nets);
1458			if (net == NULL) {
1459				return (EFAULT);
1460			}
1461			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.use_dccc_ecn;
1462		} else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) {
1463			net = TAILQ_FIRST(&stcb->asoc.nets);
1464			if (net == NULL) {
1465				return (EFAULT);
1466			}
1467			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.steady_step;
1468		} else {
1469			return (EINVAL);
1470		}
1471	}
1472	return (0);
1473}
1474
1475static void
1476sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb *stcb SCTP_UNUSED,
1477    struct sctp_nets *net)
1478{
1479	if (net->cc_mod.rtcc.tls_needs_set == 0) {
1480		SCTP_GETPTIME_TIMEVAL(&net->cc_mod.rtcc.tls);
1481		net->cc_mod.rtcc.tls_needs_set = 2;
1482	}
1483}
1484
1485static void
1486sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb,
1487    struct sctp_association *asoc,
1488    int accum_moved, int reneged_all, int will_exit)
1489{
1490	/* Passing a one argument at the last enables the rtcc algoritm */
1491	sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1);
1492}
1493
1494static void
1495sctp_rtt_rtcc_calculated(struct sctp_tcb *stcb SCTP_UNUSED,
1496    struct sctp_nets *net,
1497    struct timeval *now SCTP_UNUSED)
1498{
1499	net->cc_mod.rtcc.rtt_set_this_sack = 1;
1500}
1501
1502/* Here starts Sally Floyds HS-TCP */
1503
1504struct sctp_hs_raise_drop {
1505	int32_t cwnd;
1506	int32_t increase;
1507	int32_t drop_percent;
1508};
1509
1510#define SCTP_HS_TABLE_SIZE 73
1511
1512struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
1513	{38, 1, 50},		/* 0   */
1514	{118, 2, 44},		/* 1   */
1515	{221, 3, 41},		/* 2   */
1516	{347, 4, 38},		/* 3   */
1517	{495, 5, 37},		/* 4   */
1518	{663, 6, 35},		/* 5   */
1519	{851, 7, 34},		/* 6   */
1520	{1058, 8, 33},		/* 7   */
1521	{1284, 9, 32},		/* 8   */
1522	{1529, 10, 31},		/* 9   */
1523	{1793, 11, 30},		/* 10  */
1524	{2076, 12, 29},		/* 11  */
1525	{2378, 13, 28},		/* 12  */
1526	{2699, 14, 28},		/* 13  */
1527	{3039, 15, 27},		/* 14  */
1528	{3399, 16, 27},		/* 15  */
1529	{3778, 17, 26},		/* 16  */
1530	{4177, 18, 26},		/* 17  */
1531	{4596, 19, 25},		/* 18  */
1532	{5036, 20, 25},		/* 19  */
1533	{5497, 21, 24},		/* 20  */
1534	{5979, 22, 24},		/* 21  */
1535	{6483, 23, 23},		/* 22  */
1536	{7009, 24, 23},		/* 23  */
1537	{7558, 25, 22},		/* 24  */
1538	{8130, 26, 22},		/* 25  */
1539	{8726, 27, 22},		/* 26  */
1540	{9346, 28, 21},		/* 27  */
1541	{9991, 29, 21},		/* 28  */
1542	{10661, 30, 21},	/* 29  */
1543	{11358, 31, 20},	/* 30  */
1544	{12082, 32, 20},	/* 31  */
1545	{12834, 33, 20},	/* 32  */
1546	{13614, 34, 19},	/* 33  */
1547	{14424, 35, 19},	/* 34  */
1548	{15265, 36, 19},	/* 35  */
1549	{16137, 37, 19},	/* 36  */
1550	{17042, 38, 18},	/* 37  */
1551	{17981, 39, 18},	/* 38  */
1552	{18955, 40, 18},	/* 39  */
1553	{19965, 41, 17},	/* 40  */
1554	{21013, 42, 17},	/* 41  */
1555	{22101, 43, 17},	/* 42  */
1556	{23230, 44, 17},	/* 43  */
1557	{24402, 45, 16},	/* 44  */
1558	{25618, 46, 16},	/* 45  */
1559	{26881, 47, 16},	/* 46  */
1560	{28193, 48, 16},	/* 47  */
1561	{29557, 49, 15},	/* 48  */
1562	{30975, 50, 15},	/* 49  */
1563	{32450, 51, 15},	/* 50  */
1564	{33986, 52, 15},	/* 51  */
1565	{35586, 53, 14},	/* 52  */
1566	{37253, 54, 14},	/* 53  */
1567	{38992, 55, 14},	/* 54  */
1568	{40808, 56, 14},	/* 55  */
1569	{42707, 57, 13},	/* 56  */
1570	{44694, 58, 13},	/* 57  */
1571	{46776, 59, 13},	/* 58  */
1572	{48961, 60, 13},	/* 59  */
1573	{51258, 61, 13},	/* 60  */
1574	{53677, 62, 12},	/* 61  */
1575	{56230, 63, 12},	/* 62  */
1576	{58932, 64, 12},	/* 63  */
1577	{61799, 65, 12},	/* 64  */
1578	{64851, 66, 11},	/* 65  */
1579	{68113, 67, 11},	/* 66  */
1580	{71617, 68, 11},	/* 67  */
1581	{75401, 69, 10},	/* 68  */
1582	{79517, 70, 10},	/* 69  */
1583	{84035, 71, 10},	/* 70  */
1584	{89053, 72, 10},	/* 71  */
1585	{94717, 73, 9}		/* 72  */
1586};
1587
1588static void
1589sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
1590{
1591	int cur_val, i, indx, incr;
1592
1593	cur_val = net->cwnd >> 10;
1594	indx = SCTP_HS_TABLE_SIZE - 1;
1595
1596	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
1597		/* normal mode */
1598		if (net->net_ack > net->mtu) {
1599			net->cwnd += net->mtu;
1600			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1601				sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
1602			}
1603		} else {
1604			net->cwnd += net->net_ack;
1605			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1606				sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
1607			}
1608		}
1609	} else {
1610		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
1611			if (cur_val < sctp_cwnd_adjust[i].cwnd) {
1612				indx = i;
1613				break;
1614			}
1615		}
1616		net->last_hs_used = indx;
1617		incr = ((sctp_cwnd_adjust[indx].increase) << 10);
1618		net->cwnd += incr;
1619		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1620			sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
1621		}
1622	}
1623}
1624
1625static void
1626sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
1627{
1628	int cur_val, i, indx;
1629	int old_cwnd = net->cwnd;
1630
1631	cur_val = net->cwnd >> 10;
1632	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
1633		/* normal mode */
1634		net->ssthresh = net->cwnd / 2;
1635		if (net->ssthresh < (net->mtu * 2)) {
1636			net->ssthresh = 2 * net->mtu;
1637		}
1638		net->cwnd = net->ssthresh;
1639	} else {
1640		/* drop by the proper amount */
1641		net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
1642		    sctp_cwnd_adjust[net->last_hs_used].drop_percent);
1643		net->cwnd = net->ssthresh;
1644		/* now where are we */
1645		indx = net->last_hs_used;
1646		cur_val = net->cwnd >> 10;
1647		/* reset where we are in the table */
1648		if (cur_val < sctp_cwnd_adjust[0].cwnd) {
1649			/* feel out of hs */
1650			net->last_hs_used = 0;
1651		} else {
1652			for (i = indx; i >= 1; i--) {
1653				if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
1654					break;
1655				}
1656			}
1657			net->last_hs_used = indx;
1658		}
1659	}
1660	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1661		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
1662	}
1663}
1664
1665static void
1666sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
1667    struct sctp_association *asoc)
1668{
1669	struct sctp_nets *net;
1670
1671	/*
1672	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
1673	 * (net->fast_retran_loss_recovery == 0)))
1674	 */
1675	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1676		if ((asoc->fast_retran_loss_recovery == 0) ||
1677		    (asoc->sctp_cmt_on_off > 0)) {
1678			/* out of a RFC2582 Fast recovery window? */
1679			if (net->net_ack > 0) {
1680				/*
1681				 * per section 7.2.3, are there any
1682				 * destinations that had a fast retransmit
1683				 * to them. If so what we need to do is
1684				 * adjust ssthresh and cwnd.
1685				 */
1686				struct sctp_tmit_chunk *lchk;
1687
1688				sctp_hs_cwnd_decrease(stcb, net);
1689
1690				lchk = TAILQ_FIRST(&asoc->send_queue);
1691
1692				net->partial_bytes_acked = 0;
1693				/* Turn on fast recovery window */
1694				asoc->fast_retran_loss_recovery = 1;
1695				if (lchk == NULL) {
1696					/* Mark end of the window */
1697					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
1698				} else {
1699					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
1700				}
1701
1702				/*
1703				 * CMT fast recovery -- per destination
1704				 * recovery variable.
1705				 */
1706				net->fast_retran_loss_recovery = 1;
1707
1708				if (lchk == NULL) {
1709					/* Mark end of the window */
1710					net->fast_recovery_tsn = asoc->sending_seq - 1;
1711				} else {
1712					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
1713				}
1714
1715				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
1716				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
1717				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
1718				    stcb->sctp_ep, stcb, net);
1719			}
1720		} else if (net->net_ack > 0) {
1721			/*
1722			 * Mark a peg that we WOULD have done a cwnd
1723			 * reduction but RFC2582 prevented this action.
1724			 */
1725			SCTP_STAT_INCR(sctps_fastretransinrtt);
1726		}
1727	}
1728}
1729
1730static void
1731sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
1732    struct sctp_association *asoc,
1733    int accum_moved, int reneged_all SCTP_UNUSED, int will_exit)
1734{
1735	struct sctp_nets *net;
1736
1737	/******************************/
1738	/* update cwnd and Early FR   */
1739	/******************************/
1740	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1741
1742#ifdef JANA_CMT_FAST_RECOVERY
1743		/*
1744		 * CMT fast recovery code. Need to debug.
1745		 */
1746		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
1747			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
1748			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
1749				net->will_exit_fast_recovery = 1;
1750			}
1751		}
1752#endif
1753		/* if nothing was acked on this destination skip it */
1754		if (net->net_ack == 0) {
1755			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1756				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
1757			}
1758			continue;
1759		}
1760#ifdef JANA_CMT_FAST_RECOVERY
1761		/*
1762		 * CMT fast recovery code
1763		 */
1764		/*
1765		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
1766		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
1767		 * } else if (sctp_cmt_on_off == 0 &&
1768		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
1769		 */
1770#endif
1771
1772		if (asoc->fast_retran_loss_recovery &&
1773		    (will_exit == 0) &&
1774		    (asoc->sctp_cmt_on_off == 0)) {
1775			/*
1776			 * If we are in loss recovery we skip any cwnd
1777			 * update
1778			 */
1779			return;
1780		}
1781		/*
1782		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
1783		 * moved.
1784		 */
1785		if (accum_moved ||
1786		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
1787			/* If the cumulative ack moved we can proceed */
1788			if (net->cwnd <= net->ssthresh) {
1789				/* We are in slow start */
1790				if (net->flight_size + net->net_ack >= net->cwnd) {
1791
1792					sctp_hs_cwnd_increase(stcb, net);
1793
1794				} else {
1795					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1796						sctp_log_cwnd(stcb, net, net->net_ack,
1797						    SCTP_CWND_LOG_NOADV_SS);
1798					}
1799				}
1800			} else {
1801				/* We are in congestion avoidance */
1802				net->partial_bytes_acked += net->net_ack;
1803				if ((net->flight_size + net->net_ack >= net->cwnd) &&
1804				    (net->partial_bytes_acked >= net->cwnd)) {
1805					net->partial_bytes_acked -= net->cwnd;
1806					net->cwnd += net->mtu;
1807					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1808						sctp_log_cwnd(stcb, net, net->mtu,
1809						    SCTP_CWND_LOG_FROM_CA);
1810					}
1811				} else {
1812					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1813						sctp_log_cwnd(stcb, net, net->net_ack,
1814						    SCTP_CWND_LOG_NOADV_CA);
1815					}
1816				}
1817			}
1818		} else {
1819			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1820				sctp_log_cwnd(stcb, net, net->mtu,
1821				    SCTP_CWND_LOG_NO_CUMACK);
1822			}
1823		}
1824	}
1825}
1826
1827
1828/*
1829 * H-TCP congestion control. The algorithm is detailed in:
1830 * R.N.Shorten, D.J.Leith:
1831 *   "H-TCP: TCP for high-speed and long-distance networks"
1832 *   Proc. PFLDnet, Argonne, 2004.
1833 * http://www.hamilton.ie/net/htcp3.pdf
1834 */
1835
1836
1837static int use_rtt_scaling = 1;
1838static int use_bandwidth_switch = 1;
1839
1840static inline int
1841between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
1842{
1843	return (seq3 - seq2 >= seq1 - seq2);
1844}
1845
1846static inline uint32_t
1847htcp_cong_time(struct htcp *ca)
1848{
1849	return (sctp_get_tick_count() - ca->last_cong);
1850}
1851
1852static inline uint32_t
1853htcp_ccount(struct htcp *ca)
1854{
1855	return (htcp_cong_time(ca) / ca->minRTT);
1856}
1857
1858static inline void
1859htcp_reset(struct htcp *ca)
1860{
1861	ca->undo_last_cong = ca->last_cong;
1862	ca->undo_maxRTT = ca->maxRTT;
1863	ca->undo_old_maxB = ca->old_maxB;
1864	ca->last_cong = sctp_get_tick_count();
1865}
1866
1867#ifdef SCTP_NOT_USED
1868
1869static uint32_t
1870htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
1871{
1872	net->cc_mod.htcp_ca.last_cong = net->cc_mod.htcp_ca.undo_last_cong;
1873	net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.undo_maxRTT;
1874	net->cc_mod.htcp_ca.old_maxB = net->cc_mod.htcp_ca.undo_old_maxB;
1875	return (max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->cc_mod.htcp_ca.beta) * net->mtu));
1876}
1877
1878#endif
1879
1880static inline void
1881measure_rtt(struct sctp_nets *net)
1882{
1883	uint32_t srtt = net->lastsa >> SCTP_RTT_SHIFT;
1884
1885	/* keep track of minimum RTT seen so far, minRTT is zero at first */
1886	if (net->cc_mod.htcp_ca.minRTT > srtt || !net->cc_mod.htcp_ca.minRTT)
1887		net->cc_mod.htcp_ca.minRTT = srtt;
1888
1889	/* max RTT */
1890	if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->cc_mod.htcp_ca) > 3) {
1891		if (net->cc_mod.htcp_ca.maxRTT < net->cc_mod.htcp_ca.minRTT)
1892			net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.minRTT;
1893		if (net->cc_mod.htcp_ca.maxRTT < srtt && srtt <= net->cc_mod.htcp_ca.maxRTT + MSEC_TO_TICKS(20))
1894			net->cc_mod.htcp_ca.maxRTT = srtt;
1895	}
1896}
1897
1898static void
1899measure_achieved_throughput(struct sctp_nets *net)
1900{
1901	uint32_t now = sctp_get_tick_count();
1902
1903	if (net->fast_retran_ip == 0)
1904		net->cc_mod.htcp_ca.bytes_acked = net->net_ack;
1905
1906	if (!use_bandwidth_switch)
1907		return;
1908
1909	/* achieved throughput calculations */
1910	/* JRS - not 100% sure of this statement */
1911	if (net->fast_retran_ip == 1) {
1912		net->cc_mod.htcp_ca.bytecount = 0;
1913		net->cc_mod.htcp_ca.lasttime = now;
1914		return;
1915	}
1916	net->cc_mod.htcp_ca.bytecount += net->net_ack;
1917	if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) &&
1918	    (now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) &&
1919	    (net->cc_mod.htcp_ca.minRTT > 0)) {
1920		uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime);
1921
1922		if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) {
1923			/* just after backoff */
1924			net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi = cur_Bi;
1925		} else {
1926			net->cc_mod.htcp_ca.Bi = (3 * net->cc_mod.htcp_ca.Bi + cur_Bi) / 4;
1927			if (net->cc_mod.htcp_ca.Bi > net->cc_mod.htcp_ca.maxB)
1928				net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi;
1929			if (net->cc_mod.htcp_ca.minB > net->cc_mod.htcp_ca.maxB)
1930				net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB;
1931		}
1932		net->cc_mod.htcp_ca.bytecount = 0;
1933		net->cc_mod.htcp_ca.lasttime = now;
1934	}
1935}
1936
1937static inline void
1938htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
1939{
1940	if (use_bandwidth_switch) {
1941		uint32_t maxB = ca->maxB;
1942		uint32_t old_maxB = ca->old_maxB;
1943
1944		ca->old_maxB = ca->maxB;
1945
1946		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
1947			ca->beta = BETA_MIN;
1948			ca->modeswitch = 0;
1949			return;
1950		}
1951	}
1952	if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) {
1953		ca->beta = (minRTT << 7) / maxRTT;
1954		if (ca->beta < BETA_MIN)
1955			ca->beta = BETA_MIN;
1956		else if (ca->beta > BETA_MAX)
1957			ca->beta = BETA_MAX;
1958	} else {
1959		ca->beta = BETA_MIN;
1960		ca->modeswitch = 1;
1961	}
1962}
1963
1964static inline void
1965htcp_alpha_update(struct htcp *ca)
1966{
1967	uint32_t minRTT = ca->minRTT;
1968	uint32_t factor = 1;
1969	uint32_t diff = htcp_cong_time(ca);
1970
1971	if (diff > (uint32_t) hz) {
1972		diff -= hz;
1973		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
1974	}
1975	if (use_rtt_scaling && minRTT) {
1976		uint32_t scale = (hz << 3) / (10 * minRTT);
1977
1978		scale = min(max(scale, 1U << 2), 10U << 3);	/* clamping ratio to
1979								 * interval [0.5,10]<<3 */
1980		factor = (factor << 3) / scale;
1981		if (!factor)
1982			factor = 1;
1983	}
1984	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
1985	if (!ca->alpha)
1986		ca->alpha = ALPHA_BASE;
1987}
1988
1989/* After we have the rtt data to calculate beta, we'd still prefer to wait one
1990 * rtt before we adjust our beta to ensure we are working from a consistent
1991 * data.
1992 *
1993 * This function should be called when we hit a congestion event since only at
1994 * that point do we really have a real sense of maxRTT (the queues en route
1995 * were getting just too full now).
1996 */
1997static void
1998htcp_param_update(struct sctp_nets *net)
1999{
2000	uint32_t minRTT = net->cc_mod.htcp_ca.minRTT;
2001	uint32_t maxRTT = net->cc_mod.htcp_ca.maxRTT;
2002
2003	htcp_beta_update(&net->cc_mod.htcp_ca, minRTT, maxRTT);
2004	htcp_alpha_update(&net->cc_mod.htcp_ca);
2005
2006	/*
2007	 * add slowly fading memory for maxRTT to accommodate routing
2008	 * changes etc
2009	 */
2010	if (minRTT > 0 && maxRTT > minRTT)
2011		net->cc_mod.htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
2012}
2013
2014static uint32_t
2015htcp_recalc_ssthresh(struct sctp_nets *net)
2016{
2017	htcp_param_update(net);
2018	return (max(((net->cwnd / net->mtu * net->cc_mod.htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu));
2019}
2020
2021static void
2022htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
2023{
2024	/*-
2025	 * How to handle these functions?
2026         *	if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
2027	 *		return;
2028	 */
2029	if (net->cwnd <= net->ssthresh) {
2030		/* We are in slow start */
2031		if (net->flight_size + net->net_ack >= net->cwnd) {
2032			if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
2033				net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
2034				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2035					sctp_log_cwnd(stcb, net, net->mtu,
2036					    SCTP_CWND_LOG_FROM_SS);
2037				}
2038			} else {
2039				net->cwnd += net->net_ack;
2040				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2041					sctp_log_cwnd(stcb, net, net->net_ack,
2042					    SCTP_CWND_LOG_FROM_SS);
2043				}
2044			}
2045		} else {
2046			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2047				sctp_log_cwnd(stcb, net, net->net_ack,
2048				    SCTP_CWND_LOG_NOADV_SS);
2049			}
2050		}
2051	} else {
2052		measure_rtt(net);
2053
2054		/*
2055		 * In dangerous area, increase slowly. In theory this is
2056		 * net->cwnd += alpha / net->cwnd
2057		 */
2058		/* What is snd_cwnd_cnt?? */
2059		if (((net->partial_bytes_acked / net->mtu * net->cc_mod.htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
2060			/*-
2061			 * Does SCTP have a cwnd clamp?
2062			 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
2063			 */
2064			net->cwnd += net->mtu;
2065			net->partial_bytes_acked = 0;
2066			htcp_alpha_update(&net->cc_mod.htcp_ca);
2067			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2068				sctp_log_cwnd(stcb, net, net->mtu,
2069				    SCTP_CWND_LOG_FROM_CA);
2070			}
2071		} else {
2072			net->partial_bytes_acked += net->net_ack;
2073			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2074				sctp_log_cwnd(stcb, net, net->net_ack,
2075				    SCTP_CWND_LOG_NOADV_CA);
2076			}
2077		}
2078
2079		net->cc_mod.htcp_ca.bytes_acked = net->mtu;
2080	}
2081}
2082
2083#ifdef SCTP_NOT_USED
2084/* Lower bound on congestion window. */
2085static uint32_t
2086htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
2087{
2088	return (net->ssthresh);
2089}
2090
2091#endif
2092
2093static void
2094htcp_init(struct sctp_nets *net)
2095{
2096	memset(&net->cc_mod.htcp_ca, 0, sizeof(struct htcp));
2097	net->cc_mod.htcp_ca.alpha = ALPHA_BASE;
2098	net->cc_mod.htcp_ca.beta = BETA_MIN;
2099	net->cc_mod.htcp_ca.bytes_acked = net->mtu;
2100	net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count();
2101}
2102
2103static void
2104sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
2105{
2106	/*
2107	 * We take the max of the burst limit times a MTU or the
2108	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
2109	 */
2110	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
2111	net->ssthresh = stcb->asoc.peers_rwnd;
2112	htcp_init(net);
2113
2114	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
2115		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
2116	}
2117}
2118
2119static void
2120sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
2121    struct sctp_association *asoc,
2122    int accum_moved, int reneged_all SCTP_UNUSED, int will_exit)
2123{
2124	struct sctp_nets *net;
2125
2126	/******************************/
2127	/* update cwnd and Early FR   */
2128	/******************************/
2129	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
2130
2131#ifdef JANA_CMT_FAST_RECOVERY
2132		/*
2133		 * CMT fast recovery code. Need to debug.
2134		 */
2135		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
2136			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
2137			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
2138				net->will_exit_fast_recovery = 1;
2139			}
2140		}
2141#endif
2142		/* if nothing was acked on this destination skip it */
2143		if (net->net_ack == 0) {
2144			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2145				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
2146			}
2147			continue;
2148		}
2149#ifdef JANA_CMT_FAST_RECOVERY
2150		/*
2151		 * CMT fast recovery code
2152		 */
2153		/*
2154		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
2155		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
2156		 * } else if (sctp_cmt_on_off == 0 &&
2157		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
2158		 */
2159#endif
2160
2161		if (asoc->fast_retran_loss_recovery &&
2162		    will_exit == 0 &&
2163		    (asoc->sctp_cmt_on_off == 0)) {
2164			/*
2165			 * If we are in loss recovery we skip any cwnd
2166			 * update
2167			 */
2168			return;
2169		}
2170		/*
2171		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
2172		 * moved.
2173		 */
2174		if (accum_moved ||
2175		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
2176			htcp_cong_avoid(stcb, net);
2177			measure_achieved_throughput(net);
2178		} else {
2179			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2180				sctp_log_cwnd(stcb, net, net->mtu,
2181				    SCTP_CWND_LOG_NO_CUMACK);
2182			}
2183		}
2184	}
2185}
2186
2187static void
2188sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
2189    struct sctp_association *asoc)
2190{
2191	struct sctp_nets *net;
2192
2193	/*
2194	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
2195	 * (net->fast_retran_loss_recovery == 0)))
2196	 */
2197	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
2198		if ((asoc->fast_retran_loss_recovery == 0) ||
2199		    (asoc->sctp_cmt_on_off > 0)) {
2200			/* out of a RFC2582 Fast recovery window? */
2201			if (net->net_ack > 0) {
2202				/*
2203				 * per section 7.2.3, are there any
2204				 * destinations that had a fast retransmit
2205				 * to them. If so what we need to do is
2206				 * adjust ssthresh and cwnd.
2207				 */
2208				struct sctp_tmit_chunk *lchk;
2209				int old_cwnd = net->cwnd;
2210
2211				/* JRS - reset as if state were changed */
2212				htcp_reset(&net->cc_mod.htcp_ca);
2213				net->ssthresh = htcp_recalc_ssthresh(net);
2214				net->cwnd = net->ssthresh;
2215				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2216					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
2217					    SCTP_CWND_LOG_FROM_FR);
2218				}
2219				lchk = TAILQ_FIRST(&asoc->send_queue);
2220
2221				net->partial_bytes_acked = 0;
2222				/* Turn on fast recovery window */
2223				asoc->fast_retran_loss_recovery = 1;
2224				if (lchk == NULL) {
2225					/* Mark end of the window */
2226					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
2227				} else {
2228					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
2229				}
2230
2231				/*
2232				 * CMT fast recovery -- per destination
2233				 * recovery variable.
2234				 */
2235				net->fast_retran_loss_recovery = 1;
2236
2237				if (lchk == NULL) {
2238					/* Mark end of the window */
2239					net->fast_recovery_tsn = asoc->sending_seq - 1;
2240				} else {
2241					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
2242				}
2243
2244				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
2245				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
2246				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
2247				    stcb->sctp_ep, stcb, net);
2248			}
2249		} else if (net->net_ack > 0) {
2250			/*
2251			 * Mark a peg that we WOULD have done a cwnd
2252			 * reduction but RFC2582 prevented this action.
2253			 */
2254			SCTP_STAT_INCR(sctps_fastretransinrtt);
2255		}
2256	}
2257}
2258
2259static void
2260sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
2261    struct sctp_nets *net)
2262{
2263	int old_cwnd = net->cwnd;
2264
2265	/* JRS - reset as if the state were being changed to timeout */
2266	htcp_reset(&net->cc_mod.htcp_ca);
2267	net->ssthresh = htcp_recalc_ssthresh(net);
2268	net->cwnd = net->mtu;
2269	net->partial_bytes_acked = 0;
2270	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2271		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
2272	}
2273}
2274
2275static void
2276sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
2277    struct sctp_nets *net, int in_window, int num_pkt_lost SCTP_UNUSED)
2278{
2279	int old_cwnd;
2280
2281	old_cwnd = net->cwnd;
2282
2283	/* JRS - reset hctp as if state changed */
2284	if (in_window == 0) {
2285		htcp_reset(&net->cc_mod.htcp_ca);
2286		SCTP_STAT_INCR(sctps_ecnereducedcwnd);
2287		net->ssthresh = htcp_recalc_ssthresh(net);
2288		if (net->ssthresh < net->mtu) {
2289			net->ssthresh = net->mtu;
2290			/* here back off the timer as well, to slow us down */
2291			net->RTO <<= 1;
2292		}
2293		net->cwnd = net->ssthresh;
2294		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2295			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
2296		}
2297	}
2298}
2299
2300struct sctp_cc_functions sctp_cc_functions[] = {
2301	{
2302		.sctp_set_initial_cc_param = sctp_set_initial_cc_param,
2303		.sctp_cwnd_update_after_sack = sctp_cwnd_update_after_sack,
2304		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
2305		.sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr,
2306		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
2307		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo,
2308		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2309		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2310	},
2311	{
2312		.sctp_set_initial_cc_param = sctp_set_initial_cc_param,
2313		.sctp_cwnd_update_after_sack = sctp_hs_cwnd_update_after_sack,
2314		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
2315		.sctp_cwnd_update_after_fr = sctp_hs_cwnd_update_after_fr,
2316		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
2317		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo,
2318		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2319		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2320	},
2321	{
2322		.sctp_set_initial_cc_param = sctp_htcp_set_initial_cc_param,
2323		.sctp_cwnd_update_after_sack = sctp_htcp_cwnd_update_after_sack,
2324		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
2325		.sctp_cwnd_update_after_fr = sctp_htcp_cwnd_update_after_fr,
2326		.sctp_cwnd_update_after_timeout = sctp_htcp_cwnd_update_after_timeout,
2327		.sctp_cwnd_update_after_ecn_echo = sctp_htcp_cwnd_update_after_ecn_echo,
2328		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2329		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2330	},
2331	{
2332		.sctp_set_initial_cc_param = sctp_set_rtcc_initial_cc_param,
2333		.sctp_cwnd_update_after_sack = sctp_cwnd_update_rtcc_after_sack,
2334		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
2335		.sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr,
2336		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
2337		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_rtcc_after_ecn_echo,
2338		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2339		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2340		.sctp_cwnd_update_packet_transmitted = sctp_cwnd_update_rtcc_packet_transmitted,
2341		.sctp_cwnd_update_tsn_acknowledged = sctp_cwnd_update_rtcc_tsn_acknowledged,
2342		.sctp_cwnd_new_transmission_begins = sctp_cwnd_new_rtcc_transmission_begins,
2343		.sctp_cwnd_prepare_net_for_sack = sctp_cwnd_prepare_rtcc_net_for_sack,
2344		.sctp_cwnd_socket_option = sctp_cwnd_rtcc_socket_option,
2345		.sctp_rtt_calculated = sctp_rtt_rtcc_calculated
2346	}
2347};
2348