Lines Matching defs:to

172  * connection back to using the "default" TCP stack that is
178 * functions role in life is to mandate the connection supports SACK
181 * state is simplified due to the fact that the original do_segment
206 static uint8_t rack_req_del_mss = 18; /* How many segments need to be sent in a recovery episode to do policer_detection */
209 static uint32_t rack_gp_gain_req = 1200; /* Amount percent wise required to gain to record a round has "gaining" */
222 static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */
252 static int32_t rack_bw_multipler = 0; /* Limit on fill cw's jump up to be this x gp_est */
266 static int32_t rack_sack_not_required = 1; /* set to one to allow non-sack to use rack */
311 static uint16_t rack_atexit_prtt_hbp = 130; /* Clamp to 130% on exit prtt if highly buffered path */
312 static uint16_t rack_atexit_prtt = 130; /* Clamp to 100% on exit prtt if non highly buffered path */
316 static uint32_t rack_probertt_use_min_rtt_entry = 1; /* Use the min to calculate the goal else gp_srtt */
323 static uint32_t rack_min_probertt_hold = 40000; /* Equal to delayed ack time */
326 static uint32_t rack_min_rtt_movement = 250000; /* Must move at least 250ms (in microseconds) to count as a lowering */
330 static int32_t rack_hbp_thresh = 3; /* what is the divisor max_rtt/min_rtt to decided a hbp */
333 static int32_t rack_max_per_above = 30; /* When we go to increment stop if above 100+this% */
345 * as potential caps when adjustments are made to the timely
358 static int32_t rack_per_lower_bound = 50; /* Don't allow to drop below this multiplier */
359 static int32_t rack_per_upper_bound_ss = 0; /* Don't allow SS to grow above this */
360 static int32_t rack_per_upper_bound_ca = 0; /* Don't allow CA to grow above this */
458 struct socket *so, struct tcpcb *tp, struct tcpopt *to,
519 rack_log_ack(struct tcpcb *tp, struct tcpopt *to,
523 rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
538 struct sackblk *sack, struct tcpopt *to, struct rack_sendmap **prsm,
554 struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type, tcp_seq th_ack);
558 struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
566 struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
570 struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
574 struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
578 struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
582 struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
586 struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
590 struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
594 struct socket *so, struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen,
726 * Use the swap utility placing in 3 for flex8 to id a
739 * Use the swap utility placing in 4 for flex8 to id a
876 printf("Rack has no stats counters to clear (use 1 to clear all stats in sysctl node)\n");
931 "What percentage above goodput do we clamp CA/SS to at exit on high-BDP path 110%");
936 "What percentage above goodput do we clamp CA/SS to at exit on a non high-BDP path 100%");
951 "What percentage of goodput do we allow the multiplier to fall to");
971 "Maximum number of gp_srtt's to hold in drain waiting for flight to reach goal");
976 "We must drain this many gp_srtt's waiting for flight to reach goal");
981 "Should we use the min-rtt to calculate the goal rtt (else gp_srtt) at entry");
986 "How to set cwnd at exit, 0 - dynamic, 1 - use min-rtt, 2 - use curgprtt, 3 - entry gp-rtt");
1016 "How much is the minimum movement in rtt to count as a drop for probertt purposes");
1048 "How many rounds before we need to do a PCM measurement");
1058 "How much do we have to increase the GP to record the round 1200 = 120.0");
1088 "What is the default divisor given to the rl code?");
1113 "If non zero, what percentage of goodput to pace at in slow start");
1118 "If non zero, what percentage of goodput to pace at in congestion avoidance");
1123 "If non zero, what percentage of goodput to pace at in recovery");
1143 "If set we apply this value to the absolute rate cap used by pacing");
1165 "How many times does snd_wnd need to be bigger than pace_max_seg so we will hold off and get more acks?");
1170 "Do we always precheck the hdwr pacing queue to avoid ENOBUF's?");
1190 "Should RACK attempt to use hw pacing?");
1205 "Do we need a minimum estimate of this many bytes per second in order to engage hw pacing?");
1215 "Do we allow hw pacing to lower the rate selected?");
1267 "Rack timely lowest percentage we allow GP multiplier to fall to");
1272 "Profile 5 upper bound to timely gain");
1278 "Rack timely highest percentage we allow GP multiplier in SS to raise to (0 is no upperbound)");
1283 "Rack timely highest percentage we allow GP multiplier to CA raise to (0 is no upperbound)");
1298 "Rack timely what threshold do we count to before another boost during b/w decent");
1318 "Rack timely when deciding if to backoff on a loss, do we use under max rtt else min");
1356 "Since we do early recovery, do we override the l_abc to a value, if so what?");
1459 "Minimum RTO in microseconds -- set with caution below 1000 due to TLP");
1501 "What is the goal BDP to measure");
1506 "What is the goal BDP to measure");
1565 "Percentage of retransmits we need to be a possible policer (499 = 49.9 percent)");
1570 "What threshold of average retransmits needed to recover a lost packet (1 - 169 aka 21 = 2.1)?");
1575 "What threshold of Median retransmits needed to recover a lost packet (1 - 16)?");
1590 "How many MSS must be delivered during recovery to engage policer detection?");
1644 "How do we process dsack with respect to rack timers, bit field, 3 is standards based?");
1649 "What is the maximum number of MSS we allow to be added back if prr can't send all its data?");
1669 "Should RACK try to use the shared cwnd on connections where allowed");
1694 "Do we allow rack to run on connections not supporting SACK");
1718 "Highest sack to ack ratio seen");
1723 "Highest move to non-move ratio seen");
1822 &rack_hw_pace_init_fail, "Total number of times we failed to initialize hw pacing");
1828 &rack_hw_pace_lost, "Total number of times we failed to initialize hw pacing");
1858 "Total number of times the rack to expired");
1913 "Total allocations dropped due to limit");
1919 "Connections with allocations dropped due to limit");
1925 "Split allocations dropped due to limit");
1989 "Total times we had to walk whole list for sack processing");
1995 "Total times we had to walk whole list due to a restart");
2098 * The very noisy two need to only come out when
2108 * also needs to come out on the point and the log.
2120 /* Convert our ms to a microsecond */
2182 * Compose bbr_state to be a bit wise 0000ADHF
2215 /* Convert our ms to a microsecond */
2223 * We did not get a new Rules Applied to set so
2231 * Overlapping send case, we switched to a new
2266 * Compose bbr_state to be a bit wise 0000ADHF
2330 * is in bw_rate_cap, but we need to look at
2355 * Now lets find the amount of data left to send.
2357 * Now ideally we want to use the end_seq to figure out how much more
2408 /* Lets set in a smaller mss possibly here to match our rate-cap */
2436 /* Lets set in a smaller mss possibly here to match our rate-cap */
2463 * we have an srtt, use the tcp IW (10) to
2518 /* If we don't have one then equate it to the gp_bw */
2537 * we are using to do this, so we do that here in the opposite
2574 /* Directed to use the configured rate */
2603 * 2 = a dsack round begins, persist is reset to 16.
2691 * We allow rack_per_of_gp_xx to dictate our bw rate we want.
2729 * above the highest rate. We need to know the maxbw for the interface
2792 rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot, uint8_t which)
2800 log.u_bbr.flex2 = to;
2997 * applying to the srtt algorithm in
3004 /* Convert our ms to a microsecond */
3048 /* Convert our ms to a microsecond */
3075 /* Convert our ms to a microsecond */
3316 /* No you can't use 1, its for the real to cancel */
3492 * Dig in to our aux rsm's (the last two) since
3493 * UMA failed to get us one.
3520 /* wrapper to allocate a sendmap entry, subject to a specific limit */
3555 * we get our list down to the limit.
3595 /* Make sure we are not going to overrun our count limit of 0xff */
3620 * Now we have a number of factors to consider.
3625 * but we allow it too to be more.
3626 * 3) We want to make sure a measurement last N useconds (if
3629 * We handle the first concern here by trying to create a data
3652 /* Now we need to round up to the nearest MSS */
3666 * to attempt to get the window that is
3668 * low b/w connections and we don't want to
3688 * Has enough time passed for the GP measurement to be valid?
3702 * but it is something to contemplate in the
3728 * We are up to the app limited send point
3729 * we have to measure irrespective of the time..
3741 * This is to prevent cloudyness of when the last send
3805 * Before we increase we need to know if
3810 * rate to push us faster there is no sense of
3817 * maximum pacing overage to give us a max allowable rate.
3854 * like to try to achieve (plus some wiggle room).
3864 * get any more b/w. There is no sense of trying to
3878 * When we drag bottom, we want to assure
3880 * we want to restore it to at least that.
3918 * gasp at trying to not loose out
3919 * to a new-reno flow.
3928 * that the count is 0 based so we have to add one.
4010 * In order to do the floating point calculations above we
4262 * A window protocol must be able to have 4 packets
4263 * outstanding as the floor in order to function
4280 * to N segments outstanding and hold that for
4282 * rate to a set percentage (70 by default) and hold
4301 * we want to get what info (if any) is available
4339 * we want to get what info (if any) is available
4347 * We don't have enough data to make a measurement.
4356 * to be limited by the slower pacing rate.
4358 * We need to mark these as app-limited so we
4367 * Go out to the end app limited and mark
4369 * to this guy.
4379 * Now, we need to examine our pacing rate multipliers.
4380 * If its under 100%, we need to kick it back up to
4383 * Note setting clamp_atexit_prtt to 0 has the effect
4384 * of setting CA/SS to 100% always at exit (which is
4403 * Lets set rtt_diff to 0, so that we will get a "boost"
4412 * If configured to, set the cwnd and ssthresh to
4421 /* Set to min rtt */
4425 /* Set to current gp rtt */
4429 /* Set to entry gp rtt */
4442 * cwnd space for timely to work.
4459 * and should pick the min we can to
4468 /* There is a percentage here to boost */
4567 /* Restore back to our rate we want to pace at in prtt */
4622 * to just do timely no bonus up stuff in that case.
4624 * There are two configurations, set to 1, and we
4627 * to 100 and then decrement per timely.
4640 * makes timely unusable unless we want to get shoved
4645 * would like to stay somewhat contained and not
4659 * to try to gain back (we include override to
4721 /* You get a set number of pushes if timely is trying to reduce */
4733 * so use timely to decide.
4794 * Use a gradient to find it the timely gradient
4798 * anything below or equal to 0 will be
4832 * the gp range or up to it.
4902 /* Nothing outstanding yet, nothing to do here */
4908 * data. We need to walk through up until we get
4909 * to gp_seq marking so that no rsm is set incorrectly
4922 * Need to find the GP seq, if rsm is
4931 * Now we may need to mark already sent rsm, ahead of
4935 * returned in the previous block). So we go to the next, and continue
4995 * from being influenced by ack artifacts to come up with too
4997 * bytes in most of our measurements hopefully that is less likely to
5031 * get to the client. Anything larger is not valid.
5038 * talking to my 1Gbps link in S.C. Now measuring say 150,000
5043 * A better way I realized is to look at what the maximum
5048 * and then feeding them all at once to our endpoint
5070 * defined minimum GP (defaulting to 10 which hopefully is the
5080 * MSS worth) to be recorded as the timestamp.
5085 /* Also lets fill previous for our first measurement to be neutral */
5095 * We now need to calculate the Timely like status so
5105 * We don't want a probertt to be counted
5107 * expect to be reducing the RTT when we
5128 * Another option here would be to
5153 * where we transition from un-paced to paced.
5170 /* We have collected enough to move forward */
5177 * We want to take 1/wma of the goodput and add in to 7/8th
5182 * But we must be careful not to take too much i.e. if the
5186 * 10ms rtt we only want to take a much smaller portion.
5196 * Strange why did t_srtt go back to zero?
5206 * as done originally leads to some undesired results.
5208 * too much to the WMA. Thus I changed it if you are doing
5209 * dynamic adjustments to only do the aportioned adjustment
5211 * measurements just get there weight (defaulting to 1/8)
5212 * add to the WMA. We may want to think about changing
5213 * this to always do that for both sides i.e. dynamic
5215 * were playing with this I did not want to change the
5234 * to account for more than 1/2 of the
5236 * where utim becomes huge compared to
5253 * of time. The idea here is to prevent a small
5291 * or first-slowstart that ensues. If we ever needed to watch
5292 * growth of gp outside of that period all we need to do is
5326 * Test to see if its gone up enough
5327 * to set the round count up to now. Note
5334 * We went up enough to record the round.
5416 * API to deal with chained VOIs.
5429 * were to where we want to go?
5441 * Yep there is enough outstanding to make a measurement here.
5456 /* There is a full window to gain info from */
5459 /* We can only measure up to the applimited point */
5463 * We don't have enough to make a measurement.
5474 * this means we need to have the data available
5484 * Now we need to find the timestamp of the send at tp->gput_seq
5493 * Move back to include the earlier part
5517 * If we don't find the rsm due to some
5538 * To make sure proper timestamp merging occurs, we need to clear
5573 * We will ack all the data, time to end any
5575 * new is sent. Note we need to use the actual
5637 /* XXXLAS: Find a way to live without this */
5658 * acks, we won't be able to send this generaly happens
5693 * room to send <or> we are pacing and prr
5694 * is disabled we will want to see if we
5695 * can send data (by setting r_wanted_output to
5766 * to fit into the long-term b/w.
5777 * First is there enough packets delivered during recovery to make
5787 * amount of data in a recovery to let us fall through and double check
5791 * Once you are declared to be policed. this block of code cannot be
5816 * take for us to pace out two of our policer_max_seg's?
5850 /* We place a min on the cap which defaults to 1Mbps */
5866 * to get more than (rack_policing_do_bw_comp+1) x del-rate
5958 * dropped and having to be retransmitted the maximum times
5966 /* Has enough rounds progressed for us to re-measure? */
5983 * continue to grow. This then provides more and more times when
5984 * we are not pacing to the policer rate. This lets us compensate
5985 * for when we hit a false positive and those flows continue to
5990 * The alternative to this is to instead whenever we pace due to
5991 * policing in rack_policed_sending we could add the amount len paced to the
5996 * after things got back to normal (assuming that what caused the
6005 * the flag to be cleared (reverted) then we need
6006 * to adjust this to not do multi-counting.
6022 /* We must be able to send our max-seg or else chaos ensues */
6077 * We need to be stricter on the RR config so
6093 * we need to possibly update the values.
6131 /* We must be able to send our max-seg or else chaos ensues */
6153 * to get the amount acked prior to recovery correct.
6208 * We are allowed to add back to the cwnd the amount we did
6340 * Allow ECN reaction on ACK to CWR, if
6422 * clamps it down below the initial window raise it to the initial
6434 * - Our last ack wasn't a 0-sized window. We never want to delay
6495 * lro is the flag we use to determine if we have seen reordering.
6504 * the connection to alway be subject to reordering and just set lro
6505 * to 1.
6666 * does not exist we fallback to the srtt (which
6682 * Check to see that we don't need to fall into recovery. We will
6683 * need to do so if our oldest transmit is past the time we should
6740 * events we need to stop the running timer (if its running) before
6743 uint32_t thresh, exp, to, srtt, time_since_sent, tstmp_touse;
6750 /* All timers have been stopped none are to run */
6772 * We want to discount it the smallest amount.
6774 * recently thats the discount we want to use (now - timer time).
6776 * we want to use that (now - oldest-packet-last_transmit_time).
6790 to = tp->t_rxtcur;
6791 if (to > time_since_sent)
6792 to -= time_since_sent;
6794 to = rack->r_ctl.rc_min_to;
6795 if (to == 0)
6796 to = 1;
6802 * We have to put a ceiling on the rxt timer
6815 /* Reduce timeout to the keep value if needed */
6816 if (max_time < to)
6817 to = max_time;
6819 return (to);
6830 /* Convert from ms to usecs */
6849 * not enough ack's to yet get our next
6853 * get to use the rack-cheat.
6862 to = exp - cts;
6863 if (to < rack->r_ctl.rc_min_to) {
6864 to = rack->r_ctl.rc_min_to;
6869 to = rack->r_ctl.rc_min_to;
6874 /* Ok we need to do a TLP not RACK */
6886 /* We found no rsm to TLP with. */
6907 * If another stack as run and set srtt to 1,
6919 * rack RTT has spiked we want to use
6929 to = thresh - time_since_sent;
6931 to = rack->r_ctl.rc_min_to;
6941 if (to < rack_tlp_min) {
6942 to = rack_tlp_min;
6944 if (to > TICKS_2_USEC(TCPTV_REXMTMAX)) {
6946 * If the TLP time works out to larger than the max
6957 if (to == 0)
6958 to = 1;
6959 return (to);
7169 * Note if ack's are allowed to wake us do not
7188 * And we have to have some time
7250 * If no timers are going to run and we will fall off the hptsi
7251 * wheel, we resort to a keep-alive timer if its configured.
7268 * note that this is probably not going to
7278 * We want to instead not wake up a long time from
7279 * now but to wake up about the time we would
7328 * flags have important meanings to what happens when
7349 * timer running, then we don't even want a sack to wake
7350 * us since the rack timer has to expire before we can send.
7403 * Arrange for the hpts to kick back in after the
7418 * With respect to t_flags2(?) here, lets let any new acks wake
7455 /* Got up to all that were marked sack-passed */
7475 * appropriate thing to check if we need to do a RACK retransmit.
7483 * This timer simply provides an internal trigger to send out data.
7498 /* We need to stroke any lost that are now declared as lost */
7505 * we are allowing the rack timer to
7525 * deteceted to attempt to detect
7526 * quicker. Normally we want to do this
7544 * it, KASSERT that. Adjust the orig_m_len to
7650 /* Push bit must go to the right edge as well */
7656 * Now we need to find nrsm's new location in the mbuf chain
7659 * chain to find the righ position, it may be the same mbuf
7679 * to merge these is to move the right
7681 * is any reason we need to try to find
7736 * When merging two RSM's we also need to consider the ack time and keep
7738 * one we will want to be using.
7744 /* Transfer the split limit to the map we free */
7754 * TLP Timer, here we simply setup what segment we want to
7784 * need to figure out how to force a full MSS segment out.
7812 * its time to abandon the measurement.
7825 * there is an oldest to send jump to the need_retran.
7840 /* not enough to fill a MTU */
7866 * Ok we need to arrange the last un-acked segment to be re-sent, or
7901 * We need to split this the last segment in two.
7908 * No memory to split, we will just exit and punt
7909 * off to the RXT timer.
7943 * Delayed ack Timer, here we simply need to setup the
8010 * Persistence timer into zero window. Force a byte to be output, if
8017 * the idle time (no responses to probes) reaches the maximum
8088 * Send a packet designed to force a response if the peer is
8091 * connection due to timeout or reboot. Using sequence
8093 * segment to lie outside the receive window; by the
8094 * protocol spec, this requires the correspondent TCP to
8146 * Ideally we would like to be able to
8154 * Also we really need to stick them back in sequence
8167 /* We must re-add it back to the tlist */
8198 * needs to go out as retransmits until
8199 * we retransmit up to snd_max.
8230 * Now convert to rack's internal format,
8236 * We want a chance to stay in slowstart as
8245 * We also want to assure a IW worth of
8254 * we will setup to retransmit the lowest seq number outstanding.
8269 * It is unlikely to be of any use (the network is
8293 * to slowstart back to the level.
8308 /* Nothing outstanding .. nothing to do */
8322 * have a KEEPINIT timer we need to check the first transmit
8323 * against now to see if we have exceeded the KEEPINIT time
8332 /* Ok we have something outstanding to test keepinit with */
8343 * retransmit interval. Back off to a longer retransmit interval
8362 /* XXXGL: previously t_softerror was casted to uint16_t */
8369 * If the SYN was retransmitted, indicate CWND to be limited
8370 * to 1 segment in cc_conn_init().
8376 * recovered if this turns out to be a "bad" retransmit. A
8380 * End-to-End Network Path Properties" by Allman and Paxson
8403 * of packets and process straight to FIN. In that case we won't
8418 * 1448 -> 1188 -> 524) should be given 2 chances to recover
8429 * Reduce MTU to lower value than what we negotiated
8440 * Reduce the MSS to blackhole value or to the
8441 * default in an attempt to retransmit.
8454 * to minmss.
8473 * to minmss.
8486 * chances to recover.
8497 * subject to attack in such a case.
8508 * Disable RFC1323 and SACK if we haven't got any response to
8509 * our third SYN to work-around some broken terminal servers
8512 * unknown-to-them TCP options.
8520 * move the current srtt into rttvar to keep the current retransmit
8554 * to handle the GPUT request?
8564 * buffer that have been sent to cover this
8588 * we don't want to check the timer. It may
8590 * want to send the retransmit (if its ready).
8618 * alarm, go back to sleep. We make sure we don't have
8730 * segment, we would add to the rc_cnt_of_retran[2] the value of
8761 * The rack_unpeg_rxt is used when we go to retransmit a segment
8764 * above rack_peg_rxt() prior to calling that and incrementing
8768 * we would have subtracted 3 from rc_cnt_of_reetran[1] to remove
8777 * This effectively moves the count from rc_cnt_of_retran[1] to
8876 /* We have retransmitted due to the SACK pass */
8918 * have to split this into what was transmitted and what was not.
8929 * So here we are going to take the original rsm and make it what we
8932 * we retransmitted 5 bytes i.e. 1, 5. The original piece shrinks to
8959 rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
8971 * Add to the RACK log of packets in flight or retransmitted. If
8975 * Retransmissions will increment the count and move the ts to its
8977 * won't be able to effectively use the ACK for an RTT on a retran.
9011 * The call to rack_log_output is made before bumping
9022 /* Are sending an old segment to induce an ack (keep-alive)? */
9052 * we tried to wait.
9103 * we need to walkout to the correct location.
9160 * If we reach here its a retransmission and we need to find it.
9202 * copy rsm to nrsm and then trim the front of rsm
9203 * to not include this part.
9342 /* We are to use the lowest RTT seen in a single ack */
9345 /* We are to use the highest RTT seen in a single ack */
9348 /* We are to use the average RTT seen in a single ack */
9361 * With no RTT we have to accept
9412 * measurement. We don't need to do any tricks with shifting
9437 * variance to half the rtt (so our first retransmit happens
9459 /* Send in the millisecond rtt has close to the path RTT as we can get */
9466 /* Send in the microsecond rtt has close to the path RTT as we can get */
9479 * statistical, we have to test that we don't drop below the minimum
9494 * Apply to filter the inbound us-rtt at us_cts.
9516 * to the time that we would have entered probe-rtt.
9517 * This is probably due to the fact that a peer flow
9537 struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type, tcp_seq th_ack)
9584 /* Kick the RTT to the CC */
9593 * We need to setup what our confidence
9611 * in the RTT. We probably need to examine this algorithm
9612 * a bit more and enhance it to account for the delay
9655 * so we need to clear these to avoid incorrect handling.
9661 if (to && (to->to_flags & TOF_TS) &&
9663 (to->to_tsecr) &&
9670 if (rack_ts_to_msec(rsm->r_tim_lastsent[i]) == to->to_tsecr) {
9676 * Kick the RTT to the CC, here
9693 * want to update our rack_rtt.
9726 rack_log_rtt_sendmap(rack, i, rsm->r_tim_lastsent[i], to->to_tsecr);
9748 * don't want to update our rack_rtt. We in
9749 * theory (in future) might want to think about reverting our
9781 * Mark the SACK_PASSED flag on all entries prior to rsm send wise.
9854 * to start our next measurement. We need
9855 * to record the new gput_ts as here and
9897 * For SACK's we also want to use the end
9899 * we want to target anything after that
9907 * not a sack, we need to move all the
9908 * way up to where this ack cum-ack moves
9909 * to.
9922 * may be equal to s_rsm if the RACK_USE_BEG was set. For the other
9933 /* If we hit here we have to have *not* sent tp->gput_seq */
9950 * We are moving the goal post, we may be able to clear the
9972 * because its too small to gain us anything we
10005 /* It has to be a sub-part of the original TLP recorded */
10011 struct tcpopt *to, struct rack_sendmap **prsm, uint32_t cts,
10052 * if we need to save off the start/end
10085 * Need to split this in two pieces the before and after,
10123 * to reflect the piece being sacked and
10129 * We want to end like so:
10134 * use to update all the gizmos.
10148 * important in GP measurements to have the
10175 /* We don't need to adjust rsm, it did not change */
10183 rack_update_rtt(tp, rack, nrsm, to, cts, SACKED, 0);
10205 * Now we want to go up from rsm (the
10206 * one left un-acked) to the next one
10239 * need to split the map. We enter like
10251 * rsm to nrsm, so the next block
10279 /* Position us to point to the new nrsm that starts the sack blk */
10290 /* A partial sack to a already sacked block */
10296 * reposition the start to the
10323 * if we need to save off the start/end
10354 rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0);
10398 * to the next block in the tail queue hash table.
10421 * if we need to save off the start/end
10472 * Goal, we want the right remainder of rsm to shrink
10473 * in place and span from (rsm->r_start = end) to rsm->r_end.
10474 * We want to expand prev to go all the way
10475 * to prev->r_end <- end.
10494 /* Now adjust nrsm (stack copy) to be
10502 * important in GP measurements to have the
10539 * to prev). Update the rtt and changed
10542 rack_update_rtt(tp, rack, nrsm, to, cts, SACKED, 0);
10581 * if we need to save off the start/end
10648 rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0);
10821 * it does below) or worse and harder to track it may shrink.
10823 * This last makes it impossible to track at the time of
10837 * can happen to conserve the sendmap size. That breaks
10839 * estimate. So to prevent that and keep it working with
10841 * types to be merged. I.e. if two sends are in the GP window
10842 * then its ok to merge them together. If two sends are not
10843 * in the GP window its ok to merge them together too. Though
10887 * for a range you are not going to measure. We project
10888 * out how far and then sometimes modify that to be
10890 * that does not belong to the range included.
10903 rack_process_to_cumack(struct tcpcb *tp, struct tcp_rack *rack, register uint32_t th_ack, uint32_t cts, struct tcpopt *to, uint64_t acktime)
10907 * The ACK point is advancing to th_ack, we must drop off
10940 * a complication, we have to wait to age it out until
10953 * It is safe to start aging TLP's out.
10971 * It is safe to start aging TLP's send.
11008 rack_update_rtt(tp, rack, rsm, to, cts, CUM_ACKED, th_ack);
11016 * to think of it as a reordered segment. This may
11062 * which can happen due to reordering.
11122 (to->to_flags & TOF_TS) &&
11123 (to->to_tsecr != 0) &&
11126 * We can use the timestamp to see
11131 if (to->to_tsecr == rack_ts_to_msec(rsm->r_tim_lastsent[0])) {
11139 /* Free back to zone */
11148 * The peer has moved snd_una up to
11154 * given us snd_una up to (rsm->r_end).
11155 * We need to undo the acked markings here.
11157 * Note we have to look to make sure th_ack is
11183 * which can happen due to reordering. In this
11218 /* Now we need to move our offset forward too */
11229 /* Now do we need to move the mbuf fwd too? */
11246 * we should be able to walk the mbuf's and find our place.
11250 * hits to access the socket buffer. And even more puzzling is that
11282 * might want to revert the congestion state if nothing
11299 * incorrectly due to reordering!
11315 * the first recovery. We want to be able to slow-start
11316 * back to this level. The ssthresh from the timeout
11318 * to be min(cwnd=1mss, 2mss). Which makes it basically
11389 * nothing else is missing we need to revert.
11426 * and thus we don't need more room to send anything.
11479 rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered_recovery, int dup_ack_struck,
11523 rack_process_to_cumack(tp, rack, th_ack, cts, to,
11526 if ((to->to_flags & TOF_SACK) == 0) {
11531 * with no SACK, add to the changes so
11544 for (i = 0; i < to->to_nsacks; i++) {
11545 bcopy((to->to_sacks + i * TCPOLEN_SACK),
11587 /* Nothing to sack, but we need to update counts */
11592 /* Only one, we don't need to sort */
11646 * First lets look to see if
11658 acked = rack_proc_sack_blk(tp, rack, &sack_blocks[0], to, &rsm, cts, segsiz);
11666 * a normal implementation to happen
11669 * to 1 block and the next to be retransmitted
11671 * are acked). Count this as ACK'd data to boost
11688 acked = rack_proc_sack_blk(tp, rack, &sack_blocks[i], to, &rsm, cts, segsiz);
11720 * When we enter recovery we need to assure we send
11750 * to override.
11767 * We need to skip anything already set
11768 * to be retransmitted.
11783 * Here we see if we need to retransmit. For
11816 * timer to expire. While you were waiting all of the acknowledgments
11818 * way underneath the bottleneck to the point where our Goodput
11825 * We also check to see if we are widdling down to just one segment
11826 * outstanding. If this occurs and we have room to send in our cwnd/rwnd
11828 * we need to speed up slightly.
11838 * we were still waiting on the pacer to expire.
11840 * This means we need to boost the b/w in
11841 * addition to any earlier boosting of
11866 * available to sanity check things.
11943 /* Convert our ms to a microsecond */
11973 * Fill in flex7 to be CHD (catchup|hybrid|DGP)
11981 * Compose bbr_state to be a bit wise 0000ADHF
12067 * set up an entry we need to proceed. If
12123 * time to when the request arrived.
12128 * than one request for a tm) we want to use now, the point
12135 * so we want to use arrival time as our base.
12167 * We need to reset the max pace segs if we have a
12194 /* Time to update the track. */
12205 * was wrong. This is usually due to TLS having
12210 * guys end to match the end of this send. That
12234 * that amount. What we would ideally like to
12237 * acked amount. However we have to gate that
12240 * (i.e. we don't want to exceed the rwnd of the peer).
12271 * Here any sendmap entry that points to the
12272 * beginning mbuf must be adjusted to the correct
12275 * 2) snd_una adjusted to its new position.
12283 * that mbuf. We must use the sb functions to do this
12287 * to go in and trim off the mbufs that we freed (of course
12309 /* The very first RSM's mbuf must point to the head mbuf in the sb */
12311 ("Rack:%p sb:%p rsm:%p -- first rsm mbuf not aligned to sb",
12314 /* one to adjust */
12381 * Originally I was just going to find the th_ack associated
12384 * need to find all entries that are completed by th_ack not
12423 * Check to see if we are freeing what we are pointing to send wise
12424 * if so be sure to NULL the pointer so we know we are no longer
12425 * set to anything.
12450 * Return value of 1, we do not need to call rack_process_data().
12453 * its unlocked and probably unsafe to touch the TCB.
12457 struct tcpcb *tp, struct tcpopt *to,
12484 if (SEQ_GEQ(th->th_ack, tp->snd_una) || to->to_nsacks) {
12498 ((to->to_flags & TOF_SACK) == 0)) {
12502 rack_log_ack(tp, to, th, ((in_rec == 0) && IN_FASTRECOVERY(tp->t_flags)),
12508 * Old ack, behind (or duplicate to) the last one rcv'd
12527 * to non-starred state, increment snd_una for ACK of SYN,
12554 * within our recovery window, then it was a mistake to do the
12556 * ssthresh, and proceed to transmit where we left off.
12573 * If it is the RXT timer we want to
12596 * remember to restart (more output or persist). If there is more
12597 * data to be acked, restart retransmit timer, using current
12627 * we need to clear the rto_from_rec flag.
12702 * peer sent data (now or in the past), time to
12785 /* Nothing to do maybe the peer ack'ed it all */
12789 /* Now do we need to split this one? */
12863 * and compare that to our send window.
12942 /* Make sure we output to start the timer */
12956 * data to send. Enter persists.
12974 * presented to the user (this happens in tcp_usrreq.c, case
12989 * queue with control block tp. Set thflags to whether
12991 * the common case inline (segment is the next to be
13057 * XXX: Due to the header drop above "th" is
13187 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
13199 * the timestamp. NOTE that the test is modified according to the
13211 if (__predict_false((to->to_flags & TOF_TS) &&
13212 (TSTMP_LT(to->to_tsval, tp->ts_recent)))) {
13221 if ((to->to_flags & TOF_TS) != 0 &&
13224 tp->ts_recent = to->to_tsval;
13229 * reassembly queue and we have enough buffer space to take it.
13260 * Pull snd_wl1 up to prevent seq wrap relative to th_seq.
13264 * Pull rcv_up up to prevent seq wrap relative to rcv_nxt.
13271 /* Add data to socket buffer. */
13304 * This subfunction is used to try to highly optimize the
13306 * in sequence to remain in the fast-path. We also add
13307 * in the __predict's to attempt to help the compiler.
13314 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
13323 /* Old ack, behind (or duplicate to) the last one rcv'd */
13338 if ((to->to_flags & TOF_TS) && __predict_false(TSTMP_LT(to->to_tsval, tp->ts_recent))) {
13357 rack_log_ack(tp, to, th, 0, 0, NULL, NULL);
13383 * data to send. Enter persists.
13389 * the timestamp. NOTE that the test is modified according to the
13392 if ((to->to_flags & TOF_TS) != 0 &&
13395 tp->ts_recent = to->to_tsval;
13423 hhook_run_tcp_est_in(tp, th, to);
13434 /* Note we want to hold the sb lock through the sendmap adjust */
13436 /* Wake up the socket if we have room to write more */
13446 * If it is the RXT timer we want to
13470 * Pull snd_wl2 up to prevent seq wrap relative to th_ack.
13481 * If data are ready to send, let tcp_output decide between more
13521 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
13540 * capable. if SYN has been acked change to ESTABLISHED else
13541 * SYN_RCVD state arrange for segment to be acked (eventually)
13619 * equal we don't need to do that
13628 * set it up to be what we send (send it next).
13670 * Advance th->th_seq to correspond to first data byte. If data,
13671 * trim to stay within window, dropping FIN if necessary.
13686 * remote host used T/TCP to validate the SYN, our data will be
13691 /* For syn-sent we need to possibly update the rtt */
13692 if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
13696 t = (mcts - to->to_tsecr) * HPTS_USEC_IN_MSEC;
13699 rack_log_rtt_sample_calc(rack, t, (to->to_tsecr * 1000), (mcts * 1000), 4);
13703 if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen))
13705 /* We may have changed to FIN_WAIT_1 above */
13708 * In FIN_WAIT_1 STATE in addition to the processing
13716 * timer is contrary to the specification,
13745 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
13797 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
13798 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
13803 * In the SYN-RECEIVED state, validate that the packet belongs to
13804 * this connection before trimming the data to fit the receive
13814 if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
13826 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
13828 * SEG.Len, This modified check allows us to overcome RFC1323's
13833 if ((to->to_flags & TOF_TS) != 0 &&
13838 tp->ts_recent = to->to_tsval;
13890 * Account for the ACK of our SYN prior to
13898 * not, do so now to pass queued data to user.
13910 /* For syn-recv we need to possibly update the rtt */
13911 if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
13915 t = (mcts - to->to_tsecr) * HPTS_USEC_IN_MSEC;
13918 rack_log_rtt_sample_calc(rack, t, (to->to_tsecr * 1000), (mcts * 1000), 5);
13922 if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
13926 /* We could have went to FIN_WAIT_1 (or EST) above */
13928 * In FIN_WAIT_1 STATE in addition to the processing for the
13936 * to the specification, but if we don't get a FIN
13963 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
13985 if (__predict_true(((to->to_flags & TOF_SACK) == 0)) &&
13990 if (rack_fastack(m, th, so, tp, to, drop_hdrlen, tlen,
13995 if (rack_do_fastnewdata(m, th, so, tp, to, drop_hdrlen, tlen,
14021 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
14022 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
14026 if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
14038 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
14040 * SEG.Len, This modified check allows us to overcome RFC1323's
14045 if ((to->to_flags & TOF_TS) != 0 &&
14050 tp->ts_recent = to->to_tsval;
14074 if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val, orig_tlen)) {
14096 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
14122 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
14123 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
14127 if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
14139 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
14141 * SEG.Len, This modified check allows us to overcome RFC1323's
14146 if ((to->to_flags & TOF_TS) != 0 &&
14151 tp->ts_recent = to->to_tsval;
14175 if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, NULL, thflags, &ret_val, orig_tlen)) {
14225 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
14253 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
14254 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
14258 if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
14277 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
14279 * SEG.Len, This modified check allows us to overcome RFC1323's
14284 if ((to->to_flags & TOF_TS) != 0 &&
14289 tp->ts_recent = to->to_tsval;
14312 if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
14318 * proceed. Starting the timer is contrary to the
14353 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
14381 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
14382 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
14386 if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
14398 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
14400 * SEG.Len, This modified check allows us to overcome RFC1323's
14405 if ((to->to_flags & TOF_TS) != 0 &&
14410 tp->ts_recent = to->to_tsval;
14433 if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
14460 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
14488 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
14489 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
14494 if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
14506 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
14508 * SEG.Len, This modified check allows us to overcome RFC1323's
14513 if ((to->to_flags & TOF_TS) != 0 &&
14518 tp->ts_recent = to->to_tsval;
14541 if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
14568 struct tcpcb *tp, struct tcpopt *to, int32_t drop_hdrlen, int32_t tlen,
14597 if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
14598 TSTMP_LT(to->to_tsval, tp->ts_recent)) {
14602 if (_ctf_drop_checks(to, m, th, tp, &tlen, &thflags, &drop_hdrlen, &ret_val,
14621 * modify the segment boundary check to be Last.ACK.Sent <= SEG.SEQ
14623 * SEG.Len, This modified check allows us to overcome RFC1323's
14628 if ((to->to_flags & TOF_TS) != 0 &&
14633 tp->ts_recent = to->to_tsval;
14656 if (rack_process_ack(m, th, so, tp, to, tiwin, tlen, &ourfinisacked, thflags, &ret_val, orig_tlen)) {
14751 * If we have nothing limit us to bursting
14902 * the size to whatever the pace_max_segments
14905 * 2 - Use pacer min granularity as a guide to
14910 * that b/w) and then round it up to the next
15052 * need to validate time units and t_flags2.
15101 * to not refer to tp->t_fb_ptr. This has the old rack
15191 * so that all we have to do is copy over
15310 * We need to allocate memory but cant. The INP and INP_INFO
15312 * scheme to drop the locks fails :(
15342 * or switch off this stack, we will know to go restore
15415 * We initialize to all ones so we never match 0
15469 * to defer this until the first send.
15484 /* When dynamic adjustment is on CA needs to start at 100% */
15524 /* Basically this means rack timers are extended based on dsack by up to (2 * srtt) */
15563 * need to adjust a few things.
15574 * has not yet occured) so we need to make
15581 * we want ssthresh to be unlimited. Setting
15582 * it to the rwnd (which the default stack does
15584 * since we want to be in SS and grow both the
15586 * we set it to the rwnd then as the peer grows its
15589 * Its far better to raise it up high (this takes the
15634 * bit decimal so we have to carefully convert
15635 * these to get the full precision.
15704 * to start. Zero means no timer (no keepalive
15757 * you have to get to ESTAB or beyond to tell.
15767 * to deal with that) <or> all is acknowledged and we
15769 * would want to switch to rack after all data is acknowledged
15843 * Lets take a different approach to purging just
15962 * call send or were bounced out due to the
15992 /* We are supposed to have delayed ack up and we do */
16020 * If the delayed ack was going to go off
16021 * before the rtx/tlp/rack timer were going to
16030 * We will force the hpts to be stopped if any, and restart
16031 * with the slot set to what was in the saved slot.
16095 * data to send. Enter persists.
16223 * For sane logging we need to play a little trick.
16225 * snd_una to high_seq, but since compressed acks are
16258 * was not responded to. In such cases we have a
16273 * a response to our window probe. And that
16309 * also do common things we might need to do when
16363 * If we have enabled PCM, then we need to
16364 * check if the round has adanced to the state
16384 * For DGP an initial startup check. We want to validate
16444 * in the incoming mbuf. We also need to still pay attention
16445 * to nxt_pkt since there may be another packet after this
16458 struct tcpopt to_holder, *to = NULL;
16481 * to handle the GPUT request?
16491 * buffer that have been sent to cover this
16501 to = &to_holder;
16502 to->to_flags = 0;
16558 to->to_flags = TOF_TS;
16560 to->to_tsecr = ae->ts_echo;
16561 to->to_tsval = ae->ts_value;
16563 * If echoed timestamp is later than the current time, fall back to
16568 to->to_tsecr = 0;
16590 to->to_flags = 0;
16670 /* If the window changed setup to update */
16693 rack_process_to_cumack(tp, rack, ae->ack, cts, to,
16708 /* And lets be sure to commit the rtt measurements for this ack */
16724 /* Tend to any collapsed window */
16736 * The draft (v3) calls for us to use SEQ_GEQ, but that
16742 * have to have all the ack's processed in queue to know
16764 * to non-starred state, increment snd_una for ACK of SYN,
16790 * we need to clear the rto_from_rec flag.
16846 /* Note we want to hold the sb lock through the sendmap adjust */
16848 /* Wake up the socket if we have room to write more */
16869 * If the RXT timer is running we want to
16904 * peer sent data (not now in the past), time to
16931 * a call to tcp_output(). That way since we
16944 * proceed. Starting the timer is contrary to the
16958 * We don't change to fin-wait-2 if we have our fin acked
16965 /* Wake up the socket if we have room to write more */
17128 struct tcpopt to;
17157 * Check to see if we need to skip any output plans. This
17159 * must process the ack coming in but need to defer sending
17178 * but we do have to check the flags in the inp, it
17179 * could be, if a sack is present, we want to be awoken and
17188 * timestamp option, we will want to queue
17190 * and will need to change with accurate ECN.
17204 * It is unrealistic to think we can pace in less than
17207 * ahead and allow output to be "early". We will attempt to
17208 * make up for it in any pacing time we try to apply on
17257 * doing i.e. pinning to this CPU and then doing the accounting
17260 * exit. I have choosen to not do the critical enter since
17262 * us (line above this if) to the same CPU with sched_pin(). This
17264 * interupt but we won't be moved to another CPU.
17269 * to the time the interupt processing time plus the ack processing
17272 ack_val_set = tcp_do_ack_accounting(tp, th, &to, tiwin,
17279 memset(&to, 0, sizeof(to));
17280 tcp_dooptions(&to, (u_char *)(th + 1),
17291 * can subject us to an attack.
17293 to.to_flags &= ~TOF_SACK;
17301 * to handle the GPUT request?
17311 * buffer that have been sent to cover this
17430 if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS) &&
17439 * timer. XXX: This should be done after segment validation to
17455 * TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
17456 * this to occur after we've validated the segment.
17464 * If echoed timestamp is later than the current time, fall back to
17468 if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
17469 to.to_tsecr -= tp->ts_offset;
17470 if (TSTMP_GT(to.to_tsecr, ms_cts))
17471 to.to_tsecr = 0;
17474 (to.to_flags & TOF_TS) &&
17475 (TSTMP_GEQ(to.to_tsecr, rack->r_ctl.last_rcv_tstmp_for_rtt))) {
17480 * data to do an RTT. We set a flag when we first
17481 * sent this TS to the peer. We now have it back
17482 * and have an RTT to share. We log it as a conf
17496 * If its the first time in we need to take care of options and
17510 * According to RFC1323 the window field in a SYN (i.e., a
17512 * this is traditional behavior, may need to be cleaned up.
17517 if ((to.to_flags & TOF_SCALE) &&
17520 tp->snd_scale = to.to_wscale;
17525 * next incoming segment to the scaled value.
17529 if ((to.to_flags & TOF_TS) &&
17532 tp->ts_recent = to.to_tsval;
17536 if (to.to_flags & TOF_MSS) {
17537 tcp_mss(tp, to.to_mss);
17540 (to.to_flags & TOF_SACKPERM) == 0)
17543 if (to.to_flags & TOF_FASTOPEN) {
17546 if (to.to_flags & TOF_MSS)
17547 mss = to.to_mss;
17554 to.to_tfo_len, to.to_tfo_cookie);
17615 tp, &to, drop_hdrlen,
17637 /* We have enough to set in the pacing segment size */
17645 * If we set the ack_val_se to what ack processing we are doing
17646 * we also want to track how many cycles we burned. Note
17650 * 0xf cannot be returned and is what we initialize it too to
17685 * the remaining time (slot_remaining) to restart the timer.
17695 * The draft (v3) calls for us to use SEQ_GEQ, but that
17701 * have to have all the ack's processed in queue to know
17800 /* Return the next guy to be re-transmitted */
17997 * The cwnd is collapsed to
17999 * Lets drop back to the lt-bw.
18007 * If we are in recover our cwnd needs to be less for
18020 * Apply Timely factor to increase/decrease the
18082 /* Adjust to any cap */
18089 * We want to limit fill-cw to the some multiplier
18176 * Ok fill_bw holds our mythical b/w to fill the cwnd
18208 * We may want to pace depending on if we are going
18229 * Into the reserve to get a full policer_max_seg
18230 * so we set the len to that and eat into
18239 /* We have to wait some */
18257 * are we close enough to the end of the bucket that we need to pace? If so
18275 /* we need to pace */
18321 * into the function to find out if we are
18341 * the peer to have a gap in data sending.
18349 * instead we use the pacer to mitigate bursts.
18365 * Calculate how long this will take to drain, if
18366 * the calculation comes out to zero, thats ok we
18367 * will use send_a_lot to possibly spin around for
18368 * more increasing tot_len_this_send to the point
18369 * that its going to require a pace, or we hit the
18392 /* RRS: We insert non-paced call to stats here for len */
18407 /* no way to yet do an estimate */
18434 * No way yet to make a b/w estimate or
18440 /* We need to account for all the overheads */
18470 * Ok we need to release it, we
18488 * Ok we need to release it, we
18506 /* Lets re-allow attempting to setup pacing */
18521 * We want to pace at our rate *or* faster to
18522 * fill the cwnd to the max if its not full.
18525 /* Re-check to make sure we are not exceeding our max b/w */
18535 * Ok we need to release it, we
18555 * Lets attempt to turn on hardware pacing
18578 /* Do we need to adjust our rate? */
18585 * having to do with the previous time
18607 * do allow hardware pacing to be restarted.
18648 /* We just need to adjust the segment size */
18668 * to be longer than the SRTT of the path. If it is
18686 /* RRS: We insert paced call to stats here for len and rate_wanted */
18693 * goes to send then either the nic is out
18696 * completely. Lets add to the pacing time.
18726 * this means we need to have the data available
18761 * initial-windows worth of data to
18791 * the last ack that arrived (no need to
18811 * If we have a marker pointer to the last one that is
18812 * app limited we can use that, but we need to set
18818 * We want to get to the rsm that is either
18825 /* Have to use the next one */
18853 * Ok in this path we need to use the r_end now
18858 * We also need to adjust up the sendtime
18859 * to the send of the next data after my_rsm.
18879 * so the next seq out needs to skip the first
18960 /* We never want to go over our peers rcv-window */
19086 * It is cheaper to just add the segments
19087 * than it is to take the cache miss to look
19144 * len has fell to 0.
19165 * This is the case where the next mbuf went to NULL. This
19200 * it, KASSERT that. Adjust the orig_m_len to
19307 * and 0 is empty. So how best to make this into
19360 * us to prohibit us from sending too much (usually its 1MSS).
19373 struct tcpopt to;
19407 to.to_flags = 0;
19421 to.to_tsval = ms_cts + tp->ts_offset;
19422 to.to_tsecr = tp->ts_recent;
19423 to.to_flags = TOF_TS;
19428 to.to_flags |= TOF_SIGNATURE;
19430 optlen = tcp_addoptions(&to, opt);
19457 /* Establish the len to send */
19545 * shorten it to no longer need tso. Lets
19590 if (to.to_flags & TOF_SIGNATURE) {
19598 (u_char *)(th + 1) + (to.to_signature - opt)) != 0) {
19699 /* Time to copy in our header */
19742 * When we have a retransmit we want to log the
19792 /* Move snd_nxt to snd_max so we don't have false retransmissions */
19803 rack_log_output(tp, &to, len, rsm->r_start, flags, error, rack_to_usec_ts(tv),
19888 * size is not optimally adjusted to the actual network conditions
19895 * The criteria to step up the send buffer one notch are:
19898 * 2. send buffer is filled to 7/8th with data (so we actually
19899 * have data to make use of it);
19905 * we want to avoid hitting cache lines in the rack_fast_output()
19943 * Enter to do fast output. We are given that the sched_pin is
19946 * we know how many more bytes needs to be sent (presumably either
19947 * during pacing or to fill the cwnd and that was greater than
19948 * the max-burst). We have how much to send and all the info we
19949 * need to just send.
19963 struct tcpopt to;
19998 to.to_flags = 0;
20001 to.to_tsval = ms_cts + tp->ts_offset;
20002 to.to_tsecr = tp->ts_recent;
20003 to.to_flags = TOF_TS;
20008 to.to_flags |= TOF_SIGNATURE;
20010 optlen = tcp_addoptions(&to, opt);
20038 /* Establish the len to send */
20105 * shorten it to no longer need tso. Lets
20150 if (to.to_flags & TOF_SIGNATURE) {
20158 (u_char *)(th + 1) + (to.to_signature - opt)) != 0) {
20253 /* Time to copy in our header */
20344 * Need to record what we have since we are
20358 rack_log_output(tp, &to, len, tp->snd_max, flags, error, rack_to_usec_ts(tv),
20458 /* Round down to the nearest pace_max_seg */
20508 * the segment to be put on the wire.
20514 * It has been sacked, lets move to the
20546 * needs to grow. It can go no higher than policer_bucket_size.
20578 * note we do need to check this for exceeding
20579 * our max segment size due to the fact that
20627 struct tcpopt to;
20846 * (SYN, RST) to send, then transmit; otherwise, investigate
20880 * record the time but if possible add back to
20918 * pacing in order to turn PRR off. We do this
20921 * tried to do it while setting rack_no_prr on.
20930 * to factor this in to see if a PCM is needed.
21038 * for us to retransmit it. Move up the collapse point,
21039 * since this rsm has its chance to retransmit now.
21090 * cheat and we skipped to a TLP and it
21120 * was marked to be retransmitted with the smaller
21124 * to all be marked must. We can use the tmap to
21143 * We can use the tmap to find them.
21223 /* Check to see if there is room */
21291 * Now in theory snd_max may be equal to snd_una, if so
21325 * For prr=off, we need to send only 1 MSS
21327 * be arriving that causes us to send retransmits and
21328 * we don't want to be on a long pace due to a larger send
21347 * on PRR to dictate what we will send in the form of
21355 /* Check to see if we have the data */
21384 * likely the PRR algorithm is not going to
21393 * send empty's the socket buffer we want to
21395 * for our prr_sndcnt to get bigger.
21430 * we tune it down to a smaller value?
21437 * We are not allowed to send. How long
21438 * do we need to pace for i.e. how long
21439 * before len is available to send?
21478 * Be careful not to send data and/or FIN on SYN segments. This
21479 * measure is needed to prevent interoperability problems with not
21517 * We have nothing to send, or the window shrank, or
21518 * is closed, do we need to go into persists?
21580 * the last bytes of the sb to go out even if
21592 * room to send at least N pace_max_seg, the cwnd is greater
21593 * than or equal to a full pacing segments plus 4 mss and we have 2 or
21596 * We don't want to send instead we need to get more ack's in to
21597 * allow us to send a full pacing segment. Normally, if we are pacing
21601 * can delay the acks, which is why we want to make sure we
21602 * have cwnd space to have a bit more than a max pace segments in flight.
21607 * 2 MSS out in response to the acks being clocked in which
21608 * defeats the point of hw-pacing (i.e. to help us get
21626 * makes it impossible to transmit any options which vary per
21631 * the right thing below to provide length of just ip options and thus
21632 * checking for ipoptlen is enough to decide if ip options are present.
21689 * limited the window size) - we need to retransmit
21698 * to flush a buffer queued with moretocome. XXX
21726 * out because we want to try to have at least two
21727 * packets inflight to not be caught by delayed ack.
21736 * Window updates are important when we close our window due to a
21739 * remote end starts to send again the ACK clock takes over and
21744 * to be sent. We also should avoid sending a flurry of window
21753 * situations the ACK's to new incoming data will carry further
21820 * yet done so, then we need to send.
21828 * No reason to send a segment, just return.
21848 * The idea behind that is instead of having to have
21849 * the peer wait for the delayed-ack timer to run off
21872 ((optlen == TCPOLEN_TSTAMP_APPA) && (to.to_flags & TOF_TS)))) {
21881 /* Assure when we leave that snd_nxt will point to top */
21959 * to end the measurement window in
21967 * configured to end the measurement
21983 * If we get this out in logs we need to
21997 * There is not enough to measure.
22014 * Go out to the end app limited and mark
22016 * to this guy.
22031 /* Check if we need to go into persists or not */
22037 /* Yes lets make sure to move to persist before timer-start */
22082 * We only want to do this once with the hw_check_queue,
22084 * we come around to again, the flag will be clear.
22106 * need to make it so all data
22120 * The idea behind that is instead of having to have
22121 * the peer wait for the delayed-ack timer to run off
22153 * set not to do any options. NOTE: we assume that the IP/TCP header
22168 * no rsm to use, then we look at various bits,
22185 * Compute options for segment. We only have to care about SYN and
22189 to.to_flags = 0;
22193 to.to_mss = tcp_mssopt(&inp->inp_inc);
22195 to.to_mss -= V_tcp_udp_tunneling_overhead;
22196 to.to_flags |= TOF_MSS;
22202 * have caused the original SYN or SYN|ACK to have
22208 to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
22209 to.to_tfo_cookie =
22211 to.to_flags |= TOF_FASTOPEN;
22214 to.to_tfo_len =
22216 to.to_tfo_cookie =
22218 to.to_flags |= TOF_FASTOPEN;
22221 * If we wind up having more data to
22233 to.to_wscale = tp->request_r_scale;
22234 to.to_flags |= TOF_SCALE;
22256 to.to_tsval = ts_to_use + tp->ts_offset;
22257 to.to_tsecr = tp->ts_recent;
22258 to.to_flags |= TOF_TS;
22271 /* Subtract 1 from seq to force a response */
22283 to.to_flags |= TOF_SACKPERM;
22286 to.to_flags |= TOF_SACK;
22287 to.to_nsacks = tp->rcv_numsacks;
22288 to.to_sacks = (u_char *)tp->sackblks;
22294 to.to_flags |= TOF_SIGNATURE;
22298 hdrlen += optlen = tcp_addoptions(&to, opt);
22300 * If we wanted a TFO option to be added, but it was unable
22301 * to fit, ensure no data is sent.
22304 !(to.to_flags & TOF_FASTOPEN))
22396 * implementations to clear the FIN flag on all but
22406 * Since we don't have enough space to put
22437 * This KASSERT is here to catch edge cases at a well defined place.
22448 * Check to see if we need to send a challenge ack.
22459 * The idea behind that is instead of having to have
22460 * the peer wait for the delayed-ack timer to run off
22468 * Grab a header mbuf, attaching a copy of data to be transmitted,
22505 * Start the m_copy functions from the closest mbuf to the
22516 * sndptr to help remember the next place in
22528 * set it to NULL if its a retransmission since
22529 * we don't want to change the sb remembered
22547 * shorten it to no longer need tso. Lets
22589 * data to the user when a buffer fills or a PUSH comes in.)
22730 * According to RFC1323 the window field in a SYN (i.e., a <SYN> or
22744 * window. This may cause the remote transmitter to stall. This
22745 * flag tells soreceive() to disable delayed acknowledgements when
22747 * attempting to read more data than can be buffered prior to
22756 /* Now are we using fsb?, if so copy the template data to the mbuf */
22768 * We need to grab the correct pointers into the mbuf
22771 * to the mbuf base pointer (cpto).
22796 if (to.to_flags & TOF_SIGNATURE) {
22804 (u_char *)(th + 1) + (to.to_signature - opt)) != 0) {
22816 * ip6_plen is not need to be filled now, and will be filled
22879 hhook_run_tcp_est_out(tp, th, &to, len, tso);
22886 /* We're getting ready to send; log now. */
22928 * When we have a retransmit we want to log the
22963 * Fill in IP length and desired time to live and send to IP level.
22964 * There should be a better way to handle ttl and tos; we could keep
22965 * them in the template, but need a way to checksum without them.
22975 * user might want to change the value via setsockopt. Also,
22984 * to include the option header lengths as well.
23021 * packet. This might not be the best thing to do according
23022 * to RFC3390 Section 2. However the tcp hostcache migitates
23026 * NB: Don't set DF on small MTU/MSS to have a safe
23066 rack_log_output(tp, &to, len, rack_seq, (uint8_t) flags, error,
23084 * Need to record what we have since we are
23169 * It is used to advance snd_max if we have a new transmit.
23193 * continue to check the count.
23234 * This is only relevant in case of switching back to
23246 * If we are doing FO we need to update the mbuf position and subtract
23248 * we thus want to send a DSACK.
23250 * XXXRRS: This brings to mind a ?, when we send a DSACK block is TSO
23251 * turned off? If not then we are going to echo multiple DSACK blocks
23274 * not get enough data in the TSO to meet the
23294 * not get enough data in the TSO to meet the
23319 * the hpts. Everything else will just have to retransmit
23322 * In any case, we do not want to loop around for another
23343 * Pace us right away to retry in a some
23366 * to send segments changed to another or lowered
23439 /* Do we need to turn off sendalot? */
23513 ((optlen == TCPOLEN_TSTAMP_APPA) && (to.to_flags & TOF_TS)))) {
23541 ((optlen == TCPOLEN_TSTAMP_APPA) && (to.to_flags & TOF_TS)))) {
23558 /* Assure when we leave that snd_nxt will point to top */
23612 * The MTU has changed we need to resend everything
23624 /* Mark all inflight to needing to be rxt'd */
23630 /* We don't use snd_nxt to retransmit */
23710 * Profile 6 tweaks DGP so that it will apply to
23712 * to replace DGP. It gets then the max(dgp-rate, fillcw(discounted).
23716 /* This changes things back to the default settings */
23743 /* Basically this means rack timers are extended based on dsack by up to (2 * srtt) */
23772 /* When dynamic adjustment is on CA needs to start at 100% */
23867 /* Failed to turn pacing on */
23874 * Now we must switch to hybrid mode as well which also
23875 * means moving to regular pacing.
23997 * User wants to set a custom beta.
24021 * to have the new slop.
24039 * to be used on restoral/
24106 * If we are doing DGP we need to switch
24107 * to using the pacing limit.
24114 * Now change up the flags and counts to be correct.
24184 * The 10th bit is used to turn on the
24336 /* Change from kbits per second to bytes per second */
24373 * Now change up the flags and counts to be correct.
24604 /* Allow PRR to send more than one seg */
24693 * Must be between 2 - 80% to be a reduction else
24751 * the parent is the inpcb given. We want to
24752 * apply a read-lock to the parent (we are already
24776 /* Now copy out anything we wish to inherit i.e. things in socket-options */
24779 /* Profile 1 had to be set via sock opt */
25258 * has to revalidate that the connection is still valid for the socket
25397 /* Filter off all unknown options to the base stack */
25411 * We truncate it down to 32 bits for the socket-option trace this
25440 /* No memory to defer, fail */
25518 * impact to this routine.
25537 * to 50 for 50% i.e. the cwnd is reduced to 50% of its previous value
25545 * to 80 for 80% i.e. the cwnd is reduced by 20% of its previous value when
25559 * we don't want to report the saved values.
25690 /* convert to kbits per sec */
25710 /* Allow PRR to send more than one seg */
25898 printf("Failed to add sysctl node\n");
25907 printf("Failed to register %s stack name for "
25915 printf("Failed to register rack module -- err:%d\n", err);