1139823Simp/*- 2130989Sps * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 3169469Srwatson * The Regents of the University of California. 4169469Srwatson * All rights reserved. 5130989Sps * 6130989Sps * Redistribution and use in source and binary forms, with or without 7130989Sps * modification, are permitted provided that the following conditions 8130989Sps * are met: 9130989Sps * 1. Redistributions of source code must retain the above copyright 10130989Sps * notice, this list of conditions and the following disclaimer. 11130989Sps * 2. Redistributions in binary form must reproduce the above copyright 12130989Sps * notice, this list of conditions and the following disclaimer in the 13130989Sps * documentation and/or other materials provided with the distribution. 14130989Sps * 4. Neither the name of the University nor the names of its contributors 15130989Sps * may be used to endorse or promote products derived from this software 16130989Sps * without specific prior written permission. 17130989Sps * 18130989Sps * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19130989Sps * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20130989Sps * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21130989Sps * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22130989Sps * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23130989Sps * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24130989Sps * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25130989Sps * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26130989Sps * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27130989Sps * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28130989Sps * SUCH DAMAGE. 29130989Sps * 30130989Sps * @(#)tcp_sack.c 8.12 (Berkeley) 5/24/95 31130989Sps */ 32130989Sps 33139823Simp/*- 34130989Sps * @@(#)COPYRIGHT 1.1 (NRL) 17 January 1995 35130989Sps * 36130989Sps * NRL grants permission for redistribution and use in source and binary 37130989Sps * forms, with or without modification, of the software and documentation 38130989Sps * created at NRL provided that the following conditions are met: 39130989Sps * 40130989Sps * 1. Redistributions of source code must retain the above copyright 41130989Sps * notice, this list of conditions and the following disclaimer. 42130989Sps * 2. Redistributions in binary form must reproduce the above copyright 43130989Sps * notice, this list of conditions and the following disclaimer in the 44130989Sps * documentation and/or other materials provided with the distribution. 45130989Sps * 3. All advertising materials mentioning features or use of this software 46130989Sps * must display the following acknowledgements: 47133874Srwatson * This product includes software developed by the University of 48133874Srwatson * California, Berkeley and its contributors. 49133874Srwatson * This product includes software developed at the Information 50133874Srwatson * Technology Division, US Naval Research Laboratory. 51130989Sps * 4. Neither the name of the NRL nor the names of its contributors 52130989Sps * may be used to endorse or promote products derived from this software 53130989Sps * without specific prior written permission. 54130989Sps * 55130989Sps * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 56130989Sps * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 57130989Sps * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 58130989Sps * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 59130989Sps * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 60130989Sps * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 61130989Sps * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 62130989Sps * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 63130989Sps * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 64130989Sps * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 65130989Sps * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66130989Sps * 67130989Sps * The views and conclusions contained in the software and documentation 68130989Sps * are those of the authors and should not be interpreted as representing 69130989Sps * official policies, either expressed or implied, of the US Naval 70130989Sps * Research Laboratory (NRL). 71130989Sps */ 72169469Srwatson 73169469Srwatson#include <sys/cdefs.h> 74169469Srwatson__FBSDID("$FreeBSD$"); 75169469Srwatson 76130989Sps#include "opt_inet.h" 77130989Sps#include "opt_inet6.h" 78130989Sps#include "opt_tcpdebug.h" 79130989Sps 80130989Sps#include <sys/param.h> 81130989Sps#include <sys/systm.h> 82130989Sps#include <sys/kernel.h> 83130989Sps#include <sys/sysctl.h> 84130989Sps#include <sys/malloc.h> 85130989Sps#include <sys/mbuf.h> 86130989Sps#include <sys/proc.h> /* for proc0 declaration */ 87130989Sps#include <sys/protosw.h> 88130989Sps#include <sys/socket.h> 89130989Sps#include <sys/socketvar.h> 90130989Sps#include <sys/syslog.h> 91130989Sps#include <sys/systm.h> 92130989Sps 93130989Sps#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ 94130989Sps 95130989Sps#include <vm/uma.h> 96130989Sps 97130989Sps#include <net/if.h> 98130989Sps#include <net/route.h> 99196019Srwatson#include <net/vnet.h> 100130989Sps 101130989Sps#include <netinet/in.h> 102130989Sps#include <netinet/in_systm.h> 103130989Sps#include <netinet/ip.h> 104130989Sps#include <netinet/in_var.h> 105130989Sps#include <netinet/in_pcb.h> 106130989Sps#include <netinet/ip_var.h> 107130989Sps#include <netinet/ip6.h> 108130989Sps#include <netinet/icmp6.h> 109130989Sps#include <netinet6/nd6.h> 110130989Sps#include <netinet6/ip6_var.h> 111130989Sps#include <netinet6/in6_pcb.h> 112130989Sps#include <netinet/tcp.h> 113130989Sps#include <netinet/tcp_fsm.h> 114130989Sps#include <netinet/tcp_seq.h> 115130989Sps#include <netinet/tcp_timer.h> 116130989Sps#include <netinet/tcp_var.h> 117130989Sps#include <netinet6/tcp6_var.h> 118130989Sps#include <netinet/tcpip.h> 119130989Sps#ifdef TCPDEBUG 120130989Sps#include <netinet/tcp_debug.h> 121130989Sps#endif /* TCPDEBUG */ 122130989Sps 123130989Sps#include <machine/in_cksum.h> 124130989Sps 125195699SrwatsonVNET_DECLARE(struct uma_zone *, sack_hole_zone); 126195727Srwatson#define V_sack_hole_zone VNET(sack_hole_zone) 127195699Srwatson 128136151SpsSYSCTL_NODE(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, 0, "TCP SACK"); 129207369SbzVNET_DEFINE(int, tcp_do_sack) = 1; 130207369Sbz#define V_tcp_do_sack VNET(tcp_do_sack) 131195699SrwatsonSYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, enable, CTLFLAG_RW, 132195699Srwatson &VNET_NAME(tcp_do_sack), 0, "Enable/Disable TCP SACK support"); 133136151Sps 134207369SbzVNET_DEFINE(int, tcp_sack_maxholes) = 128; 135207369Sbz#define V_tcp_sack_maxholes VNET(tcp_sack_maxholes) 136195699SrwatsonSYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, maxholes, CTLFLAG_RW, 137195699Srwatson &VNET_NAME(tcp_sack_maxholes), 0, 138143339Sps "Maximum number of TCP SACK holes allowed per connection"); 139143339Sps 140207369SbzVNET_DEFINE(int, tcp_sack_globalmaxholes) = 65536; 141207369Sbz#define V_tcp_sack_globalmaxholes VNET(tcp_sack_globalmaxholes) 142195699SrwatsonSYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, globalmaxholes, CTLFLAG_RW, 143195699Srwatson &VNET_NAME(tcp_sack_globalmaxholes), 0, 144143339Sps "Global maximum number of TCP SACK holes"); 145143339Sps 146207369SbzVNET_DEFINE(int, tcp_sack_globalholes) = 0; 147207369Sbz#define V_tcp_sack_globalholes VNET(tcp_sack_globalholes) 148195699SrwatsonSYSCTL_VNET_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_RD, 149195699Srwatson &VNET_NAME(tcp_sack_globalholes), 0, 150143339Sps "Global number of TCP SACK holes currently allocated"); 151145244Sps 152130989Sps/* 153169469Srwatson * This function is called upon receipt of new valid data (while not in 154169469Srwatson * header prediction mode), and it updates the ordered list of sacks. 155130989Sps */ 156130989Spsvoid 157145244Spstcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end) 158130989Sps{ 159130989Sps /* 160130989Sps * First reported block MUST be the most recent one. Subsequent 161130989Sps * blocks SHOULD be in the order in which they arrived at the 162130989Sps * receiver. These two conditions make the implementation fully 163130989Sps * compliant with RFC 2018. 164130989Sps */ 165145244Sps struct sackblk head_blk, saved_blks[MAX_SACK_BLKS]; 166145244Sps int num_head, num_saved, i; 167130989Sps 168178285Srwatson INP_WLOCK_ASSERT(tp->t_inpcb); 169145244Sps 170169469Srwatson /* Check arguments. */ 171145244Sps KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end")); 172145244Sps 173145244Sps /* SACK block for the received segment. */ 174145244Sps head_blk.start = rcv_start; 175145244Sps head_blk.end = rcv_end; 176145244Sps 177130989Sps /* 178169469Srwatson * Merge updated SACK blocks into head_blk, and save unchanged SACK 179169469Srwatson * blocks into saved_blks[]. num_saved will have the number of the 180169469Srwatson * saved SACK blocks. 181130989Sps */ 182145244Sps num_saved = 0; 183130989Sps for (i = 0; i < tp->rcv_numsacks; i++) { 184145244Sps tcp_seq start = tp->sackblks[i].start; 185145244Sps tcp_seq end = tp->sackblks[i].end; 186145244Sps if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) { 187130989Sps /* 188145244Sps * Discard this SACK block. 189130989Sps */ 190145244Sps } else if (SEQ_LEQ(head_blk.start, end) && 191145244Sps SEQ_GEQ(head_blk.end, start)) { 192145244Sps /* 193169469Srwatson * Merge this SACK block into head_blk. This SACK 194169469Srwatson * block itself will be discarded. 195145244Sps */ 196145244Sps if (SEQ_GT(head_blk.start, start)) 197145244Sps head_blk.start = start; 198145244Sps if (SEQ_LT(head_blk.end, end)) 199145244Sps head_blk.end = end; 200145244Sps } else { 201145244Sps /* 202145244Sps * Save this SACK block. 203145244Sps */ 204145244Sps saved_blks[num_saved].start = start; 205145244Sps saved_blks[num_saved].end = end; 206145244Sps num_saved++; 207130989Sps } 208130989Sps } 209145244Sps 210145244Sps /* 211145244Sps * Update SACK list in tp->sackblks[]. 212145244Sps */ 213145244Sps num_head = 0; 214145244Sps if (SEQ_GT(head_blk.start, tp->rcv_nxt)) { 215145244Sps /* 216169469Srwatson * The received data segment is an out-of-order segment. Put 217169469Srwatson * head_blk at the top of SACK list. 218145244Sps */ 219145244Sps tp->sackblks[0] = head_blk; 220145244Sps num_head = 1; 221145244Sps /* 222145244Sps * If the number of saved SACK blocks exceeds its limit, 223145244Sps * discard the last SACK block. 224145244Sps */ 225145244Sps if (num_saved >= MAX_SACK_BLKS) 226145244Sps num_saved--; 227130989Sps } 228145244Sps if (num_saved > 0) { 229145244Sps /* 230145244Sps * Copy the saved SACK blocks back. 231145244Sps */ 232145244Sps bcopy(saved_blks, &tp->sackblks[num_head], 233145244Sps sizeof(struct sackblk) * num_saved); 234145244Sps } 235145244Sps 236145244Sps /* Save the number of SACK blocks. */ 237145244Sps tp->rcv_numsacks = num_head + num_saved; 238130989Sps} 239130989Sps 240130989Sps/* 241130989Sps * Delete all receiver-side SACK information. 242130989Sps */ 243130989Spsvoid 244169454Srwatsontcp_clean_sackreport(struct tcpcb *tp) 245130989Sps{ 246130989Sps int i; 247130989Sps 248178285Srwatson INP_WLOCK_ASSERT(tp->t_inpcb); 249130989Sps tp->rcv_numsacks = 0; 250130989Sps for (i = 0; i < MAX_SACK_BLKS; i++) 251130989Sps tp->sackblks[i].start = tp->sackblks[i].end=0; 252130989Sps} 253130989Sps 254130989Sps/* 255146304Sps * Allocate struct sackhole. 256146304Sps */ 257146304Spsstatic struct sackhole * 258146304Spstcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end) 259146304Sps{ 260146304Sps struct sackhole *hole; 261146304Sps 262181803Sbz if (tp->snd_numholes >= V_tcp_sack_maxholes || 263181803Sbz V_tcp_sack_globalholes >= V_tcp_sack_globalmaxholes) { 264190948Srwatson TCPSTAT_INC(tcps_sack_sboverflow); 265146304Sps return NULL; 266146304Sps } 267146304Sps 268190787Szec hole = (struct sackhole *)uma_zalloc(V_sack_hole_zone, M_NOWAIT); 269146304Sps if (hole == NULL) 270146304Sps return NULL; 271146304Sps 272146304Sps hole->start = start; 273146304Sps hole->end = end; 274146304Sps hole->rxmit = start; 275146304Sps 276146304Sps tp->snd_numholes++; 277195655Slstewart atomic_add_int(&V_tcp_sack_globalholes, 1); 278146304Sps 279146304Sps return hole; 280146304Sps} 281146304Sps 282146304Sps/* 283146304Sps * Free struct sackhole. 284146304Sps */ 285146304Spsstatic void 286146304Spstcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole) 287146304Sps{ 288169469Srwatson 289190787Szec uma_zfree(V_sack_hole_zone, hole); 290146304Sps 291146304Sps tp->snd_numholes--; 292195655Slstewart atomic_subtract_int(&V_tcp_sack_globalholes, 1); 293146304Sps 294146304Sps KASSERT(tp->snd_numholes >= 0, ("tp->snd_numholes >= 0")); 295181803Sbz KASSERT(V_tcp_sack_globalholes >= 0, ("tcp_sack_globalholes >= 0")); 296146304Sps} 297146304Sps 298146304Sps/* 299146953Sps * Insert new SACK hole into scoreboard. 300146953Sps */ 301146953Spsstatic struct sackhole * 302146953Spstcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end, 303167785Sandre struct sackhole *after) 304146953Sps{ 305146953Sps struct sackhole *hole; 306146953Sps 307146953Sps /* Allocate a new SACK hole. */ 308146953Sps hole = tcp_sackhole_alloc(tp, start, end); 309146953Sps if (hole == NULL) 310146953Sps return NULL; 311146953Sps 312169469Srwatson /* Insert the new SACK hole into scoreboard. */ 313146953Sps if (after != NULL) 314146953Sps TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink); 315146953Sps else 316146953Sps TAILQ_INSERT_TAIL(&tp->snd_holes, hole, scblink); 317146953Sps 318146953Sps /* Update SACK hint. */ 319146953Sps if (tp->sackhint.nexthole == NULL) 320146953Sps tp->sackhint.nexthole = hole; 321146953Sps 322146953Sps return hole; 323146953Sps} 324146953Sps 325146953Sps/* 326146953Sps * Remove SACK hole from scoreboard. 327146953Sps */ 328146953Spsstatic void 329146953Spstcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) 330146953Sps{ 331169469Srwatson 332146953Sps /* Update SACK hint. */ 333146953Sps if (tp->sackhint.nexthole == hole) 334146953Sps tp->sackhint.nexthole = TAILQ_NEXT(hole, scblink); 335146953Sps 336146953Sps /* Remove this SACK hole. */ 337146953Sps TAILQ_REMOVE(&tp->snd_holes, hole, scblink); 338146953Sps 339146953Sps /* Free this SACK hole. */ 340146953Sps tcp_sackhole_free(tp, hole); 341146953Sps} 342146953Sps 343146953Sps/* 344147637Sps * Process cumulative ACK and the TCP SACK option to update the scoreboard. 345147637Sps * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of 346147637Sps * the sequence space). 347130989Sps */ 348147637Spsvoid 349147637Spstcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) 350130989Sps{ 351145370Sps struct sackhole *cur, *temp; 352147637Sps struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; 353146953Sps int i, j, num_sack_blks; 354130989Sps 355178285Srwatson INP_WLOCK_ASSERT(tp->t_inpcb); 356147637Sps 357147637Sps num_sack_blks = 0; 358146552Sps /* 359147637Sps * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, 360147637Sps * treat [SND.UNA, SEG.ACK) as if it is a SACK block. 361146552Sps */ 362147637Sps if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) { 363147637Sps sack_blocks[num_sack_blks].start = tp->snd_una; 364147637Sps sack_blocks[num_sack_blks++].end = th_ack; 365147637Sps } 366147637Sps /* 367169469Srwatson * Append received valid SACK blocks to sack_blocks[], but only if we 368169469Srwatson * received new blocks from the other side. 369147637Sps */ 370167888Sandre if (to->to_flags & TOF_SACK) { 371167888Sandre for (i = 0; i < to->to_nsacks; i++) { 372167888Sandre bcopy((to->to_sacks + i * TCPOLEN_SACK), 373167888Sandre &sack, sizeof(sack)); 374167888Sandre sack.start = ntohl(sack.start); 375167888Sandre sack.end = ntohl(sack.end); 376167888Sandre if (SEQ_GT(sack.end, sack.start) && 377167888Sandre SEQ_GT(sack.start, tp->snd_una) && 378167888Sandre SEQ_GT(sack.start, th_ack) && 379167888Sandre SEQ_LT(sack.start, tp->snd_max) && 380167888Sandre SEQ_GT(sack.end, tp->snd_una) && 381167888Sandre SEQ_LEQ(sack.end, tp->snd_max)) 382167888Sandre sack_blocks[num_sack_blks++] = sack; 383167888Sandre } 384146552Sps } 385147637Sps /* 386169469Srwatson * Return if SND.UNA is not advanced and no valid SACK block is 387169469Srwatson * received. 388147637Sps */ 389146552Sps if (num_sack_blks == 0) 390147637Sps return; 391147637Sps 392147637Sps /* 393169469Srwatson * Sort the SACK blocks so we can update the scoreboard with just one 394169469Srwatson * pass. The overhead of sorting upto 4+1 elements is less than 395169469Srwatson * making upto 4+1 passes over the scoreboard. 396147637Sps */ 397146552Sps for (i = 0; i < num_sack_blks; i++) { 398146552Sps for (j = i + 1; j < num_sack_blks; j++) { 399146953Sps if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { 400146552Sps sack = sack_blocks[i]; 401146552Sps sack_blocks[i] = sack_blocks[j]; 402146552Sps sack_blocks[j] = sack; 403146552Sps } 404130989Sps } 405146552Sps } 406146552Sps if (TAILQ_EMPTY(&tp->snd_holes)) 407146552Sps /* 408146630Sps * Empty scoreboard. Need to initialize snd_fack (it may be 409146552Sps * uninitialized or have a bogus value). Scoreboard holes 410169469Srwatson * (from the sack blocks received) are created later below 411169469Srwatson * (in the logic that adds holes to the tail of the 412169469Srwatson * scoreboard). 413146552Sps */ 414147637Sps tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); 415146552Sps /* 416169469Srwatson * In the while-loop below, incoming SACK blocks (sack_blocks[]) and 417169469Srwatson * SACK holes (snd_holes) are traversed from their tails with just 418169469Srwatson * one pass in order to reduce the number of compares especially when 419169469Srwatson * the bandwidth-delay product is large. 420169469Srwatson * 421146953Sps * Note: Typically, in the first RTT of SACK recovery, the highest 422146953Sps * three or four SACK blocks with the same ack number are received. 423146953Sps * In the second RTT, if retransmitted data segments are not lost, 424146953Sps * the highest three or four SACK blocks with ack number advancing 425146953Sps * are received. 426146953Sps */ 427146953Sps sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */ 428216753Slstewart tp->sackhint.last_sack_ack = sblkp->end; 429146953Sps if (SEQ_LT(tp->snd_fack, sblkp->start)) { 430146953Sps /* 431169469Srwatson * The highest SACK block is beyond fack. Append new SACK 432169469Srwatson * hole at the tail. If the second or later highest SACK 433169469Srwatson * blocks are also beyond the current fack, they will be 434169469Srwatson * inserted by way of hole splitting in the while-loop below. 435146953Sps */ 436147169Sps temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); 437152655Sps if (temp != NULL) { 438152655Sps tp->snd_fack = sblkp->end; 439152655Sps /* Go to the previous sack block. */ 440152655Sps sblkp--; 441152655Sps } else { 442152655Sps /* 443152655Sps * We failed to add a new hole based on the current 444152655Sps * sack block. Skip over all the sack blocks that 445169469Srwatson * fall completely to the right of snd_fack and 446169469Srwatson * proceed to trim the scoreboard based on the 447169469Srwatson * remaining sack blocks. This also trims the 448169469Srwatson * scoreboard for th_ack (which is sack_blocks[0]). 449152655Sps */ 450152655Sps while (sblkp >= sack_blocks && 451152655Sps SEQ_LT(tp->snd_fack, sblkp->start)) 452152655Sps sblkp--; 453152655Sps if (sblkp >= sack_blocks && 454152655Sps SEQ_LT(tp->snd_fack, sblkp->end)) 455152655Sps tp->snd_fack = sblkp->end; 456152655Sps } 457146953Sps } else if (SEQ_LT(tp->snd_fack, sblkp->end)) 458146953Sps /* fack is advanced. */ 459146953Sps tp->snd_fack = sblkp->end; 460169469Srwatson /* We must have at least one SACK hole in scoreboard. */ 461169469Srwatson KASSERT(!TAILQ_EMPTY(&tp->snd_holes), 462169469Srwatson ("SACK scoreboard must not be empty")); 463169469Srwatson cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */ 464146953Sps /* 465146552Sps * Since the incoming sack blocks are sorted, we can process them 466146552Sps * making one sweep of the scoreboard. 467146552Sps */ 468152655Sps while (sblkp >= sack_blocks && cur != NULL) { 469146953Sps if (SEQ_GEQ(sblkp->start, cur->end)) { 470146552Sps /* 471169469Srwatson * SACKs data beyond the current hole. Go to the 472169469Srwatson * previous sack block. 473146552Sps */ 474146953Sps sblkp--; 475130989Sps continue; 476130989Sps } 477146953Sps if (SEQ_LEQ(sblkp->end, cur->start)) { 478146552Sps /* 479169469Srwatson * SACKs data before the current hole. Go to the 480169469Srwatson * previous hole. 481146552Sps */ 482146953Sps cur = TAILQ_PREV(cur, sackhole_head, scblink); 483146552Sps continue; 484146552Sps } 485146552Sps tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); 486146552Sps KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, 487169469Srwatson ("sackhint bytes rtx >= 0")); 488146953Sps if (SEQ_LEQ(sblkp->start, cur->start)) { 489169469Srwatson /* Data acks at least the beginning of hole. */ 490146953Sps if (SEQ_GEQ(sblkp->end, cur->end)) { 491169469Srwatson /* Acks entire hole, so delete hole. */ 492146552Sps temp = cur; 493146953Sps cur = TAILQ_PREV(cur, sackhole_head, scblink); 494146953Sps tcp_sackhole_remove(tp, temp); 495146552Sps /* 496169469Srwatson * The sack block may ack all or part of the 497169469Srwatson * next hole too, so continue onto the next 498169469Srwatson * hole. 499146552Sps */ 500130989Sps continue; 501146552Sps } else { 502169469Srwatson /* Move start of hole forward. */ 503146953Sps cur->start = sblkp->end; 504146552Sps cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); 505130989Sps } 506146552Sps } else { 507169469Srwatson /* Data acks at least the end of hole. */ 508146953Sps if (SEQ_GEQ(sblkp->end, cur->end)) { 509169469Srwatson /* Move end of hole backward. */ 510146953Sps cur->end = sblkp->start; 511130989Sps cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); 512146123Sps } else { 513130989Sps /* 514169469Srwatson * ACKs some data in middle of a hole; need 515169469Srwatson * to split current hole 516130989Sps */ 517146953Sps temp = tcp_sackhole_insert(tp, sblkp->end, 518169469Srwatson cur->end, cur); 519146123Sps if (temp != NULL) { 520146953Sps if (SEQ_GT(cur->rxmit, temp->rxmit)) { 521146304Sps temp->rxmit = cur->rxmit; 522146953Sps tp->sackhint.sack_bytes_rexmit 523169469Srwatson += (temp->rxmit 524169469Srwatson - temp->start); 525146953Sps } 526146953Sps cur->end = sblkp->start; 527146953Sps cur->rxmit = SEQ_MIN(cur->rxmit, 528169469Srwatson cur->end); 529146123Sps } 530130989Sps } 531146552Sps } 532146552Sps tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start); 533147061Sps /* 534147061Sps * Testing sblkp->start against cur->start tells us whether 535147061Sps * we're done with the sack block or the sack hole. 536147061Sps * Accordingly, we advance one or the other. 537147061Sps */ 538147061Sps if (SEQ_LEQ(sblkp->start, cur->start)) 539147061Sps cur = TAILQ_PREV(cur, sackhole_head, scblink); 540147061Sps else 541147061Sps sblkp--; 542146552Sps } 543130989Sps} 544130989Sps 545130989Sps/* 546147637Sps * Free all SACK holes to clear the scoreboard. 547130989Sps */ 548130989Spsvoid 549130989Spstcp_free_sackholes(struct tcpcb *tp) 550130989Sps{ 551145370Sps struct sackhole *q; 552130989Sps 553178285Srwatson INP_WLOCK_ASSERT(tp->t_inpcb); 554146953Sps while ((q = TAILQ_FIRST(&tp->snd_holes)) != NULL) 555146953Sps tcp_sackhole_remove(tp, q); 556146123Sps tp->sackhint.sack_bytes_rexmit = 0; 557146304Sps 558146304Sps KASSERT(tp->snd_numholes == 0, ("tp->snd_numholes == 0")); 559146953Sps KASSERT(tp->sackhint.nexthole == NULL, 560146953Sps ("tp->sackhint.nexthole == NULL")); 561130989Sps} 562130989Sps 563130989Sps/* 564169469Srwatson * Partial ack handling within a sack recovery episode. Keeping this very 565169469Srwatson * simple for now. When a partial ack is received, force snd_cwnd to a value 566169469Srwatson * that will allow the sender to transmit no more than 2 segments. If 567169469Srwatson * necessary, a better scheme can be adopted at a later point, but for now, 568169469Srwatson * the goal is to prevent the sender from bursting a large amount of data in 569169469Srwatson * the midst of sack recovery. 570130989Sps */ 571130989Spsvoid 572167785Sandretcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th) 573130989Sps{ 574141928Sps int num_segs = 1; 575130989Sps 576178285Srwatson INP_WLOCK_ASSERT(tp->t_inpcb); 577168615Sandre tcp_timer_activate(tp, TT_REXMT, 0); 578130989Sps tp->t_rtttime = 0; 579169469Srwatson /* Send one or 2 segments based on how much new data was acked. */ 580220105Sweongyo if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) >= 2) 581141928Sps num_segs = 2; 582146123Sps tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + 583169469Srwatson (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg); 584145087Sps if (tp->snd_cwnd > tp->snd_ssthresh) 585145087Sps tp->snd_cwnd = tp->snd_ssthresh; 586130989Sps tp->t_flags |= TF_ACKNOW; 587130989Sps (void) tcp_output(tp); 588130989Sps} 589130989Sps 590157569Smohans#if 0 591130989Sps/* 592169469Srwatson * Debug version of tcp_sack_output() that walks the scoreboard. Used for 593146123Sps * now to sanity check the hint. 594130989Sps */ 595146123Spsstatic struct sackhole * 596146123Spstcp_sack_output_debug(struct tcpcb *tp, int *sack_bytes_rexmt) 597130989Sps{ 598145370Sps struct sackhole *p; 599130989Sps 600178285Srwatson INP_WLOCK_ASSERT(tp->t_inpcb); 601136151Sps *sack_bytes_rexmt = 0; 602145370Sps TAILQ_FOREACH(p, &tp->snd_holes, scblink) { 603130989Sps if (SEQ_LT(p->rxmit, p->end)) { 604130989Sps if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */ 605130989Sps continue; 606130989Sps } 607136151Sps *sack_bytes_rexmt += (p->rxmit - p->start); 608136151Sps break; 609130989Sps } 610136151Sps *sack_bytes_rexmt += (p->rxmit - p->start); 611130989Sps } 612136151Sps return (p); 613130989Sps} 614157569Smohans#endif 615130989Sps 616130989Sps/* 617146123Sps * Returns the next hole to retransmit and the number of retransmitted bytes 618169469Srwatson * from the scoreboard. We store both the next hole and the number of 619146123Sps * retransmitted bytes as hints (and recompute these on the fly upon SACK/ACK 620169469Srwatson * reception). This avoids scoreboard traversals completely. 621146123Sps * 622169469Srwatson * The loop here will traverse *at most* one link. Here's the argument. For 623169469Srwatson * the loop to traverse more than 1 link before finding the next hole to 624169469Srwatson * retransmit, we would need to have at least 1 node following the current 625169469Srwatson * hint with (rxmit == end). But, for all holes following the current hint, 626169469Srwatson * (start == rxmit), since we have not yet retransmitted from them. 627169469Srwatson * Therefore, in order to traverse more 1 link in the loop below, we need to 628169469Srwatson * have at least one node following the current hint with (start == rxmit == 629169469Srwatson * end). But that can't happen, (start == end) means that all the data in 630169469Srwatson * that hole has been sacked, in which case, the hole would have been removed 631169469Srwatson * from the scoreboard. 632146123Sps */ 633146123Spsstruct sackhole * 634146123Spstcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt) 635146123Sps{ 636157569Smohans struct sackhole *hole = NULL; 637146123Sps 638178285Srwatson INP_WLOCK_ASSERT(tp->t_inpcb); 639146123Sps *sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit; 640146123Sps hole = tp->sackhint.nexthole; 641146123Sps if (hole == NULL || SEQ_LT(hole->rxmit, hole->end)) 642146123Sps goto out; 643146123Sps while ((hole = TAILQ_NEXT(hole, scblink)) != NULL) { 644146123Sps if (SEQ_LT(hole->rxmit, hole->end)) { 645146123Sps tp->sackhint.nexthole = hole; 646146123Sps break; 647146123Sps } 648146123Sps } 649146123Spsout: 650146123Sps return (hole); 651146123Sps} 652146123Sps 653146123Sps/* 654130989Sps * After a timeout, the SACK list may be rebuilt. This SACK information 655130989Sps * should be used to avoid retransmitting SACKed data. This function 656130989Sps * traverses the SACK list to see if snd_nxt should be moved forward. 657130989Sps */ 658130989Spsvoid 659130989Spstcp_sack_adjust(struct tcpcb *tp) 660130989Sps{ 661145370Sps struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes); 662145244Sps 663178285Srwatson INP_WLOCK_ASSERT(tp->t_inpcb); 664130989Sps if (cur == NULL) 665130989Sps return; /* No holes */ 666146630Sps if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack)) 667130989Sps return; /* We're already beyond any SACKed blocks */ 668169469Srwatson /*- 669130989Sps * Two cases for which we want to advance snd_nxt: 670130989Sps * i) snd_nxt lies between end of one hole and beginning of another 671146630Sps * ii) snd_nxt lies between end of last hole and snd_fack 672130989Sps */ 673145370Sps while ((p = TAILQ_NEXT(cur, scblink)) != NULL) { 674130989Sps if (SEQ_LT(tp->snd_nxt, cur->end)) 675130989Sps return; 676145370Sps if (SEQ_GEQ(tp->snd_nxt, p->start)) 677145370Sps cur = p; 678130989Sps else { 679145370Sps tp->snd_nxt = p->start; 680130989Sps return; 681130989Sps } 682130989Sps } 683130989Sps if (SEQ_LT(tp->snd_nxt, cur->end)) 684130989Sps return; 685146630Sps tp->snd_nxt = tp->snd_fack; 686130989Sps} 687