1/*- 2 * Copyright (c) 1984, 1985, 1986, 1987, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2004-2009 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * Copyright (c) 1995, Mike Mitchell 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the University of 45 * California, Berkeley and its contributors. 46 * 4. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)spx_usrreq.h 63 */ 64 65#include <sys/cdefs.h> 66__FBSDID("$FreeBSD$"); 67 68#include <sys/param.h> 69#include <sys/lock.h> 70#include <sys/kernel.h> 71#include <sys/malloc.h> 72#include <sys/mbuf.h> 73#include <sys/mutex.h> 74#include <sys/proc.h> 75#include <sys/protosw.h> 76#include <sys/signalvar.h> 77#include <sys/socket.h> 78#include <sys/socketvar.h> 79#include <sys/sx.h> 80#include <sys/systm.h> 81 82#include <net/route.h> 83#include <netinet/tcp_fsm.h> 84 85#include <netipx/ipx.h> 86#include <netipx/ipx_pcb.h> 87#include <netipx/ipx_var.h> 88#include <netipx/spx.h> 89#include <netipx/spx_debug.h> 90#include <netipx/spx_timer.h> 91#include <netipx/spx_var.h> 92 93static int spx_use_delack = 0; 94static int spxrexmtthresh = 3; 95 96static MALLOC_DEFINE(M_SPXREASSQ, "spxreassq", "SPX reassembly queue entry"); 97 98/* 99 * Flesh pending queued segments on SPX close. 100 */ 101void 102spx_reass_flush(struct spxpcb *cb) 103{ 104 struct spx_q *q; 105 106 while ((q = LIST_FIRST(&cb->s_q)) != NULL) { 107 LIST_REMOVE(q, sq_entry); 108 m_freem(q->sq_msi); 109 free(q, M_SPXREASSQ); 110 } 111} 112 113/* 114 * Initialize SPX segment reassembly queue on SPX socket open. 115 */ 116void 117spx_reass_init(struct spxpcb *cb) 118{ 119 120 LIST_INIT(&cb->s_q); 121} 122 123/* 124 * This is structurally similar to the tcp reassembly routine but its 125 * function is somewhat different: it merely queues packets up, and 126 * suppresses duplicates. 127 */ 128int 129spx_reass(struct spxpcb *cb, struct mbuf *msi, struct spx *si) 130{ 131 struct spx_q *q, *q_new, *q_temp; 132 struct mbuf *m; 133 struct socket *so = cb->s_ipxpcb->ipxp_socket; 134 char packetp = cb->s_flags & SF_HI; 135 int incr; 136 char wakeup = 0; 137 138 IPX_LOCK_ASSERT(cb->s_ipxpcb); 139 140 if (si == SI(0)) 141 goto present; 142 143 /* 144 * Update our news from them. 145 */ 146 if (si->si_cc & SPX_SA) 147 cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW); 148 if (SSEQ_GT(si->si_alo, cb->s_ralo)) 149 cb->s_flags |= SF_WIN; 150 if (SSEQ_LEQ(si->si_ack, cb->s_rack)) { 151 if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) { 152 spxstat.spxs_rcvdupack++; 153 154 /* 155 * If this is a completely duplicate ack and other 156 * conditions hold, we assume a packet has been 157 * dropped and retransmit it exactly as in 158 * tcp_input(). 159 */ 160 if (si->si_ack != cb->s_rack || 161 si->si_alo != cb->s_ralo) 162 cb->s_dupacks = 0; 163 else if (++cb->s_dupacks == spxrexmtthresh) { 164 u_short onxt = cb->s_snxt; 165 int cwnd = cb->s_cwnd; 166 167 cb->s_snxt = si->si_ack; 168 cb->s_cwnd = CUNIT; 169 cb->s_force = 1 + SPXT_REXMT; 170 spx_output(cb, NULL); 171 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur; 172 cb->s_rtt = 0; 173 if (cwnd >= 4 * CUNIT) 174 cb->s_cwnd = cwnd / 2; 175 if (SSEQ_GT(onxt, cb->s_snxt)) 176 cb->s_snxt = onxt; 177 return (1); 178 } 179 } else 180 cb->s_dupacks = 0; 181 goto update_window; 182 } 183 cb->s_dupacks = 0; 184 185 /* 186 * If our correspondent acknowledges data we haven't sent TCP would 187 * drop the packet after acking. We'll be a little more permissive. 188 */ 189 if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) { 190 spxstat.spxs_rcvacktoomuch++; 191 si->si_ack = cb->s_smax + 1; 192 } 193 spxstat.spxs_rcvackpack++; 194 195 /* 196 * If transmit timer is running and timed sequence number was acked, 197 * update smoothed round trip time. See discussion of algorithm in 198 * tcp_input.c 199 */ 200 if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) { 201 spxstat.spxs_rttupdated++; 202 if (cb->s_srtt != 0) { 203 short delta; 204 delta = cb->s_rtt - (cb->s_srtt >> 3); 205 if ((cb->s_srtt += delta) <= 0) 206 cb->s_srtt = 1; 207 if (delta < 0) 208 delta = -delta; 209 delta -= (cb->s_rttvar >> 2); 210 if ((cb->s_rttvar += delta) <= 0) 211 cb->s_rttvar = 1; 212 } else { 213 /* 214 * No rtt measurement yet. 215 */ 216 cb->s_srtt = cb->s_rtt << 3; 217 cb->s_rttvar = cb->s_rtt << 1; 218 } 219 cb->s_rtt = 0; 220 cb->s_rxtshift = 0; 221 SPXT_RANGESET(cb->s_rxtcur, 222 ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1, 223 SPXTV_MIN, SPXTV_REXMTMAX); 224 } 225 226 /* 227 * If all outstanding data is acked, stop retransmit timer and 228 * remember to restart (more output or persist). If there is more 229 * data to be acked, restart retransmit timer, using current 230 * (possibly backed-off) value; 231 */ 232 if (si->si_ack == cb->s_smax + 1) { 233 cb->s_timer[SPXT_REXMT] = 0; 234 cb->s_flags |= SF_RXT; 235 } else if (cb->s_timer[SPXT_PERSIST] == 0) 236 cb->s_timer[SPXT_REXMT] = cb->s_rxtcur; 237 238 /* 239 * When new data is acked, open the congestion window. If the window 240 * gives us less than ssthresh packets in flight, open exponentially 241 * (maxseg at a time). Otherwise open linearly (maxseg^2 / cwnd at a 242 * time). 243 */ 244 incr = CUNIT; 245 if (cb->s_cwnd > cb->s_ssthresh) 246 incr = max(incr * incr / cb->s_cwnd, 1); 247 cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx); 248 249 /* 250 * Trim Acked data from output queue. 251 */ 252 SOCKBUF_LOCK(&so->so_snd); 253 while ((m = so->so_snd.sb_mb) != NULL) { 254 if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack)) 255 sbdroprecord_locked(&so->so_snd); 256 else 257 break; 258 } 259 sowwakeup_locked(so); 260 cb->s_rack = si->si_ack; 261update_window: 262 if (SSEQ_LT(cb->s_snxt, cb->s_rack)) 263 cb->s_snxt = cb->s_rack; 264 if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq && 265 (SSEQ_LT(cb->s_swl2, si->si_ack))) || 266 (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) { 267 /* keep track of pure window updates */ 268 if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack 269 && SSEQ_LT(cb->s_ralo, si->si_alo)) { 270 spxstat.spxs_rcvwinupd++; 271 spxstat.spxs_rcvdupack--; 272 } 273 cb->s_ralo = si->si_alo; 274 cb->s_swl1 = si->si_seq; 275 cb->s_swl2 = si->si_ack; 276 cb->s_swnd = (1 + si->si_alo - si->si_ack); 277 if (cb->s_swnd > cb->s_smxw) 278 cb->s_smxw = cb->s_swnd; 279 cb->s_flags |= SF_WIN; 280 } 281 282 /* 283 * If this packet number is higher than that which we have allocated 284 * refuse it, unless urgent. 285 */ 286 if (SSEQ_GT(si->si_seq, cb->s_alo)) { 287 if (si->si_cc & SPX_SP) { 288 spxstat.spxs_rcvwinprobe++; 289 return (1); 290 } else 291 spxstat.spxs_rcvpackafterwin++; 292 if (si->si_cc & SPX_OB) { 293 if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) 294 return (1); /* else queue this packet; */ 295 } else { 296#ifdef BROKEN 297 /* 298 * XXXRW: This is broken on at least one count: 299 * spx_close() will free the ipxp and related parts, 300 * which are then touched by spx_input() after the 301 * return from spx_reass(). 302 */ 303 /*struct socket *so = cb->s_ipxpcb->ipxp_socket; 304 if (so->so_state && SS_NOFDREF) { 305 spx_close(cb); 306 } else 307 would crash system*/ 308#endif 309 spx_istat.notyet++; 310 return (1); 311 } 312 } 313 314 /* 315 * If this is a system packet, we don't need to queue it up, and 316 * won't update acknowledge #. 317 */ 318 if (si->si_cc & SPX_SP) 319 return (1); 320 321 /* 322 * We have already seen this packet, so drop. 323 */ 324 if (SSEQ_LT(si->si_seq, cb->s_ack)) { 325 spx_istat.bdreas++; 326 spxstat.spxs_rcvduppack++; 327 if (si->si_seq == cb->s_ack - 1) 328 spx_istat.lstdup++; 329 return (1); 330 } 331 332 /* 333 * Loop through all packets queued up to insert in appropriate 334 * sequence. 335 */ 336 q_new = malloc(sizeof(*q_new), M_SPXREASSQ, M_NOWAIT | M_ZERO); 337 if (q_new == NULL) 338 return (1); 339 q_new->sq_si = si; 340 q_new->sq_msi = msi; 341 LIST_FOREACH(q, &cb->s_q, sq_entry) { 342 if (si->si_seq == q->sq_si->si_seq) { 343 free(q_new, M_SPXREASSQ); 344 spxstat.spxs_rcvduppack++; 345 return (1); 346 } 347 if (SSEQ_LT(si->si_seq, q->sq_si->si_seq)) { 348 spxstat.spxs_rcvoopack++; 349 break; 350 } 351 } 352 if (q != NULL) 353 LIST_INSERT_BEFORE(q, q_new, sq_entry); 354 else 355 LIST_INSERT_HEAD(&cb->s_q, q_new, sq_entry); 356 357 /* 358 * If this packet is urgent, inform process 359 */ 360 if (si->si_cc & SPX_OB) { 361 cb->s_iobc = ((char *)si)[1 + sizeof(*si)]; 362 sohasoutofband(so); 363 cb->s_oobflags |= SF_IOOB; 364 } 365present: 366#define SPINC sizeof(struct spxhdr) 367 SOCKBUF_LOCK(&so->so_rcv); 368 369 /* 370 * Loop through all packets queued up to update acknowledge number, 371 * and present all acknowledged data to user; if in packet interface 372 * mode, show packet headers. 373 */ 374 LIST_FOREACH_SAFE(q, &cb->s_q, sq_entry, q_temp) { 375 struct spx *qsi; 376 struct mbuf *mqsi; 377 378 qsi = q->sq_si; 379 mqsi = q->sq_msi; 380 if (qsi->si_seq == cb->s_ack) { 381 cb->s_ack++; 382 if (qsi->si_cc & SPX_OB) { 383 cb->s_oobflags &= ~SF_IOOB; 384 if (so->so_rcv.sb_cc) 385 so->so_oobmark = so->so_rcv.sb_cc; 386 else 387 so->so_rcv.sb_state |= SBS_RCVATMARK; 388 } 389 LIST_REMOVE(q, sq_entry); 390 free(q, M_SPXREASSQ); 391 wakeup = 1; 392 spxstat.spxs_rcvpack++; 393#ifdef SF_NEWCALL 394 if (cb->s_flags2 & SF_NEWCALL) { 395 struct spxhdr *sp = 396 mtod(mqsi, struct spxhdr *); 397 u_char dt = sp->spx_dt; 398 399 spx_newchecks[4]++; 400 if (dt != cb->s_rhdr.spx_dt) { 401 struct mbuf *mm = 402 m_getclr(M_NOWAIT, MT_CONTROL); 403 spx_newchecks[0]++; 404 if (mm != NULL) { 405 u_short *s = 406 mtod(mm, u_short *); 407 cb->s_rhdr.spx_dt = dt; 408 mm->m_len = 5; /*XXX*/ 409 s[0] = 5; 410 s[1] = 1; 411 *(u_char *)(&s[2]) = dt; 412 sbappend_locked(&so->so_rcv, mm); 413 } 414 } 415 if (sp->spx_cc & SPX_OB) { 416 MCHTYPE(mqsi, MT_OOBDATA); 417 spx_newchecks[1]++; 418 so->so_oobmark = 0; 419 so->so_rcv.sb_state &= ~SBS_RCVATMARK; 420 } 421 if (packetp == 0) { 422 mqsi->m_data += SPINC; 423 mqsi->m_len -= SPINC; 424 mqsi->m_pkthdr.len -= SPINC; 425 } 426 if ((sp->spx_cc & SPX_EM) || packetp) { 427 sbappendrecord_locked(&so->so_rcv, 428 mqsi); 429 spx_newchecks[9]++; 430 } else 431 sbappend_locked(&so->so_rcv, mqsi); 432 } else 433#endif 434 if (packetp) 435 sbappendrecord_locked(&so->so_rcv, mqsi); 436 else { 437 cb->s_rhdr = *mtod(mqsi, struct spxhdr *); 438 mqsi->m_data += SPINC; 439 mqsi->m_len -= SPINC; 440 mqsi->m_pkthdr.len -= SPINC; 441 sbappend_locked(&so->so_rcv, mqsi); 442 } 443 } else 444 break; 445 } 446 if (wakeup) 447 sorwakeup_locked(so); 448 else 449 SOCKBUF_UNLOCK(&so->so_rcv); 450 return (0); 451} 452