1/*-
2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 2004-2009 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * Copyright (c) 1995, Mike Mitchell
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the University of
45 *	California, Berkeley and its contributors.
46 * 4. Neither the name of the University nor the names of its contributors
47 *    may be used to endorse or promote products derived from this software
48 *    without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 *
62 *	@(#)spx_usrreq.h
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD$");
67
68#include <sys/param.h>
69#include <sys/lock.h>
70#include <sys/kernel.h>
71#include <sys/malloc.h>
72#include <sys/mbuf.h>
73#include <sys/mutex.h>
74#include <sys/proc.h>
75#include <sys/protosw.h>
76#include <sys/signalvar.h>
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/sx.h>
80#include <sys/systm.h>
81
82#include <net/route.h>
83#include <netinet/tcp_fsm.h>
84
85#include <netipx/ipx.h>
86#include <netipx/ipx_pcb.h>
87#include <netipx/ipx_var.h>
88#include <netipx/spx.h>
89#include <netipx/spx_debug.h>
90#include <netipx/spx_timer.h>
91#include <netipx/spx_var.h>
92
93static int	spx_use_delack = 0;
94static int	spxrexmtthresh = 3;
95
96static MALLOC_DEFINE(M_SPXREASSQ, "spxreassq", "SPX reassembly queue entry");
97
98/*
99 * Flesh pending queued segments on SPX close.
100 */
101void
102spx_reass_flush(struct spxpcb *cb)
103{
104	struct spx_q *q;
105
106	while ((q = LIST_FIRST(&cb->s_q)) != NULL) {
107		LIST_REMOVE(q, sq_entry);
108		m_freem(q->sq_msi);
109		free(q, M_SPXREASSQ);
110	}
111}
112
113/*
114 * Initialize SPX segment reassembly queue on SPX socket open.
115 */
116void
117spx_reass_init(struct spxpcb *cb)
118{
119
120	LIST_INIT(&cb->s_q);
121}
122
123/*
124 * This is structurally similar to the tcp reassembly routine but its
125 * function is somewhat different: it merely queues packets up, and
126 * suppresses duplicates.
127 */
128int
129spx_reass(struct spxpcb *cb, struct mbuf *msi, struct spx *si)
130{
131	struct spx_q *q, *q_new, *q_temp;
132	struct mbuf *m;
133	struct socket *so = cb->s_ipxpcb->ipxp_socket;
134	char packetp = cb->s_flags & SF_HI;
135	int incr;
136	char wakeup = 0;
137
138	IPX_LOCK_ASSERT(cb->s_ipxpcb);
139
140	if (si == SI(0))
141		goto present;
142
143	/*
144	 * Update our news from them.
145	 */
146	if (si->si_cc & SPX_SA)
147		cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
148	if (SSEQ_GT(si->si_alo, cb->s_ralo))
149		cb->s_flags |= SF_WIN;
150	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
151		if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
152			spxstat.spxs_rcvdupack++;
153
154			/*
155			 * If this is a completely duplicate ack and other
156			 * conditions hold, we assume a packet has been
157			 * dropped and retransmit it exactly as in
158			 * tcp_input().
159			 */
160			if (si->si_ack != cb->s_rack ||
161			    si->si_alo != cb->s_ralo)
162				cb->s_dupacks = 0;
163			else if (++cb->s_dupacks == spxrexmtthresh) {
164				u_short onxt = cb->s_snxt;
165				int cwnd = cb->s_cwnd;
166
167				cb->s_snxt = si->si_ack;
168				cb->s_cwnd = CUNIT;
169				cb->s_force = 1 + SPXT_REXMT;
170				spx_output(cb, NULL);
171				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
172				cb->s_rtt = 0;
173				if (cwnd >= 4 * CUNIT)
174					cb->s_cwnd = cwnd / 2;
175				if (SSEQ_GT(onxt, cb->s_snxt))
176					cb->s_snxt = onxt;
177				return (1);
178			}
179		} else
180			cb->s_dupacks = 0;
181		goto update_window;
182	}
183	cb->s_dupacks = 0;
184
185	/*
186	 * If our correspondent acknowledges data we haven't sent TCP would
187	 * drop the packet after acking.  We'll be a little more permissive.
188	 */
189	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
190		spxstat.spxs_rcvacktoomuch++;
191		si->si_ack = cb->s_smax + 1;
192	}
193	spxstat.spxs_rcvackpack++;
194
195	/*
196	 * If transmit timer is running and timed sequence number was acked,
197	 * update smoothed round trip time.  See discussion of algorithm in
198	 * tcp_input.c
199	 */
200	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
201		spxstat.spxs_rttupdated++;
202		if (cb->s_srtt != 0) {
203			short delta;
204			delta = cb->s_rtt - (cb->s_srtt >> 3);
205			if ((cb->s_srtt += delta) <= 0)
206				cb->s_srtt = 1;
207			if (delta < 0)
208				delta = -delta;
209			delta -= (cb->s_rttvar >> 2);
210			if ((cb->s_rttvar += delta) <= 0)
211				cb->s_rttvar = 1;
212		} else {
213			/*
214			 * No rtt measurement yet.
215			 */
216			cb->s_srtt = cb->s_rtt << 3;
217			cb->s_rttvar = cb->s_rtt << 1;
218		}
219		cb->s_rtt = 0;
220		cb->s_rxtshift = 0;
221		SPXT_RANGESET(cb->s_rxtcur,
222			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
223			SPXTV_MIN, SPXTV_REXMTMAX);
224	}
225
226	/*
227	 * If all outstanding data is acked, stop retransmit timer and
228	 * remember to restart (more output or persist).  If there is more
229	 * data to be acked, restart retransmit timer, using current
230	 * (possibly backed-off) value;
231	 */
232	if (si->si_ack == cb->s_smax + 1) {
233		cb->s_timer[SPXT_REXMT] = 0;
234		cb->s_flags |= SF_RXT;
235	} else if (cb->s_timer[SPXT_PERSIST] == 0)
236		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
237
238	/*
239	 * When new data is acked, open the congestion window.  If the window
240	 * gives us less than ssthresh packets in flight, open exponentially
241	 * (maxseg at a time).  Otherwise open linearly (maxseg^2 / cwnd at a
242	 * time).
243	 */
244	incr = CUNIT;
245	if (cb->s_cwnd > cb->s_ssthresh)
246		incr = max(incr * incr / cb->s_cwnd, 1);
247	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
248
249	/*
250	 * Trim Acked data from output queue.
251	 */
252	SOCKBUF_LOCK(&so->so_snd);
253	while ((m = so->so_snd.sb_mb) != NULL) {
254		if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
255			sbdroprecord_locked(&so->so_snd);
256		else
257			break;
258	}
259	sowwakeup_locked(so);
260	cb->s_rack = si->si_ack;
261update_window:
262	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
263		cb->s_snxt = cb->s_rack;
264	if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
265	    (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
266	     (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
267		/* keep track of pure window updates */
268		if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
269		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
270			spxstat.spxs_rcvwinupd++;
271			spxstat.spxs_rcvdupack--;
272		}
273		cb->s_ralo = si->si_alo;
274		cb->s_swl1 = si->si_seq;
275		cb->s_swl2 = si->si_ack;
276		cb->s_swnd = (1 + si->si_alo - si->si_ack);
277		if (cb->s_swnd > cb->s_smxw)
278			cb->s_smxw = cb->s_swnd;
279		cb->s_flags |= SF_WIN;
280	}
281
282	/*
283	 * If this packet number is higher than that which we have allocated
284	 * refuse it, unless urgent.
285	 */
286	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
287		if (si->si_cc & SPX_SP) {
288			spxstat.spxs_rcvwinprobe++;
289			return (1);
290		} else
291			spxstat.spxs_rcvpackafterwin++;
292		if (si->si_cc & SPX_OB) {
293			if (SSEQ_GT(si->si_seq, cb->s_alo + 60))
294				return (1); /* else queue this packet; */
295		} else {
296#ifdef BROKEN
297			/*
298			 * XXXRW: This is broken on at least one count:
299			 * spx_close() will free the ipxp and related parts,
300			 * which are then touched by spx_input() after the
301			 * return from spx_reass().
302			 */
303			/*struct socket *so = cb->s_ipxpcb->ipxp_socket;
304			if (so->so_state && SS_NOFDREF) {
305				spx_close(cb);
306			} else
307				       would crash system*/
308#endif
309			spx_istat.notyet++;
310			return (1);
311		}
312	}
313
314	/*
315	 * If this is a system packet, we don't need to queue it up, and
316	 * won't update acknowledge #.
317	 */
318	if (si->si_cc & SPX_SP)
319		return (1);
320
321	/*
322	 * We have already seen this packet, so drop.
323	 */
324	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
325		spx_istat.bdreas++;
326		spxstat.spxs_rcvduppack++;
327		if (si->si_seq == cb->s_ack - 1)
328			spx_istat.lstdup++;
329		return (1);
330	}
331
332	/*
333	 * Loop through all packets queued up to insert in appropriate
334	 * sequence.
335	 */
336	q_new = malloc(sizeof(*q_new), M_SPXREASSQ, M_NOWAIT | M_ZERO);
337	if (q_new == NULL)
338		return (1);
339	q_new->sq_si = si;
340	q_new->sq_msi = msi;
341	LIST_FOREACH(q, &cb->s_q, sq_entry) {
342		if (si->si_seq == q->sq_si->si_seq) {
343			free(q_new, M_SPXREASSQ);
344			spxstat.spxs_rcvduppack++;
345			return (1);
346		}
347		if (SSEQ_LT(si->si_seq, q->sq_si->si_seq)) {
348			spxstat.spxs_rcvoopack++;
349			break;
350		}
351	}
352	if (q != NULL)
353		LIST_INSERT_BEFORE(q, q_new, sq_entry);
354	else
355		LIST_INSERT_HEAD(&cb->s_q, q_new, sq_entry);
356
357	/*
358	 * If this packet is urgent, inform process
359	 */
360	if (si->si_cc & SPX_OB) {
361		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
362		sohasoutofband(so);
363		cb->s_oobflags |= SF_IOOB;
364	}
365present:
366#define SPINC sizeof(struct spxhdr)
367	SOCKBUF_LOCK(&so->so_rcv);
368
369	/*
370	 * Loop through all packets queued up to update acknowledge number,
371	 * and present all acknowledged data to user; if in packet interface
372	 * mode, show packet headers.
373	 */
374	LIST_FOREACH_SAFE(q, &cb->s_q, sq_entry, q_temp) {
375		struct spx *qsi;
376		struct mbuf *mqsi;
377
378		qsi = q->sq_si;
379		mqsi = q->sq_msi;
380		if (qsi->si_seq == cb->s_ack) {
381			cb->s_ack++;
382			if (qsi->si_cc & SPX_OB) {
383				cb->s_oobflags &= ~SF_IOOB;
384				if (so->so_rcv.sb_cc)
385					so->so_oobmark = so->so_rcv.sb_cc;
386				else
387					so->so_rcv.sb_state |= SBS_RCVATMARK;
388			}
389			LIST_REMOVE(q, sq_entry);
390			free(q, M_SPXREASSQ);
391			wakeup = 1;
392			spxstat.spxs_rcvpack++;
393#ifdef SF_NEWCALL
394			if (cb->s_flags2 & SF_NEWCALL) {
395				struct spxhdr *sp =
396				    mtod(mqsi, struct spxhdr *);
397				u_char dt = sp->spx_dt;
398
399				spx_newchecks[4]++;
400				if (dt != cb->s_rhdr.spx_dt) {
401					struct mbuf *mm =
402					   m_getclr(M_NOWAIT, MT_CONTROL);
403					spx_newchecks[0]++;
404					if (mm != NULL) {
405						u_short *s =
406							mtod(mm, u_short *);
407						cb->s_rhdr.spx_dt = dt;
408						mm->m_len = 5; /*XXX*/
409						s[0] = 5;
410						s[1] = 1;
411						*(u_char *)(&s[2]) = dt;
412						sbappend_locked(&so->so_rcv, mm);
413					}
414				}
415				if (sp->spx_cc & SPX_OB) {
416					MCHTYPE(mqsi, MT_OOBDATA);
417					spx_newchecks[1]++;
418					so->so_oobmark = 0;
419					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
420				}
421				if (packetp == 0) {
422					mqsi->m_data += SPINC;
423					mqsi->m_len -= SPINC;
424					mqsi->m_pkthdr.len -= SPINC;
425				}
426				if ((sp->spx_cc & SPX_EM) || packetp) {
427					sbappendrecord_locked(&so->so_rcv,
428					    mqsi);
429					spx_newchecks[9]++;
430				} else
431					sbappend_locked(&so->so_rcv, mqsi);
432			} else
433#endif
434			if (packetp)
435				sbappendrecord_locked(&so->so_rcv, mqsi);
436			else {
437				cb->s_rhdr = *mtod(mqsi, struct spxhdr *);
438				mqsi->m_data += SPINC;
439				mqsi->m_len -= SPINC;
440				mqsi->m_pkthdr.len -= SPINC;
441				sbappend_locked(&so->so_rcv, mqsi);
442			}
443		  } else
444			break;
445	}
446	if (wakeup)
447		sorwakeup_locked(so);
448	else
449		SOCKBUF_UNLOCK(&so->so_rcv);
450	return (0);
451}
452