spx_reass.c revision 157068
1/*-
2 * Copyright (c) 1984, 1985, 1986, 1987, 1993
3 *	The Regents of the University of California.
4 * Copyright (c) 1995, Mike Mitchell
5 * Copyright (c) 2004-2006 Robert N. M. Watson
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 *	@(#)spx_usrreq.h
37 */
38
39#include <sys/cdefs.h>
40__FBSDID("$FreeBSD: head/sys/netipx/spx_usrreq.c 157068 2006-03-24 00:22:25Z rwatson $");
41
42#include <sys/param.h>
43#include <sys/lock.h>
44#include <sys/malloc.h>
45#include <sys/mbuf.h>
46#include <sys/mutex.h>
47#include <sys/proc.h>
48#include <sys/protosw.h>
49#include <sys/signalvar.h>
50#include <sys/socket.h>
51#include <sys/socketvar.h>
52#include <sys/sx.h>
53#include <sys/systm.h>
54
55#include <net/route.h>
56#include <netinet/tcp_fsm.h>
57
58#include <netipx/ipx.h>
59#include <netipx/ipx_pcb.h>
60#include <netipx/ipx_var.h>
61#include <netipx/spx.h>
62#include <netipx/spx_debug.h>
63#include <netipx/spx_timer.h>
64#include <netipx/spx_var.h>
65
66/*
67 * SPX protocol implementation.
68 */
69static u_short 	spx_iss;
70static u_short	spx_newchecks[50];
71static int	spx_hardnosed;
72static int	spx_use_delack = 0;
73static int	traceallspxs = 0;
74static struct	spx_istat spx_istat;
75static int	spxrexmtthresh = 3;
76
77/* Following was struct spxstat spxstat; */
78#ifndef spxstat
79#define spxstat spx_istat.newstats
80#endif
81
82static const int spx_backoff[SPX_MAXRXTSHIFT+1] =
83    { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
84
85static	void spx_close(struct spxpcb *cb);
86static	void spx_disconnect(struct spxpcb *cb);
87static	void spx_drop(struct spxpcb *cb, int errno);
88static	int spx_output(struct spxpcb *cb, struct mbuf *m0);
89static	int spx_reass(struct spxpcb *cb, struct spx *si);
90static	void spx_setpersist(struct spxpcb *cb);
91static	void spx_template(struct spxpcb *cb);
92static	struct spxpcb *spx_timers(struct spxpcb *cb, int timer);
93static	void spx_usrclosed(struct spxpcb *cb);
94
95static	int spx_usr_abort(struct socket *so);
96static	int spx_accept(struct socket *so, struct sockaddr **nam);
97static	int spx_attach(struct socket *so, int proto, struct thread *td);
98static	int spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td);
99static	int spx_connect(struct socket *so, struct sockaddr *nam,
100			struct thread *td);
101static	int spx_detach(struct socket *so);
102static	int spx_usr_disconnect(struct socket *so);
103static	int spx_listen(struct socket *so, int backlog, struct thread *td);
104static	int spx_rcvd(struct socket *so, int flags);
105static	int spx_rcvoob(struct socket *so, struct mbuf *m, int flags);
106static	int spx_send(struct socket *so, int flags, struct mbuf *m,
107		     struct sockaddr *addr, struct mbuf *control,
108		     struct thread *td);
109static	int spx_shutdown(struct socket *so);
110static	int spx_sp_attach(struct socket *so, int proto, struct thread *td);
111
112struct	pr_usrreqs spx_usrreqs = {
113	.pru_abort =		spx_usr_abort,
114	.pru_accept =		spx_accept,
115	.pru_attach =		spx_attach,
116	.pru_bind =		spx_bind,
117	.pru_connect =		spx_connect,
118	.pru_control =		ipx_control,
119	.pru_detach =		spx_detach,
120	.pru_disconnect =	spx_usr_disconnect,
121	.pru_listen =		spx_listen,
122	.pru_peeraddr =		ipx_peeraddr,
123	.pru_rcvd =		spx_rcvd,
124	.pru_rcvoob =		spx_rcvoob,
125	.pru_send =		spx_send,
126	.pru_shutdown =		spx_shutdown,
127	.pru_sockaddr =		ipx_sockaddr,
128};
129
130struct	pr_usrreqs spx_usrreq_sps = {
131	.pru_abort =		spx_usr_abort,
132	.pru_accept =		spx_accept,
133	.pru_attach =		spx_sp_attach,
134	.pru_bind =		spx_bind,
135	.pru_connect =		spx_connect,
136	.pru_control =		ipx_control,
137	.pru_detach =		spx_detach,
138	.pru_disconnect =	spx_usr_disconnect,
139	.pru_listen =		spx_listen,
140	.pru_peeraddr =		ipx_peeraddr,
141	.pru_rcvd =		spx_rcvd,
142	.pru_rcvoob =		spx_rcvoob,
143	.pru_send =		spx_send,
144	.pru_shutdown =		spx_shutdown,
145	.pru_sockaddr =		ipx_sockaddr,
146};
147
148void
149spx_init(void)
150{
151
152	spx_iss = 1; /* WRONG !! should fish it out of TODR */
153}
154
155void
156spx_input(struct mbuf *m, struct ipxpcb *ipxp)
157{
158	struct spxpcb *cb;
159	struct spx *si = mtod(m, struct spx *);
160	struct socket *so;
161	struct spx spx_savesi;
162	int dropsocket = 0;
163	short ostate = 0;
164
165	spxstat.spxs_rcvtotal++;
166	KASSERT(ipxp != NULL, ("spx_input: NULL ipxpcb"));
167
168	/*
169	 * spx_input() assumes that the caller will hold both the pcb list
170	 * lock and also the ipxp lock.  spx_input() will release both before
171	 * returning, and may in fact trade in the ipxp lock for another pcb
172	 * lock following sonewconn().
173	 */
174	IPX_LIST_LOCK_ASSERT();
175	IPX_LOCK_ASSERT(ipxp);
176
177	cb = ipxtospxpcb(ipxp);
178	if (cb == NULL)
179		goto bad;
180
181	if (m->m_len < sizeof(*si)) {
182		if ((m = m_pullup(m, sizeof(*si))) == NULL) {
183			IPX_UNLOCK(ipxp);
184			IPX_LIST_UNLOCK();
185			spxstat.spxs_rcvshort++;
186			return;
187		}
188		si = mtod(m, struct spx *);
189	}
190	si->si_seq = ntohs(si->si_seq);
191	si->si_ack = ntohs(si->si_ack);
192	si->si_alo = ntohs(si->si_alo);
193
194	so = ipxp->ipxp_socket;
195
196	if (so->so_options & SO_DEBUG || traceallspxs) {
197		ostate = cb->s_state;
198		spx_savesi = *si;
199	}
200	if (so->so_options & SO_ACCEPTCONN) {
201		struct spxpcb *ocb = cb;
202
203		so = sonewconn(so, 0);
204		if (so == NULL) {
205			goto drop;
206		}
207		/*
208		 * This is ugly, but ....
209		 *
210		 * Mark socket as temporary until we're
211		 * committed to keeping it.  The code at
212		 * ``drop'' and ``dropwithreset'' check the
213		 * flag dropsocket to see if the temporary
214		 * socket created here should be discarded.
215		 * We mark the socket as discardable until
216		 * we're committed to it below in TCPS_LISTEN.
217		 */
218		dropsocket++;
219		IPX_UNLOCK(ipxp);
220		ipxp = (struct ipxpcb *)so->so_pcb;
221		IPX_LOCK(ipxp);
222		ipxp->ipxp_laddr = si->si_dna;
223		cb = ipxtospxpcb(ipxp);
224		cb->s_mtu = ocb->s_mtu;		/* preserve sockopts */
225		cb->s_flags = ocb->s_flags;	/* preserve sockopts */
226		cb->s_flags2 = ocb->s_flags2;	/* preserve sockopts */
227		cb->s_state = TCPS_LISTEN;
228	}
229
230	/*
231	 * Packet received on connection.
232	 * reset idle time and keep-alive timer;
233	 */
234	cb->s_idle = 0;
235	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
236
237	switch (cb->s_state) {
238
239	case TCPS_LISTEN:{
240		struct sockaddr_ipx *sipx, ssipx;
241		struct ipx_addr laddr;
242
243		/*
244		 * If somebody here was carying on a conversation
245		 * and went away, and his pen pal thinks he can
246		 * still talk, we get the misdirected packet.
247		 */
248		if (spx_hardnosed && (si->si_did != 0 || si->si_seq != 0)) {
249			spx_istat.gonawy++;
250			goto dropwithreset;
251		}
252		sipx = &ssipx;
253		bzero(sipx, sizeof *sipx);
254		sipx->sipx_len = sizeof(*sipx);
255		sipx->sipx_family = AF_IPX;
256		sipx->sipx_addr = si->si_sna;
257		laddr = ipxp->ipxp_laddr;
258		if (ipx_nullhost(laddr))
259			ipxp->ipxp_laddr = si->si_dna;
260		if (ipx_pcbconnect(ipxp, (struct sockaddr *)sipx, &thread0)) {
261			ipxp->ipxp_laddr = laddr;
262			spx_istat.noconn++;
263			goto drop;
264		}
265		spx_template(cb);
266		dropsocket = 0;		/* committed to socket */
267		cb->s_did = si->si_sid;
268		cb->s_rack = si->si_ack;
269		cb->s_ralo = si->si_alo;
270#define THREEWAYSHAKE
271#ifdef THREEWAYSHAKE
272		cb->s_state = TCPS_SYN_RECEIVED;
273		cb->s_force = 1 + SPXT_KEEP;
274		spxstat.spxs_accepts++;
275		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
276		}
277		break;
278	/*
279	 * This state means that we have heard a response
280	 * to our acceptance of their connection
281	 * It is probably logically unnecessary in this
282	 * implementation.
283	 */
284	 case TCPS_SYN_RECEIVED: {
285		if (si->si_did != cb->s_sid) {
286			spx_istat.wrncon++;
287			goto drop;
288		}
289#endif
290		ipxp->ipxp_fport =  si->si_sport;
291		cb->s_timer[SPXT_REXMT] = 0;
292		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
293		soisconnected(so);
294		cb->s_state = TCPS_ESTABLISHED;
295		spxstat.spxs_accepts++;
296		}
297		break;
298
299	/*
300	 * This state means that we have gotten a response
301	 * to our attempt to establish a connection.
302	 * We fill in the data from the other side,
303	 * telling us which port to respond to, instead of the well-
304	 * known one we might have sent to in the first place.
305	 * We also require that this is a response to our
306	 * connection id.
307	 */
308	case TCPS_SYN_SENT:
309		if (si->si_did != cb->s_sid) {
310			spx_istat.notme++;
311			goto drop;
312		}
313		spxstat.spxs_connects++;
314		cb->s_did = si->si_sid;
315		cb->s_rack = si->si_ack;
316		cb->s_ralo = si->si_alo;
317		cb->s_dport = ipxp->ipxp_fport =  si->si_sport;
318		cb->s_timer[SPXT_REXMT] = 0;
319		cb->s_flags |= SF_ACKNOW;
320		soisconnected(so);
321		cb->s_state = TCPS_ESTABLISHED;
322		/* Use roundtrip time of connection request for initial rtt */
323		if (cb->s_rtt) {
324			cb->s_srtt = cb->s_rtt << 3;
325			cb->s_rttvar = cb->s_rtt << 1;
326			SPXT_RANGESET(cb->s_rxtcur,
327			    ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
328			    SPXTV_MIN, SPXTV_REXMTMAX);
329			    cb->s_rtt = 0;
330		}
331	}
332	if (so->so_options & SO_DEBUG || traceallspxs)
333		spx_trace(SA_INPUT, (u_char)ostate, cb, &spx_savesi, 0);
334
335	m->m_len -= sizeof(struct ipx);
336	m->m_pkthdr.len -= sizeof(struct ipx);
337	m->m_data += sizeof(struct ipx);
338
339	if (spx_reass(cb, si)) {
340		m_freem(m);
341	}
342	if (cb->s_force || (cb->s_flags & (SF_ACKNOW|SF_WIN|SF_RXT)))
343		spx_output(cb, NULL);
344	cb->s_flags &= ~(SF_WIN|SF_RXT);
345	IPX_UNLOCK(ipxp);
346	IPX_LIST_UNLOCK();
347	return;
348
349dropwithreset:
350	IPX_UNLOCK(ipxp);
351	if (dropsocket) {
352		struct socket *head;
353		ACCEPT_LOCK();
354		KASSERT((so->so_qstate & SQ_INCOMP) != 0,
355		    ("spx_input: nascent socket not SQ_INCOMP on soabort()"));
356		head = so->so_head;
357		TAILQ_REMOVE(&head->so_incomp, so, so_list);
358		head->so_incqlen--;
359		so->so_qstate &= ~SQ_INCOMP;
360		so->so_head = NULL;
361		ACCEPT_UNLOCK();
362		soabort(so);
363		cb = NULL;
364	}
365	IPX_LIST_UNLOCK();
366	si->si_seq = ntohs(si->si_seq);
367	si->si_ack = ntohs(si->si_ack);
368	si->si_alo = ntohs(si->si_alo);
369	m_freem(dtom(si));
370	if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
371	    traceallspxs)
372		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
373	return;
374
375drop:
376bad:
377	if (cb == NULL || cb->s_ipxpcb->ipxp_socket->so_options & SO_DEBUG ||
378            traceallspxs)
379		spx_trace(SA_DROP, (u_char)ostate, cb, &spx_savesi, 0);
380	IPX_UNLOCK(ipxp);
381	IPX_LIST_UNLOCK();
382	m_freem(m);
383}
384
385/*
386 * This is structurally similar to the tcp reassembly routine
387 * but its function is somewhat different:  It merely queues
388 * packets up, and suppresses duplicates.
389 */
390static int
391spx_reass(struct spxpcb *cb, struct spx *si)
392{
393	struct spx_q *q;
394	struct mbuf *m;
395	struct socket *so = cb->s_ipxpcb->ipxp_socket;
396	char packetp = cb->s_flags & SF_HI;
397	int incr;
398	char wakeup = 0;
399
400	IPX_LOCK_ASSERT(cb->s_ipxpcb);
401
402	if (si == SI(0))
403		goto present;
404	/*
405	 * Update our news from them.
406	 */
407	if (si->si_cc & SPX_SA)
408		cb->s_flags |= (spx_use_delack ? SF_DELACK : SF_ACKNOW);
409	if (SSEQ_GT(si->si_alo, cb->s_ralo))
410		cb->s_flags |= SF_WIN;
411	if (SSEQ_LEQ(si->si_ack, cb->s_rack)) {
412		if ((si->si_cc & SPX_SP) && cb->s_rack != (cb->s_smax + 1)) {
413			spxstat.spxs_rcvdupack++;
414			/*
415			 * If this is a completely duplicate ack
416			 * and other conditions hold, we assume
417			 * a packet has been dropped and retransmit
418			 * it exactly as in tcp_input().
419			 */
420			if (si->si_ack != cb->s_rack ||
421			    si->si_alo != cb->s_ralo)
422				cb->s_dupacks = 0;
423			else if (++cb->s_dupacks == spxrexmtthresh) {
424				u_short onxt = cb->s_snxt;
425				int cwnd = cb->s_cwnd;
426
427				cb->s_snxt = si->si_ack;
428				cb->s_cwnd = CUNIT;
429				cb->s_force = 1 + SPXT_REXMT;
430				spx_output(cb, NULL);
431				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
432				cb->s_rtt = 0;
433				if (cwnd >= 4 * CUNIT)
434					cb->s_cwnd = cwnd / 2;
435				if (SSEQ_GT(onxt, cb->s_snxt))
436					cb->s_snxt = onxt;
437				return (1);
438			}
439		} else
440			cb->s_dupacks = 0;
441		goto update_window;
442	}
443	cb->s_dupacks = 0;
444	/*
445	 * If our correspondent acknowledges data we haven't sent
446	 * TCP would drop the packet after acking.  We'll be a little
447	 * more permissive
448	 */
449	if (SSEQ_GT(si->si_ack, (cb->s_smax + 1))) {
450		spxstat.spxs_rcvacktoomuch++;
451		si->si_ack = cb->s_smax + 1;
452	}
453	spxstat.spxs_rcvackpack++;
454	/*
455	 * If transmit timer is running and timed sequence
456	 * number was acked, update smoothed round trip time.
457	 * See discussion of algorithm in tcp_input.c
458	 */
459	if (cb->s_rtt && SSEQ_GT(si->si_ack, cb->s_rtseq)) {
460		spxstat.spxs_rttupdated++;
461		if (cb->s_srtt != 0) {
462			short delta;
463			delta = cb->s_rtt - (cb->s_srtt >> 3);
464			if ((cb->s_srtt += delta) <= 0)
465				cb->s_srtt = 1;
466			if (delta < 0)
467				delta = -delta;
468			delta -= (cb->s_rttvar >> 2);
469			if ((cb->s_rttvar += delta) <= 0)
470				cb->s_rttvar = 1;
471		} else {
472			/*
473			 * No rtt measurement yet
474			 */
475			cb->s_srtt = cb->s_rtt << 3;
476			cb->s_rttvar = cb->s_rtt << 1;
477		}
478		cb->s_rtt = 0;
479		cb->s_rxtshift = 0;
480		SPXT_RANGESET(cb->s_rxtcur,
481			((cb->s_srtt >> 2) + cb->s_rttvar) >> 1,
482			SPXTV_MIN, SPXTV_REXMTMAX);
483	}
484	/*
485	 * If all outstanding data is acked, stop retransmit
486	 * timer and remember to restart (more output or persist).
487	 * If there is more data to be acked, restart retransmit
488	 * timer, using current (possibly backed-off) value;
489	 */
490	if (si->si_ack == cb->s_smax + 1) {
491		cb->s_timer[SPXT_REXMT] = 0;
492		cb->s_flags |= SF_RXT;
493	} else if (cb->s_timer[SPXT_PERSIST] == 0)
494		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
495	/*
496	 * When new data is acked, open the congestion window.
497	 * If the window gives us less than ssthresh packets
498	 * in flight, open exponentially (maxseg at a time).
499	 * Otherwise open linearly (maxseg^2 / cwnd at a time).
500	 */
501	incr = CUNIT;
502	if (cb->s_cwnd > cb->s_ssthresh)
503		incr = max(incr * incr / cb->s_cwnd, 1);
504	cb->s_cwnd = min(cb->s_cwnd + incr, cb->s_cwmx);
505	/*
506	 * Trim Acked data from output queue.
507	 */
508	SOCKBUF_LOCK(&so->so_snd);
509	while ((m = so->so_snd.sb_mb) != NULL) {
510		if (SSEQ_LT((mtod(m, struct spx *))->si_seq, si->si_ack))
511			sbdroprecord_locked(&so->so_snd);
512		else
513			break;
514	}
515	sowwakeup_locked(so);
516	cb->s_rack = si->si_ack;
517update_window:
518	if (SSEQ_LT(cb->s_snxt, cb->s_rack))
519		cb->s_snxt = cb->s_rack;
520	if (SSEQ_LT(cb->s_swl1, si->si_seq) || ((cb->s_swl1 == si->si_seq &&
521	    (SSEQ_LT(cb->s_swl2, si->si_ack))) ||
522	     (cb->s_swl2 == si->si_ack && SSEQ_LT(cb->s_ralo, si->si_alo)))) {
523		/* keep track of pure window updates */
524		if ((si->si_cc & SPX_SP) && cb->s_swl2 == si->si_ack
525		    && SSEQ_LT(cb->s_ralo, si->si_alo)) {
526			spxstat.spxs_rcvwinupd++;
527			spxstat.spxs_rcvdupack--;
528		}
529		cb->s_ralo = si->si_alo;
530		cb->s_swl1 = si->si_seq;
531		cb->s_swl2 = si->si_ack;
532		cb->s_swnd = (1 + si->si_alo - si->si_ack);
533		if (cb->s_swnd > cb->s_smxw)
534			cb->s_smxw = cb->s_swnd;
535		cb->s_flags |= SF_WIN;
536	}
537	/*
538	 * If this packet number is higher than that which
539	 * we have allocated refuse it, unless urgent
540	 */
541	if (SSEQ_GT(si->si_seq, cb->s_alo)) {
542		if (si->si_cc & SPX_SP) {
543			spxstat.spxs_rcvwinprobe++;
544			return (1);
545		} else
546			spxstat.spxs_rcvpackafterwin++;
547		if (si->si_cc & SPX_OB) {
548			if (SSEQ_GT(si->si_seq, cb->s_alo + 60)) {
549				m_freem(dtom(si));
550				return (0);
551			} /* else queue this packet; */
552		} else {
553#ifdef BROKEN
554			/*
555			 * XXXRW: This is broken on at least one count:
556			 * spx_close() will free the ipxp and related parts,
557			 * which are then touched by spx_input() after the
558			 * return from spx_reass().
559			 */
560			/*struct socket *so = cb->s_ipxpcb->ipxp_socket;
561			if (so->so_state && SS_NOFDREF) {
562				spx_close(cb);
563			} else
564				       would crash system*/
565#endif
566			spx_istat.notyet++;
567			m_freem(dtom(si));
568			return (0);
569		}
570	}
571	/*
572	 * If this is a system packet, we don't need to
573	 * queue it up, and won't update acknowledge #
574	 */
575	if (si->si_cc & SPX_SP) {
576		return (1);
577	}
578	/*
579	 * We have already seen this packet, so drop.
580	 */
581	if (SSEQ_LT(si->si_seq, cb->s_ack)) {
582		spx_istat.bdreas++;
583		spxstat.spxs_rcvduppack++;
584		if (si->si_seq == cb->s_ack - 1)
585			spx_istat.lstdup++;
586		return (1);
587	}
588	/*
589	 * Loop through all packets queued up to insert in
590	 * appropriate sequence.
591	 */
592	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
593		if (si->si_seq == SI(q)->si_seq) {
594			spxstat.spxs_rcvduppack++;
595			return (1);
596		}
597		if (SSEQ_LT(si->si_seq, SI(q)->si_seq)) {
598			spxstat.spxs_rcvoopack++;
599			break;
600		}
601	}
602	insque(si, q->si_prev);
603	/*
604	 * If this packet is urgent, inform process
605	 */
606	if (si->si_cc & SPX_OB) {
607		cb->s_iobc = ((char *)si)[1 + sizeof(*si)];
608		sohasoutofband(so);
609		cb->s_oobflags |= SF_IOOB;
610	}
611present:
612#define SPINC sizeof(struct spxhdr)
613	SOCKBUF_LOCK(&so->so_rcv);
614	/*
615	 * Loop through all packets queued up to update acknowledge
616	 * number, and present all acknowledged data to user;
617	 * If in packet interface mode, show packet headers.
618	 */
619	for (q = cb->s_q.si_next; q != &cb->s_q; q = q->si_next) {
620		  if (SI(q)->si_seq == cb->s_ack) {
621			cb->s_ack++;
622			m = dtom(q);
623			if (SI(q)->si_cc & SPX_OB) {
624				cb->s_oobflags &= ~SF_IOOB;
625				if (so->so_rcv.sb_cc)
626					so->so_oobmark = so->so_rcv.sb_cc;
627				else
628					so->so_rcv.sb_state |= SBS_RCVATMARK;
629			}
630			q = q->si_prev;
631			remque(q->si_next);
632			wakeup = 1;
633			spxstat.spxs_rcvpack++;
634#ifdef SF_NEWCALL
635			if (cb->s_flags2 & SF_NEWCALL) {
636				struct spxhdr *sp = mtod(m, struct spxhdr *);
637				u_char dt = sp->spx_dt;
638				spx_newchecks[4]++;
639				if (dt != cb->s_rhdr.spx_dt) {
640					struct mbuf *mm =
641					   m_getclr(M_DONTWAIT, MT_CONTROL);
642					spx_newchecks[0]++;
643					if (mm != NULL) {
644						u_short *s =
645							mtod(mm, u_short *);
646						cb->s_rhdr.spx_dt = dt;
647						mm->m_len = 5; /*XXX*/
648						s[0] = 5;
649						s[1] = 1;
650						*(u_char *)(&s[2]) = dt;
651						sbappend_locked(&so->so_rcv, mm);
652					}
653				}
654				if (sp->spx_cc & SPX_OB) {
655					MCHTYPE(m, MT_OOBDATA);
656					spx_newchecks[1]++;
657					so->so_oobmark = 0;
658					so->so_rcv.sb_state &= ~SBS_RCVATMARK;
659				}
660				if (packetp == 0) {
661					m->m_data += SPINC;
662					m->m_len -= SPINC;
663					m->m_pkthdr.len -= SPINC;
664				}
665				if ((sp->spx_cc & SPX_EM) || packetp) {
666					sbappendrecord_locked(&so->so_rcv, m);
667					spx_newchecks[9]++;
668				} else
669					sbappend_locked(&so->so_rcv, m);
670			} else
671#endif
672			if (packetp) {
673				sbappendrecord_locked(&so->so_rcv, m);
674			} else {
675				cb->s_rhdr = *mtod(m, struct spxhdr *);
676				m->m_data += SPINC;
677				m->m_len -= SPINC;
678				m->m_pkthdr.len -= SPINC;
679				sbappend_locked(&so->so_rcv, m);
680			}
681		  } else
682			break;
683	}
684	if (wakeup)
685		sorwakeup_locked(so);
686	else
687		SOCKBUF_UNLOCK(&so->so_rcv);
688	return (0);
689}
690
691void
692spx_ctlinput(int cmd, struct sockaddr *arg_as_sa, void *dummy)
693{
694
695	/* Currently, nothing. */
696}
697
698static int
699spx_output(struct spxpcb *cb, struct mbuf *m0)
700{
701	struct socket *so = cb->s_ipxpcb->ipxp_socket;
702	struct mbuf *m;
703	struct spx *si = NULL;
704	struct sockbuf *sb = &so->so_snd;
705	int len = 0, win, rcv_win;
706	short span, off, recordp = 0;
707	u_short alo;
708	int error = 0, sendalot;
709#ifdef notdef
710	int idle;
711#endif
712	struct mbuf *mprev;
713
714	IPX_LOCK_ASSERT(cb->s_ipxpcb);
715
716	if (m0 != NULL) {
717		int mtu = cb->s_mtu;
718		int datalen;
719		/*
720		 * Make sure that packet isn't too big.
721		 */
722		for (m = m0; m != NULL; m = m->m_next) {
723			mprev = m;
724			len += m->m_len;
725			if (m->m_flags & M_EOR)
726				recordp = 1;
727		}
728		datalen = (cb->s_flags & SF_HO) ?
729				len - sizeof(struct spxhdr) : len;
730		if (datalen > mtu) {
731			if (cb->s_flags & SF_PI) {
732				m_freem(m0);
733				return (EMSGSIZE);
734			} else {
735				int oldEM = cb->s_cc & SPX_EM;
736
737				cb->s_cc &= ~SPX_EM;
738				while (len > mtu) {
739					/*
740					 * Here we are only being called
741					 * from usrreq(), so it is OK to
742					 * block.
743					 */
744					m = m_copym(m0, 0, mtu, M_TRYWAIT);
745					if (cb->s_flags & SF_NEWCALL) {
746					    struct mbuf *mm = m;
747					    spx_newchecks[7]++;
748					    while (mm != NULL) {
749						mm->m_flags &= ~M_EOR;
750						mm = mm->m_next;
751					    }
752					}
753					error = spx_output(cb, m);
754					if (error) {
755						cb->s_cc |= oldEM;
756						m_freem(m0);
757						return (error);
758					}
759					m_adj(m0, mtu);
760					len -= mtu;
761				}
762				cb->s_cc |= oldEM;
763			}
764		}
765		/*
766		 * Force length even, by adding a "garbage byte" if
767		 * necessary.
768		 */
769		if (len & 1) {
770			m = mprev;
771			if (M_TRAILINGSPACE(m) >= 1)
772				m->m_len++;
773			else {
774				struct mbuf *m1 = m_get(M_DONTWAIT, MT_DATA);
775
776				if (m1 == NULL) {
777					m_freem(m0);
778					return (ENOBUFS);
779				}
780				m1->m_len = 1;
781				*(mtod(m1, u_char *)) = 0;
782				m->m_next = m1;
783			}
784		}
785		m = m_gethdr(M_DONTWAIT, MT_DATA);
786		if (m == NULL) {
787			m_freem(m0);
788			return (ENOBUFS);
789		}
790		/*
791		 * Fill in mbuf with extended SP header
792		 * and addresses and length put into network format.
793		 */
794		MH_ALIGN(m, sizeof(struct spx));
795		m->m_len = sizeof(struct spx);
796		m->m_next = m0;
797		si = mtod(m, struct spx *);
798		si->si_i = *cb->s_ipx;
799		si->si_s = cb->s_shdr;
800		if ((cb->s_flags & SF_PI) && (cb->s_flags & SF_HO)) {
801			struct spxhdr *sh;
802			if (m0->m_len < sizeof(*sh)) {
803				if((m0 = m_pullup(m0, sizeof(*sh))) == NULL) {
804					m_free(m);
805					m_freem(m0);
806					return (EINVAL);
807				}
808				m->m_next = m0;
809			}
810			sh = mtod(m0, struct spxhdr *);
811			si->si_dt = sh->spx_dt;
812			si->si_cc |= sh->spx_cc & SPX_EM;
813			m0->m_len -= sizeof(*sh);
814			m0->m_data += sizeof(*sh);
815			len -= sizeof(*sh);
816		}
817		len += sizeof(*si);
818		if ((cb->s_flags2 & SF_NEWCALL) && recordp) {
819			si->si_cc |= SPX_EM;
820			spx_newchecks[8]++;
821		}
822		if (cb->s_oobflags & SF_SOOB) {
823			/*
824			 * Per jqj@cornell:
825			 * make sure OB packets convey exactly 1 byte.
826			 * If the packet is 1 byte or larger, we
827			 * have already guaranted there to be at least
828			 * one garbage byte for the checksum, and
829			 * extra bytes shouldn't hurt!
830			 */
831			if (len > sizeof(*si)) {
832				si->si_cc |= SPX_OB;
833				len = (1 + sizeof(*si));
834			}
835		}
836		si->si_len = htons((u_short)len);
837		m->m_pkthdr.len = ((len - 1) | 1) + 1;
838		/*
839		 * queue stuff up for output
840		 */
841		sbappendrecord(sb, m);
842		cb->s_seq++;
843	}
844#ifdef notdef
845	idle = (cb->s_smax == (cb->s_rack - 1));
846#endif
847again:
848	sendalot = 0;
849	off = cb->s_snxt - cb->s_rack;
850	win = min(cb->s_swnd, (cb->s_cwnd / CUNIT));
851
852	/*
853	 * If in persist timeout with window of 0, send a probe.
854	 * Otherwise, if window is small but nonzero
855	 * and timer expired, send what we can and go into
856	 * transmit state.
857	 */
858	if (cb->s_force == 1 + SPXT_PERSIST) {
859		if (win != 0) {
860			cb->s_timer[SPXT_PERSIST] = 0;
861			cb->s_rxtshift = 0;
862		}
863	}
864	span = cb->s_seq - cb->s_rack;
865	len = min(span, win) - off;
866
867	if (len < 0) {
868		/*
869		 * Window shrank after we went into it.
870		 * If window shrank to 0, cancel pending
871		 * restransmission and pull s_snxt back
872		 * to (closed) window.  We will enter persist
873		 * state below.  If the widndow didn't close completely,
874		 * just wait for an ACK.
875		 */
876		len = 0;
877		if (win == 0) {
878			cb->s_timer[SPXT_REXMT] = 0;
879			cb->s_snxt = cb->s_rack;
880		}
881	}
882	if (len > 1)
883		sendalot = 1;
884	rcv_win = sbspace(&so->so_rcv);
885
886	/*
887	 * Send if we owe peer an ACK.
888	 */
889	if (cb->s_oobflags & SF_SOOB) {
890		/*
891		 * must transmit this out of band packet
892		 */
893		cb->s_oobflags &= ~ SF_SOOB;
894		sendalot = 1;
895		spxstat.spxs_sndurg++;
896		goto found;
897	}
898	if (cb->s_flags & SF_ACKNOW)
899		goto send;
900	if (cb->s_state < TCPS_ESTABLISHED)
901		goto send;
902	/*
903	 * Silly window can't happen in spx.
904	 * Code from tcp deleted.
905	 */
906	if (len)
907		goto send;
908	/*
909	 * Compare available window to amount of window
910	 * known to peer (as advertised window less
911	 * next expected input.)  If the difference is at least two
912	 * packets or at least 35% of the mximum possible window,
913	 * then want to send a window update to peer.
914	 */
915	if (rcv_win > 0) {
916		u_short delta =  1 + cb->s_alo - cb->s_ack;
917		int adv = rcv_win - (delta * cb->s_mtu);
918
919		if ((so->so_rcv.sb_cc == 0 && adv >= (2 * cb->s_mtu)) ||
920		    (100 * adv / so->so_rcv.sb_hiwat >= 35)) {
921			spxstat.spxs_sndwinup++;
922			cb->s_flags |= SF_ACKNOW;
923			goto send;
924		}
925
926	}
927	/*
928	 * Many comments from tcp_output.c are appropriate here
929	 * including . . .
930	 * If send window is too small, there is data to transmit, and no
931	 * retransmit or persist is pending, then go to persist state.
932	 * If nothing happens soon, send when timer expires:
933	 * if window is nonzero, transmit what we can,
934	 * otherwise send a probe.
935	 */
936	if (so->so_snd.sb_cc && cb->s_timer[SPXT_REXMT] == 0 &&
937		cb->s_timer[SPXT_PERSIST] == 0) {
938			cb->s_rxtshift = 0;
939			spx_setpersist(cb);
940	}
941	/*
942	 * No reason to send a packet, just return.
943	 */
944	cb->s_outx = 1;
945	return (0);
946
947send:
948	/*
949	 * Find requested packet.
950	 */
951	si = 0;
952	if (len > 0) {
953		cb->s_want = cb->s_snxt;
954		for (m = sb->sb_mb; m != NULL; m = m->m_act) {
955			si = mtod(m, struct spx *);
956			if (SSEQ_LEQ(cb->s_snxt, si->si_seq))
957				break;
958		}
959	found:
960		if (si != NULL) {
961			if (si->si_seq == cb->s_snxt)
962					cb->s_snxt++;
963				else
964					spxstat.spxs_sndvoid++, si = 0;
965		}
966	}
967	/*
968	 * update window
969	 */
970	if (rcv_win < 0)
971		rcv_win = 0;
972	alo = cb->s_ack - 1 + (rcv_win / ((short)cb->s_mtu));
973	if (SSEQ_LT(alo, cb->s_alo))
974		alo = cb->s_alo;
975
976	if (si != NULL) {
977		/*
978		 * must make a copy of this packet for
979		 * ipx_output to monkey with
980		 */
981		m = m_copy(dtom(si), 0, (int)M_COPYALL);
982		if (m == NULL) {
983			return (ENOBUFS);
984		}
985		si = mtod(m, struct spx *);
986		if (SSEQ_LT(si->si_seq, cb->s_smax))
987			spxstat.spxs_sndrexmitpack++;
988		else
989			spxstat.spxs_sndpack++;
990	} else if (cb->s_force || cb->s_flags & SF_ACKNOW) {
991		/*
992		 * Must send an acknowledgement or a probe
993		 */
994		if (cb->s_force)
995			spxstat.spxs_sndprobe++;
996		if (cb->s_flags & SF_ACKNOW)
997			spxstat.spxs_sndacks++;
998		m = m_gethdr(M_DONTWAIT, MT_DATA);
999		if (m == NULL)
1000			return (ENOBUFS);
1001		/*
1002		 * Fill in mbuf with extended SP header
1003		 * and addresses and length put into network format.
1004		 */
1005		MH_ALIGN(m, sizeof(struct spx));
1006		m->m_len = sizeof(*si);
1007		m->m_pkthdr.len = sizeof(*si);
1008		si = mtod(m, struct spx *);
1009		si->si_i = *cb->s_ipx;
1010		si->si_s = cb->s_shdr;
1011		si->si_seq = cb->s_smax + 1;
1012		si->si_len = htons(sizeof(*si));
1013		si->si_cc |= SPX_SP;
1014	} else {
1015		cb->s_outx = 3;
1016		if (so->so_options & SO_DEBUG || traceallspxs)
1017			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1018		return (0);
1019	}
1020	/*
1021	 * Stuff checksum and output datagram.
1022	 */
1023	if ((si->si_cc & SPX_SP) == 0) {
1024		if (cb->s_force != (1 + SPXT_PERSIST) ||
1025		    cb->s_timer[SPXT_PERSIST] == 0) {
1026			/*
1027			 * If this is a new packet and we are not currently
1028			 * timing anything, time this one.
1029			 */
1030			if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1031				cb->s_smax = si->si_seq;
1032				if (cb->s_rtt == 0) {
1033					spxstat.spxs_segstimed++;
1034					cb->s_rtseq = si->si_seq;
1035					cb->s_rtt = 1;
1036				}
1037			}
1038			/*
1039			 * Set rexmt timer if not currently set,
1040			 * Initial value for retransmit timer is smoothed
1041			 * round-trip time + 2 * round-trip time variance.
1042			 * Initialize shift counter which is used for backoff
1043			 * of retransmit time.
1044			 */
1045			if (cb->s_timer[SPXT_REXMT] == 0 &&
1046			    cb->s_snxt != cb->s_rack) {
1047				cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1048				if (cb->s_timer[SPXT_PERSIST]) {
1049					cb->s_timer[SPXT_PERSIST] = 0;
1050					cb->s_rxtshift = 0;
1051				}
1052			}
1053		} else if (SSEQ_LT(cb->s_smax, si->si_seq)) {
1054			cb->s_smax = si->si_seq;
1055		}
1056	} else if (cb->s_state < TCPS_ESTABLISHED) {
1057		if (cb->s_rtt == 0)
1058			cb->s_rtt = 1; /* Time initial handshake */
1059		if (cb->s_timer[SPXT_REXMT] == 0)
1060			cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1061	}
1062	{
1063		/*
1064		 * Do not request acks when we ack their data packets or
1065		 * when we do a gratuitous window update.
1066		 */
1067		if (((si->si_cc & SPX_SP) == 0) || cb->s_force)
1068				si->si_cc |= SPX_SA;
1069		si->si_seq = htons(si->si_seq);
1070		si->si_alo = htons(alo);
1071		si->si_ack = htons(cb->s_ack);
1072
1073		if (ipxcksum) {
1074			si->si_sum = ipx_cksum(m, ntohs(si->si_len));
1075		} else
1076			si->si_sum = 0xffff;
1077
1078		cb->s_outx = 4;
1079		if (so->so_options & SO_DEBUG || traceallspxs)
1080			spx_trace(SA_OUTPUT, cb->s_state, cb, si, 0);
1081
1082		if (so->so_options & SO_DONTROUTE)
1083			error = ipx_outputfl(m, NULL, IPX_ROUTETOIF);
1084		else
1085			error = ipx_outputfl(m, &cb->s_ipxpcb->ipxp_route, 0);
1086	}
1087	if (error) {
1088		return (error);
1089	}
1090	spxstat.spxs_sndtotal++;
1091	/*
1092	 * Data sent (as far as we can tell).
1093	 * If this advertises a larger window than any other segment,
1094	 * then remember the size of the advertized window.
1095	 * Any pending ACK has now been sent.
1096	 */
1097	cb->s_force = 0;
1098	cb->s_flags &= ~(SF_ACKNOW|SF_DELACK);
1099	if (SSEQ_GT(alo, cb->s_alo))
1100		cb->s_alo = alo;
1101	if (sendalot)
1102		goto again;
1103	cb->s_outx = 5;
1104	return (0);
1105}
1106
1107static int spx_do_persist_panics = 0;
1108
1109static void
1110spx_setpersist(struct spxpcb *cb)
1111{
1112	int t = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1113
1114	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1115
1116	if (cb->s_timer[SPXT_REXMT] && spx_do_persist_panics)
1117		panic("spx_output REXMT");
1118	/*
1119	 * Start/restart persistance timer.
1120	 */
1121	SPXT_RANGESET(cb->s_timer[SPXT_PERSIST],
1122	    t*spx_backoff[cb->s_rxtshift],
1123	    SPXTV_PERSMIN, SPXTV_PERSMAX);
1124	if (cb->s_rxtshift < SPX_MAXRXTSHIFT)
1125		cb->s_rxtshift++;
1126}
1127
1128int
1129spx_ctloutput(struct socket *so, struct sockopt *sopt)
1130{
1131	struct ipxpcb *ipxp = sotoipxpcb(so);
1132	struct spxpcb *cb;
1133	int mask, error;
1134	short soptval;
1135	u_short usoptval;
1136	int optval;
1137
1138	error = 0;
1139
1140	if (sopt->sopt_level != IPXPROTO_SPX) {
1141		/* This will have to be changed when we do more general
1142		   stacking of protocols */
1143		return (ipx_ctloutput(so, sopt));
1144	}
1145	if (ipxp == NULL)
1146		return (EINVAL);
1147	else
1148		cb = ipxtospxpcb(ipxp);
1149
1150	switch (sopt->sopt_dir) {
1151	case SOPT_GET:
1152		switch (sopt->sopt_name) {
1153		case SO_HEADERS_ON_INPUT:
1154			mask = SF_HI;
1155			goto get_flags;
1156
1157		case SO_HEADERS_ON_OUTPUT:
1158			mask = SF_HO;
1159		get_flags:
1160			/* Unlocked read. */
1161			soptval = cb->s_flags & mask;
1162			error = sooptcopyout(sopt, &soptval, sizeof soptval);
1163			break;
1164
1165		case SO_MTU:
1166			/* Unlocked read. */
1167			usoptval = cb->s_mtu;
1168			error = sooptcopyout(sopt, &usoptval, sizeof usoptval);
1169			break;
1170
1171		case SO_LAST_HEADER:
1172			/* Unlocked read. */
1173			error = sooptcopyout(sopt, &cb->s_rhdr,
1174					     sizeof cb->s_rhdr);
1175			break;
1176
1177		case SO_DEFAULT_HEADERS:
1178			/* Unlocked read. */
1179			error = sooptcopyout(sopt, &cb->s_shdr,
1180					     sizeof cb->s_shdr);
1181			break;
1182
1183		default:
1184			error = ENOPROTOOPT;
1185		}
1186		break;
1187
1188	case SOPT_SET:
1189		switch (sopt->sopt_name) {
1190			/* XXX why are these shorts on get and ints on set?
1191			   that doesn't make any sense... */
1192		case SO_HEADERS_ON_INPUT:
1193			mask = SF_HI;
1194			goto set_head;
1195
1196		case SO_HEADERS_ON_OUTPUT:
1197			mask = SF_HO;
1198		set_head:
1199			error = sooptcopyin(sopt, &optval, sizeof optval,
1200					    sizeof optval);
1201			if (error)
1202				break;
1203
1204			IPX_LOCK(ipxp);
1205			if (cb->s_flags & SF_PI) {
1206				if (optval)
1207					cb->s_flags |= mask;
1208				else
1209					cb->s_flags &= ~mask;
1210			} else error = EINVAL;
1211			IPX_UNLOCK(ipxp);
1212			break;
1213
1214		case SO_MTU:
1215			error = sooptcopyin(sopt, &usoptval, sizeof usoptval,
1216					    sizeof usoptval);
1217			if (error)
1218				break;
1219			/* Unlocked write. */
1220			cb->s_mtu = usoptval;
1221			break;
1222
1223#ifdef SF_NEWCALL
1224		case SO_NEWCALL:
1225			error = sooptcopyin(sopt, &optval, sizeof optval,
1226					    sizeof optval);
1227			if (error)
1228				break;
1229			IPX_LOCK(ipxp);
1230			if (optval) {
1231				cb->s_flags2 |= SF_NEWCALL;
1232				spx_newchecks[5]++;
1233			} else {
1234				cb->s_flags2 &= ~SF_NEWCALL;
1235				spx_newchecks[6]++;
1236			}
1237			IPX_UNLOCK(ipxp);
1238			break;
1239#endif
1240
1241		case SO_DEFAULT_HEADERS:
1242			{
1243				struct spxhdr sp;
1244
1245				error = sooptcopyin(sopt, &sp, sizeof sp,
1246						    sizeof sp);
1247				if (error)
1248					break;
1249				IPX_LOCK(ipxp);
1250				cb->s_dt = sp.spx_dt;
1251				cb->s_cc = sp.spx_cc & SPX_EM;
1252				IPX_UNLOCK(ipxp);
1253			}
1254			break;
1255
1256		default:
1257			error = ENOPROTOOPT;
1258		}
1259		break;
1260	}
1261	return (error);
1262}
1263
1264static int
1265spx_usr_abort(struct socket *so)
1266{
1267	struct ipxpcb *ipxp;
1268	struct spxpcb *cb;
1269
1270	ipxp = sotoipxpcb(so);
1271	cb = ipxtospxpcb(ipxp);
1272
1273	IPX_LIST_LOCK();
1274	IPX_LOCK(ipxp);
1275	spx_drop(cb, ECONNABORTED);
1276	IPX_LIST_UNLOCK();
1277	return (0);
1278}
1279
1280/*
1281 * Accept a connection.  Essentially all the work is
1282 * done at higher levels; just return the address
1283 * of the peer, storing through addr.
1284 */
1285static int
1286spx_accept(struct socket *so, struct sockaddr **nam)
1287{
1288	struct ipxpcb *ipxp;
1289	struct sockaddr_ipx *sipx, ssipx;
1290
1291	ipxp = sotoipxpcb(so);
1292	sipx = &ssipx;
1293	bzero(sipx, sizeof *sipx);
1294	sipx->sipx_len = sizeof *sipx;
1295	sipx->sipx_family = AF_IPX;
1296	IPX_LOCK(ipxp);
1297	sipx->sipx_addr = ipxp->ipxp_faddr;
1298	IPX_UNLOCK(ipxp);
1299	*nam = sodupsockaddr((struct sockaddr *)sipx, M_WAITOK);
1300	return (0);
1301}
1302
1303static int
1304spx_attach(struct socket *so, int proto, struct thread *td)
1305{
1306	struct ipxpcb *ipxp;
1307	struct spxpcb *cb;
1308	struct mbuf *mm;
1309	struct sockbuf *sb;
1310	int error;
1311
1312	ipxp = sotoipxpcb(so);
1313	cb = ipxtospxpcb(ipxp);
1314
1315	IPX_LIST_LOCK();
1316	error = ipx_pcballoc(so, &ipxpcb_list, td);
1317	if (error)
1318		goto spx_attach_end;
1319	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1320		error = soreserve(so, (u_long) 3072, (u_long) 3072);
1321		if (error)
1322			goto spx_attach_end;
1323	}
1324	ipxp = sotoipxpcb(so);
1325
1326	MALLOC(cb, struct spxpcb *, sizeof *cb, M_PCB, M_NOWAIT | M_ZERO);
1327
1328	if (cb == NULL) {
1329		error = ENOBUFS;
1330		goto spx_attach_end;
1331	}
1332	sb = &so->so_snd;
1333
1334	mm = m_getclr(M_DONTWAIT, MT_DATA);
1335	if (mm == NULL) {
1336		FREE(cb, M_PCB);
1337		error = ENOBUFS;
1338		goto spx_attach_end;
1339	}
1340	cb->s_ipx = mtod(mm, struct ipx *);
1341	cb->s_state = TCPS_LISTEN;
1342	cb->s_smax = -1;
1343	cb->s_swl1 = -1;
1344	cb->s_q.si_next = cb->s_q.si_prev = &cb->s_q;
1345	cb->s_ipxpcb = ipxp;
1346	cb->s_mtu = 576 - sizeof(struct spx);
1347	cb->s_cwnd = sbspace(sb) * CUNIT / cb->s_mtu;
1348	cb->s_ssthresh = cb->s_cwnd;
1349	cb->s_cwmx = sbspace(sb) * CUNIT / (2 * sizeof(struct spx));
1350	/* Above is recomputed when connecting to account
1351	   for changed buffering or mtu's */
1352	cb->s_rtt = SPXTV_SRTTBASE;
1353	cb->s_rttvar = SPXTV_SRTTDFLT << 2;
1354	SPXT_RANGESET(cb->s_rxtcur,
1355	    ((SPXTV_SRTTBASE >> 2) + (SPXTV_SRTTDFLT << 2)) >> 1,
1356	    SPXTV_MIN, SPXTV_REXMTMAX);
1357	ipxp->ipxp_pcb = (caddr_t)cb;
1358spx_attach_end:
1359	IPX_LIST_UNLOCK();
1360	return (error);
1361}
1362
1363static int
1364spx_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
1365{
1366	struct ipxpcb *ipxp;
1367	int error;
1368
1369	ipxp = sotoipxpcb(so);
1370
1371	IPX_LIST_LOCK();
1372	IPX_LOCK(ipxp);
1373	error = ipx_pcbbind(ipxp, nam, td);
1374	IPX_UNLOCK(ipxp);
1375	IPX_LIST_UNLOCK();
1376	return (error);
1377}
1378
1379/*
1380 * Initiate connection to peer.
1381 * Enter SYN_SENT state, and mark socket as connecting.
1382 * Start keep-alive timer, setup prototype header,
1383 * Send initial system packet requesting connection.
1384 */
1385static int
1386spx_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
1387{
1388	struct ipxpcb *ipxp;
1389	struct spxpcb *cb;
1390	int error;
1391
1392	ipxp = sotoipxpcb(so);
1393	cb = ipxtospxpcb(ipxp);
1394
1395	IPX_LIST_LOCK();
1396	IPX_LOCK(ipxp);
1397	if (ipxp->ipxp_lport == 0) {
1398		error = ipx_pcbbind(ipxp, NULL, td);
1399		if (error)
1400			goto spx_connect_end;
1401	}
1402	error = ipx_pcbconnect(ipxp, nam, td);
1403	if (error)
1404		goto spx_connect_end;
1405	soisconnecting(so);
1406	spxstat.spxs_connattempt++;
1407	cb->s_state = TCPS_SYN_SENT;
1408	cb->s_did = 0;
1409	spx_template(cb);
1410	cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1411	cb->s_force = 1 + SPXTV_KEEP;
1412	/*
1413	 * Other party is required to respond to
1414	 * the port I send from, but he is not
1415	 * required to answer from where I am sending to,
1416	 * so allow wildcarding.
1417	 * original port I am sending to is still saved in
1418	 * cb->s_dport.
1419	 */
1420	ipxp->ipxp_fport = 0;
1421	error = spx_output(cb, NULL);
1422spx_connect_end:
1423	IPX_UNLOCK(ipxp);
1424	IPX_LIST_UNLOCK();
1425	return (error);
1426}
1427
1428static int
1429spx_detach(struct socket *so)
1430{
1431	struct ipxpcb *ipxp;
1432	struct spxpcb *cb;
1433
1434	ipxp = sotoipxpcb(so);
1435	cb = ipxtospxpcb(ipxp);
1436
1437	IPX_LIST_LOCK();
1438	IPX_LOCK(ipxp);
1439	if (cb->s_state > TCPS_LISTEN)
1440		spx_disconnect(cb);
1441	else
1442		spx_close(cb);
1443	IPX_LIST_UNLOCK();
1444	return (0);
1445}
1446
1447/*
1448 * We may decide later to implement connection closing
1449 * handshaking at the spx level optionally.
1450 * here is the hook to do it:
1451 */
1452static int
1453spx_usr_disconnect(struct socket *so)
1454{
1455	struct ipxpcb *ipxp;
1456	struct spxpcb *cb;
1457
1458	ipxp = sotoipxpcb(so);
1459	cb = ipxtospxpcb(ipxp);
1460
1461	IPX_LIST_LOCK();
1462	IPX_LOCK(ipxp);
1463	spx_disconnect(cb);
1464	IPX_LIST_UNLOCK();
1465	return (0);
1466}
1467
1468static int
1469spx_listen(struct socket *so, int backlog, struct thread *td)
1470{
1471	int error;
1472	struct ipxpcb *ipxp;
1473	struct spxpcb *cb;
1474
1475	error = 0;
1476	ipxp = sotoipxpcb(so);
1477	cb = ipxtospxpcb(ipxp);
1478
1479	IPX_LIST_LOCK();
1480	IPX_LOCK(ipxp);
1481	SOCK_LOCK(so);
1482	error = solisten_proto_check(so);
1483	if (error == 0 && ipxp->ipxp_lport == 0)
1484		error = ipx_pcbbind(ipxp, NULL, td);
1485	if (error == 0) {
1486		cb->s_state = TCPS_LISTEN;
1487		solisten_proto(so, backlog);
1488	}
1489	SOCK_UNLOCK(so);
1490	IPX_UNLOCK(ipxp);
1491	IPX_LIST_UNLOCK();
1492	return (error);
1493}
1494
1495/*
1496 * After a receive, possibly send acknowledgment
1497 * updating allocation.
1498 */
1499static int
1500spx_rcvd(struct socket *so, int flags)
1501{
1502	struct ipxpcb *ipxp;
1503	struct spxpcb *cb;
1504
1505	ipxp = sotoipxpcb(so);
1506	cb = ipxtospxpcb(ipxp);
1507
1508	IPX_LOCK(ipxp);
1509	cb->s_flags |= SF_RVD;
1510	spx_output(cb, NULL);
1511	cb->s_flags &= ~SF_RVD;
1512	IPX_UNLOCK(ipxp);
1513	return (0);
1514}
1515
1516static int
1517spx_rcvoob(struct socket *so, struct mbuf *m, int flags)
1518{
1519	struct ipxpcb *ipxp;
1520	struct spxpcb *cb;
1521
1522	ipxp = sotoipxpcb(so);
1523	cb = ipxtospxpcb(ipxp);
1524
1525	SOCKBUF_LOCK(&so->so_rcv);
1526	if ((cb->s_oobflags & SF_IOOB) || so->so_oobmark ||
1527	    (so->so_rcv.sb_state & SBS_RCVATMARK)) {
1528		SOCKBUF_UNLOCK(&so->so_rcv);
1529		m->m_len = 1;
1530		/* Unlocked read. */
1531		*mtod(m, caddr_t) = cb->s_iobc;
1532		return (0);
1533	}
1534	SOCKBUF_UNLOCK(&so->so_rcv);
1535	return (EINVAL);
1536}
1537
1538static int
1539spx_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
1540    struct mbuf *controlp, struct thread *td)
1541{
1542	int error;
1543	struct ipxpcb *ipxp;
1544	struct spxpcb *cb;
1545
1546	error = 0;
1547	ipxp = sotoipxpcb(so);
1548	cb = ipxtospxpcb(ipxp);
1549
1550	IPX_LOCK(ipxp);
1551	if (flags & PRUS_OOB) {
1552		if (sbspace(&so->so_snd) < -512) {
1553			error = ENOBUFS;
1554			goto spx_send_end;
1555		}
1556		cb->s_oobflags |= SF_SOOB;
1557	}
1558	if (controlp != NULL) {
1559		u_short *p = mtod(controlp, u_short *);
1560		spx_newchecks[2]++;
1561		if ((p[0] == 5) && (p[1] == 1)) { /* XXXX, for testing */
1562			cb->s_shdr.spx_dt = *(u_char *)(&p[2]);
1563			spx_newchecks[3]++;
1564		}
1565		m_freem(controlp);
1566	}
1567	controlp = NULL;
1568	error = spx_output(cb, m);
1569	m = NULL;
1570spx_send_end:
1571	IPX_UNLOCK(ipxp);
1572	if (controlp != NULL)
1573		m_freem(controlp);
1574	if (m != NULL)
1575		m_freem(m);
1576	return (error);
1577}
1578
1579static int
1580spx_shutdown(struct socket *so)
1581{
1582	struct ipxpcb *ipxp;
1583	struct spxpcb *cb;
1584
1585	ipxp = sotoipxpcb(so);
1586	cb = ipxtospxpcb(ipxp);
1587
1588	socantsendmore(so);
1589	IPX_LIST_LOCK();
1590	IPX_LOCK(ipxp);
1591	spx_usrclosed(cb);
1592	IPX_LIST_UNLOCK();
1593	return (0);
1594}
1595
1596static int
1597spx_sp_attach(struct socket *so, int proto, struct thread *td)
1598{
1599	int error;
1600	struct ipxpcb *ipxp;
1601
1602	error = spx_attach(so, proto, td);
1603	if (error == 0) {
1604		ipxp = sotoipxpcb(so);
1605		((struct spxpcb *)ipxp->ipxp_pcb)->s_flags |=
1606					(SF_HI | SF_HO | SF_PI);
1607	}
1608	return (error);
1609}
1610
1611/*
1612 * Create template to be used to send spx packets on a connection.
1613 * Called after host entry created, fills
1614 * in a skeletal spx header (choosing connection id),
1615 * minimizing the amount of work necessary when the connection is used.
1616 */
1617static void
1618spx_template(struct spxpcb *cb)
1619{
1620	struct ipxpcb *ipxp = cb->s_ipxpcb;
1621	struct ipx *ipx = cb->s_ipx;
1622	struct sockbuf *sb = &(ipxp->ipxp_socket->so_snd);
1623
1624	IPX_LOCK_ASSERT(ipxp);
1625
1626	ipx->ipx_pt = IPXPROTO_SPX;
1627	ipx->ipx_sna = ipxp->ipxp_laddr;
1628	ipx->ipx_dna = ipxp->ipxp_faddr;
1629	cb->s_sid = htons(spx_iss);
1630	spx_iss += SPX_ISSINCR/2;
1631	cb->s_alo = 1;
1632	cb->s_cwnd = (sbspace(sb) * CUNIT) / cb->s_mtu;
1633	cb->s_ssthresh = cb->s_cwnd; /* Try to expand fast to full complement
1634					of large packets */
1635	cb->s_cwmx = (sbspace(sb) * CUNIT) / (2 * sizeof(struct spx));
1636	cb->s_cwmx = max(cb->s_cwmx, cb->s_cwnd);
1637		/* But allow for lots of little packets as well */
1638}
1639
1640/*
1641 * Close a SPIP control block:
1642 *	discard spx control block itself
1643 *	discard ipx protocol control block
1644 *	wake up any sleepers
1645 * cb will always be invalid after this call.
1646 */
1647void
1648spx_close(struct spxpcb *cb)
1649{
1650	struct spx_q *s;
1651	struct ipxpcb *ipxp = cb->s_ipxpcb;
1652	struct socket *so = ipxp->ipxp_socket;
1653	struct mbuf *m;
1654
1655	IPX_LIST_LOCK_ASSERT();
1656	IPX_LOCK_ASSERT(ipxp);
1657
1658	s = cb->s_q.si_next;
1659	while (s != &(cb->s_q)) {
1660		s = s->si_next;
1661		m = dtom(s->si_prev);
1662		remque(s->si_prev);
1663		m_freem(m);
1664	}
1665	m_free(dtom(cb->s_ipx));
1666	FREE(cb, M_PCB);
1667	ipxp->ipxp_pcb = NULL;
1668	soisdisconnected(so);
1669	ipx_pcbdetach(ipxp);
1670	spxstat.spxs_closed++;
1671}
1672
1673/*
1674 *	Someday we may do level 3 handshaking
1675 *	to close a connection or send a xerox style error.
1676 *	For now, just close.
1677 * cb will always be invalid after this call.
1678 */
1679static void
1680spx_usrclosed(struct spxpcb *cb)
1681{
1682
1683	IPX_LIST_LOCK_ASSERT();
1684	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1685
1686	spx_close(cb);
1687}
1688
1689/*
1690 * cb will always be invalid after this call.
1691 */
1692static void
1693spx_disconnect(struct spxpcb *cb)
1694{
1695
1696	IPX_LIST_LOCK_ASSERT();
1697	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1698
1699	spx_close(cb);
1700}
1701
1702/*
1703 * Drop connection, reporting
1704 * the specified error.
1705 * cb will always be invalid after this call.
1706 */
1707static void
1708spx_drop(struct spxpcb *cb, int errno)
1709{
1710	struct socket *so = cb->s_ipxpcb->ipxp_socket;
1711
1712	IPX_LIST_LOCK_ASSERT();
1713	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1714
1715	/*
1716	 * someday, in the xerox world
1717	 * we will generate error protocol packets
1718	 * announcing that the socket has gone away.
1719	 */
1720	if (TCPS_HAVERCVDSYN(cb->s_state)) {
1721		spxstat.spxs_drops++;
1722		cb->s_state = TCPS_CLOSED;
1723		/*tcp_output(cb);*/
1724	} else
1725		spxstat.spxs_conndrops++;
1726	so->so_error = errno;
1727	spx_close(cb);
1728}
1729
1730/*
1731 * Fast timeout routine for processing delayed acks
1732 */
1733void
1734spx_fasttimo(void)
1735{
1736	struct ipxpcb *ipxp;
1737	struct spxpcb *cb;
1738
1739	IPX_LIST_LOCK();
1740	LIST_FOREACH(ipxp, &ipxpcb_list, ipxp_list) {
1741		IPX_LOCK(ipxp);
1742		if ((cb = (struct spxpcb *)ipxp->ipxp_pcb) != NULL &&
1743		    (cb->s_flags & SF_DELACK)) {
1744			cb->s_flags &= ~SF_DELACK;
1745			cb->s_flags |= SF_ACKNOW;
1746			spxstat.spxs_delack++;
1747			spx_output(cb, NULL);
1748		}
1749		IPX_UNLOCK(ipxp);
1750	}
1751	IPX_LIST_UNLOCK();
1752}
1753
1754/*
1755 * spx protocol timeout routine called every 500 ms.
1756 * Updates the timers in all active pcb's and
1757 * causes finite state machine actions if timers expire.
1758 */
1759void
1760spx_slowtimo(void)
1761{
1762	struct ipxpcb *ip, *ip_temp;
1763	struct spxpcb *cb;
1764	int i;
1765
1766	/*
1767	 * Search through tcb's and update active timers.  Note that timers
1768	 * may free the ipxpcb, so be sure to handle that case.
1769	 *
1770	 * spx_timers() may remove an ipxpcb entry, so we have to be ready to
1771	 * continue despite that.  The logic here is a bit obfuscated.
1772	 */
1773	IPX_LIST_LOCK();
1774	LIST_FOREACH_SAFE(ip, &ipxpcb_list, ipxp_list, ip_temp) {
1775		cb = ipxtospxpcb(ip);
1776		if (cb == NULL)
1777			continue;
1778		IPX_LOCK(cb->s_ipxpcb);
1779		for (i = 0; i < SPXT_NTIMERS; i++) {
1780			if (cb->s_timer[i] && --cb->s_timer[i] == 0) {
1781				/*
1782				 * spx_timers() returns (NULL) if it free'd
1783				 * the pcb.
1784				 */
1785				cb = spx_timers(cb, i);
1786				if (cb == NULL)
1787					break;
1788			}
1789		}
1790		if (cb != NULL) {
1791			cb->s_idle++;
1792			if (cb->s_rtt)
1793				cb->s_rtt++;
1794			IPX_UNLOCK(cb->s_ipxpcb);
1795		}
1796	}
1797	spx_iss += SPX_ISSINCR/PR_SLOWHZ;		/* increment iss */
1798	IPX_LIST_UNLOCK();
1799}
1800
1801/*
1802 * SPX timer processing.
1803 */
1804static struct spxpcb *
1805spx_timers(struct spxpcb *cb, int timer)
1806{
1807	long rexmt;
1808	int win;
1809
1810	IPX_LIST_LOCK_ASSERT();
1811	IPX_LOCK_ASSERT(cb->s_ipxpcb);
1812
1813	cb->s_force = 1 + timer;
1814	switch (timer) {
1815
1816	/*
1817	 * 2 MSL timeout in shutdown went off.  TCP deletes connection
1818	 * control block.
1819	 */
1820	case SPXT_2MSL:
1821		printf("spx: SPXT_2MSL went off for no reason\n");
1822		cb->s_timer[timer] = 0;
1823		break;
1824
1825	/*
1826	 * Retransmission timer went off.  Message has not
1827	 * been acked within retransmit interval.  Back off
1828	 * to a longer retransmit interval and retransmit one packet.
1829	 */
1830	case SPXT_REXMT:
1831		if (++cb->s_rxtshift > SPX_MAXRXTSHIFT) {
1832			cb->s_rxtshift = SPX_MAXRXTSHIFT;
1833			spxstat.spxs_timeoutdrop++;
1834			spx_drop(cb, ETIMEDOUT);
1835			cb = NULL;
1836			break;
1837		}
1838		spxstat.spxs_rexmttimeo++;
1839		rexmt = ((cb->s_srtt >> 2) + cb->s_rttvar) >> 1;
1840		rexmt *= spx_backoff[cb->s_rxtshift];
1841		SPXT_RANGESET(cb->s_rxtcur, rexmt, SPXTV_MIN, SPXTV_REXMTMAX);
1842		cb->s_timer[SPXT_REXMT] = cb->s_rxtcur;
1843		/*
1844		 * If we have backed off fairly far, our srtt
1845		 * estimate is probably bogus.  Clobber it
1846		 * so we'll take the next rtt measurement as our srtt;
1847		 * move the current srtt into rttvar to keep the current
1848		 * retransmit times until then.
1849		 */
1850		if (cb->s_rxtshift > SPX_MAXRXTSHIFT / 4 ) {
1851			cb->s_rttvar += (cb->s_srtt >> 2);
1852			cb->s_srtt = 0;
1853		}
1854		cb->s_snxt = cb->s_rack;
1855		/*
1856		 * If timing a packet, stop the timer.
1857		 */
1858		cb->s_rtt = 0;
1859		/*
1860		 * See very long discussion in tcp_timer.c about congestion
1861		 * window and sstrhesh
1862		 */
1863		win = min(cb->s_swnd, (cb->s_cwnd/CUNIT)) / 2;
1864		if (win < 2)
1865			win = 2;
1866		cb->s_cwnd = CUNIT;
1867		cb->s_ssthresh = win * CUNIT;
1868		spx_output(cb, NULL);
1869		break;
1870
1871	/*
1872	 * Persistance timer into zero window.
1873	 * Force a probe to be sent.
1874	 */
1875	case SPXT_PERSIST:
1876		spxstat.spxs_persisttimeo++;
1877		spx_setpersist(cb);
1878		spx_output(cb, NULL);
1879		break;
1880
1881	/*
1882	 * Keep-alive timer went off; send something
1883	 * or drop connection if idle for too long.
1884	 */
1885	case SPXT_KEEP:
1886		spxstat.spxs_keeptimeo++;
1887		if (cb->s_state < TCPS_ESTABLISHED)
1888			goto dropit;
1889		if (cb->s_ipxpcb->ipxp_socket->so_options & SO_KEEPALIVE) {
1890		    	if (cb->s_idle >= SPXTV_MAXIDLE)
1891				goto dropit;
1892			spxstat.spxs_keepprobe++;
1893			spx_output(cb, NULL);
1894		} else
1895			cb->s_idle = 0;
1896		cb->s_timer[SPXT_KEEP] = SPXTV_KEEP;
1897		break;
1898	dropit:
1899		spxstat.spxs_keepdrops++;
1900		spx_drop(cb, ETIMEDOUT);
1901		cb = NULL;
1902		break;
1903	}
1904	return (cb);
1905}
1906