1237263Snp/*- 2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc. 3237263Snp * All rights reserved. 4237263Snp * 5237263Snp * Redistribution and use in source and binary forms, with or without 6237263Snp * modification, are permitted provided that the following conditions 7237263Snp * are met: 8237263Snp * 1. Redistributions of source code must retain the above copyright 9237263Snp * notice, this list of conditions and the following disclaimer. 10237263Snp * 2. Redistributions in binary form must reproduce the above copyright 11237263Snp * notice, this list of conditions and the following disclaimer in the 12237263Snp * documentation and/or other materials provided with the distribution. 13237263Snp * 14237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17237263Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24237263Snp * SUCH DAMAGE. 25237263Snp */ 26174641Skmacy 27174641Skmacy#include <sys/cdefs.h> 28174641Skmacy__FBSDID("$FreeBSD$"); 29174641Skmacy 30237263Snp#include "opt_inet.h" 31237263Snp 32237263Snp#ifdef TCP_OFFLOAD 33174641Skmacy#include <sys/param.h> 34174641Skmacy#include <sys/systm.h> 35174641Skmacy#include <sys/fcntl.h> 36174641Skmacy#include <sys/kernel.h> 37174641Skmacy#include <sys/limits.h> 38176472Skmacy#include <sys/ktr.h> 39174641Skmacy#include <sys/lock.h> 40174641Skmacy#include <sys/mbuf.h> 41174641Skmacy#include <sys/mutex.h> 42181011Skmacy#include <sys/sockstate.h> 43181011Skmacy#include <sys/sockopt.h> 44174641Skmacy#include <sys/socket.h> 45237263Snp#include <sys/socketvar.h> 46181039Sps#include <sys/sockbuf.h> 47174641Skmacy#include <sys/sysctl.h> 48174641Skmacy#include <sys/syslog.h> 49174641Skmacy#include <sys/protosw.h> 50174641Skmacy#include <sys/priv.h> 51237263Snp#include <sys/sglist.h> 52237263Snp#include <sys/taskqueue.h> 53183289Skmacy 54174641Skmacy#include <net/if.h> 55237263Snp#include <net/ethernet.h> 56174641Skmacy#include <net/route.h> 57174641Skmacy 58174641Skmacy#include <netinet/in.h> 59174641Skmacy#include <netinet/in_pcb.h> 60174641Skmacy#include <netinet/in_systm.h> 61174641Skmacy#include <netinet/in_var.h> 62174641Skmacy 63174641Skmacy#include <netinet/ip.h> 64174641Skmacy#include <netinet/tcp_var.h> 65237263Snp#define TCPSTATES 66174641Skmacy#include <netinet/tcp_fsm.h> 67237263Snp#include <netinet/toecore.h> 68174641Skmacy#include <netinet/tcp_seq.h> 69176472Skmacy#include <netinet/tcp_timer.h> 70174641Skmacy#include <net/route.h> 71174641Skmacy 72237263Snp#include "cxgb_include.h" 73237263Snp#include "ulp/tom/cxgb_l2t.h" 74237263Snp#include "ulp/tom/cxgb_tom.h" 75237263Snp#include "ulp/tom/cxgb_toepcb.h" 76174641Skmacy 77237263SnpVNET_DECLARE(int, tcp_do_autosndbuf); 78237263Snp#define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) 79237263SnpVNET_DECLARE(int, tcp_autosndbuf_inc); 80237263Snp#define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) 81237263SnpVNET_DECLARE(int, tcp_autosndbuf_max); 82237263Snp#define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) 83237263SnpVNET_DECLARE(int, tcp_do_autorcvbuf); 84237263Snp#define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) 85237263SnpVNET_DECLARE(int, tcp_autorcvbuf_inc); 86237263Snp#define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) 87237263SnpVNET_DECLARE(int, tcp_autorcvbuf_max); 88237263Snp#define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) 89237263Snpextern int always_keepalive; 90237263Snp 91174641Skmacy/* 92174641Skmacy * For ULP connections HW may add headers, e.g., for digests, that aren't part 93174641Skmacy * of the messages sent by the host but that are part of the TCP payload and 94174641Skmacy * therefore consume TCP sequence space. Tx connection parameters that 95174641Skmacy * operate in TCP sequence space are affected by the HW additions and need to 96174641Skmacy * compensate for them to accurately track TCP sequence numbers. This array 97174641Skmacy * contains the compensating extra lengths for ULP packets. It is indexed by 98174641Skmacy * a packet's ULP submode. 99174641Skmacy */ 100174641Skmacyconst unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 101174641Skmacy 102174641Skmacy/* 103174641Skmacy * Max receive window supported by HW in bytes. Only a small part of it can 104174641Skmacy * be set through option0, the rest needs to be set through RX_DATA_ACK. 105174641Skmacy */ 106174641Skmacy#define MAX_RCV_WND ((1U << 27) - 1) 107174641Skmacy 108174641Skmacy/* 109174641Skmacy * Min receive window. We want it to be large enough to accommodate receive 110174641Skmacy * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 111174641Skmacy */ 112174641Skmacy#define MIN_RCV_WND (24 * 1024U) 113178302Skmacy#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS) 114174641Skmacy 115237263Snpstatic void t3_release_offload_resources(struct toepcb *); 116237263Snpstatic void send_reset(struct toepcb *toep); 117174641Skmacy 118237263Snp/* 119237263Snp * Called after the last CPL for the toepcb has been received. 120237263Snp * 121237263Snp * The inp must be wlocked on entry and is unlocked (or maybe destroyed) by the 122237263Snp * time this function exits. 123237263Snp */ 124237263Snpstatic int 125237263Snptoepcb_release(struct toepcb *toep) 126237263Snp{ 127237263Snp struct inpcb *inp = toep->tp_inp; 128237263Snp struct toedev *tod = toep->tp_tod; 129237263Snp struct tom_data *td = t3_tomdata(tod); 130237263Snp int rc; 131178302Skmacy 132237263Snp INP_WLOCK_ASSERT(inp); 133237263Snp KASSERT(!(toep->tp_flags & TP_CPL_DONE), 134237263Snp ("%s: double release?", __func__)); 135174641Skmacy 136237263Snp CTR2(KTR_CXGB, "%s: tid %d", __func__, toep->tp_tid); 137174641Skmacy 138237263Snp toep->tp_flags |= TP_CPL_DONE; 139237263Snp toep->tp_inp = NULL; 140178302Skmacy 141237263Snp mtx_lock(&td->toep_list_lock); 142237263Snp TAILQ_REMOVE(&td->toep_list, toep, link); 143237263Snp mtx_unlock(&td->toep_list_lock); 144176472Skmacy 145237263Snp if (!(toep->tp_flags & TP_ATTACHED)) 146237263Snp t3_release_offload_resources(toep); 147237263Snp 148237263Snp rc = in_pcbrele_wlocked(inp); 149237263Snp if (!rc) 150237263Snp INP_WUNLOCK(inp); 151237263Snp return (rc); 152174641Skmacy} 153174641Skmacy 154237263Snp/* 155237263Snp * One sided detach. The tcpcb is going away and we need to unhook the toepcb 156237263Snp * hanging off it. If the TOE driver is also done with the toepcb we'll release 157237263Snp * all offload resources. 158237263Snp */ 159174641Skmacystatic void 160237263Snptoepcb_detach(struct inpcb *inp) 161174641Skmacy{ 162237263Snp struct toepcb *toep; 163237263Snp struct tcpcb *tp; 164174641Skmacy 165237263Snp KASSERT(inp, ("%s: inp is NULL", __func__)); 166237263Snp INP_WLOCK_ASSERT(inp); 167174641Skmacy 168237263Snp tp = intotcpcb(inp); 169237263Snp toep = tp->t_toe; 170174641Skmacy 171237263Snp KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 172237263Snp KASSERT(toep->tp_flags & TP_ATTACHED, ("%s: not attached", __func__)); 173178302Skmacy 174237263Snp CTR6(KTR_CXGB, "%s: %s %u, toep %p, inp %p, tp %p", __func__, 175237263Snp tp->t_state == TCPS_SYN_SENT ? "atid" : "tid", toep->tp_tid, 176237263Snp toep, inp, tp); 177174641Skmacy 178237263Snp tp->t_toe = NULL; 179237263Snp tp->t_flags &= ~TF_TOE; 180237263Snp toep->tp_flags &= ~TP_ATTACHED; 181174641Skmacy 182237263Snp if (toep->tp_flags & TP_CPL_DONE) 183237263Snp t3_release_offload_resources(toep); 184174641Skmacy} 185174641Skmacy 186237263Snpvoid 187237263Snpt3_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) 188174641Skmacy{ 189174641Skmacy 190237263Snp toepcb_detach(tp->t_inpcb); 191174641Skmacy} 192174641Skmacy 193237263Snpstatic int 194237263Snpalloc_atid(struct tid_info *t, void *ctx) 195174641Skmacy{ 196237263Snp int atid = -1; 197174641Skmacy 198237263Snp mtx_lock(&t->atid_lock); 199237263Snp if (t->afree) { 200237263Snp union active_open_entry *p = t->afree; 201176472Skmacy 202237263Snp atid = (p - t->atid_tab) + t->atid_base; 203237263Snp t->afree = p->next; 204237263Snp p->ctx = ctx; 205237263Snp t->atids_in_use++; 206174641Skmacy } 207237263Snp mtx_unlock(&t->atid_lock); 208174641Skmacy 209237263Snp return (atid); 210237263Snp} 211178302Skmacy 212237263Snpstatic void 213237263Snpfree_atid(struct tid_info *t, int atid) 214237263Snp{ 215237263Snp union active_open_entry *p = atid2entry(t, atid); 216178302Skmacy 217237263Snp mtx_lock(&t->atid_lock); 218237263Snp p->next = t->afree; 219237263Snp t->afree = p; 220237263Snp t->atids_in_use--; 221237263Snp mtx_unlock(&t->atid_lock); 222237263Snp} 223178302Skmacy 224237263Snpvoid 225237263Snpinsert_tid(struct tom_data *td, void *ctx, unsigned int tid) 226237263Snp{ 227237263Snp struct tid_info *t = &td->tid_maps; 228178302Skmacy 229237263Snp t->tid_tab[tid] = ctx; 230237263Snp atomic_add_int(&t->tids_in_use, 1); 231237263Snp} 232174641Skmacy 233237263Snpvoid 234237263Snpupdate_tid(struct tom_data *td, void *ctx, unsigned int tid) 235237263Snp{ 236237263Snp struct tid_info *t = &td->tid_maps; 237174641Skmacy 238237263Snp t->tid_tab[tid] = ctx; 239237263Snp} 240178302Skmacy 241237263Snpvoid 242237263Snpremove_tid(struct tom_data *td, unsigned int tid) 243178302Skmacy{ 244237263Snp struct tid_info *t = &td->tid_maps; 245178302Skmacy 246237263Snp t->tid_tab[tid] = NULL; 247237263Snp atomic_add_int(&t->tids_in_use, -1); 248178302Skmacy} 249174641Skmacy 250237263Snp/* use ctx as a next pointer in the tid release list */ 251237263Snpvoid 252237263Snpqueue_tid_release(struct toedev *tod, unsigned int tid) 253174641Skmacy{ 254237263Snp struct tom_data *td = t3_tomdata(tod); 255237263Snp void **p = &td->tid_maps.tid_tab[tid]; 256237263Snp struct adapter *sc = tod->tod_softc; 257174641Skmacy 258237263Snp mtx_lock(&td->tid_release_lock); 259237263Snp *p = td->tid_release_list; 260237263Snp td->tid_release_list = p; 261237263Snp if (!*p) 262237263Snp taskqueue_enqueue(sc->tq, &td->tid_release_task); 263237263Snp mtx_unlock(&td->tid_release_lock); 264174641Skmacy} 265174641Skmacy 266174641Skmacy/* 267237263Snp * Populate a TID_RELEASE WR. 268174641Skmacy */ 269237263Snpstatic inline void 270237263Snpmk_tid_release(struct cpl_tid_release *cpl, unsigned int tid) 271174641Skmacy{ 272174641Skmacy 273237263Snp cpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 274237263Snp OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 275174641Skmacy} 276174641Skmacy 277237263Snpvoid 278237263Snprelease_tid(struct toedev *tod, unsigned int tid, int qset) 279174641Skmacy{ 280237263Snp struct tom_data *td = t3_tomdata(tod); 281237263Snp struct adapter *sc = tod->tod_softc; 282174641Skmacy struct mbuf *m; 283237263Snp struct cpl_tid_release *cpl; 284237263Snp#ifdef INVARIANTS 285237263Snp struct tid_info *t = &td->tid_maps; 286237263Snp#endif 287174641Skmacy 288237263Snp KASSERT(tid >= 0 && tid < t->ntids, 289237263Snp ("%s: tid=%d, ntids=%d", __func__, tid, t->ntids)); 290237263Snp 291237263Snp m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl); 292237263Snp if (m) { 293237263Snp mk_tid_release(cpl, tid); 294237263Snp t3_offload_tx(sc, m); 295237263Snp remove_tid(td, tid); 296237263Snp } else 297237263Snp queue_tid_release(tod, tid); 298237263Snp 299174641Skmacy} 300174641Skmacy 301176472Skmacyvoid 302237263Snpt3_process_tid_release_list(void *data, int pending) 303176472Skmacy{ 304176472Skmacy struct mbuf *m; 305237263Snp struct tom_data *td = data; 306237263Snp struct adapter *sc = td->tod.tod_softc; 307174641Skmacy 308237263Snp mtx_lock(&td->tid_release_lock); 309237263Snp while (td->tid_release_list) { 310237263Snp void **p = td->tid_release_list; 311237263Snp unsigned int tid = p - td->tid_maps.tid_tab; 312237263Snp struct cpl_tid_release *cpl; 313176472Skmacy 314237263Snp td->tid_release_list = (void **)*p; 315237263Snp m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, cpl); /* qs 0 here */ 316237263Snp if (m == NULL) 317237263Snp break; /* XXX: who reschedules the release task? */ 318237263Snp mtx_unlock(&td->tid_release_lock); 319237263Snp mk_tid_release(cpl, tid); 320237263Snp t3_offload_tx(sc, m); 321237263Snp remove_tid(td, tid); 322237263Snp mtx_lock(&td->tid_release_lock); 323237263Snp } 324237263Snp mtx_unlock(&td->tid_release_lock); 325176472Skmacy} 326176472Skmacy 327176472Skmacystatic void 328237263Snpclose_conn(struct adapter *sc, struct toepcb *toep) 329176472Skmacy{ 330237263Snp struct mbuf *m; 331237263Snp struct cpl_close_con_req *req; 332176472Skmacy 333237263Snp if (toep->tp_flags & TP_FIN_SENT) 334237263Snp return; 335176472Skmacy 336237263Snp m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req); 337237263Snp if (m == NULL) 338237263Snp CXGB_UNIMPLEMENTED(); 339176472Skmacy 340237263Snp req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 341237263Snp req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid)); 342237263Snp OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, toep->tp_tid)); 343237263Snp req->rsvd = 0; 344176472Skmacy 345237263Snp toep->tp_flags |= TP_FIN_SENT; 346237263Snp t3_offload_tx(sc, m); 347176472Skmacy} 348176472Skmacy 349237263Snpstatic inline void 350237263Snpmake_tx_data_wr(struct socket *so, struct tx_data_wr *req, int len, 351237263Snp struct mbuf *tail) 352176472Skmacy{ 353237263Snp struct tcpcb *tp = so_sototcpcb(so); 354237263Snp struct toepcb *toep = tp->t_toe; 355237263Snp struct sockbuf *snd; 356176472Skmacy 357237263Snp inp_lock_assert(tp->t_inpcb); 358237263Snp snd = so_sockbuf_snd(so); 359176472Skmacy 360237263Snp req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 361237263Snp req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid)); 362237263Snp /* len includes the length of any HW ULP additions */ 363237263Snp req->len = htonl(len); 364237263Snp req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 365237263Snp /* V_TX_ULP_SUBMODE sets both the mode and submode */ 366237263Snp req->flags = htonl(V_TX_ULP_SUBMODE(toep->tp_ulp_mode) | V_TX_URG(0) | 367237263Snp V_TX_SHOVE(!(tp->t_flags & TF_MORETOCOME) && (tail ? 0 : 1))); 368237263Snp req->sndseq = htonl(tp->snd_nxt); 369237263Snp if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 370237263Snp struct adapter *sc = toep->tp_tod->tod_softc; 371237263Snp int cpu_idx = sc->rrss_map[toep->tp_qset]; 372176472Skmacy 373237263Snp req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 374237263Snp V_TX_CPU_IDX(cpu_idx)); 375176472Skmacy 376237263Snp /* Sendbuffer is in units of 32KB. */ 377237263Snp if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE) 378237263Snp req->param |= htonl(V_TX_SNDBUF(VNET(tcp_autosndbuf_max) >> 15)); 379237263Snp else 380237263Snp req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15)); 381176472Skmacy 382237263Snp toep->tp_flags |= TP_DATASENT; 383237263Snp } 384176472Skmacy} 385176472Skmacy 386176472Skmacy/* 387237263Snp * TOM_XXX_DUPLICATION sgl_len, calc_tx_descs, calc_tx_descs_ofld, mbuf_wrs, etc. 388237263Snp * TOM_XXX_MOVE to some common header file. 389174641Skmacy */ 390174641Skmacy/* 391237263Snp * IMM_LEN: # of bytes that can be tx'd as immediate data. There are 16 flits 392237263Snp * in a tx desc; subtract 3 for tx_data_wr (including the WR header), and 1 more 393237263Snp * for the second gen bit flit. This leaves us with 12 flits. 394237263Snp * 395237263Snp * descs_to_sgllen: # of SGL entries that can fit into the given # of tx descs. 396237263Snp * The first desc has a tx_data_wr (which includes the WR header), the rest have 397237263Snp * the WR header only. All descs have the second gen bit flit. 398237263Snp * 399237263Snp * sgllen_to_descs: # of tx descs used up by an sgl of given length. The first 400237263Snp * desc has a tx_data_wr (which includes the WR header), the rest have the WR 401237263Snp * header only. All descs have the second gen bit flit. 402237263Snp * 403237263Snp * flits_to_sgllen: # of SGL entries that can be fit in the given # of flits. 404237263Snp * 405174641Skmacy */ 406237263Snp#define IMM_LEN 96 407237263Snpstatic int descs_to_sgllen[TX_MAX_DESC + 1] = {0, 8, 17, 26, 35}; 408237263Snpstatic int sgllen_to_descs[TX_MAX_SEGS] = { 409237263Snp 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, /* 0 - 9 */ 410237263Snp 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, /* 10 - 19 */ 411237263Snp 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, /* 20 - 29 */ 412237263Snp 4, 4, 4, 4, 4, 4 /* 30 - 35 */ 413237263Snp}; 414237263Snp#if 0 415237263Snpstatic int flits_to_sgllen[TX_DESC_FLITS + 1] = { 416237263Snp 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10 417237263Snp}; 418237263Snp#endif 419237263Snp#if SGE_NUM_GENBITS != 2 420237263Snp#error "SGE_NUM_GENBITS really must be 2" 421237263Snp#endif 422237263Snp 423237263Snpint 424237263Snpt3_push_frames(struct socket *so, int req_completion) 425174641Skmacy{ 426237263Snp struct tcpcb *tp = so_sototcpcb(so); 427174641Skmacy struct toepcb *toep = tp->t_toe; 428237263Snp struct mbuf *m0, *sndptr, *m; 429237263Snp struct toedev *tod = toep->tp_tod; 430237263Snp struct adapter *sc = tod->tod_softc; 431237263Snp int bytes, ndesc, total_bytes = 0, mlen; 432237263Snp struct sockbuf *snd; 433237263Snp struct sglist *sgl; 434237263Snp struct ofld_hdr *oh; 435237263Snp caddr_t dst; 436237263Snp struct tx_data_wr *wr; 437174641Skmacy 438237263Snp inp_lock_assert(tp->t_inpcb); 439178302Skmacy 440237263Snp snd = so_sockbuf_snd(so); 441237263Snp SOCKBUF_LOCK(snd); 442174641Skmacy 443178302Skmacy /* 444237263Snp * Autosize the send buffer. 445174641Skmacy */ 446237263Snp if (snd->sb_flags & SB_AUTOSIZE && VNET(tcp_do_autosndbuf)) { 447237263Snp if (snd->sb_cc >= (snd->sb_hiwat / 8 * 7) && 448237263Snp snd->sb_cc < VNET(tcp_autosndbuf_max)) { 449237263Snp if (!sbreserve_locked(snd, min(snd->sb_hiwat + 450237263Snp VNET(tcp_autosndbuf_inc), VNET(tcp_autosndbuf_max)), 451237263Snp so, curthread)) 452237263Snp snd->sb_flags &= ~SB_AUTOSIZE; 453237263Snp } 454237263Snp } 455174641Skmacy 456237263Snp if (toep->tp_m_last && toep->tp_m_last == snd->sb_sndptr) 457237263Snp sndptr = toep->tp_m_last->m_next; 458237263Snp else 459237263Snp sndptr = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb; 460174641Skmacy 461237263Snp /* Nothing to send or no WRs available for sending data */ 462237263Snp if (toep->tp_wr_avail == 0 || sndptr == NULL) 463237263Snp goto out; 464174641Skmacy 465237263Snp /* Something to send and at least 1 WR available */ 466237263Snp while (toep->tp_wr_avail && sndptr != NULL) { 467174641Skmacy 468237263Snp m0 = m_gethdr(M_NOWAIT, MT_DATA); 469237263Snp if (m0 == NULL) 470237263Snp break; 471237263Snp oh = mtod(m0, struct ofld_hdr *); 472237263Snp wr = (void *)(oh + 1); 473237263Snp dst = (void *)(wr + 1); 474174641Skmacy 475237263Snp m0->m_pkthdr.len = m0->m_len = sizeof(*oh) + sizeof(*wr); 476237263Snp oh->flags = V_HDR_CTRL(CPL_PRIORITY_DATA) | F_HDR_DF | 477237263Snp V_HDR_QSET(toep->tp_qset); 478174641Skmacy 479237263Snp /* 480237263Snp * Try to construct an immediate data WR if possible. Stuff as 481237263Snp * much data into it as possible, one whole mbuf at a time. 482237263Snp */ 483237263Snp mlen = sndptr->m_len; 484237263Snp ndesc = bytes = 0; 485237263Snp while (mlen <= IMM_LEN - bytes) { 486237263Snp bcopy(sndptr->m_data, dst, mlen); 487237263Snp bytes += mlen; 488237263Snp dst += mlen; 489174641Skmacy 490237263Snp if (!(sndptr = sndptr->m_next)) 491237263Snp break; 492237263Snp mlen = sndptr->m_len; 493237263Snp } 494174641Skmacy 495237263Snp if (bytes) { 496174641Skmacy 497237263Snp /* Was able to fit 'bytes' bytes in an immediate WR */ 498177530Skmacy 499237263Snp ndesc = 1; 500237263Snp make_tx_data_wr(so, wr, bytes, sndptr); 501174641Skmacy 502237263Snp m0->m_len += bytes; 503237263Snp m0->m_pkthdr.len = m0->m_len; 504174641Skmacy 505237263Snp } else { 506237263Snp int wr_avail = min(toep->tp_wr_avail, TX_MAX_DESC); 507174641Skmacy 508237263Snp /* Need to make an SGL */ 509174641Skmacy 510237263Snp sgl = sglist_alloc(descs_to_sgllen[wr_avail], M_NOWAIT); 511237263Snp if (sgl == NULL) 512237263Snp break; 513177530Skmacy 514237263Snp for (m = sndptr; m != NULL; m = m->m_next) { 515237263Snp if ((mlen = m->m_len) > 0) { 516237263Snp if (sglist_append(sgl, m->m_data, mlen)) 517237263Snp break; 518237263Snp } 519237263Snp bytes += mlen; 520237263Snp } 521237263Snp sndptr = m; 522237263Snp if (bytes == 0) { 523237263Snp sglist_free(sgl); 524237263Snp break; 525237263Snp } 526237263Snp ndesc = sgllen_to_descs[sgl->sg_nseg]; 527237263Snp oh->flags |= F_HDR_SGL; 528237263Snp oh->sgl = sgl; 529237263Snp make_tx_data_wr(so, wr, bytes, sndptr); 530237263Snp } 531178302Skmacy 532237263Snp oh->flags |= V_HDR_NDESC(ndesc); 533237263Snp oh->plen = bytes; 534174641Skmacy 535237263Snp snd->sb_sndptr = sndptr; 536237263Snp snd->sb_sndptroff += bytes; 537237263Snp if (sndptr == NULL) { 538237263Snp snd->sb_sndptr = snd->sb_mbtail; 539237263Snp snd->sb_sndptroff -= snd->sb_mbtail->m_len; 540237263Snp toep->tp_m_last = snd->sb_mbtail; 541237263Snp } else 542237263Snp toep->tp_m_last = NULL; 543177530Skmacy 544237263Snp total_bytes += bytes; 545174641Skmacy 546237263Snp toep->tp_wr_avail -= ndesc; 547237263Snp toep->tp_wr_unacked += ndesc; 548174641Skmacy 549237263Snp if ((req_completion && toep->tp_wr_unacked == ndesc) || 550237263Snp toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 551237263Snp wr->wr.wrh_hi |= htonl(F_WR_COMPL); 552237263Snp toep->tp_wr_unacked = 0; 553237263Snp } 554174641Skmacy 555237263Snp enqueue_wr(toep, m0); 556237263Snp l2t_send(sc, m0, toep->tp_l2t); 557237263Snp } 558237263Snpout: 559237263Snp SOCKBUF_UNLOCK(snd); 560174641Skmacy 561237263Snp if (sndptr == NULL && (toep->tp_flags & TP_SEND_FIN)) 562237263Snp close_conn(sc, toep); 563174641Skmacy 564237263Snp return (total_bytes); 565174641Skmacy} 566174641Skmacy 567237263Snpstatic int 568237263Snpsend_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) 569174641Skmacy{ 570174641Skmacy struct mbuf *m; 571237263Snp struct cpl_rx_data_ack *req; 572237263Snp uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 573174641Skmacy 574237263Snp m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_CONTROL, req); 575237263Snp if (m == NULL) 576237263Snp return (0); 577174641Skmacy 578237263Snp req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 579237263Snp req->wr.wrh_lo = 0; 580237263Snp OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 581237263Snp req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 582237263Snp t3_offload_tx(sc, m); 583237263Snp return (credits); 584174641Skmacy} 585174641Skmacy 586174641Skmacyvoid 587237263Snpt3_rcvd(struct toedev *tod, struct tcpcb *tp) 588174641Skmacy{ 589237263Snp struct adapter *sc = tod->tod_softc; 590237263Snp struct inpcb *inp = tp->t_inpcb; 591237263Snp struct socket *so = inp->inp_socket; 592237263Snp struct sockbuf *so_rcv = &so->so_rcv; 593237263Snp struct toepcb *toep = tp->t_toe; 594237263Snp int must_send; 595178302Skmacy 596237263Snp INP_WLOCK_ASSERT(inp); 597174641Skmacy 598237263Snp SOCKBUF_LOCK(so_rcv); 599237263Snp KASSERT(toep->tp_enqueued >= so_rcv->sb_cc, 600237263Snp ("%s: so_rcv->sb_cc > enqueued", __func__)); 601237263Snp toep->tp_rx_credits += toep->tp_enqueued - so_rcv->sb_cc; 602237263Snp toep->tp_enqueued = so_rcv->sb_cc; 603237263Snp SOCKBUF_UNLOCK(so_rcv); 604174641Skmacy 605237263Snp must_send = toep->tp_rx_credits + 16384 >= tp->rcv_wnd; 606237263Snp if (must_send || toep->tp_rx_credits >= 15 * 1024) { 607237263Snp int credits; 608178302Skmacy 609237263Snp credits = send_rx_credits(sc, toep, toep->tp_rx_credits); 610237263Snp toep->tp_rx_credits -= credits; 611237263Snp tp->rcv_wnd += credits; 612237263Snp tp->rcv_adv += credits; 613237263Snp } 614177340Skmacy} 615177340Skmacy 616237263Snpstatic int 617237263Snpdo_rx_urg_notify(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 618174641Skmacy{ 619237263Snp struct adapter *sc = qs->adap; 620237263Snp struct tom_data *td = sc->tom_softc; 621237263Snp struct cpl_rx_urg_notify *hdr = mtod(m, void *); 622237263Snp unsigned int tid = GET_TID(hdr); 623237263Snp struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 624174641Skmacy 625237263Snp log(LOG_ERR, "%s: tid %u inp %p", __func__, tid, toep->tp_inp); 626174641Skmacy 627237263Snp m_freem(m); 628237263Snp return (0); 629174641Skmacy} 630174641Skmacy 631237263Snpint 632237263Snpt3_send_fin(struct toedev *tod, struct tcpcb *tp) 633174641Skmacy{ 634237263Snp struct toepcb *toep = tp->t_toe; 635237263Snp struct inpcb *inp = tp->t_inpcb; 636237263Snp struct socket *so = inp_inpcbtosocket(inp); 637237263Snp#if defined(KTR) 638237263Snp unsigned int tid = toep->tp_tid; 639237263Snp#endif 640174641Skmacy 641237263Snp INP_INFO_WLOCK_ASSERT(&V_tcbinfo); 642237263Snp INP_WLOCK_ASSERT(inp); 643174641Skmacy 644237263Snp CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep, 645237263Snp toep->tp_flags); 646174641Skmacy 647237263Snp toep->tp_flags |= TP_SEND_FIN; 648237263Snp t3_push_frames(so, 1); 649174641Skmacy 650237263Snp return (0); 651174641Skmacy} 652174641Skmacy 653174641Skmacyint 654237263Snpt3_tod_output(struct toedev *tod, struct tcpcb *tp) 655174641Skmacy{ 656237263Snp struct inpcb *inp = tp->t_inpcb; 657237263Snp struct socket *so = inp->inp_socket; 658174641Skmacy 659237263Snp t3_push_frames(so, 1); 660237263Snp return (0); 661174641Skmacy} 662174641Skmacy 663237263Snp/* What mtu_idx to use, given a 4-tuple and/or an MSS cap */ 664237263Snpint 665237263Snpfind_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) 666174641Skmacy{ 667237263Snp unsigned short *mtus = &sc->params.mtus[0]; 668237263Snp int i = 0, mss; 669178302Skmacy 670237263Snp KASSERT(inc != NULL || pmss > 0, 671237263Snp ("%s: at least one of inc/pmss must be specified", __func__)); 672174641Skmacy 673237263Snp mss = inc ? tcp_mssopt(inc) : pmss; 674237263Snp if (pmss > 0 && mss > pmss) 675237263Snp mss = pmss; 676174641Skmacy 677237263Snp while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40) 678237263Snp ++i; 679174641Skmacy 680174641Skmacy return (i); 681174641Skmacy} 682174641Skmacy 683174641Skmacystatic inline void 684237263Snppurge_wr_queue(struct toepcb *toep) 685174641Skmacy{ 686237263Snp struct mbuf *m; 687237263Snp struct ofld_hdr *oh; 688174641Skmacy 689237263Snp while ((m = mbufq_dequeue(&toep->wr_list)) != NULL) { 690237263Snp oh = mtod(m, struct ofld_hdr *); 691237263Snp if (oh->flags & F_HDR_SGL) 692237263Snp sglist_free(oh->sgl); 693237263Snp m_freem(m); 694237263Snp } 695174641Skmacy} 696174641Skmacy 697174641Skmacy/* 698237263Snp * Release cxgb(4) and T3 resources held by an offload connection (TID, L2T 699237263Snp * entry, etc.) 700174641Skmacy */ 701174641Skmacystatic void 702174641Skmacyt3_release_offload_resources(struct toepcb *toep) 703174641Skmacy{ 704237263Snp struct toedev *tod = toep->tp_tod; 705237263Snp struct tom_data *td = t3_tomdata(tod); 706174641Skmacy 707237263Snp /* 708237263Snp * The TOM explicitly detaches its toepcb from the system's inp before 709237263Snp * it releases the offload resources. 710237263Snp */ 711237263Snp if (toep->tp_inp) { 712237263Snp panic("%s: inp %p still attached to toepcb %p", 713237263Snp __func__, toep->tp_inp, toep); 714237263Snp } 715174641Skmacy 716237263Snp if (toep->tp_wr_avail != toep->tp_wr_max) 717174641Skmacy purge_wr_queue(toep); 718174641Skmacy 719174641Skmacy if (toep->tp_l2t) { 720237263Snp l2t_release(td->l2t, toep->tp_l2t); 721174641Skmacy toep->tp_l2t = NULL; 722174641Skmacy } 723174641Skmacy 724237263Snp if (toep->tp_tid >= 0) 725237263Snp release_tid(tod, toep->tp_tid, toep->tp_qset); 726174641Skmacy 727237263Snp toepcb_free(toep); 728174641Skmacy} 729174641Skmacy 730174641Skmacy/* 731237263Snp * Determine the receive window size for a socket. 732174641Skmacy */ 733237263Snpunsigned long 734237263Snpselect_rcv_wnd(struct socket *so) 735174641Skmacy{ 736237263Snp unsigned long wnd; 737237263Snp 738237263Snp SOCKBUF_LOCK_ASSERT(&so->so_rcv); 739237263Snp 740237263Snp wnd = sbspace(&so->so_rcv); 741237263Snp if (wnd < MIN_RCV_WND) 742237263Snp wnd = MIN_RCV_WND; 743237263Snp 744237263Snp return min(wnd, MAX_RCV_WND); 745237263Snp} 746237263Snp 747237263Snpint 748237263Snpselect_rcv_wscale(void) 749237263Snp{ 750174641Skmacy int wscale = 0; 751237263Snp unsigned long space = sb_max; 752174641Skmacy 753174641Skmacy if (space > MAX_RCV_WND) 754174641Skmacy space = MAX_RCV_WND; 755174641Skmacy 756237263Snp while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) 757237263Snp wscale++; 758176472Skmacy 759176472Skmacy return (wscale); 760174641Skmacy} 761174641Skmacy 762237263Snp 763174641Skmacy/* 764237263Snp * Set up the socket for TCP offload. 765174641Skmacy */ 766237263Snpvoid 767237263Snpoffload_socket(struct socket *so, struct toepcb *toep) 768174641Skmacy{ 769237263Snp struct toedev *tod = toep->tp_tod; 770237263Snp struct tom_data *td = t3_tomdata(tod); 771237263Snp struct inpcb *inp = sotoinpcb(so); 772237263Snp struct tcpcb *tp = intotcpcb(inp); 773174641Skmacy 774237263Snp INP_WLOCK_ASSERT(inp); 775176472Skmacy 776237263Snp /* Update socket */ 777237263Snp SOCKBUF_LOCK(&so->so_snd); 778237263Snp so_sockbuf_snd(so)->sb_flags |= SB_NOCOALESCE; 779237263Snp SOCKBUF_UNLOCK(&so->so_snd); 780237263Snp SOCKBUF_LOCK(&so->so_rcv); 781237263Snp so_sockbuf_rcv(so)->sb_flags |= SB_NOCOALESCE; 782237263Snp SOCKBUF_UNLOCK(&so->so_rcv); 783174641Skmacy 784237263Snp /* Update TCP PCB */ 785237263Snp tp->tod = toep->tp_tod; 786174641Skmacy tp->t_toe = toep; 787237263Snp tp->t_flags |= TF_TOE; 788176472Skmacy 789237263Snp /* Install an extra hold on inp */ 790237263Snp toep->tp_inp = inp; 791237263Snp toep->tp_flags |= TP_ATTACHED; 792237263Snp in_pcbref(inp); 793174641Skmacy 794237263Snp /* Add the TOE PCB to the active list */ 795237263Snp mtx_lock(&td->toep_list_lock); 796237263Snp TAILQ_INSERT_HEAD(&td->toep_list, toep, link); 797237263Snp mtx_unlock(&td->toep_list_lock); 798174641Skmacy} 799174641Skmacy 800237263Snp/* This is _not_ the normal way to "unoffload" a socket. */ 801237263Snpvoid 802237263Snpundo_offload_socket(struct socket *so) 803174641Skmacy{ 804237263Snp struct inpcb *inp = sotoinpcb(so); 805237263Snp struct tcpcb *tp = intotcpcb(inp); 806237263Snp struct toepcb *toep = tp->t_toe; 807237263Snp struct toedev *tod = toep->tp_tod; 808237263Snp struct tom_data *td = t3_tomdata(tod); 809174641Skmacy 810237263Snp INP_WLOCK_ASSERT(inp); 811174641Skmacy 812237263Snp so_sockbuf_snd(so)->sb_flags &= ~SB_NOCOALESCE; 813237263Snp so_sockbuf_rcv(so)->sb_flags &= ~SB_NOCOALESCE; 814174641Skmacy 815237263Snp tp->tod = NULL; 816237263Snp tp->t_toe = NULL; 817237263Snp tp->t_flags &= ~TF_TOE; 818174641Skmacy 819237263Snp toep->tp_inp = NULL; 820237263Snp toep->tp_flags &= ~TP_ATTACHED; 821237263Snp if (in_pcbrele_wlocked(inp)) 822237263Snp panic("%s: inp freed.", __func__); 823237263Snp 824237263Snp mtx_lock(&td->toep_list_lock); 825237263Snp TAILQ_REMOVE(&td->toep_list, toep, link); 826237263Snp mtx_unlock(&td->toep_list_lock); 827174641Skmacy} 828176472Skmacy 829237263Snp/* 830237263Snp * Socket could be a listening socket, and we may not have a toepcb at all at 831237263Snp * this time. 832237263Snp */ 833237263Snpuint32_t 834237263Snpcalc_opt0h(struct socket *so, int mtu_idx, int rscale, struct l2t_entry *e) 835176472Skmacy{ 836237263Snp uint32_t opt0h = F_TCAM_BYPASS | V_WND_SCALE(rscale) | 837237263Snp V_MSS_IDX(mtu_idx); 838176472Skmacy 839237263Snp if (so != NULL) { 840237263Snp struct inpcb *inp = sotoinpcb(so); 841237263Snp struct tcpcb *tp = intotcpcb(inp); 842237263Snp int keepalive = always_keepalive || 843237263Snp so_options_get(so) & SO_KEEPALIVE; 844237263Snp 845237263Snp opt0h |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0); 846237263Snp opt0h |= V_KEEP_ALIVE(keepalive != 0); 847237263Snp } 848237263Snp 849237263Snp if (e != NULL) 850237263Snp opt0h |= V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx); 851237263Snp 852237263Snp return (htobe32(opt0h)); 853176472Skmacy} 854176472Skmacy 855237263Snpuint32_t 856237263Snpcalc_opt0l(struct socket *so, int rcv_bufsize) 857174641Skmacy{ 858237263Snp uint32_t opt0l = V_ULP_MODE(ULP_MODE_NONE) | V_RCV_BUFSIZ(rcv_bufsize); 859176472Skmacy 860237263Snp KASSERT(rcv_bufsize <= M_RCV_BUFSIZ, 861237263Snp ("%s: rcv_bufsize (%d) is too high", __func__, rcv_bufsize)); 862237263Snp 863237263Snp if (so != NULL) /* optional because noone cares about IP TOS */ 864237263Snp opt0l |= V_TOS(INP_TOS(sotoinpcb(so))); 865237263Snp 866237263Snp return (htobe32(opt0l)); 867174641Skmacy} 868174641Skmacy 869174641Skmacy/* 870174641Skmacy * Convert an ACT_OPEN_RPL status to an errno. 871174641Skmacy */ 872174641Skmacystatic int 873174641Skmacyact_open_rpl_status_to_errno(int status) 874174641Skmacy{ 875174641Skmacy switch (status) { 876174641Skmacy case CPL_ERR_CONN_RESET: 877174641Skmacy return (ECONNREFUSED); 878174641Skmacy case CPL_ERR_ARP_MISS: 879174641Skmacy return (EHOSTUNREACH); 880174641Skmacy case CPL_ERR_CONN_TIMEDOUT: 881174641Skmacy return (ETIMEDOUT); 882174641Skmacy case CPL_ERR_TCAM_FULL: 883239511Snp return (EAGAIN); 884174641Skmacy case CPL_ERR_CONN_EXIST: 885174641Skmacy log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 886239511Snp return (EAGAIN); 887174641Skmacy default: 888174641Skmacy return (EIO); 889174641Skmacy } 890174641Skmacy} 891174641Skmacy 892174641Skmacy/* 893174641Skmacy * Return whether a failed active open has allocated a TID 894174641Skmacy */ 895174641Skmacystatic inline int 896174641Skmacyact_open_has_tid(int status) 897174641Skmacy{ 898174641Skmacy return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 899174641Skmacy status != CPL_ERR_ARP_MISS; 900174641Skmacy} 901174641Skmacy 902174641Skmacy/* 903237263Snp * Active open failed. 904174641Skmacy */ 905174641Skmacystatic int 906237263Snpdo_act_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 907174641Skmacy{ 908237263Snp struct adapter *sc = qs->adap; 909237263Snp struct tom_data *td = sc->tom_softc; 910237263Snp struct toedev *tod = &td->tod; 911237263Snp struct cpl_act_open_rpl *rpl = mtod(m, void *); 912237263Snp unsigned int atid = G_TID(ntohl(rpl->atid)); 913237263Snp struct toepcb *toep = lookup_atid(&td->tid_maps, atid); 914237263Snp struct inpcb *inp = toep->tp_inp; 915239511Snp int s = rpl->status, rc; 916237263Snp 917237263Snp CTR3(KTR_CXGB, "%s: atid %u, status %u ", __func__, atid, s); 918237263Snp 919237263Snp free_atid(&td->tid_maps, atid); 920237263Snp toep->tp_tid = -1; 921237263Snp 922237263Snp if (act_open_has_tid(s)) 923237263Snp queue_tid_release(tod, GET_TID(rpl)); 924237263Snp 925239511Snp rc = act_open_rpl_status_to_errno(s); 926239511Snp if (rc != EAGAIN) 927237263Snp INP_INFO_WLOCK(&V_tcbinfo); 928239511Snp INP_WLOCK(inp); 929239511Snp toe_connect_failed(tod, inp, rc); 930239511Snp toepcb_release(toep); /* unlocks inp */ 931239511Snp if (rc != EAGAIN) 932237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 933237263Snp 934237263Snp m_freem(m); 935174641Skmacy return (0); 936174641Skmacy} 937174641Skmacy 938174641Skmacy/* 939237263Snp * Send an active open request. 940174641Skmacy * 941237263Snp * State of affairs on entry: 942237263Snp * soisconnecting (so_state |= SS_ISCONNECTING) 943237263Snp * tcbinfo not locked (this has changed - used to be WLOCKed) 944237263Snp * inp WLOCKed 945237263Snp * tp->t_state = TCPS_SYN_SENT 946237263Snp * rtalloc1, RT_UNLOCK on rt. 947174641Skmacy */ 948174641Skmacyint 949237263Snpt3_connect(struct toedev *tod, struct socket *so, 950174641Skmacy struct rtentry *rt, struct sockaddr *nam) 951174641Skmacy{ 952237263Snp struct mbuf *m = NULL; 953237263Snp struct l2t_entry *e = NULL; 954237263Snp struct tom_data *td = t3_tomdata(tod); 955237263Snp struct adapter *sc = tod->tod_softc; 956237263Snp struct cpl_act_open_req *cpl; 957237263Snp struct inpcb *inp = sotoinpcb(so); 958174641Skmacy struct tcpcb *tp = intotcpcb(inp); 959237263Snp struct toepcb *toep; 960237263Snp int atid = -1, mtu_idx, rscale, cpu_idx, qset; 961237263Snp struct sockaddr *gw; 962237263Snp struct ifnet *ifp = rt->rt_ifp; 963237263Snp struct port_info *pi = ifp->if_softc; /* XXX wrong for VLAN etc. */ 964174641Skmacy 965237263Snp INP_WLOCK_ASSERT(inp); 966237263Snp 967237263Snp toep = toepcb_alloc(tod); 968174641Skmacy if (toep == NULL) 969237263Snp goto failed; 970174641Skmacy 971237263Snp atid = alloc_atid(&td->tid_maps, toep); 972237263Snp if (atid < 0) 973237263Snp goto failed; 974174641Skmacy 975237263Snp qset = pi->first_qset + (arc4random() % pi->nqsets); 976174641Skmacy 977237263Snp m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl); 978237263Snp if (m == NULL) 979237263Snp goto failed; 980178302Skmacy 981237263Snp gw = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam; 982237263Snp e = t3_l2t_get(pi, ifp, gw); 983237263Snp if (e == NULL) 984237263Snp goto failed; 985174641Skmacy 986237263Snp toep->tp_l2t = e; 987237263Snp toep->tp_tid = atid; /* used to double check response */ 988237263Snp toep->tp_qset = qset; 989174641Skmacy 990237263Snp SOCKBUF_LOCK(&so->so_rcv); 991237263Snp /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ 992237263Snp toep->tp_rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); 993237263Snp SOCKBUF_UNLOCK(&so->so_rcv); 994174641Skmacy 995237263Snp offload_socket(so, toep); 996174641Skmacy 997237263Snp /* 998237263Snp * The kernel sets request_r_scale based on sb_max whereas we need to 999237263Snp * take hardware's MAX_RCV_WND into account too. This is normally a 1000237263Snp * no-op as MAX_RCV_WND is much larger than the default sb_max. 1001237263Snp */ 1002237263Snp if (tp->t_flags & TF_REQ_SCALE) 1003237263Snp rscale = tp->request_r_scale = select_rcv_wscale(); 1004237263Snp else 1005237263Snp rscale = 0; 1006237263Snp mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0); 1007237263Snp cpu_idx = sc->rrss_map[qset]; 1008174641Skmacy 1009237263Snp cpl->wr.wrh_hi = htobe32(V_WR_OP(FW_WROPCODE_FORWARD)); 1010237263Snp cpl->wr.wrh_lo = 0; 1011237263Snp OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1012237263Snp inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, 1013237263Snp &cpl->peer_port); 1014237263Snp cpl->opt0h = calc_opt0h(so, mtu_idx, rscale, e); 1015237263Snp cpl->opt0l = calc_opt0l(so, toep->tp_rx_credits); 1016237263Snp cpl->params = 0; 1017237263Snp cpl->opt2 = calc_opt2(cpu_idx); 1018174641Skmacy 1019237263Snp CTR5(KTR_CXGB, "%s: atid %u (%s), toep %p, inp %p", __func__, 1020237263Snp toep->tp_tid, tcpstates[tp->t_state], toep, inp); 1021174641Skmacy 1022237263Snp if (l2t_send(sc, m, e) == 0) 1023237263Snp return (0); 1024178767Skmacy 1025237263Snp undo_offload_socket(so); 1026174641Skmacy 1027237263Snpfailed: 1028237263Snp CTR5(KTR_CXGB, "%s: FAILED, atid %d, toep %p, l2te %p, mbuf %p", 1029237263Snp __func__, atid, toep, e, m); 1030174641Skmacy 1031237263Snp if (atid >= 0) 1032237263Snp free_atid(&td->tid_maps, atid); 1033180583Skmacy 1034237263Snp if (e) 1035237263Snp l2t_release(td->l2t, e); 1036180583Skmacy 1037237263Snp if (toep) 1038237263Snp toepcb_free(toep); 1039174641Skmacy 1040237263Snp m_freem(m); 1041178302Skmacy 1042237263Snp return (ENOMEM); 1043174641Skmacy} 1044174641Skmacy 1045174641Skmacy/* 1046237263Snp * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do not 1047237263Snp * send multiple ABORT_REQs for the same connection and also that we do not try 1048237263Snp * to send a message after the connection has closed. 1049176472Skmacy */ 1050176472Skmacystatic void 1051237263Snpsend_reset(struct toepcb *toep) 1052176472Skmacy{ 1053176472Skmacy 1054237263Snp struct cpl_abort_req *req; 1055237263Snp unsigned int tid = toep->tp_tid; 1056237263Snp struct inpcb *inp = toep->tp_inp; 1057237263Snp struct socket *so = inp->inp_socket; 1058237263Snp struct tcpcb *tp = intotcpcb(inp); 1059237263Snp struct toedev *tod = toep->tp_tod; 1060237263Snp struct adapter *sc = tod->tod_softc; 1061237263Snp struct mbuf *m; 1062176472Skmacy 1063237263Snp INP_INFO_WLOCK_ASSERT(&V_tcbinfo); 1064237263Snp INP_WLOCK_ASSERT(inp); 1065176472Skmacy 1066237263Snp CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep, 1067237263Snp toep->tp_flags); 1068176472Skmacy 1069237263Snp if (toep->tp_flags & TP_ABORT_SHUTDOWN) 1070176472Skmacy return; 1071176472Skmacy 1072237263Snp toep->tp_flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN); 1073176472Skmacy 1074237263Snp /* Purge the send queue */ 1075237263Snp sbflush(so_sockbuf_snd(so)); 1076237263Snp purge_wr_queue(toep); 1077176472Skmacy 1078237263Snp m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req); 1079237263Snp if (m == NULL) 1080237263Snp CXGB_UNIMPLEMENTED(); 1081176472Skmacy 1082237263Snp req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1083237263Snp req->wr.wrh_lo = htonl(V_WR_TID(tid)); 1084237263Snp OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1085237263Snp req->rsvd0 = htonl(tp->snd_nxt); 1086237263Snp req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1087237263Snp req->cmd = CPL_ABORT_SEND_RST; 1088176472Skmacy 1089237263Snp if (tp->t_state == TCPS_SYN_SENT) 1090237263Snp mbufq_tail(&toep->out_of_order_queue, m); /* defer */ 1091176472Skmacy else 1092237263Snp l2t_send(sc, m, toep->tp_l2t); 1093176472Skmacy} 1094176472Skmacy 1095237263Snpint 1096237263Snpt3_send_rst(struct toedev *tod __unused, struct tcpcb *tp) 1097176472Skmacy{ 1098176472Skmacy 1099237263Snp send_reset(tp->t_toe); 1100176472Skmacy return (0); 1101176472Skmacy} 1102176472Skmacy 1103176472Skmacy/* 1104237263Snp * Handler for RX_DATA CPL messages. 1105174641Skmacy */ 1106237263Snpstatic int 1107237263Snpdo_rx_data(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1108174641Skmacy{ 1109237263Snp struct adapter *sc = qs->adap; 1110237263Snp struct tom_data *td = sc->tom_softc; 1111237263Snp struct cpl_rx_data *hdr = mtod(m, void *); 1112237263Snp unsigned int tid = GET_TID(hdr); 1113237263Snp struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1114237263Snp struct inpcb *inp = toep->tp_inp; 1115237263Snp struct tcpcb *tp; 1116178302Skmacy struct socket *so; 1117237263Snp struct sockbuf *so_rcv; 1118174641Skmacy 1119237263Snp /* Advance over CPL */ 1120174641Skmacy m_adj(m, sizeof(*hdr)); 1121174641Skmacy 1122237263Snp /* XXX: revisit. This comes from the T4 TOM */ 1123237263Snp if (__predict_false(inp == NULL)) { 1124237263Snp /* 1125237263Snp * do_pass_establish failed and must be attempting to abort the 1126237263Snp * connection. Meanwhile, the T4 has sent us data for such a 1127237263Snp * connection. 1128237263Snp */ 1129176472Skmacy#ifdef notyet 1130237263Snp KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN), 1131237263Snp ("%s: inp NULL and tid isn't being aborted", __func__)); 1132176472Skmacy#endif 1133237263Snp m_freem(m); 1134237263Snp return (0); 1135174641Skmacy } 1136174641Skmacy 1137237263Snp INP_WLOCK(inp); 1138237263Snp if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { 1139237263Snp CTR4(KTR_CXGB, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 1140237263Snp __func__, tid, m->m_pkthdr.len, inp->inp_flags); 1141237263Snp INP_WUNLOCK(inp); 1142237263Snp m_freem(m); 1143237263Snp return (0); 1144177340Skmacy } 1145174641Skmacy 1146237263Snp if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) 1147237263Snp toep->tp_delack_mode = hdr->dack_mode; 1148174641Skmacy 1149237263Snp tp = intotcpcb(inp); 1150178302Skmacy 1151237263Snp#ifdef INVARIANTS 1152237263Snp if (__predict_false(tp->rcv_nxt != be32toh(hdr->seq))) { 1153237263Snp log(LOG_ERR, 1154237263Snp "%s: unexpected seq# %x for TID %u, rcv_nxt %x\n", 1155237263Snp __func__, be32toh(hdr->seq), toep->tp_tid, tp->rcv_nxt); 1156174641Skmacy } 1157177340Skmacy#endif 1158237263Snp tp->rcv_nxt += m->m_pkthdr.len; 1159237263Snp KASSERT(tp->rcv_wnd >= m->m_pkthdr.len, 1160237263Snp ("%s: negative window size", __func__)); 1161237263Snp tp->rcv_wnd -= m->m_pkthdr.len; 1162237263Snp tp->t_rcvtime = ticks; 1163178302Skmacy 1164237263Snp so = inp->inp_socket; 1165237263Snp so_rcv = &so->so_rcv; 1166237263Snp SOCKBUF_LOCK(so_rcv); 1167174641Skmacy 1168237263Snp if (__predict_false(so_rcv->sb_state & SBS_CANTRCVMORE)) { 1169237263Snp CTR3(KTR_CXGB, "%s: tid %u, excess rx (%d bytes)", 1170237263Snp __func__, tid, m->m_pkthdr.len); 1171237263Snp SOCKBUF_UNLOCK(so_rcv); 1172237263Snp INP_WUNLOCK(inp); 1173174641Skmacy 1174237263Snp INP_INFO_WLOCK(&V_tcbinfo); 1175237263Snp INP_WLOCK(inp); 1176237263Snp tp = tcp_drop(tp, ECONNRESET); 1177237263Snp if (tp) 1178237263Snp INP_WUNLOCK(inp); 1179237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1180174641Skmacy 1181237263Snp m_freem(m); 1182237263Snp return (0); 1183174641Skmacy } 1184174641Skmacy 1185237263Snp /* receive buffer autosize */ 1186237263Snp if (so_rcv->sb_flags & SB_AUTOSIZE && 1187237263Snp V_tcp_do_autorcvbuf && 1188237263Snp so_rcv->sb_hiwat < V_tcp_autorcvbuf_max && 1189237263Snp (m->m_pkthdr.len > (sbspace(so_rcv) / 8 * 7) || tp->rcv_wnd < 32768)) { 1190237263Snp unsigned int hiwat = so_rcv->sb_hiwat; 1191237263Snp unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, 1192237263Snp V_tcp_autorcvbuf_max); 1193174641Skmacy 1194237263Snp if (!sbreserve_locked(so_rcv, newsize, so, NULL)) 1195237263Snp so_rcv->sb_flags &= ~SB_AUTOSIZE; 1196237263Snp else 1197237263Snp toep->tp_rx_credits += newsize - hiwat; 1198174641Skmacy } 1199176472Skmacy 1200237263Snp toep->tp_enqueued += m->m_pkthdr.len; 1201237263Snp sbappendstream_locked(so_rcv, m); 1202237263Snp sorwakeup_locked(so); 1203237263Snp SOCKBUF_UNLOCK_ASSERT(so_rcv); 1204178302Skmacy 1205237263Snp INP_WUNLOCK(inp); 1206174641Skmacy return (0); 1207174641Skmacy} 1208174641Skmacy 1209174641Skmacy/* 1210237263Snp * Handler for PEER_CLOSE CPL messages. 1211174641Skmacy */ 1212176472Skmacystatic int 1213237263Snpdo_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1214176472Skmacy{ 1215237263Snp struct adapter *sc = qs->adap; 1216237263Snp struct tom_data *td = sc->tom_softc; 1217237263Snp const struct cpl_peer_close *hdr = mtod(m, void *); 1218237263Snp unsigned int tid = GET_TID(hdr); 1219237263Snp struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1220237263Snp struct inpcb *inp = toep->tp_inp; 1221237263Snp struct tcpcb *tp; 1222237263Snp struct socket *so; 1223176472Skmacy 1224237263Snp INP_INFO_WLOCK(&V_tcbinfo); 1225237263Snp INP_WLOCK(inp); 1226237263Snp tp = intotcpcb(inp); 1227176472Skmacy 1228237263Snp CTR5(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, 1229237263Snp tid, tp ? tcpstates[tp->t_state] : "no tp" , toep->tp_flags, inp); 1230176472Skmacy 1231237263Snp if (toep->tp_flags & TP_ABORT_RPL_PENDING) 1232237263Snp goto done; 1233178302Skmacy 1234237263Snp so = inp_inpcbtosocket(inp); 1235178302Skmacy 1236237263Snp socantrcvmore(so); 1237237263Snp tp->rcv_nxt++; 1238176472Skmacy 1239174641Skmacy switch (tp->t_state) { 1240174641Skmacy case TCPS_SYN_RECEIVED: 1241237263Snp tp->t_starttime = ticks; 1242237263Snp /* FALLTHROUGH */ 1243174641Skmacy case TCPS_ESTABLISHED: 1244174641Skmacy tp->t_state = TCPS_CLOSE_WAIT; 1245174641Skmacy break; 1246174641Skmacy case TCPS_FIN_WAIT_1: 1247174641Skmacy tp->t_state = TCPS_CLOSING; 1248174641Skmacy break; 1249174641Skmacy case TCPS_FIN_WAIT_2: 1250237263Snp tcp_twstart(tp); 1251237263Snp INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1252237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1253237263Snp 1254237263Snp INP_WLOCK(inp); 1255237263Snp toepcb_release(toep); /* no more CPLs expected */ 1256237263Snp 1257237263Snp m_freem(m); 1258237263Snp return (0); 1259174641Skmacy default: 1260237263Snp log(LOG_ERR, "%s: TID %u received PEER_CLOSE in bad state %d\n", 1261237263Snp __func__, toep->tp_tid, tp->t_state); 1262174641Skmacy } 1263174641Skmacy 1264237263Snpdone: 1265237263Snp INP_WUNLOCK(inp); 1266237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1267178767Skmacy 1268237263Snp m_freem(m); 1269237263Snp return (0); 1270174641Skmacy} 1271174641Skmacy 1272174641Skmacy/* 1273237263Snp * Handler for CLOSE_CON_RPL CPL messages. peer ACK to our FIN received. 1274174641Skmacy */ 1275174641Skmacystatic int 1276237263Snpdo_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1277174641Skmacy{ 1278237263Snp struct adapter *sc = qs->adap; 1279237263Snp struct tom_data *td = sc->tom_softc; 1280237263Snp const struct cpl_close_con_rpl *rpl = mtod(m, void *); 1281237263Snp unsigned int tid = GET_TID(rpl); 1282237263Snp struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1283237263Snp struct inpcb *inp = toep->tp_inp; 1284237263Snp struct tcpcb *tp; 1285237263Snp struct socket *so; 1286174641Skmacy 1287237263Snp INP_INFO_WLOCK(&V_tcbinfo); 1288237263Snp INP_WLOCK(inp); 1289237263Snp tp = intotcpcb(inp); 1290174641Skmacy 1291237263Snp CTR4(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid, 1292237263Snp tp ? tcpstates[tp->t_state] : "no tp", toep->tp_flags); 1293174641Skmacy 1294237263Snp if ((toep->tp_flags & TP_ABORT_RPL_PENDING)) 1295237263Snp goto done; 1296237263Snp 1297237263Snp so = inp_inpcbtosocket(inp); 1298174641Skmacy tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 1299174641Skmacy 1300174641Skmacy switch (tp->t_state) { 1301237263Snp case TCPS_CLOSING: 1302237263Snp tcp_twstart(tp); 1303237263Snprelease: 1304237263Snp INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1305237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1306174641Skmacy 1307237263Snp INP_WLOCK(inp); 1308237263Snp toepcb_release(toep); /* no more CPLs expected */ 1309237263Snp 1310237263Snp m_freem(m); 1311237263Snp return (0); 1312174641Skmacy case TCPS_LAST_ACK: 1313237263Snp if (tcp_close(tp)) 1314237263Snp INP_WUNLOCK(inp); 1315237263Snp goto release; 1316237263Snp 1317174641Skmacy case TCPS_FIN_WAIT_1: 1318237263Snp if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 1319237263Snp soisdisconnected(so); 1320176472Skmacy tp->t_state = TCPS_FIN_WAIT_2; 1321174641Skmacy break; 1322174641Skmacy default: 1323174641Skmacy log(LOG_ERR, 1324237263Snp "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 1325237263Snp __func__, toep->tp_tid, tp->t_state); 1326174641Skmacy } 1327178302Skmacy 1328237263Snpdone: 1329237263Snp INP_WUNLOCK(inp); 1330237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1331178302Skmacy 1332176472Skmacy m_freem(m); 1333237263Snp return (0); 1334174641Skmacy} 1335174641Skmacy 1336174641Skmacystatic int 1337237263Snpdo_smt_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1338174641Skmacy{ 1339237263Snp struct cpl_smt_write_rpl *rpl = mtod(m, void *); 1340174641Skmacy 1341237263Snp if (rpl->status != CPL_ERR_NONE) { 1342237263Snp log(LOG_ERR, 1343237263Snp "Unexpected SMT_WRITE_RPL status %u for entry %u\n", 1344237263Snp rpl->status, GET_TID(rpl)); 1345237263Snp } 1346237263Snp 1347237263Snp m_freem(m); 1348174641Skmacy return (0); 1349174641Skmacy} 1350174641Skmacy 1351237263Snpstatic int 1352237263Snpdo_set_tcb_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1353174641Skmacy{ 1354237263Snp struct cpl_set_tcb_rpl *rpl = mtod(m, void *); 1355237263Snp 1356237263Snp if (rpl->status != CPL_ERR_NONE) { 1357237263Snp log(LOG_ERR, "Unexpected SET_TCB_RPL status %u for tid %u\n", 1358237263Snp rpl->status, GET_TID(rpl)); 1359174641Skmacy } 1360174641Skmacy 1361237263Snp m_freem(m); 1362237263Snp return (0); 1363174641Skmacy} 1364174641Skmacy 1365174641Skmacy/* 1366174641Skmacy * Handle an ABORT_RPL_RSS CPL message. 1367174641Skmacy */ 1368174641Skmacystatic int 1369237263Snpdo_abort_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1370174641Skmacy{ 1371237263Snp struct adapter *sc = qs->adap; 1372237263Snp struct tom_data *td = sc->tom_softc; 1373237263Snp const struct cpl_abort_rpl_rss *rpl = mtod(m, void *); 1374237263Snp unsigned int tid = GET_TID(rpl); 1375237263Snp struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1376237263Snp struct inpcb *inp; 1377237263Snp 1378174641Skmacy /* 1379174641Skmacy * Ignore replies to post-close aborts indicating that the abort was 1380174641Skmacy * requested too late. These connections are terminated when we get 1381174641Skmacy * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 1382174641Skmacy * arrives the TID is either no longer used or it has been recycled. 1383174641Skmacy */ 1384174641Skmacy if (rpl->status == CPL_ERR_ABORT_FAILED) { 1385237263Snp m_freem(m); 1386174641Skmacy return (0); 1387174641Skmacy } 1388174641Skmacy 1389237263Snp if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY) 1390237263Snp return (do_abort_rpl_synqe(qs, r, m)); 1391174641Skmacy 1392237263Snp CTR4(KTR_CXGB, "%s: tid %d, toep %p, status %d", __func__, tid, toep, 1393237263Snp rpl->status); 1394174641Skmacy 1395237263Snp inp = toep->tp_inp; 1396237263Snp INP_WLOCK(inp); 1397237263Snp 1398237263Snp if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 1399237263Snp if (!(toep->tp_flags & TP_ABORT_RPL_RCVD)) { 1400237263Snp toep->tp_flags |= TP_ABORT_RPL_RCVD; 1401237263Snp INP_WUNLOCK(inp); 1402237263Snp } else { 1403237263Snp toep->tp_flags &= ~TP_ABORT_RPL_RCVD; 1404237263Snp toep->tp_flags &= TP_ABORT_RPL_PENDING; 1405237263Snp toepcb_release(toep); /* no more CPLs expected */ 1406237263Snp } 1407174641Skmacy } 1408174641Skmacy 1409237263Snp m_freem(m); 1410174641Skmacy return (0); 1411174641Skmacy} 1412174641Skmacy 1413174641Skmacy/* 1414237263Snp * Convert the status code of an ABORT_REQ into a FreeBSD error code. 1415174641Skmacy */ 1416174641Skmacystatic int 1417237263Snpabort_status_to_errno(struct tcpcb *tp, int abort_reason) 1418174641Skmacy{ 1419174641Skmacy switch (abort_reason) { 1420174641Skmacy case CPL_ERR_BAD_SYN: 1421174641Skmacy case CPL_ERR_CONN_RESET: 1422174641Skmacy return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 1423174641Skmacy case CPL_ERR_XMIT_TIMEDOUT: 1424174641Skmacy case CPL_ERR_PERSIST_TIMEDOUT: 1425174641Skmacy case CPL_ERR_FINWAIT2_TIMEDOUT: 1426174641Skmacy case CPL_ERR_KEEPALIVE_TIMEDOUT: 1427174641Skmacy return (ETIMEDOUT); 1428174641Skmacy default: 1429174641Skmacy return (EIO); 1430174641Skmacy } 1431174641Skmacy} 1432174641Skmacy 1433174641Skmacy/* 1434174641Skmacy * Returns whether an ABORT_REQ_RSS message is a negative advice. 1435174641Skmacy */ 1436174641Skmacystatic inline int 1437174641Skmacyis_neg_adv_abort(unsigned int status) 1438174641Skmacy{ 1439174641Skmacy return status == CPL_ERR_RTX_NEG_ADVICE || 1440174641Skmacy status == CPL_ERR_PERSIST_NEG_ADVICE; 1441174641Skmacy} 1442174641Skmacy 1443237263Snpvoid 1444237263Snpsend_abort_rpl(struct toedev *tod, int tid, int qset) 1445174641Skmacy{ 1446237263Snp struct mbuf *reply; 1447237263Snp struct cpl_abort_rpl *rpl; 1448237263Snp struct adapter *sc = tod->tod_softc; 1449174641Skmacy 1450237263Snp reply = M_GETHDR_OFLD(qset, CPL_PRIORITY_DATA, rpl); 1451237263Snp if (!reply) 1452237263Snp CXGB_UNIMPLEMENTED(); 1453174641Skmacy 1454237263Snp rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 1455237263Snp rpl->wr.wrh_lo = htonl(V_WR_TID(tid)); 1456237263Snp OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 1457237263Snp rpl->cmd = CPL_ABORT_NO_RST; 1458174641Skmacy 1459237263Snp t3_offload_tx(sc, reply); 1460174641Skmacy} 1461174641Skmacy 1462174641Skmacy/* 1463237263Snp * Handle an ABORT_REQ_RSS CPL message. If we're waiting for an ABORT_RPL we 1464237263Snp * ignore this request except that we need to reply to it. 1465174641Skmacy */ 1466174641Skmacystatic int 1467237263Snpdo_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1468174641Skmacy{ 1469237263Snp struct adapter *sc = qs->adap; 1470237263Snp struct tom_data *td = sc->tom_softc; 1471237263Snp struct toedev *tod = &td->tod; 1472237263Snp const struct cpl_abort_req_rss *req = mtod(m, void *); 1473237263Snp unsigned int tid = GET_TID(req); 1474237263Snp struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1475237263Snp struct inpcb *inp; 1476237263Snp struct tcpcb *tp; 1477237263Snp struct socket *so; 1478237263Snp int qset = toep->tp_qset; 1479174641Skmacy 1480237263Snp if (is_neg_adv_abort(req->status)) { 1481237263Snp CTR4(KTR_CXGB, "%s: negative advice %d for tid %u (%x)", 1482237263Snp __func__, req->status, tid, toep->tp_flags); 1483237263Snp m_freem(m); 1484237263Snp return (0); 1485174641Skmacy } 1486174641Skmacy 1487237263Snp if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY) 1488237263Snp return (do_abort_req_synqe(qs, r, m)); 1489178302Skmacy 1490237263Snp inp = toep->tp_inp; 1491237263Snp INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */ 1492237263Snp INP_WLOCK(inp); 1493174641Skmacy 1494237263Snp tp = intotcpcb(inp); 1495237263Snp so = inp->inp_socket; 1496178302Skmacy 1497237263Snp CTR6(KTR_CXGB, "%s: tid %u (%s), toep %p (%x), status %d", 1498237263Snp __func__, tid, tcpstates[tp->t_state], toep, toep->tp_flags, 1499237263Snp req->status); 1500178302Skmacy 1501237263Snp if (!(toep->tp_flags & TP_ABORT_REQ_RCVD)) { 1502174641Skmacy toep->tp_flags |= TP_ABORT_REQ_RCVD; 1503237263Snp toep->tp_flags |= TP_ABORT_SHUTDOWN; 1504237263Snp INP_WUNLOCK(inp); 1505237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1506237263Snp m_freem(m); 1507174641Skmacy return (0); 1508174641Skmacy } 1509237263Snp toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 1510174641Skmacy 1511174641Skmacy /* 1512237263Snp * If we'd sent a reset on this toep, we'll ignore this and clean up in 1513237263Snp * the T3's reply to our reset instead. 1514174641Skmacy */ 1515174641Skmacy if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 1516237263Snp toep->tp_flags |= TP_ABORT_RPL_SENT; 1517237263Snp INP_WUNLOCK(inp); 1518237263Snp } else { 1519237263Snp so_error_set(so, abort_status_to_errno(tp, req->status)); 1520237263Snp tp = tcp_close(tp); 1521237263Snp if (tp == NULL) 1522237263Snp INP_WLOCK(inp); /* re-acquire */ 1523237263Snp toepcb_release(toep); /* no more CPLs expected */ 1524174641Skmacy } 1525237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1526174641Skmacy 1527237263Snp send_abort_rpl(tod, tid, qset); 1528237263Snp m_freem(m); 1529237263Snp return (0); 1530174641Skmacy} 1531174641Skmacy 1532174641Skmacystatic void 1533237263Snpassign_rxopt(struct tcpcb *tp, uint16_t tcpopt) 1534174641Skmacy{ 1535237263Snp struct toepcb *toep = tp->t_toe; 1536237263Snp struct adapter *sc = toep->tp_tod->tod_softc; 1537174641Skmacy 1538237263Snp tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40; 1539174641Skmacy 1540237263Snp if (G_TCPOPT_TSTAMP(tcpopt)) { 1541237263Snp tp->t_flags |= TF_RCVD_TSTMP; 1542237263Snp tp->t_flags |= TF_REQ_TSTMP; /* forcibly set */ 1543237263Snp tp->ts_recent = 0; /* XXX */ 1544237263Snp tp->ts_recent_age = tcp_ts_getticks(); 1545237263Snp tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; 1546174641Skmacy } 1547174641Skmacy 1548237263Snp if (G_TCPOPT_SACK(tcpopt)) 1549237263Snp tp->t_flags |= TF_SACK_PERMIT; 1550237263Snp else 1551237263Snp tp->t_flags &= ~TF_SACK_PERMIT; 1552174641Skmacy 1553237263Snp if (G_TCPOPT_WSCALE_OK(tcpopt)) 1554237263Snp tp->t_flags |= TF_RCVD_SCALE; 1555176472Skmacy 1556237263Snp if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == 1557237263Snp (TF_RCVD_SCALE | TF_REQ_SCALE)) { 1558237263Snp tp->rcv_scale = tp->request_r_scale; 1559237263Snp tp->snd_scale = G_TCPOPT_SND_WSCALE(tcpopt); 1560174641Skmacy } 1561174641Skmacy 1562174641Skmacy} 1563174641Skmacy 1564174641Skmacy/* 1565237263Snp * The ISS and IRS are from after the exchange of SYNs and are off by 1. 1566174641Skmacy */ 1567237263Snpvoid 1568237263Snpmake_established(struct socket *so, uint32_t cpl_iss, uint32_t cpl_irs, 1569237263Snp uint16_t cpl_tcpopt) 1570174641Skmacy{ 1571237263Snp struct inpcb *inp = sotoinpcb(so); 1572237263Snp struct tcpcb *tp = intotcpcb(inp); 1573174641Skmacy struct toepcb *toep = tp->t_toe; 1574237263Snp long bufsize; 1575237263Snp uint32_t iss = be32toh(cpl_iss) - 1; /* true ISS */ 1576237263Snp uint32_t irs = be32toh(cpl_irs) - 1; /* true IRS */ 1577237263Snp uint16_t tcpopt = be16toh(cpl_tcpopt); 1578174641Skmacy 1579237263Snp INP_WLOCK_ASSERT(inp); 1580174641Skmacy 1581237263Snp tp->t_state = TCPS_ESTABLISHED; 1582176472Skmacy tp->t_starttime = ticks; 1583237263Snp TCPSTAT_INC(tcps_connects); 1584174641Skmacy 1585237263Snp CTR4(KTR_CXGB, "%s tid %u, toep %p, inp %p", tcpstates[tp->t_state], 1586237263Snp toep->tp_tid, toep, inp); 1587174641Skmacy 1588237263Snp tp->irs = irs; 1589237263Snp tcp_rcvseqinit(tp); 1590237263Snp tp->rcv_wnd = toep->tp_rx_credits << 10; 1591237263Snp tp->rcv_adv += tp->rcv_wnd; 1592237263Snp tp->last_ack_sent = tp->rcv_nxt; 1593174641Skmacy 1594178302Skmacy /* 1595237263Snp * If we were unable to send all rx credits via opt0, save the remainder 1596237263Snp * in rx_credits so that they can be handed over with the next credit 1597237263Snp * update. 1598178302Skmacy */ 1599237263Snp SOCKBUF_LOCK(&so->so_rcv); 1600237263Snp bufsize = select_rcv_wnd(so); 1601237263Snp SOCKBUF_UNLOCK(&so->so_rcv); 1602237263Snp toep->tp_rx_credits = bufsize - tp->rcv_wnd; 1603178302Skmacy 1604237263Snp tp->iss = iss; 1605237263Snp tcp_sendseqinit(tp); 1606237263Snp tp->snd_una = iss + 1; 1607237263Snp tp->snd_nxt = iss + 1; 1608237263Snp tp->snd_max = iss + 1; 1609178302Skmacy 1610237263Snp assign_rxopt(tp, tcpopt); 1611237263Snp soisconnected(so); 1612174641Skmacy} 1613174641Skmacy 1614174641Skmacy/* 1615174641Skmacy * Fill in the right TID for CPL messages waiting in the out-of-order queue 1616174641Skmacy * and send them to the TOE. 1617174641Skmacy */ 1618174641Skmacystatic void 1619178302Skmacyfixup_and_send_ofo(struct toepcb *toep) 1620174641Skmacy{ 1621174641Skmacy struct mbuf *m; 1622237263Snp struct toedev *tod = toep->tp_tod; 1623237263Snp struct adapter *sc = tod->tod_softc; 1624237263Snp struct inpcb *inp = toep->tp_inp; 1625174641Skmacy unsigned int tid = toep->tp_tid; 1626174641Skmacy 1627237263Snp inp_lock_assert(inp); 1628237263Snp 1629174641Skmacy while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 1630237263Snp struct ofld_hdr *oh = mtod(m, void *); 1631174641Skmacy /* 1632174641Skmacy * A variety of messages can be waiting but the fields we'll 1633174641Skmacy * be touching are common to all so any message type will do. 1634174641Skmacy */ 1635237263Snp struct cpl_close_con_req *p = (void *)(oh + 1); 1636174641Skmacy 1637237263Snp p->wr.wrh_lo = htonl(V_WR_TID(tid)); 1638174641Skmacy OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 1639237263Snp t3_offload_tx(sc, m); 1640174641Skmacy } 1641174641Skmacy} 1642174641Skmacy 1643174641Skmacy/* 1644237263Snp * Process a CPL_ACT_ESTABLISH message. 1645174641Skmacy */ 1646237263Snpstatic int 1647237263Snpdo_act_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1648174641Skmacy{ 1649237263Snp struct adapter *sc = qs->adap; 1650237263Snp struct tom_data *td = sc->tom_softc; 1651237263Snp struct cpl_act_establish *req = mtod(m, void *); 1652237263Snp unsigned int tid = GET_TID(req); 1653237263Snp unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 1654237263Snp struct toepcb *toep = lookup_atid(&td->tid_maps, atid); 1655237263Snp struct inpcb *inp = toep->tp_inp; 1656237263Snp struct tcpcb *tp; 1657237263Snp struct socket *so; 1658174641Skmacy 1659237263Snp CTR3(KTR_CXGB, "%s: atid %u, tid %u", __func__, atid, tid); 1660174641Skmacy 1661237263Snp free_atid(&td->tid_maps, atid); 1662174641Skmacy 1663237263Snp INP_WLOCK(inp); 1664237263Snp tp = intotcpcb(inp); 1665174641Skmacy 1666237263Snp KASSERT(toep->tp_qset == qs->idx, 1667237263Snp ("%s qset mismatch %d %d", __func__, toep->tp_qset, qs->idx)); 1668237263Snp KASSERT(toep->tp_tid == atid, 1669237263Snp ("%s atid mismatch %d %d", __func__, toep->tp_tid, atid)); 1670174641Skmacy 1671237263Snp toep->tp_tid = tid; 1672237263Snp insert_tid(td, toep, tid); 1673237263Snp 1674237263Snp if (inp->inp_flags & INP_DROPPED) { 1675237263Snp /* socket closed by the kernel before hw told us it connected */ 1676237263Snp send_reset(toep); 1677237263Snp goto done; 1678174641Skmacy } 1679178302Skmacy 1680237263Snp KASSERT(tp->t_state == TCPS_SYN_SENT, 1681237263Snp ("TID %u expected TCPS_SYN_SENT, found %d.", tid, tp->t_state)); 1682178302Skmacy 1683237263Snp so = inp->inp_socket; 1684237263Snp make_established(so, req->snd_isn, req->rcv_isn, req->tcp_opt); 1685237263Snp 1686178302Skmacy /* 1687237263Snp * Now that we finally have a TID send any CPL messages that we had to 1688237263Snp * defer for lack of a TID. 1689174641Skmacy */ 1690237263Snp if (mbufq_len(&toep->out_of_order_queue)) 1691237263Snp fixup_and_send_ofo(toep); 1692174641Skmacy 1693237263Snpdone: 1694237263Snp INP_WUNLOCK(inp); 1695237263Snp m_freem(m); 1696174641Skmacy return (0); 1697174641Skmacy} 1698174641Skmacy 1699174641Skmacy/* 1700174641Skmacy * Process an acknowledgment of WR completion. Advance snd_una and send the 1701174641Skmacy * next batch of work requests from the write queue. 1702174641Skmacy */ 1703174641Skmacystatic void 1704174641Skmacywr_ack(struct toepcb *toep, struct mbuf *m) 1705174641Skmacy{ 1706237263Snp struct inpcb *inp = toep->tp_inp; 1707237263Snp struct tcpcb *tp; 1708237263Snp struct cpl_wr_ack *hdr = mtod(m, void *); 1709178302Skmacy struct socket *so; 1710174641Skmacy unsigned int credits = ntohs(hdr->credits); 1711174641Skmacy u32 snd_una = ntohl(hdr->snd_una); 1712174641Skmacy int bytes = 0; 1713178302Skmacy struct sockbuf *snd; 1714237263Snp struct mbuf *p; 1715237263Snp struct ofld_hdr *oh; 1716174641Skmacy 1717237263Snp inp_wlock(inp); 1718237263Snp tp = intotcpcb(inp); 1719237263Snp so = inp->inp_socket; 1720174641Skmacy toep->tp_wr_avail += credits; 1721174641Skmacy if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 1722174641Skmacy toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 1723174641Skmacy 1724174641Skmacy while (credits) { 1725237263Snp p = peek_wr(toep); 1726237263Snp 1727174641Skmacy if (__predict_false(!p)) { 1728237263Snp CTR5(KTR_CXGB, "%s: %u extra WR_ACK credits, " 1729237263Snp "tid %u, state %u, wr_avail %u", __func__, credits, 1730237263Snp toep->tp_tid, tp->t_state, toep->tp_wr_avail); 1731237263Snp 1732174641Skmacy log(LOG_ERR, "%u WR_ACK credits for TID %u with " 1733176472Skmacy "nothing pending, state %u wr_avail=%u\n", 1734176472Skmacy credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 1735174641Skmacy break; 1736174641Skmacy } 1737176472Skmacy 1738237263Snp oh = mtod(p, struct ofld_hdr *); 1739176472Skmacy 1740237263Snp KASSERT(credits >= G_HDR_NDESC(oh->flags), 1741237263Snp ("%s: partial credits? %d %d", __func__, credits, 1742237263Snp G_HDR_NDESC(oh->flags))); 1743174641Skmacy 1744237263Snp dequeue_wr(toep); 1745237263Snp credits -= G_HDR_NDESC(oh->flags); 1746237263Snp bytes += oh->plen; 1747174641Skmacy 1748237263Snp if (oh->flags & F_HDR_SGL) 1749237263Snp sglist_free(oh->sgl); 1750237263Snp m_freem(p); 1751237263Snp } 1752174641Skmacy 1753237263Snp if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) 1754174641Skmacy goto out_free; 1755174641Skmacy 1756174641Skmacy if (tp->snd_una != snd_una) { 1757174641Skmacy tp->snd_una = snd_una; 1758237263Snp tp->ts_recent_age = tcp_ts_getticks(); 1759174641Skmacy if (tp->snd_una == tp->snd_nxt) 1760174641Skmacy toep->tp_flags &= ~TP_TX_WAIT_IDLE; 1761174641Skmacy } 1762178302Skmacy 1763178302Skmacy snd = so_sockbuf_snd(so); 1764174641Skmacy if (bytes) { 1765237263Snp SOCKBUF_LOCK(snd); 1766178302Skmacy sbdrop_locked(snd, bytes); 1767178302Skmacy so_sowwakeup_locked(so); 1768174641Skmacy } 1769178302Skmacy 1770178302Skmacy if (snd->sb_sndptroff < snd->sb_cc) 1771174641Skmacy t3_push_frames(so, 0); 1772174641Skmacy 1773174641Skmacyout_free: 1774177530Skmacy inp_wunlock(tp->t_inpcb); 1775237263Snp m_freem(m); 1776174641Skmacy} 1777174641Skmacy 1778174641Skmacy/* 1779174641Skmacy * Handler for TX_DATA_ACK CPL messages. 1780174641Skmacy */ 1781174641Skmacystatic int 1782237263Snpdo_wr_ack(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1783174641Skmacy{ 1784237263Snp struct adapter *sc = qs->adap; 1785237263Snp struct tom_data *td = sc->tom_softc; 1786237263Snp struct cpl_wr_ack *hdr = mtod(m, void *); 1787237263Snp unsigned int tid = GET_TID(hdr); 1788237263Snp struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1789174641Skmacy 1790237263Snp /* XXX bad race */ 1791237263Snp if (toep) 1792237263Snp wr_ack(toep, m); 1793174641Skmacy 1794176472Skmacy return (0); 1795176472Skmacy} 1796176472Skmacy 1797174641Skmacyvoid 1798237263Snpt3_init_cpl_io(struct adapter *sc) 1799176472Skmacy{ 1800237263Snp t3_register_cpl_handler(sc, CPL_ACT_ESTABLISH, do_act_establish); 1801237263Snp t3_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl); 1802237263Snp t3_register_cpl_handler(sc, CPL_RX_URG_NOTIFY, do_rx_urg_notify); 1803237263Snp t3_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data); 1804237263Snp t3_register_cpl_handler(sc, CPL_TX_DMA_ACK, do_wr_ack); 1805237263Snp t3_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close); 1806237263Snp t3_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req); 1807237263Snp t3_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl); 1808237263Snp t3_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl); 1809237263Snp t3_register_cpl_handler(sc, CPL_SMT_WRITE_RPL, do_smt_write_rpl); 1810237263Snp t3_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl); 1811176472Skmacy} 1812176472Skmacy#endif 1813