1237263Snp/*- 2237263Snp * Copyright (c) 2012 Chelsio Communications, Inc. 3237263Snp * All rights reserved. 4237263Snp * Written by: Navdeep Parhar <np@FreeBSD.org> 5237263Snp * 6237263Snp * Redistribution and use in source and binary forms, with or without 7237263Snp * modification, are permitted provided that the following conditions 8237263Snp * are met: 9237263Snp * 1. Redistributions of source code must retain the above copyright 10237263Snp * notice, this list of conditions and the following disclaimer. 11237263Snp * 2. Redistributions in binary form must reproduce the above copyright 12237263Snp * notice, this list of conditions and the following disclaimer in the 13237263Snp * documentation and/or other materials provided with the distribution. 14237263Snp * 15237263Snp * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16237263Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17237263Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18237263Snp * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19237263Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20237263Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21237263Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22237263Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23237263Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24237263Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25237263Snp * SUCH DAMAGE. 26237263Snp */ 27237263Snp 28237263Snp#include <sys/cdefs.h> 29237263Snp__FBSDID("$FreeBSD$"); 30237263Snp 31237263Snp#include "opt_inet.h" 32245468Snp#include "opt_inet6.h" 33237263Snp 34237263Snp#ifdef TCP_OFFLOAD 35237263Snp#include <sys/param.h> 36237263Snp#include <sys/types.h> 37237263Snp#include <sys/kernel.h> 38237263Snp#include <sys/ktr.h> 39237263Snp#include <sys/module.h> 40237263Snp#include <sys/protosw.h> 41237263Snp#include <sys/refcount.h> 42237263Snp#include <sys/domain.h> 43237263Snp#include <sys/fnv_hash.h> 44237263Snp#include <sys/socket.h> 45237263Snp#include <sys/socketvar.h> 46237263Snp#include <net/ethernet.h> 47237263Snp#include <net/if.h> 48237263Snp#include <net/if_types.h> 49237263Snp#include <net/if_vlan_var.h> 50237263Snp#include <net/route.h> 51237263Snp#include <netinet/in.h> 52237263Snp#include <netinet/in_pcb.h> 53237263Snp#include <netinet/ip.h> 54245468Snp#include <netinet/ip6.h> 55245468Snp#include <netinet6/scope6_var.h> 56239544Snp#include <netinet/tcp_timer.h> 57237263Snp#include <netinet/tcp_var.h> 58237263Snp#define TCPSTATES 59237263Snp#include <netinet/tcp_fsm.h> 60237263Snp#include <netinet/toecore.h> 61237263Snp 62237263Snp#include "common/common.h" 63237263Snp#include "common/t4_msg.h" 64237263Snp#include "common/t4_regs.h" 65237263Snp#include "tom/t4_tom_l2t.h" 66237263Snp#include "tom/t4_tom.h" 67237263Snp 68237263Snp/* stid services */ 69245276Snpstatic int alloc_stid(struct adapter *, struct listen_ctx *, int); 70245276Snpstatic struct listen_ctx *lookup_stid(struct adapter *, int); 71245276Snpstatic void free_stid(struct adapter *, struct listen_ctx *); 72237263Snp 73237263Snp/* lctx services */ 74237263Snpstatic struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *, 75237263Snp struct port_info *); 76237263Snpstatic int free_lctx(struct adapter *, struct listen_ctx *); 77237263Snpstatic void hold_lctx(struct listen_ctx *); 78237263Snpstatic void listen_hash_add(struct adapter *, struct listen_ctx *); 79237263Snpstatic struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *); 80237263Snpstatic struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *); 81237263Snpstatic struct inpcb *release_lctx(struct adapter *, struct listen_ctx *); 82237263Snp 83237263Snpstatic inline void save_qids_in_mbuf(struct mbuf *, struct port_info *); 84237263Snpstatic inline void get_qids_from_mbuf(struct mbuf *m, int *, int *); 85237263Snpstatic void send_reset_synqe(struct toedev *, struct synq_entry *); 86237263Snp 87237263Snpstatic int 88245276Snpalloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6) 89237263Snp{ 90237263Snp struct tid_info *t = &sc->tids; 91245276Snp u_int stid, n, f, mask; 92245276Snp struct stid_region *sr = &lctx->stid_region; 93237263Snp 94245276Snp /* 95245276Snp * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in 96245276Snp * the TCAM. The start of the stid region is properly aligned (the chip 97245276Snp * requires each region to be 128-cell aligned). 98245276Snp */ 99245276Snp n = isipv6 ? 2 : 1; 100245276Snp mask = n - 1; 101245276Snp KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0, 102245276Snp ("%s: stid region (%u, %u) not properly aligned. n = %u", 103245276Snp __func__, t->stid_base, t->nstids, n)); 104245276Snp 105237263Snp mtx_lock(&t->stid_lock); 106245276Snp if (n > t->nstids - t->stids_in_use) { 107245276Snp mtx_unlock(&t->stid_lock); 108245276Snp return (-1); 109245276Snp } 110237263Snp 111245276Snp if (t->nstids_free_head >= n) { 112245276Snp /* 113245276Snp * This allocation will definitely succeed because the region 114245276Snp * starts at a good alignment and we just checked we have enough 115245276Snp * stids free. 116245276Snp */ 117245276Snp f = t->nstids_free_head & mask; 118245276Snp t->nstids_free_head -= n + f; 119245276Snp stid = t->nstids_free_head; 120245276Snp TAILQ_INSERT_HEAD(&t->stids, sr, link); 121245276Snp } else { 122245276Snp struct stid_region *s; 123245276Snp 124245276Snp stid = t->nstids_free_head; 125245276Snp TAILQ_FOREACH(s, &t->stids, link) { 126245276Snp stid += s->used + s->free; 127245276Snp f = stid & mask; 128251518Snp if (s->free >= n + f) { 129245276Snp stid -= n + f; 130245276Snp s->free -= n + f; 131245276Snp TAILQ_INSERT_AFTER(&t->stids, s, sr, link); 132245276Snp goto allocated; 133245276Snp } 134245276Snp } 135245276Snp 136245276Snp if (__predict_false(stid != t->nstids)) { 137245276Snp panic("%s: stids TAILQ (%p) corrupt." 138245276Snp " At %d instead of %d at the end of the queue.", 139245276Snp __func__, &t->stids, stid, t->nstids); 140245276Snp } 141245276Snp 142245276Snp mtx_unlock(&t->stid_lock); 143245276Snp return (-1); 144237263Snp } 145245276Snp 146245276Snpallocated: 147245276Snp sr->used = n; 148245276Snp sr->free = f; 149245276Snp t->stids_in_use += n; 150245276Snp t->stid_tab[stid] = lctx; 151237263Snp mtx_unlock(&t->stid_lock); 152245276Snp 153245276Snp KASSERT(((stid + t->stid_base) & mask) == 0, 154245276Snp ("%s: EDOOFUS.", __func__)); 155245276Snp return (stid + t->stid_base); 156237263Snp} 157237263Snp 158245276Snpstatic struct listen_ctx * 159237263Snplookup_stid(struct adapter *sc, int stid) 160237263Snp{ 161237263Snp struct tid_info *t = &sc->tids; 162237263Snp 163245276Snp return (t->stid_tab[stid - t->stid_base]); 164237263Snp} 165237263Snp 166237263Snpstatic void 167245276Snpfree_stid(struct adapter *sc, struct listen_ctx *lctx) 168237263Snp{ 169237263Snp struct tid_info *t = &sc->tids; 170245276Snp struct stid_region *sr = &lctx->stid_region; 171245276Snp struct stid_region *s; 172237263Snp 173245276Snp KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used)); 174245276Snp 175237263Snp mtx_lock(&t->stid_lock); 176245276Snp s = TAILQ_PREV(sr, stid_head, link); 177245276Snp if (s != NULL) 178245276Snp s->free += sr->used + sr->free; 179245276Snp else 180245276Snp t->nstids_free_head += sr->used + sr->free; 181245276Snp KASSERT(t->stids_in_use >= sr->used, 182245276Snp ("%s: stids_in_use (%u) < stids being freed (%u)", __func__, 183245276Snp t->stids_in_use, sr->used)); 184245276Snp t->stids_in_use -= sr->used; 185245276Snp TAILQ_REMOVE(&t->stids, sr, link); 186237263Snp mtx_unlock(&t->stid_lock); 187237263Snp} 188237263Snp 189237263Snpstatic struct listen_ctx * 190237263Snpalloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi) 191237263Snp{ 192237263Snp struct listen_ctx *lctx; 193237263Snp 194237263Snp INP_WLOCK_ASSERT(inp); 195237263Snp 196237263Snp lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO); 197237263Snp if (lctx == NULL) 198237263Snp return (NULL); 199237263Snp 200245468Snp lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6); 201237263Snp if (lctx->stid < 0) { 202237263Snp free(lctx, M_CXGBE); 203237263Snp return (NULL); 204237263Snp } 205237263Snp 206259804Snp if (inp->inp_vflag & INP_IPV6 && 207259804Snp !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) { 208259804Snp struct tom_data *td = sc->tom_softc; 209259804Snp 210259804Snp lctx->ce = hold_lip(td, &inp->in6p_laddr); 211259804Snp if (lctx->ce == NULL) { 212259804Snp free(lctx, M_CXGBE); 213259804Snp return (NULL); 214259804Snp } 215259804Snp } 216259804Snp 217237263Snp lctx->ctrlq = &sc->sge.ctrlq[pi->port_id]; 218237263Snp lctx->ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq]; 219237263Snp refcount_init(&lctx->refcount, 1); 220237263Snp TAILQ_INIT(&lctx->synq); 221237263Snp 222237263Snp lctx->inp = inp; 223237263Snp in_pcbref(inp); 224237263Snp 225237263Snp return (lctx); 226237263Snp} 227237263Snp 228237263Snp/* Don't call this directly, use release_lctx instead */ 229237263Snpstatic int 230237263Snpfree_lctx(struct adapter *sc, struct listen_ctx *lctx) 231237263Snp{ 232237263Snp struct inpcb *inp = lctx->inp; 233259804Snp struct tom_data *td = sc->tom_softc; 234237263Snp 235237263Snp INP_WLOCK_ASSERT(inp); 236237263Snp KASSERT(lctx->refcount == 0, 237237263Snp ("%s: refcount %d", __func__, lctx->refcount)); 238237263Snp KASSERT(TAILQ_EMPTY(&lctx->synq), 239237263Snp ("%s: synq not empty.", __func__)); 240237263Snp KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid)); 241237263Snp 242237263Snp CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p", 243237263Snp __func__, lctx->stid, lctx, lctx->inp); 244237263Snp 245259804Snp if (lctx->ce) 246259804Snp release_lip(td, lctx->ce); 247245276Snp free_stid(sc, lctx); 248237263Snp free(lctx, M_CXGBE); 249237263Snp 250237263Snp return (in_pcbrele_wlocked(inp)); 251237263Snp} 252237263Snp 253237263Snpstatic void 254237263Snphold_lctx(struct listen_ctx *lctx) 255237263Snp{ 256237263Snp 257237263Snp refcount_acquire(&lctx->refcount); 258237263Snp} 259237263Snp 260237263Snpstatic inline uint32_t 261237263Snplisten_hashfn(void *key, u_long mask) 262237263Snp{ 263237263Snp 264237263Snp return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask); 265237263Snp} 266237263Snp 267237263Snp/* 268237263Snp * Add a listen_ctx entry to the listen hash table. 269237263Snp */ 270237263Snpstatic void 271237263Snplisten_hash_add(struct adapter *sc, struct listen_ctx *lctx) 272237263Snp{ 273237263Snp struct tom_data *td = sc->tom_softc; 274237263Snp int bucket = listen_hashfn(lctx->inp, td->listen_mask); 275237263Snp 276237263Snp mtx_lock(&td->lctx_hash_lock); 277237263Snp LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link); 278237263Snp td->lctx_count++; 279237263Snp mtx_unlock(&td->lctx_hash_lock); 280237263Snp} 281237263Snp 282237263Snp/* 283237263Snp * Look for the listening socket's context entry in the hash and return it. 284237263Snp */ 285237263Snpstatic struct listen_ctx * 286237263Snplisten_hash_find(struct adapter *sc, struct inpcb *inp) 287237263Snp{ 288237263Snp struct tom_data *td = sc->tom_softc; 289237263Snp int bucket = listen_hashfn(inp, td->listen_mask); 290237263Snp struct listen_ctx *lctx; 291237263Snp 292237263Snp mtx_lock(&td->lctx_hash_lock); 293237263Snp LIST_FOREACH(lctx, &td->listen_hash[bucket], link) { 294237263Snp if (lctx->inp == inp) 295237263Snp break; 296237263Snp } 297237263Snp mtx_unlock(&td->lctx_hash_lock); 298237263Snp 299237263Snp return (lctx); 300237263Snp} 301237263Snp 302237263Snp/* 303237263Snp * Removes the listen_ctx structure for inp from the hash and returns it. 304237263Snp */ 305237263Snpstatic struct listen_ctx * 306237263Snplisten_hash_del(struct adapter *sc, struct inpcb *inp) 307237263Snp{ 308237263Snp struct tom_data *td = sc->tom_softc; 309237263Snp int bucket = listen_hashfn(inp, td->listen_mask); 310237263Snp struct listen_ctx *lctx, *l; 311237263Snp 312237263Snp mtx_lock(&td->lctx_hash_lock); 313237263Snp LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) { 314237263Snp if (lctx->inp == inp) { 315237263Snp LIST_REMOVE(lctx, link); 316237263Snp td->lctx_count--; 317237263Snp break; 318237263Snp } 319237263Snp } 320237263Snp mtx_unlock(&td->lctx_hash_lock); 321237263Snp 322237263Snp return (lctx); 323237263Snp} 324237263Snp 325237263Snp/* 326237263Snp * Releases a hold on the lctx. Must be called with the listening socket's inp 327237263Snp * locked. The inp may be freed by this function and it returns NULL to 328237263Snp * indicate this. 329237263Snp */ 330237263Snpstatic struct inpcb * 331237263Snprelease_lctx(struct adapter *sc, struct listen_ctx *lctx) 332237263Snp{ 333237263Snp struct inpcb *inp = lctx->inp; 334237263Snp int inp_freed = 0; 335237263Snp 336237263Snp INP_WLOCK_ASSERT(inp); 337237263Snp if (refcount_release(&lctx->refcount)) 338237263Snp inp_freed = free_lctx(sc, lctx); 339237263Snp 340237263Snp return (inp_freed ? NULL : inp); 341237263Snp} 342237263Snp 343237263Snpstatic void 344237263Snpsend_reset_synqe(struct toedev *tod, struct synq_entry *synqe) 345237263Snp{ 346237263Snp struct adapter *sc = tod->tod_softc; 347237263Snp struct mbuf *m = synqe->syn; 348237263Snp struct ifnet *ifp = m->m_pkthdr.rcvif; 349237263Snp struct port_info *pi = ifp->if_softc; 350237263Snp struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; 351241626Snp struct wrqe *wr; 352241626Snp struct fw_flowc_wr *flowc; 353237263Snp struct cpl_abort_req *req; 354237263Snp int txqid, rxqid, flowclen; 355237263Snp struct sge_wrq *ofld_txq; 356237263Snp struct sge_ofld_rxq *ofld_rxq; 357241642Snp const int nparams = 6; 358237263Snp unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; 359237263Snp 360237263Snp INP_WLOCK_ASSERT(synqe->lctx->inp); 361237263Snp 362243680Snp CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s", 363243680Snp __func__, synqe, synqe->flags, synqe->tid, 364239514Snp synqe->flags & TPF_ABORT_SHUTDOWN ? 365237263Snp " (abort already in progress)" : ""); 366239514Snp if (synqe->flags & TPF_ABORT_SHUTDOWN) 367237263Snp return; /* abort already in progress */ 368239514Snp synqe->flags |= TPF_ABORT_SHUTDOWN; 369237263Snp 370237263Snp get_qids_from_mbuf(m, &txqid, &rxqid); 371237263Snp ofld_txq = &sc->sge.ofld_txq[txqid]; 372237263Snp ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 373237263Snp 374237263Snp /* The wrqe will have two WRs - a flowc followed by an abort_req */ 375237263Snp flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 376237263Snp 377248925Snp wr = alloc_wrqe(roundup2(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq); 378237263Snp if (wr == NULL) { 379237263Snp /* XXX */ 380237263Snp panic("%s: allocation failure.", __func__); 381237263Snp } 382237263Snp flowc = wrtod(wr); 383248925Snp req = (void *)((caddr_t)flowc + roundup2(flowclen, EQ_ESIZE)); 384237263Snp 385237263Snp /* First the flowc ... */ 386237263Snp memset(flowc, 0, wr->wr_len); 387237263Snp flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 388237263Snp V_FW_FLOWC_WR_NPARAMS(nparams)); 389237263Snp flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 390237263Snp V_FW_WR_FLOWID(synqe->tid)); 391237263Snp flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 392241626Snp flowc->mnemval[0].val = htobe32(pfvf); 393241626Snp flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 394241626Snp flowc->mnemval[1].val = htobe32(pi->tx_chan); 395241626Snp flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 396241626Snp flowc->mnemval[2].val = htobe32(pi->tx_chan); 397241626Snp flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 398241626Snp flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id); 399241642Snp flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; 400241642Snp flowc->mnemval[4].val = htobe32(512); 401241642Snp flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; 402241642Snp flowc->mnemval[5].val = htobe32(512); 403239514Snp synqe->flags |= TPF_FLOWC_WR_SENT; 404237263Snp 405237263Snp /* ... then ABORT request */ 406237263Snp INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid); 407237263Snp req->rsvd0 = 0; /* don't have a snd_nxt */ 408237263Snp req->rsvd1 = 1; /* no data sent yet */ 409237263Snp req->cmd = CPL_ABORT_SEND_RST; 410237263Snp 411237263Snp t4_l2t_send(sc, wr, e); 412237263Snp} 413237263Snp 414237263Snpstatic int 415237263Snpcreate_server(struct adapter *sc, struct listen_ctx *lctx) 416237263Snp{ 417237263Snp struct wrqe *wr; 418237263Snp struct cpl_pass_open_req *req; 419245468Snp struct inpcb *inp = lctx->inp; 420237263Snp 421237263Snp wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); 422237263Snp if (wr == NULL) { 423237263Snp log(LOG_ERR, "%s: allocation failure", __func__); 424237263Snp return (ENOMEM); 425237263Snp } 426237263Snp req = wrtod(wr); 427237263Snp 428237263Snp INIT_TP_WR(req, 0); 429237263Snp OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid)); 430245468Snp req->local_port = inp->inp_lport; 431237263Snp req->peer_port = 0; 432245468Snp req->local_ip = inp->inp_laddr.s_addr; 433237263Snp req->peer_ip = 0; 434237263Snp req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); 435237263Snp req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | 436237263Snp F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); 437237263Snp 438237263Snp t4_wrq_tx(sc, wr); 439237263Snp return (0); 440237263Snp} 441237263Snp 442237263Snpstatic int 443245468Snpcreate_server6(struct adapter *sc, struct listen_ctx *lctx) 444245468Snp{ 445245468Snp struct wrqe *wr; 446245468Snp struct cpl_pass_open_req6 *req; 447245468Snp struct inpcb *inp = lctx->inp; 448245468Snp 449245468Snp wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); 450245468Snp if (wr == NULL) { 451245468Snp log(LOG_ERR, "%s: allocation failure", __func__); 452245468Snp return (ENOMEM); 453245468Snp } 454245468Snp req = wrtod(wr); 455245468Snp 456245468Snp INIT_TP_WR(req, 0); 457245468Snp OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid)); 458245468Snp req->local_port = inp->inp_lport; 459245468Snp req->peer_port = 0; 460245468Snp req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; 461245468Snp req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; 462245468Snp req->peer_ip_hi = 0; 463245468Snp req->peer_ip_lo = 0; 464245468Snp req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); 465245468Snp req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | 466245468Snp F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); 467245468Snp 468245468Snp t4_wrq_tx(sc, wr); 469245468Snp return (0); 470245468Snp} 471245468Snp 472245468Snpstatic int 473237263Snpdestroy_server(struct adapter *sc, struct listen_ctx *lctx) 474237263Snp{ 475237263Snp struct wrqe *wr; 476237263Snp struct cpl_close_listsvr_req *req; 477237263Snp 478237263Snp wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); 479237263Snp if (wr == NULL) { 480237263Snp /* XXX */ 481237263Snp panic("%s: allocation failure.", __func__); 482237263Snp } 483237263Snp req = wrtod(wr); 484237263Snp 485237263Snp INIT_TP_WR(req, 0); 486237263Snp OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, 487237263Snp lctx->stid)); 488237263Snp req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id); 489237263Snp req->rsvd = htobe16(0); 490237263Snp 491237263Snp t4_wrq_tx(sc, wr); 492237263Snp return (0); 493237263Snp} 494237263Snp 495237263Snp/* 496237263Snp * Start a listening server by sending a passive open request to HW. 497237263Snp * 498237263Snp * Can't take adapter lock here and access to sc->flags, sc->open_device_map, 499237263Snp * sc->offload_map, if_capenable are all race prone. 500237263Snp */ 501237263Snpint 502237263Snpt4_listen_start(struct toedev *tod, struct tcpcb *tp) 503237263Snp{ 504237263Snp struct adapter *sc = tod->tod_softc; 505237263Snp struct port_info *pi; 506237263Snp struct inpcb *inp = tp->t_inpcb; 507237263Snp struct listen_ctx *lctx; 508245468Snp int i, rc; 509237263Snp 510237263Snp INP_WLOCK_ASSERT(inp); 511237263Snp 512259804Snp /* Don't start a hardware listener for any loopback address. */ 513259804Snp if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr)) 514259804Snp return (0); 515259804Snp if (!(inp->inp_vflag & INP_IPV6) && 516259804Snp IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr))) 517259804Snp return (0); 518237263Snp#if 0 519237263Snp ADAPTER_LOCK(sc); 520237263Snp if (IS_BUSY(sc)) { 521237263Snp log(LOG_ERR, "%s: listen request ignored, %s is busy", 522237263Snp __func__, device_get_nameunit(sc->dev)); 523237263Snp goto done; 524237263Snp } 525237263Snp 526237263Snp KASSERT(sc->flags & TOM_INIT_DONE, 527237263Snp ("%s: TOM not initialized", __func__)); 528237263Snp#endif 529237263Snp 530237263Snp if ((sc->open_device_map & sc->offload_map) == 0) 531237263Snp goto done; /* no port that's UP with IFCAP_TOE enabled */ 532237263Snp 533237263Snp /* 534245468Snp * Find a running port with IFCAP_TOE (4 or 6). We'll use the first 535245468Snp * such port's queues to send the passive open and receive the reply to 536245468Snp * it. 537237263Snp * 538237263Snp * XXX: need a way to mark a port in use by offload. if_cxgbe should 539237263Snp * then reject any attempt to bring down such a port (and maybe reject 540237263Snp * attempts to disable IFCAP_TOE on that port too?). 541237263Snp */ 542237263Snp for_each_port(sc, i) { 543237263Snp if (isset(&sc->open_device_map, i) && 544245468Snp sc->port[i]->ifp->if_capenable & IFCAP_TOE) 545237263Snp break; 546237263Snp } 547237263Snp KASSERT(i < sc->params.nports, 548237263Snp ("%s: no running port with TOE capability enabled.", __func__)); 549237263Snp pi = sc->port[i]; 550237263Snp 551237263Snp if (listen_hash_find(sc, inp) != NULL) 552237263Snp goto done; /* already setup */ 553237263Snp 554237263Snp lctx = alloc_lctx(sc, inp, pi); 555237263Snp if (lctx == NULL) { 556237263Snp log(LOG_ERR, 557237263Snp "%s: listen request ignored, %s couldn't allocate lctx\n", 558237263Snp __func__, device_get_nameunit(sc->dev)); 559237263Snp goto done; 560237263Snp } 561237263Snp listen_hash_add(sc, lctx); 562237263Snp 563245468Snp CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x", 564245468Snp __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp, 565245468Snp inp->inp_vflag); 566237263Snp 567245468Snp if (inp->inp_vflag & INP_IPV6) 568245468Snp rc = create_server6(sc, lctx); 569245468Snp else 570245468Snp rc = create_server(sc, lctx); 571245468Snp if (rc != 0) { 572245468Snp log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n", 573245468Snp __func__, device_get_nameunit(sc->dev), rc); 574237263Snp (void) listen_hash_del(sc, inp); 575237263Snp inp = release_lctx(sc, lctx); 576237263Snp /* can't be freed, host stack has a reference */ 577237263Snp KASSERT(inp != NULL, ("%s: inp freed", __func__)); 578237263Snp goto done; 579237263Snp } 580237263Snp lctx->flags |= LCTX_RPL_PENDING; 581237263Snpdone: 582237263Snp#if 0 583237263Snp ADAPTER_UNLOCK(sc); 584237263Snp#endif 585237263Snp return (0); 586237263Snp} 587237263Snp 588237263Snpint 589237263Snpt4_listen_stop(struct toedev *tod, struct tcpcb *tp) 590237263Snp{ 591237263Snp struct listen_ctx *lctx; 592237263Snp struct adapter *sc = tod->tod_softc; 593237263Snp struct inpcb *inp = tp->t_inpcb; 594237263Snp struct synq_entry *synqe; 595237263Snp 596237263Snp INP_WLOCK_ASSERT(inp); 597237263Snp 598237263Snp lctx = listen_hash_del(sc, inp); 599237263Snp if (lctx == NULL) 600237263Snp return (ENOENT); /* no hardware listener for this inp */ 601237263Snp 602237263Snp CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid, 603237263Snp lctx, lctx->flags); 604237263Snp 605237263Snp /* 606237263Snp * If the reply to the PASS_OPEN is still pending we'll wait for it to 607237263Snp * arrive and clean up when it does. 608237263Snp */ 609237263Snp if (lctx->flags & LCTX_RPL_PENDING) { 610237263Snp KASSERT(TAILQ_EMPTY(&lctx->synq), 611237263Snp ("%s: synq not empty.", __func__)); 612237263Snp return (EINPROGRESS); 613237263Snp } 614237263Snp 615237263Snp /* 616237263Snp * The host stack will abort all the connections on the listening 617237263Snp * socket's so_comp. It doesn't know about the connections on the synq 618237263Snp * so we need to take care of those. 619237263Snp */ 620243680Snp TAILQ_FOREACH(synqe, &lctx->synq, link) { 621243680Snp if (synqe->flags & TPF_SYNQE_HAS_L2TE) 622243680Snp send_reset_synqe(tod, synqe); 623243680Snp } 624237263Snp 625237263Snp destroy_server(sc, lctx); 626237263Snp return (0); 627237263Snp} 628237263Snp 629237263Snpstatic inline void 630237263Snphold_synqe(struct synq_entry *synqe) 631237263Snp{ 632237263Snp 633237263Snp refcount_acquire(&synqe->refcnt); 634237263Snp} 635237263Snp 636237263Snpstatic inline void 637237263Snprelease_synqe(struct synq_entry *synqe) 638237263Snp{ 639237263Snp 640237263Snp if (refcount_release(&synqe->refcnt)) { 641239514Snp int needfree = synqe->flags & TPF_SYNQE_NEEDFREE; 642237263Snp 643237263Snp m_freem(synqe->syn); 644237263Snp if (needfree) 645237263Snp free(synqe, M_CXGBE); 646237263Snp } 647237263Snp} 648237263Snp 649237263Snpvoid 650237263Snpt4_syncache_added(struct toedev *tod __unused, void *arg) 651237263Snp{ 652237263Snp struct synq_entry *synqe = arg; 653237263Snp 654237263Snp hold_synqe(synqe); 655237263Snp} 656237263Snp 657237263Snpvoid 658237263Snpt4_syncache_removed(struct toedev *tod __unused, void *arg) 659237263Snp{ 660237263Snp struct synq_entry *synqe = arg; 661237263Snp 662237263Snp release_synqe(synqe); 663237263Snp} 664237263Snp 665237263Snp/* XXX */ 666237263Snpextern void tcp_dooptions(struct tcpopt *, u_char *, int, int); 667237263Snp 668237263Snpint 669237263Snpt4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) 670237263Snp{ 671237263Snp struct adapter *sc = tod->tod_softc; 672237263Snp struct synq_entry *synqe = arg; 673237263Snp struct wrqe *wr; 674237263Snp struct l2t_entry *e; 675237263Snp struct tcpopt to; 676237263Snp struct ip *ip = mtod(m, struct ip *); 677245468Snp struct tcphdr *th; 678237263Snp 679237263Snp wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr); 680243110Snp if (wr == NULL) { 681243110Snp m_freem(m); 682237263Snp return (EALREADY); 683243110Snp } 684237263Snp 685245468Snp if (ip->ip_v == IPVERSION) 686245468Snp th = (void *)(ip + 1); 687245468Snp else 688245468Snp th = (void *)((struct ip6_hdr *)ip + 1); 689237263Snp bzero(&to, sizeof(to)); 690237263Snp tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th), 691237263Snp TO_SYN); 692237263Snp 693237263Snp /* save these for later */ 694237263Snp synqe->iss = be32toh(th->th_seq); 695237263Snp synqe->ts = to.to_tsval; 696237263Snp 697252711Snp if (is_t5(sc)) { 698252711Snp struct cpl_t5_pass_accept_rpl *rpl5 = wrtod(wr); 699252711Snp 700252711Snp rpl5->iss = th->th_seq; 701252711Snp } 702252711Snp 703237263Snp e = &sc->l2t->l2tab[synqe->l2e_idx]; 704237263Snp t4_l2t_send(sc, wr, e); 705237263Snp 706237263Snp m_freem(m); /* don't need this any more */ 707237263Snp return (0); 708237263Snp} 709237263Snp 710237263Snpstatic int 711237263Snpdo_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, 712237263Snp struct mbuf *m) 713237263Snp{ 714237263Snp struct adapter *sc = iq->adapter; 715237263Snp const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1); 716237263Snp int stid = GET_TID(cpl); 717237263Snp unsigned int status = cpl->status; 718237263Snp struct listen_ctx *lctx = lookup_stid(sc, stid); 719237263Snp struct inpcb *inp = lctx->inp; 720237263Snp#ifdef INVARIANTS 721237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 722237263Snp#endif 723237263Snp 724237263Snp KASSERT(opcode == CPL_PASS_OPEN_RPL, 725237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 726237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 727237263Snp KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 728237263Snp 729237263Snp INP_WLOCK(inp); 730237263Snp 731237263Snp CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x", 732237263Snp __func__, stid, status, lctx->flags); 733237263Snp 734237263Snp lctx->flags &= ~LCTX_RPL_PENDING; 735237263Snp 736237263Snp if (status != CPL_ERR_NONE) 737245468Snp log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status); 738237263Snp 739237263Snp#ifdef INVARIANTS 740237263Snp /* 741237263Snp * If the inp has been dropped (listening socket closed) then 742237263Snp * listen_stop must have run and taken the inp out of the hash. 743237263Snp */ 744237263Snp if (inp->inp_flags & INP_DROPPED) { 745237263Snp KASSERT(listen_hash_del(sc, inp) == NULL, 746237263Snp ("%s: inp %p still in listen hash", __func__, inp)); 747237263Snp } 748237263Snp#endif 749237263Snp 750237263Snp if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) { 751237263Snp if (release_lctx(sc, lctx) != NULL) 752237263Snp INP_WUNLOCK(inp); 753237263Snp return (status); 754237263Snp } 755237263Snp 756237263Snp /* 757237263Snp * Listening socket stopped listening earlier and now the chip tells us 758237263Snp * it has started the hardware listener. Stop it; the lctx will be 759237263Snp * released in do_close_server_rpl. 760237263Snp */ 761237263Snp if (inp->inp_flags & INP_DROPPED) { 762237263Snp destroy_server(sc, lctx); 763237263Snp INP_WUNLOCK(inp); 764237263Snp return (status); 765237263Snp } 766237263Snp 767237263Snp /* 768237263Snp * Failed to start hardware listener. Take inp out of the hash and 769237263Snp * release our reference on it. An error message has been logged 770237263Snp * already. 771237263Snp */ 772237263Snp if (status != CPL_ERR_NONE) { 773237263Snp listen_hash_del(sc, inp); 774237263Snp if (release_lctx(sc, lctx) != NULL) 775237263Snp INP_WUNLOCK(inp); 776237263Snp return (status); 777237263Snp } 778237263Snp 779237263Snp /* hardware listener open for business */ 780237263Snp 781237263Snp INP_WUNLOCK(inp); 782237263Snp return (status); 783237263Snp} 784237263Snp 785237263Snpstatic int 786237263Snpdo_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss, 787237263Snp struct mbuf *m) 788237263Snp{ 789237263Snp struct adapter *sc = iq->adapter; 790237263Snp const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1); 791237263Snp int stid = GET_TID(cpl); 792237263Snp unsigned int status = cpl->status; 793237263Snp struct listen_ctx *lctx = lookup_stid(sc, stid); 794237263Snp struct inpcb *inp = lctx->inp; 795237263Snp#ifdef INVARIANTS 796237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 797237263Snp#endif 798237263Snp 799237263Snp KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL, 800237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 801237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 802237263Snp KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 803237263Snp 804237263Snp CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status); 805237263Snp 806237263Snp if (status != CPL_ERR_NONE) { 807245468Snp log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n", 808237263Snp __func__, status, stid); 809237263Snp return (status); 810237263Snp } 811237263Snp 812237263Snp INP_WLOCK(inp); 813237263Snp inp = release_lctx(sc, lctx); 814237263Snp if (inp != NULL) 815237263Snp INP_WUNLOCK(inp); 816237263Snp 817237263Snp return (status); 818237263Snp} 819237263Snp 820237263Snpstatic void 821237263Snpdone_with_synqe(struct adapter *sc, struct synq_entry *synqe) 822237263Snp{ 823237263Snp struct listen_ctx *lctx = synqe->lctx; 824237263Snp struct inpcb *inp = lctx->inp; 825237263Snp struct port_info *pi = synqe->syn->m_pkthdr.rcvif->if_softc; 826237263Snp struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; 827237263Snp 828237263Snp INP_WLOCK_ASSERT(inp); 829237263Snp 830237263Snp TAILQ_REMOVE(&lctx->synq, synqe, link); 831237263Snp inp = release_lctx(sc, lctx); 832237263Snp if (inp) 833237263Snp INP_WUNLOCK(inp); 834237263Snp remove_tid(sc, synqe->tid); 835237263Snp release_tid(sc, synqe->tid, &sc->sge.ctrlq[pi->port_id]); 836237263Snp t4_l2t_release(e); 837237263Snp release_synqe(synqe); /* removed from synq list */ 838237263Snp} 839237263Snp 840237263Snpint 841237263Snpdo_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss, 842237263Snp struct mbuf *m) 843237263Snp{ 844237263Snp struct adapter *sc = iq->adapter; 845237263Snp const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 846237263Snp unsigned int tid = GET_TID(cpl); 847237263Snp struct synq_entry *synqe = lookup_tid(sc, tid); 848237263Snp struct listen_ctx *lctx = synqe->lctx; 849237263Snp struct inpcb *inp = lctx->inp; 850237263Snp int txqid; 851237263Snp struct sge_wrq *ofld_txq; 852237263Snp#ifdef INVARIANTS 853237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 854237263Snp#endif 855237263Snp 856237263Snp KASSERT(opcode == CPL_ABORT_REQ_RSS, 857237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 858237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 859237263Snp KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); 860237263Snp 861237263Snp CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", 862237263Snp __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); 863237263Snp 864245935Snp if (negative_advice(cpl->status)) 865237263Snp return (0); /* Ignore negative advice */ 866237263Snp 867237263Snp INP_WLOCK(inp); 868237263Snp 869237263Snp get_qids_from_mbuf(synqe->syn, &txqid, NULL); 870237263Snp ofld_txq = &sc->sge.ofld_txq[txqid]; 871237263Snp 872237263Snp /* 873237263Snp * If we'd initiated an abort earlier the reply to it is responsible for 874237263Snp * cleaning up resources. Otherwise we tear everything down right here 875237263Snp * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 876237263Snp */ 877239514Snp if (synqe->flags & TPF_ABORT_SHUTDOWN) { 878237263Snp INP_WUNLOCK(inp); 879237263Snp goto done; 880237263Snp } 881237263Snp 882237263Snp done_with_synqe(sc, synqe); 883237263Snp /* inp lock released by done_with_synqe */ 884237263Snpdone: 885237263Snp send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 886237263Snp return (0); 887237263Snp} 888237263Snp 889237263Snpint 890237263Snpdo_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss, 891237263Snp struct mbuf *m) 892237263Snp{ 893237263Snp struct adapter *sc = iq->adapter; 894237263Snp const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 895237263Snp unsigned int tid = GET_TID(cpl); 896237263Snp struct synq_entry *synqe = lookup_tid(sc, tid); 897237263Snp struct listen_ctx *lctx = synqe->lctx; 898237263Snp struct inpcb *inp = lctx->inp; 899237263Snp#ifdef INVARIANTS 900237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 901237263Snp#endif 902237263Snp 903237263Snp KASSERT(opcode == CPL_ABORT_RPL_RSS, 904237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 905237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 906237263Snp KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); 907237263Snp 908237263Snp CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", 909237263Snp __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); 910237263Snp 911237263Snp INP_WLOCK(inp); 912239514Snp KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 913237263Snp ("%s: wasn't expecting abort reply for synqe %p (0x%x)", 914237263Snp __func__, synqe, synqe->flags)); 915237263Snp 916237263Snp done_with_synqe(sc, synqe); 917237263Snp /* inp lock released by done_with_synqe */ 918237263Snp 919237263Snp return (0); 920237263Snp} 921237263Snp 922237263Snpvoid 923237263Snpt4_offload_socket(struct toedev *tod, void *arg, struct socket *so) 924237263Snp{ 925237263Snp struct adapter *sc = tod->tod_softc; 926237263Snp struct synq_entry *synqe = arg; 927237263Snp#ifdef INVARIANTS 928237263Snp struct inpcb *inp = sotoinpcb(so); 929237263Snp#endif 930237263Snp struct cpl_pass_establish *cpl = mtod(synqe->syn, void *); 931237263Snp struct toepcb *toep = *(struct toepcb **)(cpl + 1); 932237263Snp 933237263Snp INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */ 934237263Snp INP_WLOCK_ASSERT(inp); 935239514Snp KASSERT(synqe->flags & TPF_SYNQE, 936237263Snp ("%s: %p not a synq_entry?", __func__, arg)); 937237263Snp 938237263Snp offload_socket(so, toep); 939237263Snp make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt); 940239514Snp toep->flags |= TPF_CPL_PENDING; 941237263Snp update_tid(sc, synqe->tid, toep); 942239544Snp synqe->flags |= TPF_SYNQE_EXPANDED; 943237263Snp} 944237263Snp 945237263Snpstatic inline void 946237263Snpsave_qids_in_mbuf(struct mbuf *m, struct port_info *pi) 947237263Snp{ 948237263Snp uint32_t txqid, rxqid; 949237263Snp 950237263Snp txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; 951237263Snp rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; 952237263Snp 953237263Snp m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff); 954237263Snp} 955237263Snp 956237263Snpstatic inline void 957237263Snpget_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid) 958237263Snp{ 959237263Snp 960237263Snp if (txqid) 961237263Snp *txqid = m->m_pkthdr.flowid >> 16; 962237263Snp if (rxqid) 963237263Snp *rxqid = m->m_pkthdr.flowid & 0xffff; 964237263Snp} 965237263Snp 966237263Snp/* 967237263Snp * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to 968237263Snp * store some state temporarily. 969237263Snp */ 970237263Snpstatic struct synq_entry * 971237263Snpmbuf_to_synqe(struct mbuf *m) 972237263Snp{ 973248925Snp int len = roundup2(sizeof (struct synq_entry), 8); 974237263Snp int tspace = M_TRAILINGSPACE(m); 975237263Snp struct synq_entry *synqe = NULL; 976237263Snp 977237263Snp if (tspace < len) { 978237263Snp synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT); 979237263Snp if (synqe == NULL) 980237263Snp return (NULL); 981239514Snp synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE; 982239514Snp } else { 983245937Snp synqe = (void *)(m->m_data + m->m_len + tspace - len); 984239514Snp synqe->flags = TPF_SYNQE; 985239514Snp } 986237263Snp 987237263Snp return (synqe); 988237263Snp} 989237263Snp 990237263Snpstatic void 991237263Snpt4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to) 992237263Snp{ 993237263Snp bzero(to, sizeof(*to)); 994237263Snp 995237263Snp if (t4opt->mss) { 996237263Snp to->to_flags |= TOF_MSS; 997237263Snp to->to_mss = be16toh(t4opt->mss); 998237263Snp } 999237263Snp 1000237263Snp if (t4opt->wsf) { 1001237263Snp to->to_flags |= TOF_SCALE; 1002237263Snp to->to_wscale = t4opt->wsf; 1003237263Snp } 1004237263Snp 1005237263Snp if (t4opt->tstamp) 1006237263Snp to->to_flags |= TOF_TS; 1007237263Snp 1008237263Snp if (t4opt->sack) 1009237263Snp to->to_flags |= TOF_SACKPERM; 1010237263Snp} 1011237263Snp 1012237263Snp/* 1013237263Snp * Options2 for passive open. 1014237263Snp */ 1015237263Snpstatic uint32_t 1016237263Snpcalc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid, 1017239344Snp const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode) 1018237263Snp{ 1019237263Snp struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 1020249385Snp uint32_t opt2; 1021237263Snp 1022249385Snp opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) | 1023249385Snp F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id); 1024249385Snp 1025237263Snp if (V_tcp_do_rfc1323) { 1026237263Snp if (tcpopt->tstamp) 1027237263Snp opt2 |= F_TSTAMPS_EN; 1028237263Snp if (tcpopt->sack) 1029237263Snp opt2 |= F_SACK_EN; 1030255198Snp if (tcpopt->wsf <= 14) 1031237263Snp opt2 |= F_WND_SCALE_EN; 1032237263Snp } 1033237263Snp 1034237263Snp if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR)) 1035237263Snp opt2 |= F_CCTRL_ECN; 1036237263Snp 1037249385Snp /* RX_COALESCE is always a valid value (0 or M_RX_COALESCE). */ 1038248925Snp if (is_t4(sc)) 1039249385Snp opt2 |= F_RX_COALESCE_VALID; 1040252711Snp else { 1041249385Snp opt2 |= F_T5_OPT_2_VALID; 1042252711Snp opt2 |= F_CONG_CNTRL_VALID; /* OPT_2_ISS really, for T5 */ 1043252711Snp } 1044252728Snp if (sc->tt.rx_coalesce) 1045252728Snp opt2 |= V_RX_COALESCE(M_RX_COALESCE); 1046237263Snp 1047239344Snp#ifdef USE_DDP_RX_FLOW_CONTROL 1048239344Snp if (ulp_mode == ULP_MODE_TCPDDP) 1049239344Snp opt2 |= F_RX_FC_VALID | F_RX_FC_DDP; 1050239344Snp#endif 1051239344Snp 1052237263Snp return htobe32(opt2); 1053237263Snp} 1054237263Snp 1055237263Snpstatic void 1056237263Snppass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, 1057237263Snp struct tcphdr *th) 1058237263Snp{ 1059237263Snp const struct cpl_pass_accept_req *cpl = mtod(m, const void *); 1060237263Snp const struct ether_header *eh; 1061237263Snp unsigned int hlen = be32toh(cpl->hdr_len); 1062245468Snp uintptr_t l3hdr; 1063237263Snp const struct tcphdr *tcp; 1064237263Snp 1065237263Snp eh = (const void *)(cpl + 1); 1066245468Snp l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); 1067245468Snp tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen)); 1068237263Snp 1069237263Snp if (inc) { 1070237263Snp bzero(inc, sizeof(*inc)); 1071237263Snp inc->inc_fport = tcp->th_sport; 1072237263Snp inc->inc_lport = tcp->th_dport; 1073245468Snp if (((struct ip *)l3hdr)->ip_v == IPVERSION) { 1074245468Snp const struct ip *ip = (const void *)l3hdr; 1075245468Snp 1076245468Snp inc->inc_faddr = ip->ip_src; 1077245468Snp inc->inc_laddr = ip->ip_dst; 1078245468Snp } else { 1079245468Snp const struct ip6_hdr *ip6 = (const void *)l3hdr; 1080245468Snp 1081237263Snp inc->inc_flags |= INC_ISIPV6; 1082245468Snp inc->inc6_faddr = ip6->ip6_src; 1083245468Snp inc->inc6_laddr = ip6->ip6_dst; 1084245468Snp } 1085237263Snp } 1086237263Snp 1087237263Snp if (th) { 1088237263Snp bcopy(tcp, th, sizeof(*th)); 1089237263Snp tcp_fields_to_host(th); /* just like tcp_input */ 1090237263Snp } 1091237263Snp} 1092237263Snp 1093245468Snpstatic int 1094245468Snpifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6) 1095245468Snp{ 1096245468Snp struct ifaddr *ifa; 1097245468Snp struct sockaddr_in6 *sin6; 1098245468Snp int found = 0; 1099245468Snp struct in6_addr in6 = *ip6; 1100245468Snp 1101245468Snp /* Just as in ip6_input */ 1102245468Snp if (in6_clearscope(&in6) || in6_clearscope(&in6)) 1103245468Snp return (0); 1104245468Snp in6_setscope(&in6, ifp, NULL); 1105245468Snp 1106245468Snp if_addr_rlock(ifp); 1107245468Snp TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1108245468Snp sin6 = (void *)ifa->ifa_addr; 1109245468Snp if (sin6->sin6_family != AF_INET6) 1110245468Snp continue; 1111245468Snp 1112245468Snp if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) { 1113245468Snp found = 1; 1114245468Snp break; 1115245468Snp } 1116245468Snp } 1117245468Snp if_addr_runlock(ifp); 1118245468Snp 1119245468Snp return (found); 1120245468Snp} 1121245468Snp 1122245468Snpstatic struct l2t_entry * 1123245468Snpget_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp, 1124245468Snp struct in_conninfo *inc) 1125245468Snp{ 1126245468Snp struct rtentry *rt; 1127245468Snp struct l2t_entry *e; 1128245468Snp struct sockaddr_in6 sin6; 1129245468Snp struct sockaddr *dst = (void *)&sin6; 1130245468Snp 1131245468Snp if (inc->inc_flags & INC_ISIPV6) { 1132245468Snp dst->sa_len = sizeof(struct sockaddr_in6); 1133245468Snp dst->sa_family = AF_INET6; 1134245468Snp ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr; 1135245468Snp 1136245468Snp if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) { 1137245468Snp /* no need for route lookup */ 1138245468Snp e = t4_l2t_get(pi, ifp, dst); 1139245468Snp return (e); 1140245468Snp } 1141245468Snp } else { 1142245468Snp dst->sa_len = sizeof(struct sockaddr_in); 1143245468Snp dst->sa_family = AF_INET; 1144245468Snp ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr; 1145245468Snp } 1146245468Snp 1147245468Snp rt = rtalloc1(dst, 0, 0); 1148245468Snp if (rt == NULL) 1149245468Snp return (NULL); 1150245468Snp else { 1151245468Snp struct sockaddr *nexthop; 1152245468Snp 1153245468Snp RT_UNLOCK(rt); 1154245468Snp if (rt->rt_ifp != ifp) 1155245468Snp e = NULL; 1156245468Snp else { 1157245468Snp if (rt->rt_flags & RTF_GATEWAY) 1158245468Snp nexthop = rt->rt_gateway; 1159245468Snp else 1160245468Snp nexthop = dst; 1161245468Snp e = t4_l2t_get(pi, ifp, nexthop); 1162245468Snp } 1163245468Snp RTFREE(rt); 1164245468Snp } 1165245468Snp 1166245468Snp return (e); 1167245468Snp} 1168245468Snp 1169245468Snpstatic int 1170245468Snpifnet_has_ip(struct ifnet *ifp, struct in_addr in) 1171245468Snp{ 1172245468Snp struct ifaddr *ifa; 1173245468Snp struct sockaddr_in *sin; 1174245468Snp int found = 0; 1175245468Snp 1176245468Snp if_addr_rlock(ifp); 1177245468Snp TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1178245468Snp sin = (void *)ifa->ifa_addr; 1179245468Snp if (sin->sin_family != AF_INET) 1180245468Snp continue; 1181245468Snp 1182245468Snp if (sin->sin_addr.s_addr == in.s_addr) { 1183245468Snp found = 1; 1184245468Snp break; 1185245468Snp } 1186245468Snp } 1187245468Snp if_addr_runlock(ifp); 1188245468Snp 1189245468Snp return (found); 1190245468Snp} 1191245468Snp 1192237263Snp#define REJECT_PASS_ACCEPT() do { \ 1193237263Snp reject_reason = __LINE__; \ 1194237263Snp goto reject; \ 1195237263Snp} while (0) 1196237263Snp 1197237263Snp/* 1198237263Snp * The context associated with a tid entry via insert_tid could be a synq_entry 1199237263Snp * or a toepcb. The only way CPL handlers can tell is via a bit in these flags. 1200237263Snp */ 1201237263SnpCTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags)); 1202237263Snp 1203237263Snp/* 1204237263Snp * Incoming SYN on a listening socket. 1205237263Snp * 1206237263Snp * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe, 1207237263Snp * etc. 1208237263Snp */ 1209237263Snpstatic int 1210237263Snpdo_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, 1211237263Snp struct mbuf *m) 1212237263Snp{ 1213237263Snp struct adapter *sc = iq->adapter; 1214237263Snp struct toedev *tod; 1215237263Snp const struct cpl_pass_accept_req *cpl = mtod(m, const void *); 1216237263Snp struct cpl_pass_accept_rpl *rpl; 1217237263Snp struct wrqe *wr; 1218237263Snp unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); 1219237263Snp unsigned int tid = GET_TID(cpl); 1220237263Snp struct listen_ctx *lctx = lookup_stid(sc, stid); 1221237263Snp struct inpcb *inp; 1222237263Snp struct socket *so; 1223237263Snp struct in_conninfo inc; 1224237263Snp struct tcphdr th; 1225237263Snp struct tcpopt to; 1226237263Snp struct port_info *pi; 1227245468Snp struct ifnet *hw_ifp, *ifp; 1228237263Snp struct l2t_entry *e = NULL; 1229239344Snp int rscale, mtu_idx, rx_credits, rxqid, ulp_mode; 1230237263Snp struct synq_entry *synqe = NULL; 1231237263Snp int reject_reason; 1232237263Snp uint16_t vid; 1233237263Snp#ifdef INVARIANTS 1234237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1235237263Snp#endif 1236237263Snp 1237237263Snp KASSERT(opcode == CPL_PASS_ACCEPT_REQ, 1238237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1239237263Snp KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 1240237263Snp 1241237263Snp CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid, 1242237263Snp lctx); 1243237263Snp 1244237263Snp pass_accept_req_to_protohdrs(m, &inc, &th); 1245237263Snp t4opt_to_tcpopt(&cpl->tcpopt, &to); 1246237263Snp 1247237263Snp pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; 1248245468Snp hw_ifp = pi->ifp; /* the cxgbeX ifnet */ 1249245468Snp m->m_pkthdr.rcvif = hw_ifp; 1250245468Snp tod = TOEDEV(hw_ifp); 1251237263Snp 1252237263Snp /* 1253245468Snp * Figure out if there is a pseudo interface (vlan, lagg, etc.) 1254245468Snp * involved. Don't offload if the SYN had a VLAN tag and the vid 1255245468Snp * doesn't match anything on this interface. 1256245468Snp * 1257245468Snp * XXX: lagg support, lagg + vlan support. 1258237263Snp */ 1259237263Snp vid = EVL_VLANOFTAG(be16toh(cpl->vlan)); 1260237263Snp if (vid != 0xfff) { 1261245468Snp ifp = VLAN_DEVAT(hw_ifp, vid); 1262245468Snp if (ifp == NULL) 1263237263Snp REJECT_PASS_ACCEPT(); 1264245468Snp } else 1265245468Snp ifp = hw_ifp; 1266237263Snp 1267237263Snp /* 1268237263Snp * Don't offload if the peer requested a TCP option that's not known to 1269237263Snp * the silicon. 1270237263Snp */ 1271237263Snp if (cpl->tcpopt.unknown) 1272237263Snp REJECT_PASS_ACCEPT(); 1273237263Snp 1274245468Snp if (inc.inc_flags & INC_ISIPV6) { 1275237263Snp 1276245468Snp /* Don't offload if the ifcap isn't enabled */ 1277245468Snp if ((ifp->if_capenable & IFCAP_TOE6) == 0) 1278245468Snp REJECT_PASS_ACCEPT(); 1279245468Snp 1280245468Snp /* 1281245468Snp * SYN must be directed to an IP6 address on this ifnet. This 1282245468Snp * is more restrictive than in6_localip. 1283245468Snp */ 1284245468Snp if (!ifnet_has_ip6(ifp, &inc.inc6_laddr)) 1285245468Snp REJECT_PASS_ACCEPT(); 1286245468Snp } else { 1287245468Snp 1288245468Snp /* Don't offload if the ifcap isn't enabled */ 1289245468Snp if ((ifp->if_capenable & IFCAP_TOE4) == 0) 1290245468Snp REJECT_PASS_ACCEPT(); 1291245468Snp 1292245468Snp /* 1293245468Snp * SYN must be directed to an IP address on this ifnet. This 1294245468Snp * is more restrictive than in_localip. 1295245468Snp */ 1296245468Snp if (!ifnet_has_ip(ifp, inc.inc_laddr)) 1297245468Snp REJECT_PASS_ACCEPT(); 1298237263Snp } 1299237263Snp 1300245468Snp e = get_l2te_for_nexthop(pi, ifp, &inc); 1301245468Snp if (e == NULL) 1302245468Snp REJECT_PASS_ACCEPT(); 1303245468Snp 1304237263Snp synqe = mbuf_to_synqe(m); 1305237263Snp if (synqe == NULL) 1306237263Snp REJECT_PASS_ACCEPT(); 1307237263Snp 1308252711Snp wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) : 1309252711Snp sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[pi->port_id]); 1310237263Snp if (wr == NULL) 1311237263Snp REJECT_PASS_ACCEPT(); 1312237263Snp rpl = wrtod(wr); 1313237263Snp 1314237263Snp INP_INFO_WLOCK(&V_tcbinfo); /* for 4-tuple check, syncache_add */ 1315237263Snp 1316237263Snp /* Don't offload if the 4-tuple is already in use */ 1317237263Snp if (toe_4tuple_check(&inc, &th, ifp) != 0) { 1318237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1319237263Snp free(wr, M_CXGBE); 1320237263Snp REJECT_PASS_ACCEPT(); 1321237263Snp } 1322237263Snp 1323237263Snp inp = lctx->inp; /* listening socket, not owned by TOE */ 1324237263Snp INP_WLOCK(inp); 1325237263Snp 1326237263Snp /* Don't offload if the listening socket has closed */ 1327237263Snp if (__predict_false(inp->inp_flags & INP_DROPPED)) { 1328237263Snp /* 1329237263Snp * The listening socket has closed. The reply from the TOE to 1330237263Snp * our CPL_CLOSE_LISTSRV_REQ will ultimately release all 1331237263Snp * resources tied to this listen context. 1332237263Snp */ 1333237263Snp INP_WUNLOCK(inp); 1334237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1335237263Snp free(wr, M_CXGBE); 1336237263Snp REJECT_PASS_ACCEPT(); 1337237263Snp } 1338237263Snp so = inp->inp_socket; 1339237263Snp 1340237263Snp mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss)); 1341237263Snp rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0; 1342237263Snp SOCKBUF_LOCK(&so->so_rcv); 1343237263Snp /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ 1344237263Snp rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); 1345237263Snp SOCKBUF_UNLOCK(&so->so_rcv); 1346237263Snp 1347237263Snp save_qids_in_mbuf(m, pi); 1348237263Snp get_qids_from_mbuf(m, NULL, &rxqid); 1349237263Snp 1350252711Snp if (is_t4(sc)) 1351252711Snp INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid); 1352252711Snp else { 1353252711Snp struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl; 1354252711Snp 1355252711Snp INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid); 1356252711Snp } 1357239344Snp if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) { 1358239344Snp ulp_mode = ULP_MODE_TCPDDP; 1359239514Snp synqe->flags |= TPF_SYNQE_TCPDDP; 1360239344Snp } else 1361239344Snp ulp_mode = ULP_MODE_NONE; 1362239344Snp rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode); 1363239344Snp rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode); 1364237263Snp 1365237263Snp synqe->tid = tid; 1366237263Snp synqe->lctx = lctx; 1367237263Snp synqe->syn = m; 1368237263Snp m = NULL; 1369245937Snp refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */ 1370237263Snp synqe->l2e_idx = e->idx; 1371237263Snp synqe->rcv_bufsize = rx_credits; 1372237263Snp atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr); 1373237263Snp 1374237263Snp insert_tid(sc, tid, synqe); 1375237263Snp TAILQ_INSERT_TAIL(&lctx->synq, synqe, link); 1376237263Snp hold_synqe(synqe); /* hold for the duration it's in the synq */ 1377237263Snp hold_lctx(lctx); /* A synqe on the list has a ref on its lctx */ 1378237263Snp 1379237263Snp /* 1380237263Snp * If all goes well t4_syncache_respond will get called during 1381237263Snp * syncache_add. Also note that syncache_add releases both pcbinfo and 1382237263Snp * pcb locks. 1383237263Snp */ 1384237263Snp toe_syncache_add(&inc, &to, &th, inp, tod, synqe); 1385237263Snp INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */ 1386237263Snp INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); 1387237263Snp 1388237263Snp /* 1389237263Snp * If we replied during syncache_add (synqe->wr has been consumed), 1390237263Snp * good. Otherwise, set it to 0 so that further syncache_respond 1391237263Snp * attempts by the kernel will be ignored. 1392237263Snp */ 1393237263Snp if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) { 1394237263Snp 1395237263Snp /* 1396237263Snp * syncache may or may not have a hold on the synqe, which may 1397237263Snp * or may not be stashed in the original SYN mbuf passed to us. 1398237263Snp * Just copy it over instead of dealing with all possibilities. 1399237263Snp */ 1400243857Sglebius m = m_dup(synqe->syn, M_NOWAIT); 1401237263Snp if (m) 1402245468Snp m->m_pkthdr.rcvif = hw_ifp; 1403237263Snp 1404242666Snp remove_tid(sc, synqe->tid); 1405237263Snp free(wr, M_CXGBE); 1406243680Snp 1407243680Snp /* Yank the synqe out of the lctx synq. */ 1408243680Snp INP_WLOCK(inp); 1409243680Snp TAILQ_REMOVE(&lctx->synq, synqe, link); 1410243680Snp release_synqe(synqe); /* removed from synq list */ 1411243680Snp inp = release_lctx(sc, lctx); 1412243680Snp if (inp) 1413243680Snp INP_WUNLOCK(inp); 1414243680Snp 1415245937Snp release_synqe(synqe); /* extra hold */ 1416237263Snp REJECT_PASS_ACCEPT(); 1417237263Snp } 1418243680Snp 1419237263Snp CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK", 1420237263Snp __func__, stid, tid, lctx, synqe); 1421243680Snp 1422243680Snp INP_WLOCK(inp); 1423243680Snp synqe->flags |= TPF_SYNQE_HAS_L2TE; 1424243680Snp if (__predict_false(inp->inp_flags & INP_DROPPED)) { 1425243680Snp /* 1426243680Snp * Listening socket closed but tod_listen_stop did not abort 1427243680Snp * this tid because there was no L2T entry for the tid at that 1428243680Snp * time. Abort it now. The reply to the abort will clean up. 1429243680Snp */ 1430245937Snp CTR6(KTR_CXGBE, 1431245937Snp "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT", 1432245937Snp __func__, stid, tid, lctx, synqe, synqe->flags); 1433245937Snp if (!(synqe->flags & TPF_SYNQE_EXPANDED)) 1434245937Snp send_reset_synqe(tod, synqe); 1435243680Snp INP_WUNLOCK(inp); 1436243680Snp 1437245937Snp release_synqe(synqe); /* extra hold */ 1438243680Snp return (__LINE__); 1439243680Snp } 1440243680Snp INP_WUNLOCK(inp); 1441243680Snp 1442245937Snp release_synqe(synqe); /* extra hold */ 1443237263Snp return (0); 1444237263Snpreject: 1445237263Snp CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid, 1446237263Snp reject_reason); 1447237263Snp 1448237263Snp if (e) 1449237263Snp t4_l2t_release(e); 1450237263Snp release_tid(sc, tid, lctx->ctrlq); 1451237263Snp 1452237263Snp if (__predict_true(m != NULL)) { 1453237263Snp m_adj(m, sizeof(*cpl)); 1454237263Snp m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | 1455237263Snp CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 1456237263Snp m->m_pkthdr.csum_data = 0xffff; 1457245468Snp hw_ifp->if_input(hw_ifp, m); 1458237263Snp } 1459237263Snp 1460237263Snp return (reject_reason); 1461237263Snp} 1462237263Snp 1463237263Snpstatic void 1464237263Snpsynqe_to_protohdrs(struct synq_entry *synqe, 1465237263Snp const struct cpl_pass_establish *cpl, struct in_conninfo *inc, 1466237263Snp struct tcphdr *th, struct tcpopt *to) 1467237263Snp{ 1468237263Snp uint16_t tcp_opt = be16toh(cpl->tcp_opt); 1469237263Snp 1470237263Snp /* start off with the original SYN */ 1471237263Snp pass_accept_req_to_protohdrs(synqe->syn, inc, th); 1472237263Snp 1473237263Snp /* modify parts to make it look like the ACK to our SYN|ACK */ 1474237263Snp th->th_flags = TH_ACK; 1475237263Snp th->th_ack = synqe->iss + 1; 1476237263Snp th->th_seq = be32toh(cpl->rcv_isn); 1477237263Snp bzero(to, sizeof(*to)); 1478237263Snp if (G_TCPOPT_TSTAMP(tcp_opt)) { 1479237263Snp to->to_flags |= TOF_TS; 1480237263Snp to->to_tsecr = synqe->ts; 1481237263Snp } 1482237263Snp} 1483237263Snp 1484237263Snpstatic int 1485237263Snpdo_pass_establish(struct sge_iq *iq, const struct rss_header *rss, 1486237263Snp struct mbuf *m) 1487237263Snp{ 1488237263Snp struct adapter *sc = iq->adapter; 1489237263Snp struct port_info *pi; 1490237263Snp struct ifnet *ifp; 1491237263Snp const struct cpl_pass_establish *cpl = (const void *)(rss + 1); 1492237263Snp#if defined(KTR) || defined(INVARIANTS) 1493237263Snp unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); 1494237263Snp#endif 1495237263Snp unsigned int tid = GET_TID(cpl); 1496237263Snp struct synq_entry *synqe = lookup_tid(sc, tid); 1497237263Snp struct listen_ctx *lctx = synqe->lctx; 1498237263Snp struct inpcb *inp = lctx->inp; 1499237263Snp struct socket *so; 1500237263Snp struct tcphdr th; 1501237263Snp struct tcpopt to; 1502237263Snp struct in_conninfo inc; 1503237263Snp struct toepcb *toep; 1504237263Snp u_int txqid, rxqid; 1505237263Snp#ifdef INVARIANTS 1506237263Snp unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1507237263Snp#endif 1508237263Snp 1509237263Snp KASSERT(opcode == CPL_PASS_ESTABLISH, 1510237263Snp ("%s: unexpected opcode 0x%x", __func__, opcode)); 1511237263Snp KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1512237263Snp KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); 1513239514Snp KASSERT(synqe->flags & TPF_SYNQE, 1514237263Snp ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe)); 1515237263Snp 1516237263Snp INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */ 1517237263Snp INP_WLOCK(inp); 1518237263Snp 1519237263Snp CTR6(KTR_CXGBE, 1520237263Snp "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x", 1521237263Snp __func__, stid, tid, synqe, synqe->flags, inp->inp_flags); 1522237263Snp 1523237263Snp if (__predict_false(inp->inp_flags & INP_DROPPED)) { 1524237263Snp 1525243680Snp if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1526243680Snp KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1527243680Snp ("%s: listen socket closed but tid %u not aborted.", 1528243680Snp __func__, tid)); 1529243680Snp } 1530243680Snp 1531237263Snp INP_WUNLOCK(inp); 1532237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1533237263Snp return (0); 1534237263Snp } 1535237263Snp 1536237263Snp ifp = synqe->syn->m_pkthdr.rcvif; 1537237263Snp pi = ifp->if_softc; 1538237263Snp KASSERT(pi->adapter == sc, 1539237263Snp ("%s: pi %p, sc %p mismatch", __func__, pi, sc)); 1540237263Snp 1541237263Snp get_qids_from_mbuf(synqe->syn, &txqid, &rxqid); 1542237263Snp KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0], 1543237263Snp ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid, 1544237263Snp (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0]))); 1545237263Snp 1546237263Snp toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT); 1547237263Snp if (toep == NULL) { 1548237263Snpreset: 1549243680Snp /* 1550243680Snp * The reply to this abort will perform final cleanup. There is 1551243680Snp * no need to check for HAS_L2TE here. We can be here only if 1552243680Snp * we responded to the PASS_ACCEPT_REQ, and our response had the 1553243680Snp * L2T idx. 1554243680Snp */ 1555237263Snp send_reset_synqe(TOEDEV(ifp), synqe); 1556237263Snp INP_WUNLOCK(inp); 1557237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1558237263Snp return (0); 1559237263Snp } 1560237263Snp toep->tid = tid; 1561237263Snp toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx]; 1562239514Snp if (synqe->flags & TPF_SYNQE_TCPDDP) 1563239344Snp set_tcpddp_ulp_mode(toep); 1564239344Snp else 1565239344Snp toep->ulp_mode = ULP_MODE_NONE; 1566237263Snp /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ 1567237263Snp toep->rx_credits = synqe->rcv_bufsize; 1568237263Snp 1569237263Snp so = inp->inp_socket; 1570237263Snp KASSERT(so != NULL, ("%s: socket is NULL", __func__)); 1571237263Snp 1572237263Snp /* Come up with something that syncache_expand should be ok with. */ 1573237263Snp synqe_to_protohdrs(synqe, cpl, &inc, &th, &to); 1574237263Snp 1575237263Snp /* 1576237263Snp * No more need for anything in the mbuf that carried the 1577237263Snp * CPL_PASS_ACCEPT_REQ. Drop the CPL_PASS_ESTABLISH and toep pointer 1578237263Snp * there. XXX: bad form but I don't want to increase the size of synqe. 1579237263Snp */ 1580237263Snp m = synqe->syn; 1581237263Snp KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len, 1582237263Snp ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len)); 1583237263Snp bcopy(cpl, mtod(m, void *), sizeof(*cpl)); 1584237263Snp *(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep; 1585237263Snp 1586237263Snp if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) { 1587237263Snp free_toepcb(toep); 1588237263Snp goto reset; 1589237263Snp } 1590237263Snp 1591239544Snp /* 1592239544Snp * This is for the unlikely case where the syncache entry that we added 1593239544Snp * has been evicted from the syncache, but the syncache_expand above 1594239544Snp * works because of syncookies. 1595239544Snp * 1596239544Snp * XXX: we've held the tcbinfo lock throughout so there's no risk of 1597239544Snp * anyone accept'ing a connection before we've installed our hooks, but 1598239544Snp * this somewhat defeats the purpose of having a tod_offload_socket :-( 1599239544Snp */ 1600239544Snp if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) { 1601239544Snp struct inpcb *new_inp = sotoinpcb(so); 1602239544Snp 1603239544Snp INP_WLOCK(new_inp); 1604239544Snp tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0); 1605239544Snp t4_offload_socket(TOEDEV(ifp), synqe, so); 1606239544Snp INP_WUNLOCK(new_inp); 1607239544Snp } 1608239544Snp 1609237263Snp /* Done with the synqe */ 1610237263Snp TAILQ_REMOVE(&lctx->synq, synqe, link); 1611237263Snp inp = release_lctx(sc, lctx); 1612237263Snp if (inp != NULL) 1613237263Snp INP_WUNLOCK(inp); 1614237263Snp INP_INFO_WUNLOCK(&V_tcbinfo); 1615237263Snp release_synqe(synqe); 1616237263Snp 1617237263Snp return (0); 1618237263Snp} 1619237263Snp 1620237263Snpvoid 1621237263Snpt4_init_listen_cpl_handlers(struct adapter *sc) 1622237263Snp{ 1623237263Snp 1624237263Snp t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl); 1625237263Snp t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl); 1626237263Snp t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 1627237263Snp t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish); 1628237263Snp} 1629237263Snp#endif 1630