1/************************************************************************** 2 3Copyright (c) 2007, Chelsio Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Chelsio Corporation nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include "opt_inet.h" 33 34#ifdef TCP_OFFLOAD 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/kernel.h> 38#include <sys/bus.h> 39#include <sys/pciio.h> 40#include <sys/conf.h> 41#include <machine/bus.h> 42#include <machine/resource.h> 43#include <sys/bus_dma.h> 44#include <sys/rman.h> 45#include <sys/ioccom.h> 46#include <sys/mbuf.h> 47#include <sys/rwlock.h> 48#include <sys/linker.h> 49#include <sys/firmware.h> 50#include <sys/socket.h> 51#include <sys/socketvar.h> 52#include <sys/sockio.h> 53#include <sys/smp.h> 54#include <sys/sysctl.h> 55#include <sys/syslog.h> 56#include <sys/queue.h> 57#include <sys/taskqueue.h> 58#include <sys/proc.h> 59#include <sys/uio.h> 60 61#include <net/route.h> 62#include <netinet/in_systm.h> 63#include <netinet/in.h> 64#include <netinet/in_pcb.h> 65#include <netinet/ip.h> 66#include <netinet/ip_var.h> 67#include <netinet/tcp_var.h> 68#include <netinet/tcp.h> 69#include <netinet/tcpip.h> 70 71#include <rdma/ib_verbs.h> 72#include <linux/idr.h> 73#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h> 74 75#include <cxgb_include.h> 76#include <ulp/tom/cxgb_tom.h> 77#include <ulp/tom/cxgb_toepcb.h> 78#include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h> 79#include <rdma/ib_verbs.h> 80#include <linux/idr.h> 81 82#include <ulp/iw_cxgb/iw_cxgb_wr.h> 83#include <ulp/iw_cxgb/iw_cxgb_hal.h> 84#include <ulp/iw_cxgb/iw_cxgb_provider.h> 85#include <ulp/iw_cxgb/iw_cxgb_cm.h> 86#include <ulp/iw_cxgb/iw_cxgb.h> 87 88#ifdef KTR 89static char *states[] = { 90 "idle", 91 "listen", 92 "connecting", 93 "mpa_wait_req", 94 "mpa_req_sent", 95 "mpa_req_rcvd", 96 "mpa_rep_sent", 97 "fpdu_mode", 98 "aborting", 99 "closing", 100 "moribund", 101 "dead", 102 NULL, 103}; 104#endif 105 106SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters"); 107 108static int ep_timeout_secs = 60; 109TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs); 110SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0, 111 "CM Endpoint operation timeout in seconds (default=60)"); 112 113static int mpa_rev = 1; 114TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev); 115SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0, 116 "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)"); 117 118static int markers_enabled = 0; 119TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled); 120SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0, 121 "Enable MPA MARKERS (default(0)=disabled)"); 122 123static int crc_enabled = 1; 124TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled); 125SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0, 126 "Enable MPA CRC (default(1)=enabled)"); 127 128static int rcv_win = 256 * 1024; 129TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win); 130SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0, 131 "TCP receive window in bytes (default=256KB)"); 132 133static int snd_win = 32 * 1024; 134TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win); 135SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0, 136 "TCP send window in bytes (default=32KB)"); 137 138static unsigned int nocong = 0; 139TUNABLE_INT("hw.iw_cxgb.nocong", &nocong); 140SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RW, &nocong, 0, 141 "Turn off congestion control (default=0)"); 142 143static unsigned int cong_flavor = 1; 144TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor); 145SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RW, &cong_flavor, 0, 146 "TCP Congestion control flavor (default=1)"); 147 148static void ep_timeout(void *arg); 149static void connect_reply_upcall(struct iwch_ep *ep, int status); 150static int iwch_so_upcall(struct socket *so, void *arg, int waitflag); 151 152/* 153 * Cruft to offload socket upcalls onto thread. 154 */ 155static struct mtx req_lock; 156static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list; 157static struct task iw_cxgb_task; 158static struct taskqueue *iw_cxgb_taskq; 159static void process_req(void *ctx, int pending); 160 161static void 162start_ep_timer(struct iwch_ep *ep) 163{ 164 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 165 if (callout_pending(&ep->timer)) { 166 CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep); 167 callout_deactivate(&ep->timer); 168 callout_drain(&ep->timer); 169 } else { 170 /* 171 * XXX this looks racy 172 */ 173 get_ep(&ep->com); 174 callout_init(&ep->timer, TRUE); 175 } 176 callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep); 177} 178 179static void 180stop_ep_timer(struct iwch_ep *ep) 181{ 182 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 183 if (!callout_pending(&ep->timer)) { 184 CTR3(KTR_IW_CXGB, "%s timer stopped when its not running! ep %p state %u\n", 185 __func__, ep, ep->com.state); 186 return; 187 } 188 callout_drain(&ep->timer); 189 put_ep(&ep->com); 190} 191 192static int 193set_tcpinfo(struct iwch_ep *ep) 194{ 195 struct socket *so = ep->com.so; 196 struct inpcb *inp = sotoinpcb(so); 197 struct tcpcb *tp; 198 struct toepcb *toep; 199 int rc = 0; 200 201 INP_WLOCK(inp); 202 tp = intotcpcb(inp); 203 204 if ((tp->t_flags & TF_TOE) == 0) { 205 rc = EINVAL; 206 printf("%s: connection NOT OFFLOADED!\n", __func__); 207 goto done; 208 } 209 toep = tp->t_toe; 210 211 ep->hwtid = toep->tp_tid; 212 ep->snd_seq = tp->snd_nxt; 213 ep->rcv_seq = tp->rcv_nxt; 214 ep->emss = tp->t_maxseg; 215 if (ep->emss < 128) 216 ep->emss = 128; 217done: 218 INP_WUNLOCK(inp); 219 return (rc); 220 221} 222 223static enum iwch_ep_state 224state_read(struct iwch_ep_common *epc) 225{ 226 enum iwch_ep_state state; 227 228 mtx_lock(&epc->lock); 229 state = epc->state; 230 mtx_unlock(&epc->lock); 231 return state; 232} 233 234static void 235__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 236{ 237 epc->state = new; 238} 239 240static void 241state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) 242{ 243 244 mtx_lock(&epc->lock); 245 CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]); 246 __state_set(epc, new); 247 mtx_unlock(&epc->lock); 248 return; 249} 250 251static void * 252alloc_ep(int size, int flags) 253{ 254 struct iwch_ep_common *epc; 255 256 epc = malloc(size, M_DEVBUF, flags); 257 if (epc) { 258 memset(epc, 0, size); 259 refcount_init(&epc->refcount, 1); 260 mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK); 261 cv_init(&epc->waitq, "iwch_epc cv"); 262 } 263 CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc); 264 return epc; 265} 266 267void __free_ep(struct iwch_ep_common *epc) 268{ 269 CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); 270 KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so)); 271 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); 272 free(epc, M_DEVBUF); 273} 274 275static struct rtentry * 276find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 277 __be16 peer_port, u8 tos) 278{ 279 struct route iproute; 280 struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; 281 282 bzero(&iproute, sizeof iproute); 283 dst->sin_family = AF_INET; 284 dst->sin_len = sizeof *dst; 285 dst->sin_addr.s_addr = peer_ip; 286 287 rtalloc(&iproute); 288 return iproute.ro_rt; 289} 290 291static void 292close_socket(struct iwch_ep_common *epc, int close) 293{ 294 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 295 SOCK_LOCK(epc->so); 296 soupcall_clear(epc->so, SO_RCV); 297 SOCK_UNLOCK(epc->so); 298 if (close) 299 soclose(epc->so); 300 else 301 soshutdown(epc->so, SHUT_WR|SHUT_RD); 302 epc->so = NULL; 303} 304 305static void 306shutdown_socket(struct iwch_ep_common *epc) 307{ 308 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); 309 soshutdown(epc->so, SHUT_WR); 310} 311 312static void 313abort_socket(struct iwch_ep *ep) 314{ 315 struct sockopt sopt; 316 int err; 317 struct linger l; 318 319 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 320 l.l_onoff = 1; 321 l.l_linger = 0; 322 323 /* linger_time of 0 forces RST to be sent */ 324 sopt.sopt_dir = SOPT_SET; 325 sopt.sopt_level = SOL_SOCKET; 326 sopt.sopt_name = SO_LINGER; 327 sopt.sopt_val = (caddr_t)&l; 328 sopt.sopt_valsize = sizeof l; 329 sopt.sopt_td = NULL; 330 err = sosetopt(ep->com.so, &sopt); 331 if (err) 332 printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err); 333} 334 335static void 336send_mpa_req(struct iwch_ep *ep) 337{ 338 int mpalen; 339 struct mpa_message *mpa; 340 struct mbuf *m; 341 int err; 342 343 CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen); 344 345 mpalen = sizeof(*mpa) + ep->plen; 346 m = m_gethdr(mpalen, M_NOWAIT); 347 if (m == NULL) { 348 connect_reply_upcall(ep, -ENOMEM); 349 return; 350 } 351 mpa = mtod(m, struct mpa_message *); 352 m->m_len = mpalen; 353 m->m_pkthdr.len = mpalen; 354 memset(mpa, 0, sizeof(*mpa)); 355 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 356 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 357 (markers_enabled ? MPA_MARKERS : 0); 358 mpa->private_data_size = htons(ep->plen); 359 mpa->revision = mpa_rev; 360 if (ep->plen) 361 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); 362 363 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 364 if (err) { 365 m_freem(m); 366 connect_reply_upcall(ep, -ENOMEM); 367 return; 368 } 369 370 start_ep_timer(ep); 371 state_set(&ep->com, MPA_REQ_SENT); 372 return; 373} 374 375static int 376send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) 377{ 378 int mpalen; 379 struct mpa_message *mpa; 380 struct mbuf *m; 381 int err; 382 383 CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen); 384 385 mpalen = sizeof(*mpa) + plen; 386 387 m = m_gethdr(mpalen, M_NOWAIT); 388 if (m == NULL) { 389 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 390 return (-ENOMEM); 391 } 392 mpa = mtod(m, struct mpa_message *); 393 m->m_len = mpalen; 394 m->m_pkthdr.len = mpalen; 395 memset(mpa, 0, sizeof(*mpa)); 396 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 397 mpa->flags = MPA_REJECT; 398 mpa->revision = mpa_rev; 399 mpa->private_data_size = htons(plen); 400 if (plen) 401 memcpy(mpa->private_data, pdata, plen); 402 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 403 PANIC_IF(err); 404 return 0; 405} 406 407static int 408send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) 409{ 410 int mpalen; 411 struct mpa_message *mpa; 412 struct mbuf *m; 413 414 CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen); 415 416 mpalen = sizeof(*mpa) + plen; 417 418 m = m_gethdr(mpalen, M_NOWAIT); 419 if (m == NULL) { 420 printf("%s - cannot alloc mbuf!\n", __FUNCTION__); 421 return (-ENOMEM); 422 } 423 mpa = mtod(m, struct mpa_message *); 424 m->m_len = mpalen; 425 m->m_pkthdr.len = mpalen; 426 memset(mpa, 0, sizeof(*mpa)); 427 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 428 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 429 (markers_enabled ? MPA_MARKERS : 0); 430 mpa->revision = mpa_rev; 431 mpa->private_data_size = htons(plen); 432 if (plen) 433 memcpy(mpa->private_data, pdata, plen); 434 435 state_set(&ep->com, MPA_REP_SENT); 436 return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 437 ep->com.thread); 438} 439 440static void 441close_complete_upcall(struct iwch_ep *ep) 442{ 443 struct iw_cm_event event; 444 445 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 446 memset(&event, 0, sizeof(event)); 447 event.event = IW_CM_EVENT_CLOSE; 448 if (ep->com.cm_id) { 449 CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d", 450 ep, ep->com.cm_id, ep->hwtid); 451 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 452 ep->com.cm_id->rem_ref(ep->com.cm_id); 453 ep->com.cm_id = NULL; 454 ep->com.qp = NULL; 455 } 456} 457 458static void 459abort_connection(struct iwch_ep *ep) 460{ 461 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 462 state_set(&ep->com, ABORTING); 463 abort_socket(ep); 464 close_socket(&ep->com, 0); 465 close_complete_upcall(ep); 466 state_set(&ep->com, DEAD); 467 put_ep(&ep->com); 468} 469 470static void 471peer_close_upcall(struct iwch_ep *ep) 472{ 473 struct iw_cm_event event; 474 475 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 476 memset(&event, 0, sizeof(event)); 477 event.event = IW_CM_EVENT_DISCONNECT; 478 if (ep->com.cm_id) { 479 CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d", 480 ep, ep->com.cm_id, ep->hwtid); 481 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 482 } 483} 484 485static void 486peer_abort_upcall(struct iwch_ep *ep) 487{ 488 struct iw_cm_event event; 489 490 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 491 memset(&event, 0, sizeof(event)); 492 event.event = IW_CM_EVENT_CLOSE; 493 event.status = ECONNRESET; 494 if (ep->com.cm_id) { 495 CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep, 496 ep->com.cm_id, ep->hwtid); 497 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 498 ep->com.cm_id->rem_ref(ep->com.cm_id); 499 ep->com.cm_id = NULL; 500 ep->com.qp = NULL; 501 } 502} 503 504static void 505connect_reply_upcall(struct iwch_ep *ep, int status) 506{ 507 struct iw_cm_event event; 508 509 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status); 510 memset(&event, 0, sizeof(event)); 511 event.event = IW_CM_EVENT_CONNECT_REPLY; 512 event.status = status; 513 event.local_addr = ep->com.local_addr; 514 event.remote_addr = ep->com.remote_addr; 515 516 if ((status == 0) || (status == ECONNREFUSED)) { 517 event.private_data_len = ep->plen; 518 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 519 } 520 if (ep->com.cm_id) { 521 CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep, 522 ep->hwtid, status); 523 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 524 } 525 if (status < 0) { 526 ep->com.cm_id->rem_ref(ep->com.cm_id); 527 ep->com.cm_id = NULL; 528 ep->com.qp = NULL; 529 } 530} 531 532static void 533connect_request_upcall(struct iwch_ep *ep) 534{ 535 struct iw_cm_event event; 536 537 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 538 memset(&event, 0, sizeof(event)); 539 event.event = IW_CM_EVENT_CONNECT_REQUEST; 540 event.local_addr = ep->com.local_addr; 541 event.remote_addr = ep->com.remote_addr; 542 event.private_data_len = ep->plen; 543 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 544 event.provider_data = ep; 545 event.so = ep->com.so; 546 if (state_read(&ep->parent_ep->com) != DEAD) { 547 get_ep(&ep->com); 548 ep->parent_ep->com.cm_id->event_handler( 549 ep->parent_ep->com.cm_id, 550 &event); 551 } 552 put_ep(&ep->parent_ep->com); 553} 554 555static void 556established_upcall(struct iwch_ep *ep) 557{ 558 struct iw_cm_event event; 559 560 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 561 memset(&event, 0, sizeof(event)); 562 event.event = IW_CM_EVENT_ESTABLISHED; 563 if (ep->com.cm_id) { 564 CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid); 565 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 566 } 567} 568 569static void 570process_mpa_reply(struct iwch_ep *ep) 571{ 572 struct mpa_message *mpa; 573 u16 plen; 574 struct iwch_qp_attributes attrs; 575 enum iwch_qp_attr_mask mask; 576 int err; 577 struct mbuf *top, *m; 578 int flags = MSG_DONTWAIT; 579 struct uio uio; 580 int len; 581 582 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 583 584 /* 585 * Stop mpa timer. If it expired, then the state has 586 * changed and we bail since ep_timeout already aborted 587 * the connection. 588 */ 589 stop_ep_timer(ep); 590 if (state_read(&ep->com) != MPA_REQ_SENT) 591 return; 592 593 uio.uio_resid = len = 1000000; 594 uio.uio_td = ep->com.thread; 595 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 596 if (err) { 597 if (err == EWOULDBLOCK) { 598 start_ep_timer(ep); 599 return; 600 } 601 err = -err; 602 goto err; 603 } 604 605 if (ep->com.so->so_rcv.sb_mb) { 606 printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 607 __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); 608 } 609 610 m = top; 611 do { 612 /* 613 * If we get more than the supported amount of private data 614 * then we must fail this connection. 615 */ 616 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 617 err = (-EINVAL); 618 goto err; 619 } 620 621 /* 622 * copy the new data into our accumulation buffer. 623 */ 624 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 625 ep->mpa_pkt_len += m->m_len; 626 if (!m->m_next) 627 m = m->m_nextpkt; 628 else 629 m = m->m_next; 630 } while (m); 631 632 m_freem(top); 633 634 /* 635 * if we don't even have the mpa message, then bail. 636 */ 637 if (ep->mpa_pkt_len < sizeof(*mpa)) 638 return; 639 mpa = (struct mpa_message *)ep->mpa_pkt; 640 641 /* Validate MPA header. */ 642 if (mpa->revision != mpa_rev) { 643 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 644 err = EPROTO; 645 goto err; 646 } 647 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 648 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 649 err = EPROTO; 650 goto err; 651 } 652 653 plen = ntohs(mpa->private_data_size); 654 655 /* 656 * Fail if there's too much private data. 657 */ 658 if (plen > MPA_MAX_PRIVATE_DATA) { 659 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 660 err = EPROTO; 661 goto err; 662 } 663 664 /* 665 * If plen does not account for pkt size 666 */ 667 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 668 CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len); 669 err = EPROTO; 670 goto err; 671 } 672 673 ep->plen = (u8) plen; 674 675 /* 676 * If we don't have all the pdata yet, then bail. 677 * We'll continue process when more data arrives. 678 */ 679 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 680 return; 681 682 if (mpa->flags & MPA_REJECT) { 683 err = ECONNREFUSED; 684 goto err; 685 } 686 687 /* 688 * If we get here we have accumulated the entire mpa 689 * start reply message including private data. And 690 * the MPA header is valid. 691 */ 692 CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__); 693 state_set(&ep->com, FPDU_MODE); 694 ep->mpa_attr.initiator = 1; 695 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 696 ep->mpa_attr.recv_marker_enabled = markers_enabled; 697 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 698 ep->mpa_attr.version = mpa_rev; 699 if (set_tcpinfo(ep)) { 700 printf("%s set_tcpinfo error\n", __FUNCTION__); 701 goto err; 702 } 703 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 704 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 705 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 706 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 707 708 attrs.mpa_attr = ep->mpa_attr; 709 attrs.max_ird = ep->ird; 710 attrs.max_ord = ep->ord; 711 attrs.llp_stream_handle = ep; 712 attrs.next_state = IWCH_QP_STATE_RTS; 713 714 mask = IWCH_QP_ATTR_NEXT_STATE | 715 IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | 716 IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; 717 718 /* bind QP and TID with INIT_WR */ 719 err = iwch_modify_qp(ep->com.qp->rhp, 720 ep->com.qp, mask, &attrs, 1); 721 if (!err) 722 goto out; 723err: 724 abort_connection(ep); 725out: 726 connect_reply_upcall(ep, err); 727 return; 728} 729 730static void 731process_mpa_request(struct iwch_ep *ep) 732{ 733 struct mpa_message *mpa; 734 u16 plen; 735 int flags = MSG_DONTWAIT; 736 struct mbuf *top, *m; 737 int err; 738 struct uio uio; 739 int len; 740 741 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 742 743 /* 744 * Stop mpa timer. If it expired, then the state has 745 * changed and we bail since ep_timeout already aborted 746 * the connection. 747 */ 748 stop_ep_timer(ep); 749 if (state_read(&ep->com) != MPA_REQ_WAIT) 750 return; 751 752 uio.uio_resid = len = 1000000; 753 uio.uio_td = ep->com.thread; 754 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 755 if (err) { 756 if (err == EWOULDBLOCK) { 757 start_ep_timer(ep); 758 return; 759 } 760 err = -err; 761 goto err; 762 } 763 764 m = top; 765 do { 766 767 /* 768 * If we get more than the supported amount of private data 769 * then we must fail this connection. 770 */ 771 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 772 CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, 773 ep->mpa_pkt_len + m->m_len); 774 goto err; 775 } 776 777 778 /* 779 * Copy the new data into our accumulation buffer. 780 */ 781 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 782 ep->mpa_pkt_len += m->m_len; 783 784 if (!m->m_next) 785 m = m->m_nextpkt; 786 else 787 m = m->m_next; 788 } while (m); 789 790 m_freem(top); 791 792 /* 793 * If we don't even have the mpa message, then bail. 794 * We'll continue process when more data arrives. 795 */ 796 if (ep->mpa_pkt_len < sizeof(*mpa)) { 797 start_ep_timer(ep); 798 CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, 799 ep->mpa_pkt_len); 800 return; 801 } 802 mpa = (struct mpa_message *) ep->mpa_pkt; 803 804 /* 805 * Validate MPA Header. 806 */ 807 if (mpa->revision != mpa_rev) { 808 CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); 809 goto err; 810 } 811 812 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { 813 CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); 814 goto err; 815 } 816 817 plen = ntohs(mpa->private_data_size); 818 819 /* 820 * Fail if there's too much private data. 821 */ 822 if (plen > MPA_MAX_PRIVATE_DATA) { 823 CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); 824 goto err; 825 } 826 827 /* 828 * If plen does not account for pkt size 829 */ 830 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 831 CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, 832 ep->mpa_pkt_len); 833 goto err; 834 } 835 ep->plen = (u8) plen; 836 837 /* 838 * If we don't have all the pdata yet, then bail. 839 */ 840 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { 841 start_ep_timer(ep); 842 CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, 843 ep->mpa_pkt_len); 844 return; 845 } 846 847 /* 848 * If we get here we have accumulated the entire mpa 849 * start reply message including private data. 850 */ 851 ep->mpa_attr.initiator = 0; 852 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 853 ep->mpa_attr.recv_marker_enabled = markers_enabled; 854 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 855 ep->mpa_attr.version = mpa_rev; 856 if (set_tcpinfo(ep)) { 857 printf("%s set_tcpinfo error\n", __FUNCTION__); 858 goto err; 859 } 860 CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " 861 "xmit_marker_enabled=%d, version=%d", __FUNCTION__, 862 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 863 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 864 865 state_set(&ep->com, MPA_REQ_RCVD); 866 867 /* drive upcall */ 868 connect_request_upcall(ep); 869 return; 870err: 871 abort_connection(ep); 872 return; 873} 874 875static void 876process_peer_close(struct iwch_ep *ep) 877{ 878 struct iwch_qp_attributes attrs; 879 int disconnect = 1; 880 int release = 0; 881 882 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 883 884 mtx_lock(&ep->com.lock); 885 switch (ep->com.state) { 886 case MPA_REQ_WAIT: 887 __state_set(&ep->com, CLOSING); 888 break; 889 case MPA_REQ_SENT: 890 __state_set(&ep->com, CLOSING); 891 connect_reply_upcall(ep, -ECONNRESET); 892 break; 893 case MPA_REQ_RCVD: 894 895 /* 896 * We're gonna mark this puppy DEAD, but keep 897 * the reference on it until the ULP accepts or 898 * rejects the CR. 899 */ 900 __state_set(&ep->com, CLOSING); 901 break; 902 case MPA_REP_SENT: 903 __state_set(&ep->com, CLOSING); 904 break; 905 case FPDU_MODE: 906 start_ep_timer(ep); 907 __state_set(&ep->com, CLOSING); 908 attrs.next_state = IWCH_QP_STATE_CLOSING; 909 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 910 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 911 peer_close_upcall(ep); 912 break; 913 case ABORTING: 914 disconnect = 0; 915 break; 916 case CLOSING: 917 __state_set(&ep->com, MORIBUND); 918 disconnect = 0; 919 break; 920 case MORIBUND: 921 stop_ep_timer(ep); 922 if (ep->com.cm_id && ep->com.qp) { 923 attrs.next_state = IWCH_QP_STATE_IDLE; 924 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, 925 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); 926 } 927 close_socket(&ep->com, 0); 928 close_complete_upcall(ep); 929 __state_set(&ep->com, DEAD); 930 release = 1; 931 disconnect = 0; 932 break; 933 case DEAD: 934 disconnect = 0; 935 break; 936 default: 937 PANIC_IF(1); 938 } 939 mtx_unlock(&ep->com.lock); 940 if (disconnect) 941 iwch_ep_disconnect(ep, 0, M_NOWAIT); 942 if (release) 943 put_ep(&ep->com); 944 return; 945} 946 947static void 948process_conn_error(struct iwch_ep *ep) 949{ 950 struct iwch_qp_attributes attrs; 951 int ret; 952 953 mtx_lock(&ep->com.lock); 954 CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state); 955 switch (ep->com.state) { 956 case MPA_REQ_WAIT: 957 stop_ep_timer(ep); 958 break; 959 case MPA_REQ_SENT: 960 stop_ep_timer(ep); 961 connect_reply_upcall(ep, -ECONNRESET); 962 break; 963 case MPA_REP_SENT: 964 ep->com.rpl_err = ECONNRESET; 965 CTR1(KTR_IW_CXGB, "waking up ep %p", ep); 966 break; 967 case MPA_REQ_RCVD: 968 969 /* 970 * We're gonna mark this puppy DEAD, but keep 971 * the reference on it until the ULP accepts or 972 * rejects the CR. 973 */ 974 break; 975 case MORIBUND: 976 case CLOSING: 977 stop_ep_timer(ep); 978 /*FALLTHROUGH*/ 979 case FPDU_MODE: 980 if (ep->com.cm_id && ep->com.qp) { 981 attrs.next_state = IWCH_QP_STATE_ERROR; 982 ret = iwch_modify_qp(ep->com.qp->rhp, 983 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 984 &attrs, 1); 985 if (ret) 986 log(LOG_ERR, 987 "%s - qp <- error failed!\n", 988 __FUNCTION__); 989 } 990 peer_abort_upcall(ep); 991 break; 992 case ABORTING: 993 break; 994 case DEAD: 995 mtx_unlock(&ep->com.lock); 996 CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, 997 ep->com.so->so_error); 998 return; 999 default: 1000 PANIC_IF(1); 1001 break; 1002 } 1003 1004 if (ep->com.state != ABORTING) { 1005 close_socket(&ep->com, 0); 1006 __state_set(&ep->com, DEAD); 1007 put_ep(&ep->com); 1008 } 1009 mtx_unlock(&ep->com.lock); 1010 return; 1011} 1012 1013static void 1014process_close_complete(struct iwch_ep *ep) 1015{ 1016 struct iwch_qp_attributes attrs; 1017 int release = 0; 1018 1019 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1020 PANIC_IF(!ep); 1021 1022 /* The cm_id may be null if we failed to connect */ 1023 mtx_lock(&ep->com.lock); 1024 switch (ep->com.state) { 1025 case CLOSING: 1026 __state_set(&ep->com, MORIBUND); 1027 break; 1028 case MORIBUND: 1029 stop_ep_timer(ep); 1030 if ((ep->com.cm_id) && (ep->com.qp)) { 1031 attrs.next_state = IWCH_QP_STATE_IDLE; 1032 iwch_modify_qp(ep->com.qp->rhp, 1033 ep->com.qp, 1034 IWCH_QP_ATTR_NEXT_STATE, 1035 &attrs, 1); 1036 } 1037 if (ep->parent_ep) 1038 close_socket(&ep->com, 1); 1039 else 1040 close_socket(&ep->com, 0); 1041 close_complete_upcall(ep); 1042 __state_set(&ep->com, DEAD); 1043 release = 1; 1044 break; 1045 case ABORTING: 1046 break; 1047 case DEAD: 1048 default: 1049 PANIC_IF(1); 1050 break; 1051 } 1052 mtx_unlock(&ep->com.lock); 1053 if (release) 1054 put_ep(&ep->com); 1055 return; 1056} 1057 1058/* 1059 * T3A does 3 things when a TERM is received: 1060 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet 1061 * 2) generate an async event on the QP with the TERMINATE opcode 1062 * 3) post a TERMINATE opcde cqe into the associated CQ. 1063 * 1064 * For (1), we save the message in the qp for later consumer consumption. 1065 * For (2), we move the QP into TERMINATE, post a QP event and disconnect. 1066 * For (3), we toss the CQE in cxio_poll_cq(). 1067 * 1068 * terminate() handles case (1)... 1069 */ 1070static int 1071terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1072{ 1073 struct adapter *sc = qs->adap; 1074 struct tom_data *td = sc->tom_softc; 1075 uint32_t hash = *((uint32_t *)r + 1); 1076 unsigned int tid = ntohl(hash) >> 8 & 0xfffff; 1077 struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1078 struct socket *so = toep->tp_inp->inp_socket; 1079 struct iwch_ep *ep = so->so_rcv.sb_upcallarg; 1080 1081 if (state_read(&ep->com) != FPDU_MODE) 1082 goto done; 1083 1084 m_adj(m, sizeof(struct cpl_rdma_terminate)); 1085 1086 CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes", 1087 __func__, tid, ep, m->m_len); 1088 1089 m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer); 1090 ep->com.qp->attr.terminate_msg_len = m->m_len; 1091 ep->com.qp->attr.is_terminate_local = 0; 1092 1093done: 1094 m_freem(m); 1095 return (0); 1096} 1097 1098static int 1099ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m) 1100{ 1101 struct adapter *sc = qs->adap; 1102 struct tom_data *td = sc->tom_softc; 1103 struct cpl_rdma_ec_status *rep = mtod(m, void *); 1104 unsigned int tid = GET_TID(rep); 1105 struct toepcb *toep = lookup_tid(&td->tid_maps, tid); 1106 struct socket *so = toep->tp_inp->inp_socket; 1107 struct iwch_ep *ep = so->so_rcv.sb_upcallarg; 1108 1109 if (rep->status) { 1110 struct iwch_qp_attributes attrs; 1111 1112 CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__); 1113 stop_ep_timer(ep); 1114 attrs.next_state = IWCH_QP_STATE_ERROR; 1115 iwch_modify_qp(ep->com.qp->rhp, 1116 ep->com.qp, 1117 IWCH_QP_ATTR_NEXT_STATE, 1118 &attrs, 1); 1119 abort_connection(ep); 1120 } 1121 1122 m_freem(m); 1123 return (0); 1124} 1125 1126static void 1127ep_timeout(void *arg) 1128{ 1129 struct iwch_ep *ep = (struct iwch_ep *)arg; 1130 struct iwch_qp_attributes attrs; 1131 int err = 0; 1132 int abort = 1; 1133 1134 mtx_lock(&ep->com.lock); 1135 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1136 switch (ep->com.state) { 1137 case MPA_REQ_SENT: 1138 __state_set(&ep->com, ABORTING); 1139 connect_reply_upcall(ep, -ETIMEDOUT); 1140 break; 1141 case MPA_REQ_WAIT: 1142 __state_set(&ep->com, ABORTING); 1143 break; 1144 case CLOSING: 1145 case MORIBUND: 1146 if (ep->com.cm_id && ep->com.qp) 1147 err = 1; 1148 __state_set(&ep->com, ABORTING); 1149 break; 1150 default: 1151 CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n", 1152 __func__, ep, ep->com.state); 1153 abort = 0; 1154 } 1155 mtx_unlock(&ep->com.lock); 1156 if (err){ 1157 attrs.next_state = IWCH_QP_STATE_ERROR; 1158 iwch_modify_qp(ep->com.qp->rhp, 1159 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1160 &attrs, 1); 1161 } 1162 if (abort) 1163 abort_connection(ep); 1164 put_ep(&ep->com); 1165} 1166 1167int 1168iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 1169{ 1170 int err; 1171 struct iwch_ep *ep = to_ep(cm_id); 1172 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1173 1174 if (state_read(&ep->com) == DEAD) { 1175 put_ep(&ep->com); 1176 return (-ECONNRESET); 1177 } 1178 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1179 if (mpa_rev == 0) { 1180 abort_connection(ep); 1181 } else { 1182 err = send_mpa_reject(ep, pdata, pdata_len); 1183 err = soshutdown(ep->com.so, 3); 1184 } 1185 put_ep(&ep->com); 1186 return 0; 1187} 1188 1189int 1190iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1191{ 1192 int err; 1193 struct iwch_qp_attributes attrs; 1194 enum iwch_qp_attr_mask mask; 1195 struct iwch_ep *ep = to_ep(cm_id); 1196 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1197 struct iwch_qp *qp = get_qhp(h, conn_param->qpn); 1198 1199 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1200 if (state_read(&ep->com) == DEAD) { 1201 err = -ECONNRESET; 1202 goto err; 1203 } 1204 1205 PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); 1206 PANIC_IF(!qp); 1207 1208 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || 1209 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { 1210 abort_connection(ep); 1211 err = -EINVAL; 1212 goto err; 1213 } 1214 1215 cm_id->add_ref(cm_id); 1216 ep->com.cm_id = cm_id; 1217 ep->com.qp = qp; 1218 1219 ep->com.rpl_err = 0; 1220 ep->com.rpl_done = 0; 1221 ep->ird = conn_param->ird; 1222 ep->ord = conn_param->ord; 1223 CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord); 1224 1225 /* bind QP to EP and move to RTS */ 1226 attrs.mpa_attr = ep->mpa_attr; 1227 attrs.max_ird = ep->ird; 1228 attrs.max_ord = ep->ord; 1229 attrs.llp_stream_handle = ep; 1230 attrs.next_state = IWCH_QP_STATE_RTS; 1231 1232 /* bind QP and TID with INIT_WR */ 1233 mask = IWCH_QP_ATTR_NEXT_STATE | 1234 IWCH_QP_ATTR_LLP_STREAM_HANDLE | 1235 IWCH_QP_ATTR_MPA_ATTR | 1236 IWCH_QP_ATTR_MAX_IRD | 1237 IWCH_QP_ATTR_MAX_ORD; 1238 1239 err = iwch_modify_qp(ep->com.qp->rhp, 1240 ep->com.qp, mask, &attrs, 1); 1241 1242 if (err) 1243 goto err1; 1244 1245 err = send_mpa_reply(ep, conn_param->private_data, 1246 conn_param->private_data_len); 1247 if (err) 1248 goto err1; 1249 state_set(&ep->com, FPDU_MODE); 1250 established_upcall(ep); 1251 put_ep(&ep->com); 1252 return 0; 1253err1: 1254 ep->com.cm_id = NULL; 1255 ep->com.qp = NULL; 1256 cm_id->rem_ref(cm_id); 1257err: 1258 put_ep(&ep->com); 1259 return err; 1260} 1261 1262static int init_sock(struct iwch_ep_common *epc) 1263{ 1264 int err; 1265 struct sockopt sopt; 1266 int on=1; 1267 1268 SOCK_LOCK(epc->so); 1269 soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc); 1270 epc->so->so_state |= SS_NBIO; 1271 SOCK_UNLOCK(epc->so); 1272 sopt.sopt_dir = SOPT_SET; 1273 sopt.sopt_level = IPPROTO_TCP; 1274 sopt.sopt_name = TCP_NODELAY; 1275 sopt.sopt_val = (caddr_t)&on; 1276 sopt.sopt_valsize = sizeof on; 1277 sopt.sopt_td = NULL; 1278 err = sosetopt(epc->so, &sopt); 1279 if (err) 1280 printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err); 1281 1282 return 0; 1283} 1284 1285static int 1286is_loopback_dst(struct iw_cm_id *cm_id) 1287{ 1288 uint16_t port = cm_id->remote_addr.sin_port; 1289 int ifa_present; 1290 1291 cm_id->remote_addr.sin_port = 0; 1292 ifa_present = ifa_ifwithaddr_check( 1293 (struct sockaddr *)&cm_id->remote_addr); 1294 cm_id->remote_addr.sin_port = port; 1295 return (ifa_present); 1296} 1297 1298int 1299iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1300{ 1301 int err = 0; 1302 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1303 struct iwch_ep *ep; 1304 struct rtentry *rt; 1305 struct toedev *tdev; 1306 1307 if (is_loopback_dst(cm_id)) { 1308 err = -ENOSYS; 1309 goto out; 1310 } 1311 1312 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1313 if (!ep) { 1314 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1315 err = (-ENOMEM); 1316 goto out; 1317 } 1318 callout_init(&ep->timer, TRUE); 1319 ep->plen = conn_param->private_data_len; 1320 if (ep->plen) 1321 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 1322 conn_param->private_data, ep->plen); 1323 ep->ird = conn_param->ird; 1324 ep->ord = conn_param->ord; 1325 1326 cm_id->add_ref(cm_id); 1327 ep->com.cm_id = cm_id; 1328 ep->com.qp = get_qhp(h, conn_param->qpn); 1329 ep->com.thread = curthread; 1330 PANIC_IF(!ep->com.qp); 1331 CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn, 1332 ep->com.qp, cm_id); 1333 1334 ep->com.so = cm_id->so; 1335 err = init_sock(&ep->com); 1336 if (err) 1337 goto fail2; 1338 1339 /* find a route */ 1340 rt = find_route(cm_id->local_addr.sin_addr.s_addr, 1341 cm_id->remote_addr.sin_addr.s_addr, 1342 cm_id->local_addr.sin_port, 1343 cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); 1344 if (!rt) { 1345 printf("%s - cannot find route.\n", __FUNCTION__); 1346 err = EHOSTUNREACH; 1347 goto fail2; 1348 } 1349 1350 if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) { 1351 printf("%s - interface not TOE capable.\n", __FUNCTION__); 1352 RTFREE(rt); 1353 goto fail2; 1354 } 1355 tdev = TOEDEV(rt->rt_ifp); 1356 if (tdev == NULL) { 1357 printf("%s - No toedev for interface.\n", __FUNCTION__); 1358 RTFREE(rt); 1359 goto fail2; 1360 } 1361 RTFREE(rt); 1362 1363 state_set(&ep->com, CONNECTING); 1364 ep->com.local_addr = cm_id->local_addr; 1365 ep->com.remote_addr = cm_id->remote_addr; 1366 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 1367 ep->com.thread); 1368 if (!err) 1369 goto out; 1370fail2: 1371 put_ep(&ep->com); 1372out: 1373 return err; 1374} 1375 1376int 1377iwch_create_listen(struct iw_cm_id *cm_id, int backlog) 1378{ 1379 int err = 0; 1380 struct iwch_listen_ep *ep; 1381 1382 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 1383 if (!ep) { 1384 printf("%s - cannot alloc ep.\n", __FUNCTION__); 1385 err = ENOMEM; 1386 goto out; 1387 } 1388 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1389 cm_id->add_ref(cm_id); 1390 ep->com.cm_id = cm_id; 1391 ep->backlog = backlog; 1392 ep->com.local_addr = cm_id->local_addr; 1393 ep->com.thread = curthread; 1394 state_set(&ep->com, LISTEN); 1395 1396 ep->com.so = cm_id->so; 1397 err = init_sock(&ep->com); 1398 if (err) 1399 goto fail; 1400 1401 err = solisten(ep->com.so, ep->backlog, ep->com.thread); 1402 if (!err) { 1403 cm_id->provider_data = ep; 1404 goto out; 1405 } 1406 close_socket(&ep->com, 0); 1407fail: 1408 cm_id->rem_ref(cm_id); 1409 put_ep(&ep->com); 1410out: 1411 return err; 1412} 1413 1414int 1415iwch_destroy_listen(struct iw_cm_id *cm_id) 1416{ 1417 struct iwch_listen_ep *ep = to_listen_ep(cm_id); 1418 1419 CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); 1420 1421 state_set(&ep->com, DEAD); 1422 close_socket(&ep->com, 0); 1423 cm_id->rem_ref(cm_id); 1424 put_ep(&ep->com); 1425 return 0; 1426} 1427 1428int 1429iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags) 1430{ 1431 int close = 0; 1432 1433 mtx_lock(&ep->com.lock); 1434 1435 PANIC_IF(!ep); 1436 PANIC_IF(!ep->com.so); 1437 1438 CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep, 1439 ep->com.so, states[ep->com.state], abrupt); 1440 1441 switch (ep->com.state) { 1442 case MPA_REQ_WAIT: 1443 case MPA_REQ_SENT: 1444 case MPA_REQ_RCVD: 1445 case MPA_REP_SENT: 1446 case FPDU_MODE: 1447 close = 1; 1448 if (abrupt) 1449 ep->com.state = ABORTING; 1450 else { 1451 ep->com.state = CLOSING; 1452 start_ep_timer(ep); 1453 } 1454 break; 1455 case CLOSING: 1456 close = 1; 1457 if (abrupt) { 1458 stop_ep_timer(ep); 1459 ep->com.state = ABORTING; 1460 } else 1461 ep->com.state = MORIBUND; 1462 break; 1463 case MORIBUND: 1464 case ABORTING: 1465 case DEAD: 1466 CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n", 1467 __func__, ep, ep->com.state); 1468 break; 1469 default: 1470 panic("unknown state: %d\n", ep->com.state); 1471 break; 1472 } 1473 1474 mtx_unlock(&ep->com.lock); 1475 if (close) { 1476 if (abrupt) 1477 abort_connection(ep); 1478 else { 1479 if (!ep->parent_ep) 1480 __state_set(&ep->com, MORIBUND); 1481 shutdown_socket(&ep->com); 1482 } 1483 } 1484 return 0; 1485} 1486 1487static void 1488process_data(struct iwch_ep *ep) 1489{ 1490 struct sockaddr_in *local, *remote; 1491 1492 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1493 1494 switch (state_read(&ep->com)) { 1495 case MPA_REQ_SENT: 1496 process_mpa_reply(ep); 1497 break; 1498 case MPA_REQ_WAIT: 1499 1500 /* 1501 * XXX 1502 * Set local and remote addrs here because when we 1503 * dequeue the newly accepted socket, they aren't set 1504 * yet in the pcb! 1505 */ 1506 in_getsockaddr(ep->com.so, (struct sockaddr **)&local); 1507 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); 1508 CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__, 1509 inet_ntoa(local->sin_addr), 1510 inet_ntoa(remote->sin_addr)); 1511 ep->com.local_addr = *local; 1512 ep->com.remote_addr = *remote; 1513 free(local, M_SONAME); 1514 free(remote, M_SONAME); 1515 process_mpa_request(ep); 1516 break; 1517 default: 1518 if (ep->com.so->so_rcv.sb_cc) 1519 printf("%s Unexpected streaming data." 1520 " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", 1521 __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, 1522 ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb); 1523 break; 1524 } 1525 return; 1526} 1527 1528static void 1529process_connected(struct iwch_ep *ep) 1530{ 1531 CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); 1532 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { 1533 send_mpa_req(ep); 1534 } else { 1535 connect_reply_upcall(ep, -ep->com.so->so_error); 1536 close_socket(&ep->com, 0); 1537 state_set(&ep->com, DEAD); 1538 put_ep(&ep->com); 1539 } 1540} 1541 1542static struct socket * 1543dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep) 1544{ 1545 struct socket *so; 1546 1547 ACCEPT_LOCK(); 1548 so = TAILQ_FIRST(&head->so_comp); 1549 if (!so) { 1550 ACCEPT_UNLOCK(); 1551 return NULL; 1552 } 1553 TAILQ_REMOVE(&head->so_comp, so, so_list); 1554 head->so_qlen--; 1555 SOCK_LOCK(so); 1556 so->so_qstate &= ~SQ_COMP; 1557 so->so_head = NULL; 1558 soref(so); 1559 soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep); 1560 so->so_state |= SS_NBIO; 1561 PANIC_IF(!(so->so_state & SS_ISCONNECTED)); 1562 PANIC_IF(so->so_error); 1563 SOCK_UNLOCK(so); 1564 ACCEPT_UNLOCK(); 1565 soaccept(so, (struct sockaddr **)remote); 1566 return so; 1567} 1568 1569static void 1570process_newconn(struct iwch_ep *parent_ep) 1571{ 1572 struct socket *child_so; 1573 struct iwch_ep *child_ep; 1574 struct sockaddr_in *remote; 1575 1576 CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); 1577 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); 1578 if (!child_ep) { 1579 log(LOG_ERR, "%s - failed to allocate ep entry!\n", 1580 __FUNCTION__); 1581 return; 1582 } 1583 child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); 1584 if (!child_so) { 1585 log(LOG_ERR, "%s - failed to dequeue child socket!\n", 1586 __FUNCTION__); 1587 __free_ep(&child_ep->com); 1588 return; 1589 } 1590 CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, 1591 inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); 1592 child_ep->com.tdev = parent_ep->com.tdev; 1593 child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family; 1594 child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port; 1595 child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr; 1596 child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len; 1597 child_ep->com.remote_addr.sin_family = remote->sin_family; 1598 child_ep->com.remote_addr.sin_port = remote->sin_port; 1599 child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr; 1600 child_ep->com.remote_addr.sin_len = remote->sin_len; 1601 child_ep->com.so = child_so; 1602 child_ep->com.cm_id = NULL; 1603 child_ep->com.thread = parent_ep->com.thread; 1604 child_ep->parent_ep = parent_ep; 1605 1606 free(remote, M_SONAME); 1607 get_ep(&parent_ep->com); 1608 child_ep->parent_ep = parent_ep; 1609 callout_init(&child_ep->timer, TRUE); 1610 state_set(&child_ep->com, MPA_REQ_WAIT); 1611 start_ep_timer(child_ep); 1612 1613 /* maybe the request has already been queued up on the socket... */ 1614 process_mpa_request(child_ep); 1615} 1616 1617static int 1618iwch_so_upcall(struct socket *so, void *arg, int waitflag) 1619{ 1620 struct iwch_ep *ep = arg; 1621 1622 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1623 mtx_lock(&req_lock); 1624 if (ep && ep->com.so && !ep->com.entry.tqe_prev) { 1625 get_ep(&ep->com); 1626 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); 1627 taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task); 1628 } 1629 mtx_unlock(&req_lock); 1630 return (SU_OK); 1631} 1632 1633static void 1634process_socket_event(struct iwch_ep *ep) 1635{ 1636 int state = state_read(&ep->com); 1637 struct socket *so = ep->com.so; 1638 1639 CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); 1640 if (state == CONNECTING) { 1641 process_connected(ep); 1642 return; 1643 } 1644 1645 if (state == LISTEN) { 1646 process_newconn(ep); 1647 return; 1648 } 1649 1650 /* connection error */ 1651 if (so->so_error) { 1652 process_conn_error(ep); 1653 return; 1654 } 1655 1656 /* peer close */ 1657 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { 1658 process_peer_close(ep); 1659 return; 1660 } 1661 1662 /* close complete */ 1663 if (so->so_state & (SS_ISDISCONNECTED)) { 1664 process_close_complete(ep); 1665 return; 1666 } 1667 1668 /* rx data */ 1669 process_data(ep); 1670 return; 1671} 1672 1673static void 1674process_req(void *ctx, int pending) 1675{ 1676 struct iwch_ep_common *epc; 1677 1678 CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__); 1679 mtx_lock(&req_lock); 1680 while (!TAILQ_EMPTY(&req_list)) { 1681 epc = TAILQ_FIRST(&req_list); 1682 TAILQ_REMOVE(&req_list, epc, entry); 1683 epc->entry.tqe_prev = NULL; 1684 mtx_unlock(&req_lock); 1685 if (epc->so) 1686 process_socket_event((struct iwch_ep *)epc); 1687 put_ep(epc); 1688 mtx_lock(&req_lock); 1689 } 1690 mtx_unlock(&req_lock); 1691} 1692 1693int 1694iwch_cm_init(void) 1695{ 1696 TAILQ_INIT(&req_list); 1697 mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF); 1698 iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT, 1699 taskqueue_thread_enqueue, &iw_cxgb_taskq); 1700 if (iw_cxgb_taskq == NULL) { 1701 printf("failed to allocate iw_cxgb taskqueue\n"); 1702 return (ENOMEM); 1703 } 1704 taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq"); 1705 TASK_INIT(&iw_cxgb_task, 0, process_req, NULL); 1706 return (0); 1707} 1708 1709void 1710iwch_cm_term(void) 1711{ 1712 1713 taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task); 1714 taskqueue_free(iw_cxgb_taskq); 1715} 1716 1717void 1718iwch_cm_init_cpl(struct adapter *sc) 1719{ 1720 1721 t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate); 1722 t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status); 1723} 1724 1725void 1726iwch_cm_term_cpl(struct adapter *sc) 1727{ 1728 1729 t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL); 1730 t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL); 1731} 1732#endif 1733