qp.c revision 309450
1/* 2 * Copyright (c) 2006-2014 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32#if HAVE_CONFIG_H 33# include <config.h> 34#endif /* HAVE_CONFIG_H */ 35 36#include <assert.h> 37#include <stdlib.h> 38#include <pthread.h> 39#include <string.h> 40#include <stdio.h> 41#include <netinet/in.h> 42#include "libcxgb4.h" 43 44#ifdef STATS 45struct c4iw_stats c4iw_stats; 46#endif 47 48static void copy_wr_to_sq(struct t4_wq *wq, union t4_wr *wqe, u8 len16) 49{ 50 u64 *src, *dst; 51 52 src = (u64 *)wqe; 53 dst = (u64 *)((u8 *)wq->sq.queue + wq->sq.wq_pidx * T4_EQ_ENTRY_SIZE); 54 if (t4_sq_onchip(wq)) { 55 len16 = align(len16, 4); 56 wc_wmb(); 57 } 58 while (len16) { 59 *dst++ = *src++; 60 if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) 61 dst = (u64 *)wq->sq.queue; 62 *dst++ = *src++; 63 if (dst == (u64 *)&wq->sq.queue[wq->sq.size]) 64 dst = (u64 *)wq->sq.queue; 65 len16--; 66 } 67} 68 69static void copy_wr_to_rq(struct t4_wq *wq, union t4_recv_wr *wqe, u8 len16) 70{ 71 u64 *src, *dst; 72 73 src = (u64 *)wqe; 74 dst = (u64 *)((u8 *)wq->rq.queue + wq->rq.wq_pidx * T4_EQ_ENTRY_SIZE); 75 while (len16) { 76 *dst++ = *src++; 77 if (dst >= (u64 *)&wq->rq.queue[wq->rq.size]) 78 dst = (u64 *)wq->rq.queue; 79 *dst++ = *src++; 80 if (dst >= (u64 *)&wq->rq.queue[wq->rq.size]) 81 dst = (u64 *)wq->rq.queue; 82 len16--; 83 } 84} 85 86static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, 87 struct ibv_send_wr *wr, int max, u32 *plenp) 88{ 89 u8 *dstp, *srcp; 90 u32 plen = 0; 91 int i; 92 int len; 93 94 dstp = (u8 *)immdp->data; 95 for (i = 0; i < wr->num_sge; i++) { 96 if ((plen + wr->sg_list[i].length) > max) 97 return -EMSGSIZE; 98 srcp = (u8 *)(unsigned long)wr->sg_list[i].addr; 99 plen += wr->sg_list[i].length; 100 len = wr->sg_list[i].length; 101 memcpy(dstp, srcp, len); 102 dstp += len; 103 srcp += len; 104 } 105 len = ROUND_UP(plen + 8, 16) - (plen + 8); 106 if (len) 107 memset(dstp, 0, len); 108 immdp->op = FW_RI_DATA_IMMD; 109 immdp->r1 = 0; 110 immdp->r2 = 0; 111 immdp->immdlen = cpu_to_be32(plen); 112 *plenp = plen; 113 return 0; 114} 115 116static int build_isgl(struct fw_ri_isgl *isglp, struct ibv_sge *sg_list, 117 int num_sge, u32 *plenp) 118{ 119 int i; 120 u32 plen = 0; 121 __be64 *flitp = (__be64 *)isglp->sge; 122 123 for (i = 0; i < num_sge; i++) { 124 if ((plen + sg_list[i].length) < plen) 125 return -EMSGSIZE; 126 plen += sg_list[i].length; 127 *flitp++ = cpu_to_be64(((u64)sg_list[i].lkey << 32) | 128 sg_list[i].length); 129 *flitp++ = cpu_to_be64(sg_list[i].addr); 130 } 131 *flitp = 0; 132 isglp->op = FW_RI_DATA_ISGL; 133 isglp->r1 = 0; 134 isglp->nsge = cpu_to_be16(num_sge); 135 isglp->r2 = 0; 136 if (plenp) 137 *plenp = plen; 138 return 0; 139} 140 141static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, 142 struct ibv_send_wr *wr, u8 *len16) 143{ 144 u32 plen; 145 int size; 146 int ret; 147 148 if (wr->num_sge > T4_MAX_SEND_SGE) 149 return -EINVAL; 150 if (wr->send_flags & IBV_SEND_SOLICITED) 151 wqe->send.sendop_pkd = cpu_to_be32( 152 V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE)); 153 else 154 wqe->send.sendop_pkd = cpu_to_be32( 155 V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND)); 156 wqe->send.stag_inv = 0; 157 wqe->send.r3 = 0; 158 wqe->send.r4 = 0; 159 160 plen = 0; 161 if (wr->num_sge) { 162 if (wr->send_flags & IBV_SEND_INLINE) { 163 ret = build_immd(sq, wqe->send.u.immd_src, wr, 164 T4_MAX_SEND_INLINE, &plen); 165 if (ret) 166 return ret; 167 size = sizeof wqe->send + sizeof(struct fw_ri_immd) + 168 plen; 169 } else { 170 ret = build_isgl(wqe->send.u.isgl_src, 171 wr->sg_list, wr->num_sge, &plen); 172 if (ret) 173 return ret; 174 size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + 175 wr->num_sge * sizeof (struct fw_ri_sge); 176 } 177 } else { 178 wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD; 179 wqe->send.u.immd_src[0].r1 = 0; 180 wqe->send.u.immd_src[0].r2 = 0; 181 wqe->send.u.immd_src[0].immdlen = 0; 182 size = sizeof wqe->send + sizeof(struct fw_ri_immd); 183 plen = 0; 184 } 185 *len16 = DIV_ROUND_UP(size, 16); 186 wqe->send.plen = cpu_to_be32(plen); 187 return 0; 188} 189 190static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, 191 struct ibv_send_wr *wr, u8 *len16) 192{ 193 u32 plen; 194 int size; 195 int ret; 196 197 if (wr->num_sge > T4_MAX_SEND_SGE) 198 return -EINVAL; 199 wqe->write.r2 = 0; 200 wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); 201 wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); 202 if (wr->num_sge) { 203 if (wr->send_flags & IBV_SEND_INLINE) { 204 ret = build_immd(sq, wqe->write.u.immd_src, wr, 205 T4_MAX_WRITE_INLINE, &plen); 206 if (ret) 207 return ret; 208 size = sizeof wqe->write + sizeof(struct fw_ri_immd) + 209 plen; 210 } else { 211 ret = build_isgl(wqe->write.u.isgl_src, 212 wr->sg_list, wr->num_sge, &plen); 213 if (ret) 214 return ret; 215 size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + 216 wr->num_sge * sizeof (struct fw_ri_sge); 217 } 218 } else { 219 wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD; 220 wqe->write.u.immd_src[0].r1 = 0; 221 wqe->write.u.immd_src[0].r2 = 0; 222 wqe->write.u.immd_src[0].immdlen = 0; 223 size = sizeof wqe->write + sizeof(struct fw_ri_immd); 224 plen = 0; 225 } 226 *len16 = DIV_ROUND_UP(size, 16); 227 wqe->write.plen = cpu_to_be32(plen); 228 return 0; 229} 230 231static int build_rdma_read(union t4_wr *wqe, struct ibv_send_wr *wr, u8 *len16) 232{ 233 if (wr->num_sge > 1) 234 return -EINVAL; 235 if (wr->num_sge) { 236 wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); 237 wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr >>32)); 238 wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); 239 wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); 240 wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); 241 wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr >> 32)); 242 wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr)); 243 } else { 244 wqe->read.stag_src = cpu_to_be32(2); 245 wqe->read.to_src_hi = 0; 246 wqe->read.to_src_lo = 0; 247 wqe->read.stag_sink = cpu_to_be32(2); 248 wqe->read.plen = 0; 249 wqe->read.to_sink_hi = 0; 250 wqe->read.to_sink_lo = 0; 251 } 252 wqe->read.r2 = 0; 253 wqe->read.r5 = 0; 254 *len16 = DIV_ROUND_UP(sizeof wqe->read, 16); 255 return 0; 256} 257 258static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, 259 struct ibv_recv_wr *wr, u8 *len16) 260{ 261 int ret; 262 263 ret = build_isgl(&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); 264 if (ret) 265 return ret; 266 *len16 = DIV_ROUND_UP(sizeof wqe->recv + 267 wr->num_sge * sizeof(struct fw_ri_sge), 16); 268 return 0; 269} 270 271void dump_wqe(void *arg) 272{ 273 u64 *p = arg; 274 int len16; 275 276 len16 = be64_to_cpu(*p) & 0xff; 277 while (len16--) { 278 printf("%02x: %016lx ", (u8)(unsigned long)p, be64_to_cpu(*p)); 279 p++; 280 printf("%016lx\n", be64_to_cpu(*p)); 281 p++; 282 } 283} 284 285static void ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 idx) 286{ 287 struct ibv_modify_qp cmd; 288 struct ibv_qp_attr attr; 289 int mask; 290 int ret; 291 292 wc_wmb(); 293 if (qid == qhp->wq.sq.qid) { 294 attr.sq_psn = idx; 295 mask = IBV_QP_SQ_PSN; 296 } else { 297 attr.rq_psn = idx; 298 mask = IBV_QP_RQ_PSN; 299 } 300 ret = ibv_cmd_modify_qp(&qhp->ibv_qp, &attr, mask, &cmd, sizeof cmd); 301 assert(!ret); 302} 303 304int c4iw_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr, 305 struct ibv_send_wr **bad_wr) 306{ 307 int err = 0; 308 u8 len16; 309 enum fw_wr_opcodes fw_opcode; 310 enum fw_ri_wr_flags fw_flags; 311 struct c4iw_qp *qhp; 312 union t4_wr *wqe, lwqe; 313 u32 num_wrs; 314 struct t4_swsqe *swsqe; 315 u16 idx = 0; 316 317 qhp = to_c4iw_qp(ibqp); 318 pthread_spin_lock(&qhp->lock); 319 if (t4_wq_in_error(&qhp->wq)) { 320 pthread_spin_unlock(&qhp->lock); 321 return -EINVAL; 322 } 323 num_wrs = t4_sq_avail(&qhp->wq); 324 if (num_wrs == 0) { 325 pthread_spin_unlock(&qhp->lock); 326 return -ENOMEM; 327 } 328 while (wr) { 329 if (num_wrs == 0) { 330 err = -ENOMEM; 331 *bad_wr = wr; 332 break; 333 } 334 335 wqe = &lwqe; 336 fw_flags = 0; 337 if (wr->send_flags & IBV_SEND_SOLICITED) 338 fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; 339 if (wr->send_flags & IBV_SEND_SIGNALED || qhp->sq_sig_all) 340 fw_flags |= FW_RI_COMPLETION_FLAG; 341 swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; 342 switch (wr->opcode) { 343 case IBV_WR_SEND: 344 INC_STAT(send); 345 if (wr->send_flags & IBV_SEND_FENCE) 346 fw_flags |= FW_RI_READ_FENCE_FLAG; 347 fw_opcode = FW_RI_SEND_WR; 348 swsqe->opcode = FW_RI_SEND; 349 err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); 350 break; 351 case IBV_WR_RDMA_WRITE: 352 INC_STAT(write); 353 fw_opcode = FW_RI_RDMA_WRITE_WR; 354 swsqe->opcode = FW_RI_RDMA_WRITE; 355 err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); 356 break; 357 case IBV_WR_RDMA_READ: 358 INC_STAT(read); 359 fw_opcode = FW_RI_RDMA_READ_WR; 360 swsqe->opcode = FW_RI_READ_REQ; 361 fw_flags = 0; 362 err = build_rdma_read(wqe, wr, &len16); 363 if (err) 364 break; 365 swsqe->read_len = wr->sg_list ? wr->sg_list[0].length : 0; 366 if (!qhp->wq.sq.oldest_read) 367 qhp->wq.sq.oldest_read = swsqe; 368 break; 369 default: 370 PDBG("%s post of type=%d TBD!\n", __func__, 371 wr->opcode); 372 err = -EINVAL; 373 } 374 if (err) { 375 *bad_wr = wr; 376 break; 377 } 378 swsqe->idx = qhp->wq.sq.pidx; 379 swsqe->complete = 0; 380 swsqe->signaled = (wr->send_flags & IBV_SEND_SIGNALED) || 381 qhp->sq_sig_all; 382 swsqe->flushed = 0; 383 swsqe->wr_id = wr->wr_id; 384 385 init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); 386 PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x\n", 387 __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx, 388 swsqe->opcode); 389 wr = wr->next; 390 num_wrs--; 391 copy_wr_to_sq(&qhp->wq, wqe, len16); 392 t4_sq_produce(&qhp->wq, len16); 393 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); 394 } 395 396 t4_ring_sq_db(&qhp->wq, idx, dev_is_t5(qhp->rhp), 397 len16, wqe); 398 qhp->wq.sq.queue[qhp->wq.sq.size].status.host_wq_pidx = \ 399 (qhp->wq.sq.wq_pidx); 400 pthread_spin_unlock(&qhp->lock); 401 return err; 402} 403 404int c4iw_post_receive(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, 405 struct ibv_recv_wr **bad_wr) 406{ 407 int err = 0; 408 struct c4iw_qp *qhp; 409 union t4_recv_wr *wqe, lwqe; 410 u32 num_wrs; 411 u8 len16 = 0; 412 u16 idx = 0; 413 414 qhp = to_c4iw_qp(ibqp); 415 pthread_spin_lock(&qhp->lock); 416 if (t4_wq_in_error(&qhp->wq)) { 417 pthread_spin_unlock(&qhp->lock); 418 return -EINVAL; 419 } 420 INC_STAT(recv); 421 num_wrs = t4_rq_avail(&qhp->wq); 422 if (num_wrs == 0) { 423 pthread_spin_unlock(&qhp->lock); 424 return -ENOMEM; 425 } 426 while (wr) { 427 if (wr->num_sge > T4_MAX_RECV_SGE) { 428 err = -EINVAL; 429 *bad_wr = wr; 430 break; 431 } 432 wqe = &lwqe; 433 if (num_wrs) 434 err = build_rdma_recv(qhp, wqe, wr, &len16); 435 else 436 err = -ENOMEM; 437 if (err) { 438 *bad_wr = wr; 439 break; 440 } 441 442 qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id; 443 444 wqe->recv.opcode = FW_RI_RECV_WR; 445 wqe->recv.r1 = 0; 446 wqe->recv.wrid = qhp->wq.rq.pidx; 447 wqe->recv.r2[0] = 0; 448 wqe->recv.r2[1] = 0; 449 wqe->recv.r2[2] = 0; 450 wqe->recv.len16 = len16; 451 PDBG("%s cookie 0x%llx pidx %u\n", __func__, 452 (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); 453 copy_wr_to_rq(&qhp->wq, wqe, len16); 454 t4_rq_produce(&qhp->wq, len16); 455 idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); 456 wr = wr->next; 457 num_wrs--; 458 } 459 460 t4_ring_rq_db(&qhp->wq, idx, dev_is_t5(qhp->rhp), 461 len16, wqe); 462 qhp->wq.rq.queue[qhp->wq.rq.size].status.host_wq_pidx = \ 463 (qhp->wq.rq.wq_pidx); 464 pthread_spin_unlock(&qhp->lock); 465 return err; 466} 467 468static void update_qp_state(struct c4iw_qp *qhp) 469{ 470 struct ibv_query_qp cmd; 471 struct ibv_qp_attr attr; 472 struct ibv_qp_init_attr iattr; 473 int ret; 474 475 ret = ibv_cmd_query_qp(&qhp->ibv_qp, &attr, IBV_QP_STATE, &iattr, 476 &cmd, sizeof cmd); 477 assert(!ret); 478 if (!ret) 479 qhp->ibv_qp.state = attr.qp_state; 480} 481 482/* 483 * Assumes qhp lock is held. 484 */ 485void c4iw_flush_qp(struct c4iw_qp *qhp) 486{ 487 struct c4iw_cq *rchp, *schp; 488 int count; 489 490 if (qhp->wq.flushed) 491 return; 492 493 update_qp_state(qhp); 494 495 rchp = to_c4iw_cq(qhp->ibv_qp.recv_cq); 496 schp = to_c4iw_cq(qhp->ibv_qp.send_cq); 497 498 PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); 499 qhp->wq.flushed = 1; 500 pthread_spin_unlock(&qhp->lock); 501 502 /* locking heirarchy: cq lock first, then qp lock. */ 503 pthread_spin_lock(&rchp->lock); 504 pthread_spin_lock(&qhp->lock); 505 c4iw_flush_hw_cq(rchp); 506 c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); 507 c4iw_flush_rq(&qhp->wq, &rchp->cq, count); 508 pthread_spin_unlock(&qhp->lock); 509 pthread_spin_unlock(&rchp->lock); 510 511 /* locking heirarchy: cq lock first, then qp lock. */ 512 pthread_spin_lock(&schp->lock); 513 pthread_spin_lock(&qhp->lock); 514 if (schp != rchp) 515 c4iw_flush_hw_cq(schp); 516 c4iw_flush_sq(qhp); 517 pthread_spin_unlock(&qhp->lock); 518 pthread_spin_unlock(&schp->lock); 519 pthread_spin_lock(&qhp->lock); 520} 521 522void c4iw_flush_qps(struct c4iw_dev *dev) 523{ 524 int i; 525 526 pthread_spin_lock(&dev->lock); 527 for (i=0; i < dev->max_qp; i++) { 528 struct c4iw_qp *qhp = dev->qpid2ptr[i]; 529 if (qhp) { 530 if (!qhp->wq.flushed && t4_wq_in_error(&qhp->wq)) { 531 pthread_spin_lock(&qhp->lock); 532 c4iw_flush_qp(qhp); 533 pthread_spin_unlock(&qhp->lock); 534 } 535 } 536 } 537 pthread_spin_unlock(&dev->lock); 538} 539