1219820Sjeff/*
2219820Sjeff * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3219820Sjeff *
4219820Sjeff * This software is available to you under a choice of one of two
5219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
6219820Sjeff * General Public License (GPL) Version 2, available from the file
7219820Sjeff * COPYING in the main directory of this source tree, or the
8219820Sjeff * OpenIB.org BSD license below:
9219820Sjeff *
10219820Sjeff *     Redistribution and use in source and binary forms, with or
11219820Sjeff *     without modification, are permitted provided that the following
12219820Sjeff *     conditions are met:
13219820Sjeff *
14219820Sjeff *      - Redistributions of source code must retain the above
15219820Sjeff *        copyright notice, this list of conditions and the following
16219820Sjeff *        disclaimer.
17219820Sjeff *
18219820Sjeff *      - Redistributions in binary form must reproduce the above
19219820Sjeff *        copyright notice, this list of conditions and the following
20219820Sjeff *        disclaimer in the documentation and/or other materials
21219820Sjeff *        provided with the distribution.
22219820Sjeff *
23219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30219820Sjeff * SOFTWARE.
31219820Sjeff */
32331769Shselasky#define _GNU_SOURCE
33331769Shselasky#include <config.h>
34219820Sjeff
35219820Sjeff#include <stdio.h>
36219820Sjeff#include <stdlib.h>
37219820Sjeff#include <unistd.h>
38219820Sjeff#include <string.h>
39219820Sjeff#include <sys/types.h>
40219820Sjeff#include <sys/socket.h>
41219820Sjeff#include <sys/time.h>
42219820Sjeff#include <netdb.h>
43331769Shselasky#include <stdlib.h>
44219820Sjeff#include <getopt.h>
45219820Sjeff#include <arpa/inet.h>
46219820Sjeff#include <time.h>
47219820Sjeff
48219820Sjeff#include "pingpong.h"
49219820Sjeff
50219820Sjeffenum {
51219820Sjeff	PINGPONG_RECV_WRID = 1,
52219820Sjeff	PINGPONG_SEND_WRID = 2,
53219820Sjeff};
54219820Sjeff
55219820Sjeffstatic int page_size;
56219820Sjeff
57219820Sjeffstruct pingpong_context {
58219820Sjeff	struct ibv_context	*context;
59219820Sjeff	struct ibv_comp_channel *channel;
60219820Sjeff	struct ibv_pd		*pd;
61219820Sjeff	struct ibv_mr		*mr;
62219820Sjeff	struct ibv_cq		*cq;
63219820Sjeff	struct ibv_qp		*qp;
64219820Sjeff	struct ibv_ah		*ah;
65219820Sjeff	void			*buf;
66219820Sjeff	int			 size;
67331769Shselasky	int			 send_flags;
68219820Sjeff	int			 rx_depth;
69219820Sjeff	int			 pending;
70219820Sjeff	struct ibv_port_attr     portinfo;
71219820Sjeff};
72219820Sjeff
73219820Sjeffstruct pingpong_dest {
74219820Sjeff	int lid;
75219820Sjeff	int qpn;
76219820Sjeff	int psn;
77219820Sjeff	union ibv_gid gid;
78219820Sjeff};
79219820Sjeff
80219820Sjeffstatic int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
81219820Sjeff			  int sl, struct pingpong_dest *dest, int sgid_idx)
82219820Sjeff{
83219820Sjeff	struct ibv_ah_attr ah_attr = {
84219820Sjeff		.is_global     = 0,
85219820Sjeff		.dlid          = dest->lid,
86219820Sjeff		.sl            = sl,
87219820Sjeff		.src_path_bits = 0,
88219820Sjeff		.port_num      = port
89219820Sjeff	};
90219820Sjeff	struct ibv_qp_attr attr = {
91219820Sjeff		.qp_state		= IBV_QPS_RTR
92219820Sjeff	};
93219820Sjeff
94219820Sjeff	if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE)) {
95219820Sjeff		fprintf(stderr, "Failed to modify QP to RTR\n");
96219820Sjeff		return 1;
97219820Sjeff	}
98219820Sjeff
99219820Sjeff	attr.qp_state	    = IBV_QPS_RTS;
100219820Sjeff	attr.sq_psn	    = my_psn;
101219820Sjeff
102219820Sjeff	if (ibv_modify_qp(ctx->qp, &attr,
103219820Sjeff			  IBV_QP_STATE              |
104219820Sjeff			  IBV_QP_SQ_PSN)) {
105219820Sjeff		fprintf(stderr, "Failed to modify QP to RTS\n");
106219820Sjeff		return 1;
107219820Sjeff	}
108219820Sjeff
109219820Sjeff	if (dest->gid.global.interface_id) {
110219820Sjeff		ah_attr.is_global = 1;
111219820Sjeff		ah_attr.grh.hop_limit = 1;
112219820Sjeff		ah_attr.grh.dgid = dest->gid;
113219820Sjeff		ah_attr.grh.sgid_index = sgid_idx;
114219820Sjeff	}
115219820Sjeff
116219820Sjeff	ctx->ah = ibv_create_ah(ctx->pd, &ah_attr);
117219820Sjeff	if (!ctx->ah) {
118219820Sjeff		fprintf(stderr, "Failed to create AH\n");
119219820Sjeff		return 1;
120219820Sjeff	}
121219820Sjeff
122219820Sjeff	return 0;
123219820Sjeff}
124219820Sjeff
125219820Sjeffstatic struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
126219820Sjeff						 const struct pingpong_dest *my_dest)
127219820Sjeff{
128219820Sjeff	struct addrinfo *res, *t;
129219820Sjeff	struct addrinfo hints = {
130337119Shselasky		.ai_family   = AF_UNSPEC,
131219820Sjeff		.ai_socktype = SOCK_STREAM
132219820Sjeff	};
133219820Sjeff	char *service;
134219820Sjeff	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
135219820Sjeff	int n;
136219820Sjeff	int sockfd = -1;
137219820Sjeff	struct pingpong_dest *rem_dest = NULL;
138219820Sjeff	char gid[33];
139219820Sjeff
140219820Sjeff	if (asprintf(&service, "%d", port) < 0)
141219820Sjeff		return NULL;
142219820Sjeff
143219820Sjeff	n = getaddrinfo(servername, service, &hints, &res);
144219820Sjeff
145219820Sjeff	if (n < 0) {
146219820Sjeff		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
147219820Sjeff		free(service);
148219820Sjeff		return NULL;
149219820Sjeff	}
150219820Sjeff
151219820Sjeff	for (t = res; t; t = t->ai_next) {
152219820Sjeff		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
153219820Sjeff		if (sockfd >= 0) {
154219820Sjeff			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
155219820Sjeff				break;
156219820Sjeff			close(sockfd);
157219820Sjeff			sockfd = -1;
158219820Sjeff		}
159219820Sjeff	}
160219820Sjeff
161331769Shselasky	freeaddrinfo_null(res);
162219820Sjeff	free(service);
163219820Sjeff
164219820Sjeff	if (sockfd < 0) {
165219820Sjeff		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
166219820Sjeff		return NULL;
167219820Sjeff	}
168219820Sjeff
169219820Sjeff	gid_to_wire_gid(&my_dest->gid, gid);
170331769Shselasky	sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
171331769Shselasky							my_dest->psn, gid);
172219820Sjeff	if (write(sockfd, msg, sizeof msg) != sizeof msg) {
173219820Sjeff		fprintf(stderr, "Couldn't send local address\n");
174219820Sjeff		goto out;
175219820Sjeff	}
176219820Sjeff
177331769Shselasky	if (read(sockfd, msg, sizeof msg) != sizeof msg ||
178331769Shselasky	    write(sockfd, "done", sizeof "done") != sizeof "done") {
179331769Shselasky		perror("client read/write");
180331769Shselasky		fprintf(stderr, "Couldn't read/write remote address\n");
181219820Sjeff		goto out;
182219820Sjeff	}
183219820Sjeff
184219820Sjeff	rem_dest = malloc(sizeof *rem_dest);
185219820Sjeff	if (!rem_dest)
186219820Sjeff		goto out;
187219820Sjeff
188331769Shselasky	sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
189331769Shselasky							&rem_dest->psn, gid);
190219820Sjeff	wire_gid_to_gid(gid, &rem_dest->gid);
191219820Sjeff
192219820Sjeffout:
193219820Sjeff	close(sockfd);
194219820Sjeff	return rem_dest;
195219820Sjeff}
196219820Sjeff
197219820Sjeffstatic struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
198219820Sjeff						 int ib_port, int port, int sl,
199219820Sjeff						 const struct pingpong_dest *my_dest,
200219820Sjeff						 int sgid_idx)
201219820Sjeff{
202219820Sjeff	struct addrinfo *res, *t;
203219820Sjeff	struct addrinfo hints = {
204219820Sjeff		.ai_flags    = AI_PASSIVE,
205219820Sjeff		.ai_family   = AF_INET,
206219820Sjeff		.ai_socktype = SOCK_STREAM
207219820Sjeff	};
208219820Sjeff	char *service;
209219820Sjeff	char msg[sizeof "0000:000000:000000:00000000000000000000000000000000"];
210219820Sjeff	int n;
211219820Sjeff	int sockfd = -1, connfd;
212219820Sjeff	struct pingpong_dest *rem_dest = NULL;
213219820Sjeff	char gid[33];
214219820Sjeff
215219820Sjeff	if (asprintf(&service, "%d", port) < 0)
216219820Sjeff		return NULL;
217219820Sjeff
218219820Sjeff	n = getaddrinfo(NULL, service, &hints, &res);
219219820Sjeff
220219820Sjeff	if (n < 0) {
221219820Sjeff		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
222219820Sjeff		free(service);
223219820Sjeff		return NULL;
224219820Sjeff	}
225219820Sjeff
226219820Sjeff	for (t = res; t; t = t->ai_next) {
227219820Sjeff		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
228219820Sjeff		if (sockfd >= 0) {
229219820Sjeff			n = 1;
230219820Sjeff
231219820Sjeff			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
232219820Sjeff
233219820Sjeff			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
234219820Sjeff				break;
235219820Sjeff			close(sockfd);
236219820Sjeff			sockfd = -1;
237219820Sjeff		}
238219820Sjeff	}
239219820Sjeff
240331769Shselasky	freeaddrinfo_null(res);
241219820Sjeff	free(service);
242219820Sjeff
243219820Sjeff	if (sockfd < 0) {
244219820Sjeff		fprintf(stderr, "Couldn't listen to port %d\n", port);
245219820Sjeff		return NULL;
246219820Sjeff	}
247219820Sjeff
248219820Sjeff	listen(sockfd, 1);
249331769Shselasky	connfd = accept(sockfd, NULL, NULL);
250219820Sjeff	close(sockfd);
251219820Sjeff	if (connfd < 0) {
252219820Sjeff		fprintf(stderr, "accept() failed\n");
253219820Sjeff		return NULL;
254219820Sjeff	}
255219820Sjeff
256219820Sjeff	n = read(connfd, msg, sizeof msg);
257219820Sjeff	if (n != sizeof msg) {
258219820Sjeff		perror("server read");
259219820Sjeff		fprintf(stderr, "%d/%d: Couldn't read remote address\n", n, (int) sizeof msg);
260219820Sjeff		goto out;
261219820Sjeff	}
262219820Sjeff
263219820Sjeff	rem_dest = malloc(sizeof *rem_dest);
264219820Sjeff	if (!rem_dest)
265219820Sjeff		goto out;
266219820Sjeff
267331769Shselasky	sscanf(msg, "%x:%x:%x:%s", &rem_dest->lid, &rem_dest->qpn,
268331769Shselasky							&rem_dest->psn, gid);
269219820Sjeff	wire_gid_to_gid(gid, &rem_dest->gid);
270219820Sjeff
271331769Shselasky	if (pp_connect_ctx(ctx, ib_port, my_dest->psn, sl, rem_dest,
272331769Shselasky								sgid_idx)) {
273219820Sjeff		fprintf(stderr, "Couldn't connect to remote QP\n");
274219820Sjeff		free(rem_dest);
275219820Sjeff		rem_dest = NULL;
276219820Sjeff		goto out;
277219820Sjeff	}
278219820Sjeff
279219820Sjeff	gid_to_wire_gid(&my_dest->gid, gid);
280331769Shselasky	sprintf(msg, "%04x:%06x:%06x:%s", my_dest->lid, my_dest->qpn,
281331769Shselasky							my_dest->psn, gid);
282331769Shselasky	if (write(connfd, msg, sizeof msg) != sizeof msg ||
283331769Shselasky	    read(connfd, msg, sizeof msg) != sizeof "done") {
284331769Shselasky		fprintf(stderr, "Couldn't send/recv local address\n");
285219820Sjeff		free(rem_dest);
286219820Sjeff		rem_dest = NULL;
287219820Sjeff		goto out;
288219820Sjeff	}
289219820Sjeffout:
290219820Sjeff	close(connfd);
291219820Sjeff	return rem_dest;
292219820Sjeff}
293219820Sjeff
294219820Sjeffstatic struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
295219820Sjeff					    int rx_depth, int port,
296219820Sjeff					    int use_event)
297219820Sjeff{
298219820Sjeff	struct pingpong_context *ctx;
299219820Sjeff
300219820Sjeff	ctx = malloc(sizeof *ctx);
301219820Sjeff	if (!ctx)
302219820Sjeff		return NULL;
303219820Sjeff
304331769Shselasky	ctx->size       = size;
305331769Shselasky	ctx->send_flags = IBV_SEND_SIGNALED;
306331769Shselasky	ctx->rx_depth   = rx_depth;
307219820Sjeff
308331769Shselasky	ctx->buf = memalign(page_size, size + 40);
309219820Sjeff	if (!ctx->buf) {
310219820Sjeff		fprintf(stderr, "Couldn't allocate work buf.\n");
311331769Shselasky		goto clean_ctx;
312219820Sjeff	}
313219820Sjeff
314331769Shselasky	/* FIXME memset(ctx->buf, 0, size + 40); */
315331769Shselasky	memset(ctx->buf, 0x7b, size + 40);
316219820Sjeff
317219820Sjeff	ctx->context = ibv_open_device(ib_dev);
318219820Sjeff	if (!ctx->context) {
319219820Sjeff		fprintf(stderr, "Couldn't get context for %s\n",
320219820Sjeff			ibv_get_device_name(ib_dev));
321331769Shselasky		goto clean_buffer;
322219820Sjeff	}
323219820Sjeff
324331769Shselasky	{
325331769Shselasky		struct ibv_port_attr port_info = {};
326331769Shselasky		int mtu;
327331769Shselasky
328331769Shselasky		if (ibv_query_port(ctx->context, port, &port_info)) {
329331769Shselasky			fprintf(stderr, "Unable to query port info for port %d\n", port);
330331769Shselasky			goto clean_device;
331331769Shselasky		}
332331769Shselasky		mtu = 1 << (port_info.active_mtu + 7);
333331769Shselasky		if (size > mtu) {
334331769Shselasky			fprintf(stderr, "Requested size larger than port MTU (%d)\n", mtu);
335331769Shselasky			goto clean_device;
336331769Shselasky		}
337331769Shselasky	}
338331769Shselasky
339219820Sjeff	if (use_event) {
340219820Sjeff		ctx->channel = ibv_create_comp_channel(ctx->context);
341219820Sjeff		if (!ctx->channel) {
342219820Sjeff			fprintf(stderr, "Couldn't create completion channel\n");
343331769Shselasky			goto clean_device;
344219820Sjeff		}
345219820Sjeff	} else
346219820Sjeff		ctx->channel = NULL;
347219820Sjeff
348219820Sjeff	ctx->pd = ibv_alloc_pd(ctx->context);
349219820Sjeff	if (!ctx->pd) {
350219820Sjeff		fprintf(stderr, "Couldn't allocate PD\n");
351331769Shselasky		goto clean_comp_channel;
352219820Sjeff	}
353219820Sjeff
354219820Sjeff	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size + 40, IBV_ACCESS_LOCAL_WRITE);
355219820Sjeff	if (!ctx->mr) {
356219820Sjeff		fprintf(stderr, "Couldn't register MR\n");
357331769Shselasky		goto clean_pd;
358219820Sjeff	}
359219820Sjeff
360219820Sjeff	ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL,
361219820Sjeff				ctx->channel, 0);
362219820Sjeff	if (!ctx->cq) {
363219820Sjeff		fprintf(stderr, "Couldn't create CQ\n");
364331769Shselasky		goto clean_mr;
365219820Sjeff	}
366219820Sjeff
367219820Sjeff	{
368331769Shselasky		struct ibv_qp_attr attr;
369331769Shselasky		struct ibv_qp_init_attr init_attr = {
370219820Sjeff			.send_cq = ctx->cq,
371219820Sjeff			.recv_cq = ctx->cq,
372219820Sjeff			.cap     = {
373219820Sjeff				.max_send_wr  = 1,
374219820Sjeff				.max_recv_wr  = rx_depth,
375219820Sjeff				.max_send_sge = 1,
376219820Sjeff				.max_recv_sge = 1
377219820Sjeff			},
378219820Sjeff			.qp_type = IBV_QPT_UD,
379219820Sjeff		};
380219820Sjeff
381331769Shselasky		ctx->qp = ibv_create_qp(ctx->pd, &init_attr);
382219820Sjeff		if (!ctx->qp)  {
383219820Sjeff			fprintf(stderr, "Couldn't create QP\n");
384331769Shselasky			goto clean_cq;
385219820Sjeff		}
386331769Shselasky
387331769Shselasky		ibv_query_qp(ctx->qp, &attr, IBV_QP_CAP, &init_attr);
388331769Shselasky		if (init_attr.cap.max_inline_data >= size) {
389331769Shselasky			ctx->send_flags |= IBV_SEND_INLINE;
390331769Shselasky		}
391219820Sjeff	}
392219820Sjeff
393219820Sjeff	{
394219820Sjeff		struct ibv_qp_attr attr = {
395219820Sjeff			.qp_state        = IBV_QPS_INIT,
396219820Sjeff			.pkey_index      = 0,
397219820Sjeff			.port_num        = port,
398219820Sjeff			.qkey            = 0x11111111
399219820Sjeff		};
400219820Sjeff
401219820Sjeff		if (ibv_modify_qp(ctx->qp, &attr,
402219820Sjeff				  IBV_QP_STATE              |
403219820Sjeff				  IBV_QP_PKEY_INDEX         |
404219820Sjeff				  IBV_QP_PORT               |
405219820Sjeff				  IBV_QP_QKEY)) {
406219820Sjeff			fprintf(stderr, "Failed to modify QP to INIT\n");
407331769Shselasky			goto clean_qp;
408219820Sjeff		}
409219820Sjeff	}
410219820Sjeff
411219820Sjeff	return ctx;
412331769Shselasky
413331769Shselaskyclean_qp:
414331769Shselasky	ibv_destroy_qp(ctx->qp);
415331769Shselasky
416331769Shselaskyclean_cq:
417331769Shselasky	ibv_destroy_cq(ctx->cq);
418331769Shselasky
419331769Shselaskyclean_mr:
420331769Shselasky	ibv_dereg_mr(ctx->mr);
421331769Shselasky
422331769Shselaskyclean_pd:
423331769Shselasky	ibv_dealloc_pd(ctx->pd);
424331769Shselasky
425331769Shselaskyclean_comp_channel:
426331769Shselasky	if (ctx->channel)
427331769Shselasky		ibv_destroy_comp_channel(ctx->channel);
428331769Shselasky
429331769Shselaskyclean_device:
430331769Shselasky	ibv_close_device(ctx->context);
431331769Shselasky
432331769Shselaskyclean_buffer:
433331769Shselasky	free(ctx->buf);
434331769Shselasky
435331769Shselaskyclean_ctx:
436331769Shselasky	free(ctx);
437331769Shselasky
438331769Shselasky	return NULL;
439219820Sjeff}
440219820Sjeff
441331769Shselaskystatic int pp_close_ctx(struct pingpong_context *ctx)
442219820Sjeff{
443219820Sjeff	if (ibv_destroy_qp(ctx->qp)) {
444219820Sjeff		fprintf(stderr, "Couldn't destroy QP\n");
445219820Sjeff		return 1;
446219820Sjeff	}
447219820Sjeff
448219820Sjeff	if (ibv_destroy_cq(ctx->cq)) {
449219820Sjeff		fprintf(stderr, "Couldn't destroy CQ\n");
450219820Sjeff		return 1;
451219820Sjeff	}
452219820Sjeff
453219820Sjeff	if (ibv_dereg_mr(ctx->mr)) {
454219820Sjeff		fprintf(stderr, "Couldn't deregister MR\n");
455219820Sjeff		return 1;
456219820Sjeff	}
457219820Sjeff
458219820Sjeff	if (ibv_destroy_ah(ctx->ah)) {
459219820Sjeff		fprintf(stderr, "Couldn't destroy AH\n");
460219820Sjeff		return 1;
461219820Sjeff	}
462219820Sjeff
463219820Sjeff	if (ibv_dealloc_pd(ctx->pd)) {
464219820Sjeff		fprintf(stderr, "Couldn't deallocate PD\n");
465219820Sjeff		return 1;
466219820Sjeff	}
467219820Sjeff
468219820Sjeff	if (ctx->channel) {
469219820Sjeff		if (ibv_destroy_comp_channel(ctx->channel)) {
470219820Sjeff			fprintf(stderr, "Couldn't destroy completion channel\n");
471219820Sjeff			return 1;
472219820Sjeff		}
473219820Sjeff	}
474219820Sjeff
475219820Sjeff	if (ibv_close_device(ctx->context)) {
476219820Sjeff		fprintf(stderr, "Couldn't release context\n");
477219820Sjeff		return 1;
478219820Sjeff	}
479219820Sjeff
480219820Sjeff	free(ctx->buf);
481219820Sjeff	free(ctx);
482219820Sjeff
483219820Sjeff	return 0;
484219820Sjeff}
485219820Sjeff
486219820Sjeffstatic int pp_post_recv(struct pingpong_context *ctx, int n)
487219820Sjeff{
488219820Sjeff	struct ibv_sge list = {
489219820Sjeff		.addr	= (uintptr_t) ctx->buf,
490219820Sjeff		.length = ctx->size + 40,
491219820Sjeff		.lkey	= ctx->mr->lkey
492219820Sjeff	};
493219820Sjeff	struct ibv_recv_wr wr = {
494219820Sjeff		.wr_id	    = PINGPONG_RECV_WRID,
495219820Sjeff		.sg_list    = &list,
496219820Sjeff		.num_sge    = 1,
497219820Sjeff	};
498219820Sjeff	struct ibv_recv_wr *bad_wr;
499219820Sjeff	int i;
500219820Sjeff
501219820Sjeff	for (i = 0; i < n; ++i)
502219820Sjeff		if (ibv_post_recv(ctx->qp, &wr, &bad_wr))
503219820Sjeff			break;
504219820Sjeff
505219820Sjeff	return i;
506219820Sjeff}
507219820Sjeff
508219820Sjeffstatic int pp_post_send(struct pingpong_context *ctx, uint32_t qpn)
509219820Sjeff{
510219820Sjeff	struct ibv_sge list = {
511219820Sjeff		.addr	= (uintptr_t) ctx->buf + 40,
512219820Sjeff		.length = ctx->size,
513219820Sjeff		.lkey	= ctx->mr->lkey
514219820Sjeff	};
515219820Sjeff	struct ibv_send_wr wr = {
516219820Sjeff		.wr_id	    = PINGPONG_SEND_WRID,
517219820Sjeff		.sg_list    = &list,
518219820Sjeff		.num_sge    = 1,
519219820Sjeff		.opcode     = IBV_WR_SEND,
520331769Shselasky		.send_flags = ctx->send_flags,
521219820Sjeff		.wr         = {
522219820Sjeff			.ud = {
523219820Sjeff				 .ah          = ctx->ah,
524219820Sjeff				 .remote_qpn  = qpn,
525219820Sjeff				 .remote_qkey = 0x11111111
526219820Sjeff			 }
527219820Sjeff		}
528219820Sjeff	};
529219820Sjeff	struct ibv_send_wr *bad_wr;
530219820Sjeff
531219820Sjeff	return ibv_post_send(ctx->qp, &wr, &bad_wr);
532219820Sjeff}
533219820Sjeff
534219820Sjeffstatic void usage(const char *argv0)
535219820Sjeff{
536219820Sjeff	printf("Usage:\n");
537219820Sjeff	printf("  %s            start a server and wait for connection\n", argv0);
538219820Sjeff	printf("  %s <host>     connect to server at <host>\n", argv0);
539219820Sjeff	printf("\n");
540219820Sjeff	printf("Options:\n");
541219820Sjeff	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
542219820Sjeff	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
543219820Sjeff	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
544331769Shselasky	printf("  -s, --size=<size>      size of message to exchange (default 2048)\n");
545219820Sjeff	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
546219820Sjeff	printf("  -n, --iters=<iters>    number of exchanges (default 1000)\n");
547331769Shselasky        printf("  -l, --sl=<SL>          send messages with service level <SL> (default 0)\n");
548219820Sjeff	printf("  -e, --events           sleep on CQ events (default poll)\n");
549219820Sjeff	printf("  -g, --gid-idx=<gid index> local port gid index\n");
550219820Sjeff}
551219820Sjeff
552219820Sjeffint main(int argc, char *argv[])
553219820Sjeff{
554219820Sjeff	struct ibv_device      **dev_list;
555219820Sjeff	struct ibv_device	*ib_dev;
556219820Sjeff	struct pingpong_context *ctx;
557219820Sjeff	struct pingpong_dest     my_dest;
558219820Sjeff	struct pingpong_dest    *rem_dest;
559219820Sjeff	struct timeval           start, end;
560219820Sjeff	char                    *ib_devname = NULL;
561219820Sjeff	char                    *servername = NULL;
562331769Shselasky	unsigned int             port = 18515;
563219820Sjeff	int                      ib_port = 1;
564331769Shselasky	unsigned int             size = 2048;
565331769Shselasky	unsigned int             rx_depth = 500;
566331769Shselasky	unsigned int             iters = 1000;
567219820Sjeff	int                      use_event = 0;
568219820Sjeff	int                      routs;
569219820Sjeff	int                      rcnt, scnt;
570219820Sjeff	int                      num_cq_events = 0;
571219820Sjeff	int                      sl = 0;
572331769Shselasky	int			 gidx = -1;
573219820Sjeff	char			 gid[33];
574219820Sjeff
575219820Sjeff	srand48(getpid() * time(NULL));
576219820Sjeff
577219820Sjeff	while (1) {
578219820Sjeff		int c;
579219820Sjeff
580219820Sjeff		static struct option long_options[] = {
581219820Sjeff			{ .name = "port",     .has_arg = 1, .val = 'p' },
582219820Sjeff			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
583219820Sjeff			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
584219820Sjeff			{ .name = "size",     .has_arg = 1, .val = 's' },
585219820Sjeff			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
586219820Sjeff			{ .name = "iters",    .has_arg = 1, .val = 'n' },
587219820Sjeff			{ .name = "sl",       .has_arg = 1, .val = 'l' },
588219820Sjeff			{ .name = "events",   .has_arg = 0, .val = 'e' },
589219820Sjeff			{ .name = "gid-idx",  .has_arg = 1, .val = 'g' },
590331769Shselasky			{}
591219820Sjeff		};
592219820Sjeff
593331769Shselasky		c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:",
594331769Shselasky							long_options, NULL);
595219820Sjeff		if (c == -1)
596219820Sjeff			break;
597219820Sjeff
598219820Sjeff		switch (c) {
599219820Sjeff		case 'p':
600219820Sjeff			port = strtol(optarg, NULL, 0);
601331769Shselasky			if (port > 65535) {
602219820Sjeff				usage(argv[0]);
603219820Sjeff				return 1;
604219820Sjeff			}
605219820Sjeff			break;
606219820Sjeff
607219820Sjeff		case 'd':
608331769Shselasky			ib_devname = strdupa(optarg);
609219820Sjeff			break;
610219820Sjeff
611219820Sjeff		case 'i':
612219820Sjeff			ib_port = strtol(optarg, NULL, 0);
613331769Shselasky			if (ib_port < 1) {
614219820Sjeff				usage(argv[0]);
615219820Sjeff				return 1;
616219820Sjeff			}
617219820Sjeff			break;
618219820Sjeff
619219820Sjeff		case 's':
620331769Shselasky			size = strtoul(optarg, NULL, 0);
621219820Sjeff			break;
622219820Sjeff
623219820Sjeff		case 'r':
624331769Shselasky			rx_depth = strtoul(optarg, NULL, 0);
625219820Sjeff			break;
626219820Sjeff
627219820Sjeff		case 'n':
628331769Shselasky			iters = strtoul(optarg, NULL, 0);
629219820Sjeff			break;
630219820Sjeff
631219820Sjeff		case 'l':
632219820Sjeff			sl = strtol(optarg, NULL, 0);
633219820Sjeff			break;
634219820Sjeff
635219820Sjeff		case 'e':
636219820Sjeff			++use_event;
637219820Sjeff			break;
638219820Sjeff
639219820Sjeff		case 'g':
640219820Sjeff			gidx = strtol(optarg, NULL, 0);
641219820Sjeff			break;
642219820Sjeff
643219820Sjeff		default:
644219820Sjeff			usage(argv[0]);
645219820Sjeff			return 1;
646219820Sjeff		}
647219820Sjeff	}
648219820Sjeff
649219820Sjeff	if (optind == argc - 1)
650331769Shselasky		servername = strdupa(argv[optind]);
651219820Sjeff	else if (optind < argc) {
652219820Sjeff		usage(argv[0]);
653219820Sjeff		return 1;
654219820Sjeff	}
655219820Sjeff
656219820Sjeff	page_size = sysconf(_SC_PAGESIZE);
657219820Sjeff
658219820Sjeff	dev_list = ibv_get_device_list(NULL);
659219820Sjeff	if (!dev_list) {
660219820Sjeff		perror("Failed to get IB devices list");
661219820Sjeff		return 1;
662219820Sjeff	}
663219820Sjeff
664219820Sjeff	if (!ib_devname) {
665219820Sjeff		ib_dev = *dev_list;
666219820Sjeff		if (!ib_dev) {
667219820Sjeff			fprintf(stderr, "No IB devices found\n");
668219820Sjeff			return 1;
669219820Sjeff		}
670219820Sjeff	} else {
671219820Sjeff		int i;
672219820Sjeff		for (i = 0; dev_list[i]; ++i)
673219820Sjeff			if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname))
674219820Sjeff				break;
675219820Sjeff		ib_dev = dev_list[i];
676219820Sjeff		if (!ib_dev) {
677219820Sjeff			fprintf(stderr, "IB device %s not found\n", ib_devname);
678219820Sjeff			return 1;
679219820Sjeff		}
680219820Sjeff	}
681219820Sjeff
682219820Sjeff	ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event);
683219820Sjeff	if (!ctx)
684219820Sjeff		return 1;
685219820Sjeff
686219820Sjeff	routs = pp_post_recv(ctx, ctx->rx_depth);
687219820Sjeff	if (routs < ctx->rx_depth) {
688219820Sjeff		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
689219820Sjeff		return 1;
690219820Sjeff	}
691219820Sjeff
692219820Sjeff	if (use_event)
693219820Sjeff		if (ibv_req_notify_cq(ctx->cq, 0)) {
694219820Sjeff			fprintf(stderr, "Couldn't request CQ notification\n");
695219820Sjeff			return 1;
696219820Sjeff		}
697219820Sjeff
698219820Sjeff	if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) {
699219820Sjeff		fprintf(stderr, "Couldn't get port info\n");
700219820Sjeff		return 1;
701219820Sjeff	}
702219820Sjeff	my_dest.lid = ctx->portinfo.lid;
703219820Sjeff
704219820Sjeff	my_dest.qpn = ctx->qp->qp_num;
705219820Sjeff	my_dest.psn = lrand48() & 0xffffff;
706219820Sjeff
707219820Sjeff	if (gidx >= 0) {
708219820Sjeff		if (ibv_query_gid(ctx->context, ib_port, gidx, &my_dest.gid)) {
709331769Shselasky			fprintf(stderr, "Could not get local gid for gid index "
710331769Shselasky								"%d\n", gidx);
711219820Sjeff			return 1;
712219820Sjeff		}
713219820Sjeff	} else
714219820Sjeff		memset(&my_dest.gid, 0, sizeof my_dest.gid);
715219820Sjeff
716219820Sjeff	inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid);
717219820Sjeff	printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x: GID %s\n",
718219820Sjeff	       my_dest.lid, my_dest.qpn, my_dest.psn, gid);
719219820Sjeff
720219820Sjeff	if (servername)
721219820Sjeff		rem_dest = pp_client_exch_dest(servername, port, &my_dest);
722219820Sjeff	else
723331769Shselasky		rem_dest = pp_server_exch_dest(ctx, ib_port, port, sl,
724331769Shselasky							&my_dest, gidx);
725219820Sjeff
726219820Sjeff	if (!rem_dest)
727219820Sjeff		return 1;
728219820Sjeff
729219820Sjeff	inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid);
730219820Sjeff	printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x, GID %s\n",
731219820Sjeff	       rem_dest->lid, rem_dest->qpn, rem_dest->psn, gid);
732219820Sjeff
733219820Sjeff	if (servername)
734331769Shselasky		if (pp_connect_ctx(ctx, ib_port, my_dest.psn, sl, rem_dest,
735331769Shselasky									gidx))
736219820Sjeff			return 1;
737219820Sjeff
738219820Sjeff	ctx->pending = PINGPONG_RECV_WRID;
739219820Sjeff
740219820Sjeff	if (servername) {
741219820Sjeff		if (pp_post_send(ctx, rem_dest->qpn)) {
742219820Sjeff			fprintf(stderr, "Couldn't post send\n");
743219820Sjeff			return 1;
744219820Sjeff		}
745219820Sjeff		ctx->pending |= PINGPONG_SEND_WRID;
746219820Sjeff	}
747219820Sjeff
748219820Sjeff	if (gettimeofday(&start, NULL)) {
749219820Sjeff		perror("gettimeofday");
750219820Sjeff		return 1;
751219820Sjeff	}
752219820Sjeff
753219820Sjeff	rcnt = scnt = 0;
754219820Sjeff	while (rcnt < iters || scnt < iters) {
755219820Sjeff		if (use_event) {
756219820Sjeff			struct ibv_cq *ev_cq;
757219820Sjeff			void          *ev_ctx;
758219820Sjeff
759219820Sjeff			if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
760219820Sjeff				fprintf(stderr, "Failed to get cq_event\n");
761219820Sjeff				return 1;
762219820Sjeff			}
763219820Sjeff
764219820Sjeff			++num_cq_events;
765219820Sjeff
766219820Sjeff			if (ev_cq != ctx->cq) {
767219820Sjeff				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
768219820Sjeff				return 1;
769219820Sjeff			}
770219820Sjeff
771219820Sjeff			if (ibv_req_notify_cq(ctx->cq, 0)) {
772219820Sjeff				fprintf(stderr, "Couldn't request CQ notification\n");
773219820Sjeff				return 1;
774219820Sjeff			}
775219820Sjeff		}
776219820Sjeff
777219820Sjeff		{
778219820Sjeff			struct ibv_wc wc[2];
779219820Sjeff			int ne, i;
780219820Sjeff
781219820Sjeff			do {
782219820Sjeff				ne = ibv_poll_cq(ctx->cq, 2, wc);
783219820Sjeff				if (ne < 0) {
784219820Sjeff					fprintf(stderr, "poll CQ failed %d\n", ne);
785219820Sjeff					return 1;
786219820Sjeff				}
787219820Sjeff			} while (!use_event && ne < 1);
788219820Sjeff
789219820Sjeff			for (i = 0; i < ne; ++i) {
790219820Sjeff				if (wc[i].status != IBV_WC_SUCCESS) {
791219820Sjeff					fprintf(stderr, "Failed status %s (%d) for wr_id %d\n",
792219820Sjeff						ibv_wc_status_str(wc[i].status),
793219820Sjeff						wc[i].status, (int) wc[i].wr_id);
794219820Sjeff					return 1;
795219820Sjeff				}
796219820Sjeff
797219820Sjeff				switch ((int) wc[i].wr_id) {
798219820Sjeff				case PINGPONG_SEND_WRID:
799219820Sjeff					++scnt;
800219820Sjeff					break;
801219820Sjeff
802219820Sjeff				case PINGPONG_RECV_WRID:
803219820Sjeff					if (--routs <= 1) {
804219820Sjeff						routs += pp_post_recv(ctx, ctx->rx_depth - routs);
805219820Sjeff						if (routs < ctx->rx_depth) {
806219820Sjeff							fprintf(stderr,
807219820Sjeff								"Couldn't post receive (%d)\n",
808219820Sjeff								routs);
809219820Sjeff							return 1;
810219820Sjeff						}
811219820Sjeff					}
812219820Sjeff
813219820Sjeff					++rcnt;
814219820Sjeff					break;
815219820Sjeff
816219820Sjeff				default:
817219820Sjeff					fprintf(stderr, "Completion for unknown wr_id %d\n",
818219820Sjeff						(int) wc[i].wr_id);
819219820Sjeff					return 1;
820219820Sjeff				}
821219820Sjeff
822219820Sjeff				ctx->pending &= ~(int) wc[i].wr_id;
823219820Sjeff				if (scnt < iters && !ctx->pending) {
824219820Sjeff					if (pp_post_send(ctx, rem_dest->qpn)) {
825219820Sjeff						fprintf(stderr, "Couldn't post send\n");
826219820Sjeff						return 1;
827219820Sjeff					}
828219820Sjeff					ctx->pending = PINGPONG_RECV_WRID |
829219820Sjeff						       PINGPONG_SEND_WRID;
830219820Sjeff				}
831219820Sjeff			}
832219820Sjeff		}
833219820Sjeff	}
834219820Sjeff
835219820Sjeff	if (gettimeofday(&end, NULL)) {
836219820Sjeff		perror("gettimeofday");
837219820Sjeff		return 1;
838219820Sjeff	}
839219820Sjeff
840219820Sjeff	{
841219820Sjeff		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
842219820Sjeff			(end.tv_usec - start.tv_usec);
843219820Sjeff		long long bytes = (long long) size * iters * 2;
844219820Sjeff
845219820Sjeff		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
846219820Sjeff		       bytes, usec / 1000000., bytes * 8. / usec);
847219820Sjeff		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
848219820Sjeff		       iters, usec / 1000000., usec / iters);
849219820Sjeff	}
850219820Sjeff
851219820Sjeff	ibv_ack_cq_events(ctx->cq, num_cq_events);
852219820Sjeff
853219820Sjeff	if (pp_close_ctx(ctx))
854219820Sjeff		return 1;
855219820Sjeff
856219820Sjeff	ibv_free_device_list(dev_list);
857219820Sjeff	free(rem_dest);
858219820Sjeff
859219820Sjeff	return 0;
860219820Sjeff}
861