1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2005-2007 Intel Corporation.  All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#if !defined(RDMA_CMA_H)
35#define RDMA_CMA_H
36
37#include <netinet/in.h>
38#include <sys/socket.h>
39#include <infiniband/verbs.h>
40#include <infiniband/sa.h>
41
42#ifdef __cplusplus
43extern "C" {
44#endif
45
46/*
47 * Upon receiving a device removal event, users must destroy the associated
48 * RDMA identifier and release all resources allocated with the device.
49 */
50enum rdma_cm_event_type {
51	RDMA_CM_EVENT_ADDR_RESOLVED,
52	RDMA_CM_EVENT_ADDR_ERROR,
53	RDMA_CM_EVENT_ROUTE_RESOLVED,
54	RDMA_CM_EVENT_ROUTE_ERROR,
55	RDMA_CM_EVENT_CONNECT_REQUEST,
56	RDMA_CM_EVENT_CONNECT_RESPONSE,
57	RDMA_CM_EVENT_CONNECT_ERROR,
58	RDMA_CM_EVENT_UNREACHABLE,
59	RDMA_CM_EVENT_REJECTED,
60	RDMA_CM_EVENT_ESTABLISHED,
61	RDMA_CM_EVENT_DISCONNECTED,
62	RDMA_CM_EVENT_DEVICE_REMOVAL,
63	RDMA_CM_EVENT_MULTICAST_JOIN,
64	RDMA_CM_EVENT_MULTICAST_ERROR,
65	RDMA_CM_EVENT_ADDR_CHANGE,
66	RDMA_CM_EVENT_TIMEWAIT_EXIT
67};
68
69enum rdma_port_space {
70	RDMA_PS_IPOIB= 0x0002,
71	RDMA_PS_TCP  = 0x0106,
72	RDMA_PS_UDP  = 0x0111,
73};
74
75/*
76 * Global qkey value for UDP QPs and multicast groups created via the
77 * RDMA CM.
78 */
79#define RDMA_UDP_QKEY 0x01234567
80
81struct ib_addr {
82	union ibv_gid	sgid;
83	union ibv_gid	dgid;
84	uint16_t	pkey;
85};
86
87struct rdma_addr {
88	struct sockaddr		src_addr;
89	uint8_t			src_pad[sizeof(struct sockaddr_storage) -
90					sizeof(struct sockaddr)];
91	struct sockaddr		dst_addr;
92	uint8_t			dst_pad[sizeof(struct sockaddr_storage) -
93					sizeof(struct sockaddr)];
94	union {
95		struct ib_addr	ibaddr;
96	} addr;
97};
98
99struct rdma_route {
100	struct rdma_addr	 addr;
101	struct ibv_sa_path_rec	*path_rec;
102	int			 num_paths;
103};
104
105struct rdma_event_channel {
106	int			fd;
107};
108
109struct rdma_cm_id {
110	struct ibv_context	*verbs;
111	struct rdma_event_channel *channel;
112	void			*context;
113	struct ibv_qp		*qp;
114	struct rdma_route	 route;
115	enum rdma_port_space	 ps;
116	uint8_t			 port_num;
117};
118
119struct rdma_conn_param {
120	const void *private_data;
121	uint8_t private_data_len;
122	uint8_t responder_resources;
123	uint8_t initiator_depth;
124	uint8_t flow_control;
125	uint8_t retry_count;		/* ignored when accepting */
126	uint8_t rnr_retry_count;
127	/* Fields below ignored if a QP is created on the rdma_cm_id. */
128	uint8_t srq;
129	uint32_t qp_num;
130};
131
132struct rdma_ud_param {
133	const void *private_data;
134	uint8_t private_data_len;
135	struct ibv_ah_attr ah_attr;
136	uint32_t qp_num;
137	uint32_t qkey;
138};
139
140struct rdma_cm_event {
141	struct rdma_cm_id	*id;
142	struct rdma_cm_id	*listen_id;
143	enum rdma_cm_event_type	 event;
144	int			 status;
145	union {
146		struct rdma_conn_param conn;
147		struct rdma_ud_param   ud;
148	} param;
149};
150
151/**
152 * rdma_create_event_channel - Open a channel used to report communication events.
153 * Description:
154 *   Asynchronous events are reported to users through event channels.  Each
155 *   event channel maps to a file descriptor.
156 * Notes:
157 *   All created event channels must be destroyed by calling
158 *   rdma_destroy_event_channel.  Users should call rdma_get_cm_event to
159 *   retrieve events on an event channel.
160 * See also:
161 *   rdma_get_cm_event, rdma_destroy_event_channel
162 */
163struct rdma_event_channel *rdma_create_event_channel(void);
164
165/**
166 * rdma_destroy_event_channel - Close an event communication channel.
167 * @channel: The communication channel to destroy.
168 * Description:
169 *   Release all resources associated with an event channel and closes the
170 *   associated file descriptor.
171 * Notes:
172 *   All rdma_cm_id's associated with the event channel must be destroyed,
173 *   and all returned events must be acked before calling this function.
174 * See also:
175 *  rdma_create_event_channel, rdma_get_cm_event, rdma_ack_cm_event
176 */
177void rdma_destroy_event_channel(struct rdma_event_channel *channel);
178
179/**
180 * rdma_create_id - Allocate a communication identifier.
181 * @channel: The communication channel that events associated with the
182 *   allocated rdma_cm_id will be reported on.
183 * @id: A reference where the allocated communication identifier will be
184 *   returned.
185 * @context: User specified context associated with the rdma_cm_id.
186 * @ps: RDMA port space.
187 * Description:
188 *   Creates an identifier that is used to track communication information.
189 * Notes:
190 *   Rdma_cm_id's are conceptually equivalent to a socket for RDMA
191 *   communication.  The difference is that RDMA communication requires
192 *   explicitly binding to a specified RDMA device before communication
193 *   can occur, and most operations are asynchronous in nature.  Communication
194 *   events on an rdma_cm_id are reported through the associated event
195 *   channel.  Users must release the rdma_cm_id by calling rdma_destroy_id.
196 * See also:
197 *   rdma_create_event_channel, rdma_destroy_id, rdma_get_devices,
198 *   rdma_bind_addr, rdma_resolve_addr, rdma_connect, rdma_listen,
199 */
200int rdma_create_id(struct rdma_event_channel *channel,
201		   struct rdma_cm_id **id, void *context,
202		   enum rdma_port_space ps);
203
204/**
205 * rdma_destroy_id - Release a communication identifier.
206 * @id: The communication identifier to destroy.
207 * Description:
208 *   Destroys the specified rdma_cm_id and cancels any outstanding
209 *   asynchronous operation.
210 * Notes:
211 *   Users must free any associated QP with the rdma_cm_id before
212 *   calling this routine and ack an related events.
213 * See also:
214 *   rdma_create_id, rdma_destroy_qp, rdma_ack_cm_event
215 */
216int rdma_destroy_id(struct rdma_cm_id *id);
217
218/**
219 * rdma_bind_addr - Bind an RDMA identifier to a source address.
220 * @id: RDMA identifier.
221 * @addr: Local address information.  Wildcard values are permitted.
222 * Description:
223 *   Associates a source address with an rdma_cm_id.  The address may be
224 *   wildcarded.  If binding to a specific local address, the rdma_cm_id
225 *   will also be bound to a local RDMA device.
226 * Notes:
227 *   Typically, this routine is called before calling rdma_listen to bind
228 *   to a specific port number, but it may also be called on the active side
229 *   of a connection before calling rdma_resolve_addr to bind to a specific
230 *   address.
231 * See also:
232 *   rdma_create_id, rdma_listen, rdma_resolve_addr, rdma_create_qp
233 */
234int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
235
236/**
237 * rdma_resolve_addr - Resolve destination and optional source addresses.
238 * @id: RDMA identifier.
239 * @src_addr: Source address information.  This parameter may be NULL.
240 * @dst_addr: Destination address information.
241 * @timeout_ms: Time to wait for resolution to complete.
242 * Description:
243 *   Resolve destination and optional source addresses from IP addresses
244 *   to an RDMA address.  If successful, the specified rdma_cm_id will
245 *   be bound to a local device.
246 * Notes:
247 *   This call is used to map a given destination IP address to a usable RDMA
248 *   address.  If a source address is given, the rdma_cm_id is bound to that
249 *   address, the same as if rdma_bind_addr were called.  If no source
250 *   address is given, and the rdma_cm_id has not yet been bound to a device,
251 *   then the rdma_cm_id will be bound to a source address based on the
252 *   local routing tables.  After this call, the rdma_cm_id will be bound to
253 *   an RDMA device.  This call is typically made from the active side of a
254 *   connection before calling rdma_resolve_route and rdma_connect.
255 * See also:
256 *   rdma_create_id, rdma_resolve_route, rdma_connect, rdma_create_qp,
257 *   rdma_get_cm_event, rdma_bind_addr
258 */
259int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
260		      struct sockaddr *dst_addr, int timeout_ms);
261
262/**
263 * rdma_resolve_route - Resolve the route information needed to establish a connection.
264 * @id: RDMA identifier.
265 * @timeout_ms: Time to wait for resolution to complete.
266 * Description:
267 *   Resolves an RDMA route to the destination address in order to establish
268 *   a connection.  The destination address must have already been resolved
269 *   by calling rdma_resolve_addr.
270 * Notes:
271 *   This is called on the client side of a connection after calling
272 *   rdma_resolve_addr, but before calling rdma_connect.
273 * See also:
274 *   rdma_resolve_addr, rdma_connect, rdma_get_cm_event
275 */
276int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
277
278/**
279 * rdma_create_qp - Allocate a QP.
280 * @id: RDMA identifier.
281 * @pd: protection domain for the QP.
282 * @qp_init_attr: initial QP attributes.
283 * Description:
284 *  Allocate a QP associated with the specified rdma_cm_id and transition it
285 *  for sending and receiving.
286 * Notes:
287 *   The rdma_cm_id must be bound to a local RDMA device before calling this
288 *   function, and the protection domain must be for that same device.
289 *   QPs allocated to an rdma_cm_id are automatically transitioned by the
290 *   librdmacm through their states.  After being allocated, the QP will be
291 *   ready to handle posting of receives.  If the QP is unconnected, it will
292 *   be ready to post sends.
293 * See also:
294 *   rdma_bind_addr, rdma_resolve_addr, rdma_destroy_qp, ibv_create_qp,
295 *   ibv_modify_qp
296 */
297int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd,
298		   struct ibv_qp_init_attr *qp_init_attr);
299
300/**
301 * rdma_destroy_qp - Deallocate a QP.
302 * @id: RDMA identifier.
303 * Description:
304 *   Destroy a QP allocated on the rdma_cm_id.
305 * Notes:
306 *   Users must destroy any QP associated with an rdma_cm_id before
307 *   destroying the ID.
308 * See also:
309 *   rdma_create_qp, rdma_destroy_id, ibv_destroy_qp
310 */
311void rdma_destroy_qp(struct rdma_cm_id *id);
312
313/**
314 * rdma_connect - Initiate an active connection request.
315 * @id: RDMA identifier.
316 * @conn_param: connection parameters.
317 * Description:
318 *   For a connected rdma_cm_id, this call initiates a connection request
319 *   to a remote destination.  For an unconnected rdma_cm_id, it initiates
320 *   a lookup of the remote QP providing the datagram service.
321 * Notes:
322 *   Users must have resolved a route to the destination address
323 *   by having called rdma_resolve_route before calling this routine.
324 * See also:
325 *   rdma_resolve_route, rdma_disconnect, rdma_listen, rdma_get_cm_event
326 */
327int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
328
329/**
330 * rdma_listen - Listen for incoming connection requests.
331 * @id: RDMA identifier.
332 * @backlog: backlog of incoming connection requests.
333 * Description:
334 *   Initiates a listen for incoming connection requests or datagram service
335 *   lookup.  The listen will be restricted to the locally bound source
336 *   address.
337 * Notes:
338 *   Users must have bound the rdma_cm_id to a local address by calling
339 *   rdma_bind_addr before calling this routine.  If the rdma_cm_id is
340 *   bound to a specific IP address, the listen will be restricted to that
341 *   address and the associated RDMA device.  If the rdma_cm_id is bound
342 *   to an RDMA port number only, the listen will occur across all RDMA
343 *   devices.
344 * See also:
345 *   rdma_bind_addr, rdma_connect, rdma_accept, rdma_reject, rdma_get_cm_event
346 */
347int rdma_listen(struct rdma_cm_id *id, int backlog);
348
349/**
350 * rdma_accept - Called to accept a connection request.
351 * @id: Connection identifier associated with the request.
352 * @conn_param: Information needed to establish the connection.
353 * Description:
354 *   Called from the listening side to accept a connection or datagram
355 *   service lookup request.
356 * Notes:
357 *   Unlike the socket accept routine, rdma_accept is not called on a
358 *   listening rdma_cm_id.  Instead, after calling rdma_listen, the user
359 *   waits for a connection request event to occur.  Connection request
360 *   events give the user a newly created rdma_cm_id, similar to a new
361 *   socket, but the rdma_cm_id is bound to a specific RDMA device.
362 *   rdma_accept is called on the new rdma_cm_id.
363 * See also:
364 *   rdma_listen, rdma_reject, rdma_get_cm_event
365 */
366int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
367
368/**
369 * rdma_reject - Called to reject a connection request.
370 * @id: Connection identifier associated with the request.
371 * @private_data: Optional private data to send with the reject message.
372 * @private_data_len: Size of the private_data to send, in bytes.
373 * Description:
374 *   Called from the listening side to reject a connection or datagram
375 *   service lookup request.
376 * Notes:
377 *   After receiving a connection request event, a user may call rdma_reject
378 *   to reject the request.  If the underlying RDMA transport supports
379 *   private data in the reject message, the specified data will be passed to
380 *   the remote side.
381 * See also:
382 *   rdma_listen, rdma_accept, rdma_get_cm_event
383 */
384int rdma_reject(struct rdma_cm_id *id, const void *private_data,
385		uint8_t private_data_len);
386
387/**
388 * rdma_notify - Notifies the librdmacm of an asynchronous event.
389 * @id: RDMA identifier.
390 * @event: Asynchronous event.
391 * Description:
392 *   Used to notify the librdmacm of asynchronous events that have occurred
393 *   on a QP associated with the rdma_cm_id.
394 * Notes:
395 *   Asynchronous events that occur on a QP are reported through the user's
396 *   device event handler.  This routine is used to notify the librdmacm of
397 *   communication events.  In most cases, use of this routine is not
398 *   necessary, however if connection establishment is done out of band
399 *   (such as done through Infiniband), it's possible to receive data on a
400 *   QP that is not yet considered connected.  This routine forces the
401 *   connection into an established state in this case in order to handle
402 *   the rare situation where the connection never forms on its own.
403 *   Events that should be reported to the CM are: IB_EVENT_COMM_EST.
404 * See also:
405 *   rdma_connect, rdma_accept, rdma_listen
406 */
407int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event);
408
409/**
410 * rdma_disconnect - This function disconnects a connection.
411 * @id: RDMA identifier.
412 * Description:
413 *   Disconnects a connection and transitions any associated QP to the
414 *   error state.
415 * See also:
416 *   rdma_connect, rdma_listen, rdma_accept
417 */
418int rdma_disconnect(struct rdma_cm_id *id);
419
420/**
421 * rdma_join_multicast - Joins a multicast group.
422 * @id: Communication identifier associated with the request.
423 * @addr: Multicast address identifying the group to join.
424 * @context: User-defined context associated with the join request.
425 * Description:
426 *   Joins a multicast group and attaches an associated QP to the group.
427 * Notes:
428 *   Before joining a multicast group, the rdma_cm_id must be bound to
429 *   an RDMA device by calling rdma_bind_addr or rdma_resolve_addr.  Use of
430 *   rdma_resolve_addr requires the local routing tables to resolve the
431 *   multicast address to an RDMA device.  The user must call
432 *   rdma_leave_multicast to leave the multicast group and release any
433 *   multicast resources.  The context is returned to the user through
434 *   the private_data field in the rdma_cm_event.
435 * See also:
436 *   rdma_leave_multicast, rdma_bind_addr, rdma_resolve_addr, rdma_create_qp
437 */
438int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
439			void *context);
440
441/**
442 * rdma_leave_multicast - Leaves a multicast group.
443 * @id: Communication identifier associated with the request.
444 * @addr: Multicast address identifying the group to leave.
445 * Description:
446 *   Leaves a multicast group and detaches an associated QP from the group.
447 * Notes:
448 *   Calling this function before a group has been fully joined results in
449 *   canceling the join operation.  Users should be aware that messages
450 *   received from the multicast group may stilled be queued for
451 *   completion processing immediately after leaving a multicast group.
452 *   Destroying an rdma_cm_id will automatically leave all multicast groups.
453 * See also:
454 *   rdma_join_multicast, rdma_destroy_qp
455 */
456int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
457
458/**
459 * rdma_get_cm_event - Retrieves the next pending communication event.
460 * @channel: Event channel to check for events.
461 * @event: Allocated information about the next communication event.
462 * Description:
463 *   Retrieves a communication event.  If no events are pending, by default,
464 *   the call will block until an event is received.
465 * Notes:
466 *   The default synchronous behavior of this routine can be changed by
467 *   modifying the file descriptor associated with the given channel.  All
468 *   events that are reported must be acknowledged by calling rdma_ack_cm_event.
469 *   Destruction of an rdma_cm_id will block until related events have been
470 *   acknowledged.
471 * See also:
472 *   rdma_ack_cm_event, rdma_create_event_channel, rdma_event_str
473 */
474int rdma_get_cm_event(struct rdma_event_channel *channel,
475		      struct rdma_cm_event **event);
476
477/**
478 * rdma_ack_cm_event - Free a communication event.
479 * @event: Event to be released.
480 * Description:
481 *   All events which are allocated by rdma_get_cm_event must be released,
482 *   there should be a one-to-one correspondence between successful gets
483 *   and acks.
484 * See also:
485 *   rdma_get_cm_event, rdma_destroy_id
486 */
487int rdma_ack_cm_event(struct rdma_cm_event *event);
488
489static inline uint16_t rdma_get_src_port(struct rdma_cm_id *id)
490{
491	return	id->route.addr.src_addr.sa_family == PF_INET6 ?
492		((struct sockaddr_in6 *) &id->route.addr.src_addr)->sin6_port :
493		((struct sockaddr_in *) &id->route.addr.src_addr)->sin_port;
494}
495
496static inline uint16_t rdma_get_dst_port(struct rdma_cm_id *id)
497{
498	return	id->route.addr.dst_addr.sa_family == PF_INET6 ?
499		((struct sockaddr_in6 *) &id->route.addr.dst_addr)->sin6_port :
500		((struct sockaddr_in *) &id->route.addr.dst_addr)->sin_port;
501}
502
503static inline struct sockaddr *rdma_get_local_addr(struct rdma_cm_id *id)
504{
505	return &id->route.addr.src_addr;
506}
507
508static inline struct sockaddr *rdma_get_peer_addr(struct rdma_cm_id *id)
509{
510	return &id->route.addr.dst_addr;
511}
512
513/**
514 * rdma_get_devices - Get list of RDMA devices currently available.
515 * @num_devices: If non-NULL, set to the number of devices returned.
516 * Description:
517 *   Return a NULL-terminated array of opened RDMA devices.  Callers can use
518 *   this routine to allocate resources on specific RDMA devices that will be
519 *   shared across multiple rdma_cm_id's.
520 * Notes:
521 *   The returned array must be released by calling rdma_free_devices.  Devices
522 *   remain opened while the librdmacm is loaded.
523 * See also:
524 *   rdma_free_devices
525 */
526struct ibv_context **rdma_get_devices(int *num_devices);
527
528/**
529 * rdma_free_devices - Frees the list of devices returned by rdma_get_devices.
530 * @list: List of devices returned from rdma_get_devices.
531 * Description:
532 *   Frees the device array returned by rdma_get_devices.
533 * See also:
534 *   rdma_get_devices
535 */
536void rdma_free_devices(struct ibv_context **list);
537
538/**
539 * rdma_event_str - Returns a string representation of an rdma cm event.
540 * @event: Asynchronous event.
541 * Description:
542 *   Returns a string representation of an asynchronous event.
543 * See also:
544 *   rdma_get_cm_event
545 */
546const char *rdma_event_str(enum rdma_cm_event_type event);
547
548/* Option levels */
549enum {
550	RDMA_OPTION_ID		= 0
551};
552
553/* Option details */
554enum {
555	RDMA_OPTION_ID_TOS	= 0	/* uint8_t: RFC 2474 */
556};
557
558/**
559 * rdma_set_option - Set options for an rdma_cm_id.
560 * @id: Communication identifier to set option for.
561 * @level: Protocol level of the option to set.
562 * @optname: Name of the option to set.
563 * @optval: Reference to the option data.
564 * @optlen: The size of the %optval buffer.
565 */
566int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
567		    void *optval, size_t optlen);
568
569/**
570 * rdma_migrate_id - Move an rdma_cm_id to a new event channel.
571 * @id: Communication identifier to migrate.
572 * @channel: New event channel for rdma_cm_id events.
573 */
574int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel);
575
576#ifdef __cplusplus
577}
578#endif
579
580#endif /* RDMA_CMA_H */
581