addr.c revision 329835
1/*
2 * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses.  You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 *     Redistribution and use in source and binary forms, with or
14 *     without modification, are permitted provided that the following
15 *     conditions are met:
16 *
17 *      - Redistributions of source code must retain the above
18 *        copyright notice, this list of conditions and the following
19 *        disclaimer.
20 *
21 *      - Redistributions in binary form must reproduce the above
22 *        copyright notice, this list of conditions and the following
23 *        disclaimer in the documentation and/or other materials
24 *        provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36#include <linux/mutex.h>
37#include <linux/inetdevice.h>
38#include <linux/slab.h>
39#include <linux/workqueue.h>
40#include <linux/module.h>
41#include <linux/notifier.h>
42#include <net/route.h>
43#include <net/netevent.h>
44#include <rdma/ib_addr.h>
45#include <netinet/if_ether.h>
46#include <netinet/toecore.h>
47#include <netinet6/scope6_var.h>
48
49
50MODULE_AUTHOR("Sean Hefty");
51MODULE_DESCRIPTION("IB Address Translation");
52MODULE_LICENSE("Dual BSD/GPL");
53
54struct addr_req {
55	struct list_head list;
56	struct sockaddr_storage src_addr;
57	struct sockaddr_storage dst_addr;
58	struct rdma_dev_addr *addr;
59	struct rdma_addr_client *client;
60	void *context;
61	void (*callback)(int status, struct sockaddr *src_addr,
62			 struct rdma_dev_addr *addr, void *context);
63	unsigned long timeout;
64	int status;
65};
66
67static void process_req(struct work_struct *work);
68
69static DEFINE_MUTEX(lock);
70static LIST_HEAD(req_list);
71static struct delayed_work work;
72static struct workqueue_struct *addr_wq;
73
74void rdma_addr_register_client(struct rdma_addr_client *client)
75{
76	atomic_set(&client->refcount, 1);
77	init_completion(&client->comp);
78}
79EXPORT_SYMBOL(rdma_addr_register_client);
80
81static inline void put_client(struct rdma_addr_client *client)
82{
83	if (atomic_dec_and_test(&client->refcount))
84		complete(&client->comp);
85}
86
87void rdma_addr_unregister_client(struct rdma_addr_client *client)
88{
89	put_client(client);
90	wait_for_completion(&client->comp);
91}
92EXPORT_SYMBOL(rdma_addr_unregister_client);
93
94int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
95		     const unsigned char *dst_dev_addr)
96{
97	if (dev->if_type == IFT_INFINIBAND)
98		dev_addr->dev_type = ARPHRD_INFINIBAND;
99	else if (dev->if_type == IFT_ETHER)
100		dev_addr->dev_type = ARPHRD_ETHER;
101	else
102		dev_addr->dev_type = 0;
103	memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
104	memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr),
105	    dev->if_addrlen);
106	if (dst_dev_addr)
107		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen);
108	dev_addr->bound_dev_if = dev->if_index;
109	return 0;
110}
111EXPORT_SYMBOL(rdma_copy_addr);
112
113int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
114{
115	struct net_device *dev;
116	int ret = -EADDRNOTAVAIL;
117
118	if (dev_addr->bound_dev_if) {
119		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
120		if (!dev)
121			return -ENODEV;
122		ret = rdma_copy_addr(dev_addr, dev, NULL);
123		dev_put(dev);
124		return ret;
125	}
126
127	switch (addr->sa_family) {
128#ifdef INET
129	case AF_INET:
130		dev = ip_dev_find(&init_net,
131			((struct sockaddr_in *) addr)->sin_addr.s_addr);
132
133		if (!dev)
134			return ret;
135
136		ret = rdma_copy_addr(dev_addr, dev, NULL);
137		dev_put(dev);
138		break;
139#endif
140
141#if defined(INET6)
142	case AF_INET6:
143		dev = ip6_dev_find(&init_net,
144			((const struct sockaddr_in6 *)addr)->sin6_addr);
145
146		if (!dev)
147			return ret;
148
149		ret = rdma_copy_addr(dev_addr, dev, NULL);
150		dev_put(dev);
151		break;
152#endif
153	default:
154		break;
155	}
156	return ret;
157}
158EXPORT_SYMBOL(rdma_translate_ip);
159
160static void set_timeout(unsigned long time)
161{
162	int delay;	/* under FreeBSD ticks are 32-bit */
163
164	delay = time - jiffies;
165	if (delay <= 0)
166		delay = 1;
167
168	mod_delayed_work(addr_wq, &work, delay);
169}
170
171static void queue_req(struct addr_req *req)
172{
173	struct addr_req *temp_req;
174
175	mutex_lock(&lock);
176	list_for_each_entry_reverse(temp_req, &req_list, list) {
177		if (time_after_eq(req->timeout, temp_req->timeout))
178			break;
179	}
180
181	list_add(&req->list, &temp_req->list);
182
183	if (req_list.next == &req->list)
184		set_timeout(req->timeout);
185	mutex_unlock(&lock);
186}
187
188static int addr_resolve(struct sockaddr *src_in,
189			struct sockaddr *dst_in,
190			struct rdma_dev_addr *addr)
191{
192	struct sockaddr_in *sin;
193	struct sockaddr_in6 *sin6;
194	struct ifaddr *ifa;
195	struct ifnet *ifp;
196	struct rtentry *rte;
197#if defined(INET)
198	struct llentry *lle;
199#endif
200#if defined(INET6)
201	struct sockaddr_in6 dstv6_tmp;
202	uint16_t vlan_id;
203#endif
204	u_char edst[MAX_ADDR_LEN];
205	int multi;
206	int bcast;
207	int is_gw = 0;
208	int error = 0;
209
210	CURVNET_SET_QUIET(&init_net);
211
212	/*
213	 * Determine whether the address is unicast, multicast, or broadcast
214	 * and whether the source interface is valid.
215	 */
216	multi = 0;
217	bcast = 0;
218	sin = NULL;
219	sin6 = NULL;
220	ifp = NULL;
221	rte = NULL;
222	ifa = NULL;
223	memset(edst, 0, sizeof(edst));
224
225	switch (dst_in->sa_family) {
226#ifdef INET
227	case AF_INET:
228		sin = (struct sockaddr_in *)dst_in;
229		if (sin->sin_addr.s_addr == INADDR_BROADCAST)
230			bcast = 1;
231		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
232			multi = 1;
233		sin = (struct sockaddr_in *)src_in;
234		if (sin->sin_addr.s_addr != INADDR_ANY) {
235			ifp = ip_dev_find(&init_net, sin->sin_addr.s_addr);
236			if (ifp == NULL) {
237				error = ENETUNREACH;
238				goto done;
239			}
240			if (bcast || multi)
241				goto mcast;
242		}
243		break;
244#endif
245#ifdef INET6
246	case AF_INET6:
247		/* Make destination socket address writeable */
248		dstv6_tmp = *(struct sockaddr_in6 *)dst_in;
249		dst_in = (struct sockaddr *)&dstv6_tmp;
250		sin6 = (struct sockaddr_in6 *)dst_in;
251		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
252			multi = 1;
253		/*
254		 * Make sure the scope ID gets embedded, else rtalloc1() will
255		 * resolve to the loopback interface.
256		 */
257		sin6->sin6_scope_id = addr->bound_dev_if;
258		sa6_embedscope(sin6, 0);
259
260		sin6 = (struct sockaddr_in6 *)src_in;
261		if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
262			ifp = ip6_dev_find(&init_net, sin6->sin6_addr);
263			if (ifp == NULL) {
264				error = ENETUNREACH;
265				goto done;
266			}
267			if (bcast || multi)
268				goto mcast;
269		}
270		break;
271#endif
272	default:
273		error = EINVAL;
274		goto done;
275	}
276	/*
277	 * Make sure the route exists and has a valid link.
278	 */
279	rte = rtalloc1(dst_in, 1, 0);
280	if (rte == NULL || rte->rt_ifp == NULL ||
281	    RT_LINK_IS_UP(rte->rt_ifp) == 0 ||
282	    rte->rt_ifp == V_loif) {
283		if (rte != NULL) {
284			RTFREE_LOCKED(rte);
285			rte = NULL;
286		}
287		error = EHOSTUNREACH;
288		goto done;
289	}
290	if (rte->rt_flags & RTF_GATEWAY)
291		is_gw = 1;
292	/*
293	 * If it's not multicast or broadcast and the route doesn't match the
294	 * requested interface return unreachable.  Otherwise fetch the
295	 * correct interface pointer and unlock the route.
296	 */
297	if (multi || bcast) {
298		/* rt_ifa holds the route answer source address */
299		ifa = rte->rt_ifa;
300
301		if (ifp == NULL) {
302			ifp = rte->rt_ifp;
303			dev_hold(ifp);
304		}
305		RTFREE_LOCKED(rte);
306		rte = NULL;
307	} else if (ifp != NULL && ifp != rte->rt_ifp) {
308		RTFREE_LOCKED(rte);
309		rte = NULL;
310		error = ENETUNREACH;
311		goto done;
312	} else {
313		/* rt_ifa holds the route answer source address */
314		ifa = rte->rt_ifa;
315
316		if (ifp == NULL) {
317			ifp = rte->rt_ifp;
318			dev_hold(ifp);
319		}
320		RT_UNLOCK(rte);
321	}
322#if defined(INET) || defined(INET6)
323mcast:
324#endif
325	if (bcast) {
326		memcpy(edst, ifp->if_broadcastaddr, ifp->if_addrlen);
327		goto done;
328	} else if (multi) {
329		struct sockaddr *llsa = NULL;
330
331		if (ifp->if_resolvemulti == NULL) {
332			error = EOPNOTSUPP;
333			goto done;
334		}
335		error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
336		if (error == 0) {
337			if (llsa == NULL) {
338				error = EAFNOSUPPORT;
339				goto done;
340			} else {
341				memcpy(edst, LLADDR((struct sockaddr_dl *)llsa),
342				    ifp->if_addrlen);
343				free(llsa, M_IFMADDR);
344			}
345		}
346		goto done;
347	}
348	/*
349	 * Resolve the link local address.
350	 */
351	switch (dst_in->sa_family) {
352#ifdef INET
353	case AF_INET:
354		error = arpresolve(ifp, rte, NULL, is_gw ? rte->rt_gateway : dst_in, edst, &lle);
355		break;
356#endif
357#ifdef INET6
358	case AF_INET6:
359		error = toe_l2_resolve(NULL, ifp, is_gw ? rte->rt_gateway : dst_in, edst, &vlan_id);
360		break;
361#endif
362	default:
363		KASSERT(0, ("rdma_addr_resolve: Unreachable"));
364		error = EINVAL;
365		break;
366	}
367done:
368	if (error == 0)
369		error = -rdma_copy_addr(addr, ifp, edst);
370	if (error == 0)
371		memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
372	if (error == EWOULDBLOCK)
373		error = ENODATA;
374	if (rte != NULL)
375		RTFREE(rte);
376	if (ifp != NULL)
377		dev_put(ifp);
378
379	CURVNET_RESTORE();
380	return -error;
381}
382
383static void process_req(struct work_struct *work)
384{
385	struct addr_req *req, *temp_req;
386	struct sockaddr *src_in, *dst_in;
387	struct list_head done_list;
388
389	INIT_LIST_HEAD(&done_list);
390
391	mutex_lock(&lock);
392	list_for_each_entry_safe(req, temp_req, &req_list, list) {
393		if (req->status == -ENODATA) {
394			src_in = (struct sockaddr *) &req->src_addr;
395			dst_in = (struct sockaddr *) &req->dst_addr;
396			req->status = addr_resolve(src_in, dst_in, req->addr);
397			if (req->status && time_after_eq(jiffies, req->timeout))
398				req->status = -ETIMEDOUT;
399			else if (req->status == -ENODATA)
400				continue;
401		}
402		list_move_tail(&req->list, &done_list);
403	}
404
405	if (!list_empty(&req_list)) {
406		req = list_entry(req_list.next, struct addr_req, list);
407		set_timeout(req->timeout);
408	}
409	mutex_unlock(&lock);
410
411	list_for_each_entry_safe(req, temp_req, &done_list, list) {
412		list_del(&req->list);
413		req->callback(req->status, (struct sockaddr *) &req->src_addr,
414			req->addr, req->context);
415		put_client(req->client);
416		kfree(req);
417	}
418}
419
420int rdma_resolve_ip(struct rdma_addr_client *client,
421		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
422		    struct rdma_dev_addr *addr, int timeout_ms,
423		    void (*callback)(int status, struct sockaddr *src_addr,
424				     struct rdma_dev_addr *addr, void *context),
425		    void *context)
426{
427	struct sockaddr *src_in, *dst_in;
428	struct addr_req *req;
429	int ret = 0;
430
431	req = kzalloc(sizeof *req, GFP_KERNEL);
432	if (!req)
433		return -ENOMEM;
434
435	src_in = (struct sockaddr *) &req->src_addr;
436	dst_in = (struct sockaddr *) &req->dst_addr;
437
438	if (src_addr) {
439		if (src_addr->sa_family != dst_addr->sa_family) {
440			ret = -EINVAL;
441			goto err;
442		}
443
444		memcpy(src_in, src_addr, ip_addr_size(src_addr));
445	} else {
446		src_in->sa_family = dst_addr->sa_family;
447	}
448
449	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
450	req->addr = addr;
451	req->callback = callback;
452	req->context = context;
453	req->client = client;
454	atomic_inc(&client->refcount);
455
456	req->status = addr_resolve(src_in, dst_in, addr);
457	switch (req->status) {
458	case 0:
459		req->timeout = jiffies;
460		queue_req(req);
461		break;
462	case -ENODATA:
463		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
464		queue_req(req);
465		break;
466	default:
467		ret = req->status;
468		atomic_dec(&client->refcount);
469		goto err;
470	}
471	return ret;
472err:
473	kfree(req);
474	return ret;
475}
476EXPORT_SYMBOL(rdma_resolve_ip);
477
478void rdma_addr_cancel(struct rdma_dev_addr *addr)
479{
480	struct addr_req *req, *temp_req;
481
482	mutex_lock(&lock);
483	list_for_each_entry_safe(req, temp_req, &req_list, list) {
484		if (req->addr == addr) {
485			req->status = -ECANCELED;
486			req->timeout = jiffies;
487			list_move(&req->list, &req_list);
488			set_timeout(req->timeout);
489			break;
490		}
491	}
492	mutex_unlock(&lock);
493}
494EXPORT_SYMBOL(rdma_addr_cancel);
495
496static int __init addr_init(void)
497{
498	INIT_DELAYED_WORK(&work, process_req);
499	addr_wq = create_singlethread_workqueue("ib_addr");
500	if (!addr_wq)
501		return -ENOMEM;
502
503	return 0;
504}
505
506static void __exit addr_cleanup(void)
507{
508	destroy_workqueue(addr_wq);
509}
510
511module_init(addr_init);
512module_exit(addr_cleanup);
513