1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	Generic address resolution entity
4 *
5 *	Authors:
6 *	Pedro Roque		<roque@di.fc.ul.pt>
7 *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
8 *
9 *	Fixes:
10 *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
11 *	Harald Welte		Add neighbour cache statistics like rtstat
12 */
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16#include <linux/slab.h>
17#include <linux/kmemleak.h>
18#include <linux/types.h>
19#include <linux/kernel.h>
20#include <linux/module.h>
21#include <linux/socket.h>
22#include <linux/netdevice.h>
23#include <linux/proc_fs.h>
24#ifdef CONFIG_SYSCTL
25#include <linux/sysctl.h>
26#endif
27#include <linux/times.h>
28#include <net/net_namespace.h>
29#include <net/neighbour.h>
30#include <net/arp.h>
31#include <net/dst.h>
32#include <net/sock.h>
33#include <net/netevent.h>
34#include <net/netlink.h>
35#include <linux/rtnetlink.h>
36#include <linux/random.h>
37#include <linux/string.h>
38#include <linux/log2.h>
39#include <linux/inetdevice.h>
40#include <net/addrconf.h>
41
42#include <trace/events/neigh.h>
43
44#define NEIGH_DEBUG 1
45#define neigh_dbg(level, fmt, ...)		\
46do {						\
47	if (level <= NEIGH_DEBUG)		\
48		pr_debug(fmt, ##__VA_ARGS__);	\
49} while (0)
50
51#define PNEIGH_HASHMASK		0xF
52
53static void neigh_timer_handler(struct timer_list *t);
54static void __neigh_notify(struct neighbour *n, int type, int flags,
55			   u32 pid);
56static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
57static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
58				    struct net_device *dev);
59
60#ifdef CONFIG_PROC_FS
61static const struct seq_operations neigh_stat_seq_ops;
62#endif
63
64/*
65   Neighbour hash table buckets are protected with rwlock tbl->lock.
66
67   - All the scans/updates to hash buckets MUST be made under this lock.
68   - NOTHING clever should be made under this lock: no callbacks
69     to protocol backends, no attempts to send something to network.
70     It will result in deadlocks, if backend/driver wants to use neighbour
71     cache.
72   - If the entry requires some non-trivial actions, increase
73     its reference count and release table lock.
74
75   Neighbour entries are protected:
76   - with reference count.
77   - with rwlock neigh->lock
78
79   Reference count prevents destruction.
80
81   neigh->lock mainly serializes ll address data and its validity state.
82   However, the same lock is used to protect another entry fields:
83    - timer
84    - resolution queue
85
86   Again, nothing clever shall be made under neigh->lock,
87   the most complicated procedure, which we allow is dev->hard_header.
88   It is supposed, that dev->hard_header is simplistic and does
89   not make callbacks to neighbour tables.
90 */
91
92static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
93{
94	kfree_skb(skb);
95	return -ENETDOWN;
96}
97
98static void neigh_cleanup_and_release(struct neighbour *neigh)
99{
100	trace_neigh_cleanup_and_release(neigh, 0);
101	__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
102	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
103	neigh_release(neigh);
104}
105
106/*
107 * It is random distribution in the interval (1/2)*base...(3/2)*base.
108 * It corresponds to default IPv6 settings and is not overridable,
109 * because it is really reasonable choice.
110 */
111
112unsigned long neigh_rand_reach_time(unsigned long base)
113{
114	return base ? get_random_u32_below(base) + (base >> 1) : 0;
115}
116EXPORT_SYMBOL(neigh_rand_reach_time);
117
118static void neigh_mark_dead(struct neighbour *n)
119{
120	n->dead = 1;
121	if (!list_empty(&n->gc_list)) {
122		list_del_init(&n->gc_list);
123		atomic_dec(&n->tbl->gc_entries);
124	}
125	if (!list_empty(&n->managed_list))
126		list_del_init(&n->managed_list);
127}
128
129static void neigh_update_gc_list(struct neighbour *n)
130{
131	bool on_gc_list, exempt_from_gc;
132
133	write_lock_bh(&n->tbl->lock);
134	write_lock(&n->lock);
135	if (n->dead)
136		goto out;
137
138	/* remove from the gc list if new state is permanent or if neighbor
139	 * is externally learned; otherwise entry should be on the gc list
140	 */
141	exempt_from_gc = n->nud_state & NUD_PERMANENT ||
142			 n->flags & NTF_EXT_LEARNED;
143	on_gc_list = !list_empty(&n->gc_list);
144
145	if (exempt_from_gc && on_gc_list) {
146		list_del_init(&n->gc_list);
147		atomic_dec(&n->tbl->gc_entries);
148	} else if (!exempt_from_gc && !on_gc_list) {
149		/* add entries to the tail; cleaning removes from the front */
150		list_add_tail(&n->gc_list, &n->tbl->gc_list);
151		atomic_inc(&n->tbl->gc_entries);
152	}
153out:
154	write_unlock(&n->lock);
155	write_unlock_bh(&n->tbl->lock);
156}
157
158static void neigh_update_managed_list(struct neighbour *n)
159{
160	bool on_managed_list, add_to_managed;
161
162	write_lock_bh(&n->tbl->lock);
163	write_lock(&n->lock);
164	if (n->dead)
165		goto out;
166
167	add_to_managed = n->flags & NTF_MANAGED;
168	on_managed_list = !list_empty(&n->managed_list);
169
170	if (!add_to_managed && on_managed_list)
171		list_del_init(&n->managed_list);
172	else if (add_to_managed && !on_managed_list)
173		list_add_tail(&n->managed_list, &n->tbl->managed_list);
174out:
175	write_unlock(&n->lock);
176	write_unlock_bh(&n->tbl->lock);
177}
178
179static void neigh_update_flags(struct neighbour *neigh, u32 flags, int *notify,
180			       bool *gc_update, bool *managed_update)
181{
182	u32 ndm_flags, old_flags = neigh->flags;
183
184	if (!(flags & NEIGH_UPDATE_F_ADMIN))
185		return;
186
187	ndm_flags  = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
188	ndm_flags |= (flags & NEIGH_UPDATE_F_MANAGED) ? NTF_MANAGED : 0;
189
190	if ((old_flags ^ ndm_flags) & NTF_EXT_LEARNED) {
191		if (ndm_flags & NTF_EXT_LEARNED)
192			neigh->flags |= NTF_EXT_LEARNED;
193		else
194			neigh->flags &= ~NTF_EXT_LEARNED;
195		*notify = 1;
196		*gc_update = true;
197	}
198	if ((old_flags ^ ndm_flags) & NTF_MANAGED) {
199		if (ndm_flags & NTF_MANAGED)
200			neigh->flags |= NTF_MANAGED;
201		else
202			neigh->flags &= ~NTF_MANAGED;
203		*notify = 1;
204		*managed_update = true;
205	}
206}
207
208static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
209		      struct neigh_table *tbl)
210{
211	bool retval = false;
212
213	write_lock(&n->lock);
214	if (refcount_read(&n->refcnt) == 1) {
215		struct neighbour *neigh;
216
217		neigh = rcu_dereference_protected(n->next,
218						  lockdep_is_held(&tbl->lock));
219		rcu_assign_pointer(*np, neigh);
220		neigh_mark_dead(n);
221		retval = true;
222	}
223	write_unlock(&n->lock);
224	if (retval)
225		neigh_cleanup_and_release(n);
226	return retval;
227}
228
229bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
230{
231	struct neigh_hash_table *nht;
232	void *pkey = ndel->primary_key;
233	u32 hash_val;
234	struct neighbour *n;
235	struct neighbour __rcu **np;
236
237	nht = rcu_dereference_protected(tbl->nht,
238					lockdep_is_held(&tbl->lock));
239	hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
240	hash_val = hash_val >> (32 - nht->hash_shift);
241
242	np = &nht->hash_buckets[hash_val];
243	while ((n = rcu_dereference_protected(*np,
244					      lockdep_is_held(&tbl->lock)))) {
245		if (n == ndel)
246			return neigh_del(n, np, tbl);
247		np = &n->next;
248	}
249	return false;
250}
251
252static int neigh_forced_gc(struct neigh_table *tbl)
253{
254	int max_clean = atomic_read(&tbl->gc_entries) -
255			READ_ONCE(tbl->gc_thresh2);
256	u64 tmax = ktime_get_ns() + NSEC_PER_MSEC;
257	unsigned long tref = jiffies - 5 * HZ;
258	struct neighbour *n, *tmp;
259	int shrunk = 0;
260	int loop = 0;
261
262	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
263
264	write_lock_bh(&tbl->lock);
265
266	list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
267		if (refcount_read(&n->refcnt) == 1) {
268			bool remove = false;
269
270			write_lock(&n->lock);
271			if ((n->nud_state == NUD_FAILED) ||
272			    (n->nud_state == NUD_NOARP) ||
273			    (tbl->is_multicast &&
274			     tbl->is_multicast(n->primary_key)) ||
275			    !time_in_range(n->updated, tref, jiffies))
276				remove = true;
277			write_unlock(&n->lock);
278
279			if (remove && neigh_remove_one(n, tbl))
280				shrunk++;
281			if (shrunk >= max_clean)
282				break;
283			if (++loop == 16) {
284				if (ktime_get_ns() > tmax)
285					goto unlock;
286				loop = 0;
287			}
288		}
289	}
290
291	WRITE_ONCE(tbl->last_flush, jiffies);
292unlock:
293	write_unlock_bh(&tbl->lock);
294
295	return shrunk;
296}
297
298static void neigh_add_timer(struct neighbour *n, unsigned long when)
299{
300	/* Use safe distance from the jiffies - LONG_MAX point while timer
301	 * is running in DELAY/PROBE state but still show to user space
302	 * large times in the past.
303	 */
304	unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
305
306	neigh_hold(n);
307	if (!time_in_range(n->confirmed, mint, jiffies))
308		n->confirmed = mint;
309	if (time_before(n->used, n->confirmed))
310		n->used = n->confirmed;
311	if (unlikely(mod_timer(&n->timer, when))) {
312		printk("NEIGH: BUG, double timer add, state is %x\n",
313		       n->nud_state);
314		dump_stack();
315	}
316}
317
318static int neigh_del_timer(struct neighbour *n)
319{
320	if ((n->nud_state & NUD_IN_TIMER) &&
321	    del_timer(&n->timer)) {
322		neigh_release(n);
323		return 1;
324	}
325	return 0;
326}
327
328static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
329						   int family)
330{
331	switch (family) {
332	case AF_INET:
333		return __in_dev_arp_parms_get_rcu(dev);
334	case AF_INET6:
335		return __in6_dev_nd_parms_get_rcu(dev);
336	}
337	return NULL;
338}
339
340static void neigh_parms_qlen_dec(struct net_device *dev, int family)
341{
342	struct neigh_parms *p;
343
344	rcu_read_lock();
345	p = neigh_get_dev_parms_rcu(dev, family);
346	if (p)
347		p->qlen--;
348	rcu_read_unlock();
349}
350
351static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
352			       int family)
353{
354	struct sk_buff_head tmp;
355	unsigned long flags;
356	struct sk_buff *skb;
357
358	skb_queue_head_init(&tmp);
359	spin_lock_irqsave(&list->lock, flags);
360	skb = skb_peek(list);
361	while (skb != NULL) {
362		struct sk_buff *skb_next = skb_peek_next(skb, list);
363		struct net_device *dev = skb->dev;
364
365		if (net == NULL || net_eq(dev_net(dev), net)) {
366			neigh_parms_qlen_dec(dev, family);
367			__skb_unlink(skb, list);
368			__skb_queue_tail(&tmp, skb);
369		}
370		skb = skb_next;
371	}
372	spin_unlock_irqrestore(&list->lock, flags);
373
374	while ((skb = __skb_dequeue(&tmp))) {
375		dev_put(skb->dev);
376		kfree_skb(skb);
377	}
378}
379
380static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
381			    bool skip_perm)
382{
383	int i;
384	struct neigh_hash_table *nht;
385
386	nht = rcu_dereference_protected(tbl->nht,
387					lockdep_is_held(&tbl->lock));
388
389	for (i = 0; i < (1 << nht->hash_shift); i++) {
390		struct neighbour *n;
391		struct neighbour __rcu **np = &nht->hash_buckets[i];
392
393		while ((n = rcu_dereference_protected(*np,
394					lockdep_is_held(&tbl->lock))) != NULL) {
395			if (dev && n->dev != dev) {
396				np = &n->next;
397				continue;
398			}
399			if (skip_perm && n->nud_state & NUD_PERMANENT) {
400				np = &n->next;
401				continue;
402			}
403			rcu_assign_pointer(*np,
404				   rcu_dereference_protected(n->next,
405						lockdep_is_held(&tbl->lock)));
406			write_lock(&n->lock);
407			neigh_del_timer(n);
408			neigh_mark_dead(n);
409			if (refcount_read(&n->refcnt) != 1) {
410				/* The most unpleasant situation.
411				   We must destroy neighbour entry,
412				   but someone still uses it.
413
414				   The destroy will be delayed until
415				   the last user releases us, but
416				   we must kill timers etc. and move
417				   it to safe state.
418				 */
419				__skb_queue_purge(&n->arp_queue);
420				n->arp_queue_len_bytes = 0;
421				WRITE_ONCE(n->output, neigh_blackhole);
422				if (n->nud_state & NUD_VALID)
423					n->nud_state = NUD_NOARP;
424				else
425					n->nud_state = NUD_NONE;
426				neigh_dbg(2, "neigh %p is stray\n", n);
427			}
428			write_unlock(&n->lock);
429			neigh_cleanup_and_release(n);
430		}
431	}
432}
433
434void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
435{
436	write_lock_bh(&tbl->lock);
437	neigh_flush_dev(tbl, dev, false);
438	write_unlock_bh(&tbl->lock);
439}
440EXPORT_SYMBOL(neigh_changeaddr);
441
442static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
443			  bool skip_perm)
444{
445	write_lock_bh(&tbl->lock);
446	neigh_flush_dev(tbl, dev, skip_perm);
447	pneigh_ifdown_and_unlock(tbl, dev);
448	pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
449			   tbl->family);
450	if (skb_queue_empty_lockless(&tbl->proxy_queue))
451		del_timer_sync(&tbl->proxy_timer);
452	return 0;
453}
454
455int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev)
456{
457	__neigh_ifdown(tbl, dev, true);
458	return 0;
459}
460EXPORT_SYMBOL(neigh_carrier_down);
461
462int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
463{
464	__neigh_ifdown(tbl, dev, false);
465	return 0;
466}
467EXPORT_SYMBOL(neigh_ifdown);
468
469static struct neighbour *neigh_alloc(struct neigh_table *tbl,
470				     struct net_device *dev,
471				     u32 flags, bool exempt_from_gc)
472{
473	struct neighbour *n = NULL;
474	unsigned long now = jiffies;
475	int entries, gc_thresh3;
476
477	if (exempt_from_gc)
478		goto do_alloc;
479
480	entries = atomic_inc_return(&tbl->gc_entries) - 1;
481	gc_thresh3 = READ_ONCE(tbl->gc_thresh3);
482	if (entries >= gc_thresh3 ||
483	    (entries >= READ_ONCE(tbl->gc_thresh2) &&
484	     time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) {
485		if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) {
486			net_info_ratelimited("%s: neighbor table overflow!\n",
487					     tbl->id);
488			NEIGH_CACHE_STAT_INC(tbl, table_fulls);
489			goto out_entries;
490		}
491	}
492
493do_alloc:
494	n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
495	if (!n)
496		goto out_entries;
497
498	__skb_queue_head_init(&n->arp_queue);
499	rwlock_init(&n->lock);
500	seqlock_init(&n->ha_lock);
501	n->updated	  = n->used = now;
502	n->nud_state	  = NUD_NONE;
503	n->output	  = neigh_blackhole;
504	n->flags	  = flags;
505	seqlock_init(&n->hh.hh_lock);
506	n->parms	  = neigh_parms_clone(&tbl->parms);
507	timer_setup(&n->timer, neigh_timer_handler, 0);
508
509	NEIGH_CACHE_STAT_INC(tbl, allocs);
510	n->tbl		  = tbl;
511	refcount_set(&n->refcnt, 1);
512	n->dead		  = 1;
513	INIT_LIST_HEAD(&n->gc_list);
514	INIT_LIST_HEAD(&n->managed_list);
515
516	atomic_inc(&tbl->entries);
517out:
518	return n;
519
520out_entries:
521	if (!exempt_from_gc)
522		atomic_dec(&tbl->gc_entries);
523	goto out;
524}
525
526static void neigh_get_hash_rnd(u32 *x)
527{
528	*x = get_random_u32() | 1;
529}
530
531static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
532{
533	size_t size = (1 << shift) * sizeof(struct neighbour *);
534	struct neigh_hash_table *ret;
535	struct neighbour __rcu **buckets;
536	int i;
537
538	ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
539	if (!ret)
540		return NULL;
541	if (size <= PAGE_SIZE) {
542		buckets = kzalloc(size, GFP_ATOMIC);
543	} else {
544		buckets = (struct neighbour __rcu **)
545			  __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
546					   get_order(size));
547		kmemleak_alloc(buckets, size, 1, GFP_ATOMIC);
548	}
549	if (!buckets) {
550		kfree(ret);
551		return NULL;
552	}
553	ret->hash_buckets = buckets;
554	ret->hash_shift = shift;
555	for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
556		neigh_get_hash_rnd(&ret->hash_rnd[i]);
557	return ret;
558}
559
560static void neigh_hash_free_rcu(struct rcu_head *head)
561{
562	struct neigh_hash_table *nht = container_of(head,
563						    struct neigh_hash_table,
564						    rcu);
565	size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
566	struct neighbour __rcu **buckets = nht->hash_buckets;
567
568	if (size <= PAGE_SIZE) {
569		kfree(buckets);
570	} else {
571		kmemleak_free(buckets);
572		free_pages((unsigned long)buckets, get_order(size));
573	}
574	kfree(nht);
575}
576
577static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
578						unsigned long new_shift)
579{
580	unsigned int i, hash;
581	struct neigh_hash_table *new_nht, *old_nht;
582
583	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
584
585	old_nht = rcu_dereference_protected(tbl->nht,
586					    lockdep_is_held(&tbl->lock));
587	new_nht = neigh_hash_alloc(new_shift);
588	if (!new_nht)
589		return old_nht;
590
591	for (i = 0; i < (1 << old_nht->hash_shift); i++) {
592		struct neighbour *n, *next;
593
594		for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
595						   lockdep_is_held(&tbl->lock));
596		     n != NULL;
597		     n = next) {
598			hash = tbl->hash(n->primary_key, n->dev,
599					 new_nht->hash_rnd);
600
601			hash >>= (32 - new_nht->hash_shift);
602			next = rcu_dereference_protected(n->next,
603						lockdep_is_held(&tbl->lock));
604
605			rcu_assign_pointer(n->next,
606					   rcu_dereference_protected(
607						new_nht->hash_buckets[hash],
608						lockdep_is_held(&tbl->lock)));
609			rcu_assign_pointer(new_nht->hash_buckets[hash], n);
610		}
611	}
612
613	rcu_assign_pointer(tbl->nht, new_nht);
614	call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
615	return new_nht;
616}
617
618struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
619			       struct net_device *dev)
620{
621	struct neighbour *n;
622
623	NEIGH_CACHE_STAT_INC(tbl, lookups);
624
625	rcu_read_lock();
626	n = __neigh_lookup_noref(tbl, pkey, dev);
627	if (n) {
628		if (!refcount_inc_not_zero(&n->refcnt))
629			n = NULL;
630		NEIGH_CACHE_STAT_INC(tbl, hits);
631	}
632
633	rcu_read_unlock();
634	return n;
635}
636EXPORT_SYMBOL(neigh_lookup);
637
638static struct neighbour *
639___neigh_create(struct neigh_table *tbl, const void *pkey,
640		struct net_device *dev, u32 flags,
641		bool exempt_from_gc, bool want_ref)
642{
643	u32 hash_val, key_len = tbl->key_len;
644	struct neighbour *n1, *rc, *n;
645	struct neigh_hash_table *nht;
646	int error;
647
648	n = neigh_alloc(tbl, dev, flags, exempt_from_gc);
649	trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
650	if (!n) {
651		rc = ERR_PTR(-ENOBUFS);
652		goto out;
653	}
654
655	memcpy(n->primary_key, pkey, key_len);
656	n->dev = dev;
657	netdev_hold(dev, &n->dev_tracker, GFP_ATOMIC);
658
659	/* Protocol specific setup. */
660	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
661		rc = ERR_PTR(error);
662		goto out_neigh_release;
663	}
664
665	if (dev->netdev_ops->ndo_neigh_construct) {
666		error = dev->netdev_ops->ndo_neigh_construct(dev, n);
667		if (error < 0) {
668			rc = ERR_PTR(error);
669			goto out_neigh_release;
670		}
671	}
672
673	/* Device specific setup. */
674	if (n->parms->neigh_setup &&
675	    (error = n->parms->neigh_setup(n)) < 0) {
676		rc = ERR_PTR(error);
677		goto out_neigh_release;
678	}
679
680	n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
681
682	write_lock_bh(&tbl->lock);
683	nht = rcu_dereference_protected(tbl->nht,
684					lockdep_is_held(&tbl->lock));
685
686	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
687		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
688
689	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
690
691	if (n->parms->dead) {
692		rc = ERR_PTR(-EINVAL);
693		goto out_tbl_unlock;
694	}
695
696	for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
697					    lockdep_is_held(&tbl->lock));
698	     n1 != NULL;
699	     n1 = rcu_dereference_protected(n1->next,
700			lockdep_is_held(&tbl->lock))) {
701		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
702			if (want_ref)
703				neigh_hold(n1);
704			rc = n1;
705			goto out_tbl_unlock;
706		}
707	}
708
709	n->dead = 0;
710	if (!exempt_from_gc)
711		list_add_tail(&n->gc_list, &n->tbl->gc_list);
712	if (n->flags & NTF_MANAGED)
713		list_add_tail(&n->managed_list, &n->tbl->managed_list);
714	if (want_ref)
715		neigh_hold(n);
716	rcu_assign_pointer(n->next,
717			   rcu_dereference_protected(nht->hash_buckets[hash_val],
718						     lockdep_is_held(&tbl->lock)));
719	rcu_assign_pointer(nht->hash_buckets[hash_val], n);
720	write_unlock_bh(&tbl->lock);
721	neigh_dbg(2, "neigh %p is created\n", n);
722	rc = n;
723out:
724	return rc;
725out_tbl_unlock:
726	write_unlock_bh(&tbl->lock);
727out_neigh_release:
728	if (!exempt_from_gc)
729		atomic_dec(&tbl->gc_entries);
730	neigh_release(n);
731	goto out;
732}
733
734struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
735				 struct net_device *dev, bool want_ref)
736{
737	bool exempt_from_gc = !!(dev->flags & IFF_LOOPBACK);
738
739	return ___neigh_create(tbl, pkey, dev, 0, exempt_from_gc, want_ref);
740}
741EXPORT_SYMBOL(__neigh_create);
742
743static u32 pneigh_hash(const void *pkey, unsigned int key_len)
744{
745	u32 hash_val = *(u32 *)(pkey + key_len - 4);
746	hash_val ^= (hash_val >> 16);
747	hash_val ^= hash_val >> 8;
748	hash_val ^= hash_val >> 4;
749	hash_val &= PNEIGH_HASHMASK;
750	return hash_val;
751}
752
753static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
754					      struct net *net,
755					      const void *pkey,
756					      unsigned int key_len,
757					      struct net_device *dev)
758{
759	while (n) {
760		if (!memcmp(n->key, pkey, key_len) &&
761		    net_eq(pneigh_net(n), net) &&
762		    (n->dev == dev || !n->dev))
763			return n;
764		n = n->next;
765	}
766	return NULL;
767}
768
769struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
770		struct net *net, const void *pkey, struct net_device *dev)
771{
772	unsigned int key_len = tbl->key_len;
773	u32 hash_val = pneigh_hash(pkey, key_len);
774
775	return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
776				 net, pkey, key_len, dev);
777}
778EXPORT_SYMBOL_GPL(__pneigh_lookup);
779
780struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
781				    struct net *net, const void *pkey,
782				    struct net_device *dev, int creat)
783{
784	struct pneigh_entry *n;
785	unsigned int key_len = tbl->key_len;
786	u32 hash_val = pneigh_hash(pkey, key_len);
787
788	read_lock_bh(&tbl->lock);
789	n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
790			      net, pkey, key_len, dev);
791	read_unlock_bh(&tbl->lock);
792
793	if (n || !creat)
794		goto out;
795
796	ASSERT_RTNL();
797
798	n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
799	if (!n)
800		goto out;
801
802	write_pnet(&n->net, net);
803	memcpy(n->key, pkey, key_len);
804	n->dev = dev;
805	netdev_hold(dev, &n->dev_tracker, GFP_KERNEL);
806
807	if (tbl->pconstructor && tbl->pconstructor(n)) {
808		netdev_put(dev, &n->dev_tracker);
809		kfree(n);
810		n = NULL;
811		goto out;
812	}
813
814	write_lock_bh(&tbl->lock);
815	n->next = tbl->phash_buckets[hash_val];
816	tbl->phash_buckets[hash_val] = n;
817	write_unlock_bh(&tbl->lock);
818out:
819	return n;
820}
821EXPORT_SYMBOL(pneigh_lookup);
822
823
824int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
825		  struct net_device *dev)
826{
827	struct pneigh_entry *n, **np;
828	unsigned int key_len = tbl->key_len;
829	u32 hash_val = pneigh_hash(pkey, key_len);
830
831	write_lock_bh(&tbl->lock);
832	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
833	     np = &n->next) {
834		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
835		    net_eq(pneigh_net(n), net)) {
836			*np = n->next;
837			write_unlock_bh(&tbl->lock);
838			if (tbl->pdestructor)
839				tbl->pdestructor(n);
840			netdev_put(n->dev, &n->dev_tracker);
841			kfree(n);
842			return 0;
843		}
844	}
845	write_unlock_bh(&tbl->lock);
846	return -ENOENT;
847}
848
849static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
850				    struct net_device *dev)
851{
852	struct pneigh_entry *n, **np, *freelist = NULL;
853	u32 h;
854
855	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
856		np = &tbl->phash_buckets[h];
857		while ((n = *np) != NULL) {
858			if (!dev || n->dev == dev) {
859				*np = n->next;
860				n->next = freelist;
861				freelist = n;
862				continue;
863			}
864			np = &n->next;
865		}
866	}
867	write_unlock_bh(&tbl->lock);
868	while ((n = freelist)) {
869		freelist = n->next;
870		n->next = NULL;
871		if (tbl->pdestructor)
872			tbl->pdestructor(n);
873		netdev_put(n->dev, &n->dev_tracker);
874		kfree(n);
875	}
876	return -ENOENT;
877}
878
879static void neigh_parms_destroy(struct neigh_parms *parms);
880
881static inline void neigh_parms_put(struct neigh_parms *parms)
882{
883	if (refcount_dec_and_test(&parms->refcnt))
884		neigh_parms_destroy(parms);
885}
886
887/*
888 *	neighbour must already be out of the table;
889 *
890 */
891void neigh_destroy(struct neighbour *neigh)
892{
893	struct net_device *dev = neigh->dev;
894
895	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
896
897	if (!neigh->dead) {
898		pr_warn("Destroying alive neighbour %p\n", neigh);
899		dump_stack();
900		return;
901	}
902
903	if (neigh_del_timer(neigh))
904		pr_warn("Impossible event\n");
905
906	write_lock_bh(&neigh->lock);
907	__skb_queue_purge(&neigh->arp_queue);
908	write_unlock_bh(&neigh->lock);
909	neigh->arp_queue_len_bytes = 0;
910
911	if (dev->netdev_ops->ndo_neigh_destroy)
912		dev->netdev_ops->ndo_neigh_destroy(dev, neigh);
913
914	netdev_put(dev, &neigh->dev_tracker);
915	neigh_parms_put(neigh->parms);
916
917	neigh_dbg(2, "neigh %p is destroyed\n", neigh);
918
919	atomic_dec(&neigh->tbl->entries);
920	kfree_rcu(neigh, rcu);
921}
922EXPORT_SYMBOL(neigh_destroy);
923
924/* Neighbour state is suspicious;
925   disable fast path.
926
927   Called with write_locked neigh.
928 */
929static void neigh_suspect(struct neighbour *neigh)
930{
931	neigh_dbg(2, "neigh %p is suspected\n", neigh);
932
933	WRITE_ONCE(neigh->output, neigh->ops->output);
934}
935
936/* Neighbour state is OK;
937   enable fast path.
938
939   Called with write_locked neigh.
940 */
941static void neigh_connect(struct neighbour *neigh)
942{
943	neigh_dbg(2, "neigh %p is connected\n", neigh);
944
945	WRITE_ONCE(neigh->output, neigh->ops->connected_output);
946}
947
948static void neigh_periodic_work(struct work_struct *work)
949{
950	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
951	struct neighbour *n;
952	struct neighbour __rcu **np;
953	unsigned int i;
954	struct neigh_hash_table *nht;
955
956	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
957
958	write_lock_bh(&tbl->lock);
959	nht = rcu_dereference_protected(tbl->nht,
960					lockdep_is_held(&tbl->lock));
961
962	/*
963	 *	periodically recompute ReachableTime from random function
964	 */
965
966	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
967		struct neigh_parms *p;
968
969		WRITE_ONCE(tbl->last_rand, jiffies);
970		list_for_each_entry(p, &tbl->parms_list, list)
971			p->reachable_time =
972				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
973	}
974
975	if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1))
976		goto out;
977
978	for (i = 0 ; i < (1 << nht->hash_shift); i++) {
979		np = &nht->hash_buckets[i];
980
981		while ((n = rcu_dereference_protected(*np,
982				lockdep_is_held(&tbl->lock))) != NULL) {
983			unsigned int state;
984
985			write_lock(&n->lock);
986
987			state = n->nud_state;
988			if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) ||
989			    (n->flags & NTF_EXT_LEARNED)) {
990				write_unlock(&n->lock);
991				goto next_elt;
992			}
993
994			if (time_before(n->used, n->confirmed) &&
995			    time_is_before_eq_jiffies(n->confirmed))
996				n->used = n->confirmed;
997
998			if (refcount_read(&n->refcnt) == 1 &&
999			    (state == NUD_FAILED ||
1000			     !time_in_range_open(jiffies, n->used,
1001						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
1002				rcu_assign_pointer(*np,
1003					rcu_dereference_protected(n->next,
1004						lockdep_is_held(&tbl->lock)));
1005				neigh_mark_dead(n);
1006				write_unlock(&n->lock);
1007				neigh_cleanup_and_release(n);
1008				continue;
1009			}
1010			write_unlock(&n->lock);
1011
1012next_elt:
1013			np = &n->next;
1014		}
1015		/*
1016		 * It's fine to release lock here, even if hash table
1017		 * grows while we are preempted.
1018		 */
1019		write_unlock_bh(&tbl->lock);
1020		cond_resched();
1021		write_lock_bh(&tbl->lock);
1022		nht = rcu_dereference_protected(tbl->nht,
1023						lockdep_is_held(&tbl->lock));
1024	}
1025out:
1026	/* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
1027	 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
1028	 * BASE_REACHABLE_TIME.
1029	 */
1030	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1031			      NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
1032	write_unlock_bh(&tbl->lock);
1033}
1034
1035static __inline__ int neigh_max_probes(struct neighbour *n)
1036{
1037	struct neigh_parms *p = n->parms;
1038	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
1039	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
1040	        NEIGH_VAR(p, MCAST_PROBES));
1041}
1042
1043static void neigh_invalidate(struct neighbour *neigh)
1044	__releases(neigh->lock)
1045	__acquires(neigh->lock)
1046{
1047	struct sk_buff *skb;
1048
1049	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
1050	neigh_dbg(2, "neigh %p is failed\n", neigh);
1051	neigh->updated = jiffies;
1052
1053	/* It is very thin place. report_unreachable is very complicated
1054	   routine. Particularly, it can hit the same neighbour entry!
1055
1056	   So that, we try to be accurate and avoid dead loop. --ANK
1057	 */
1058	while (neigh->nud_state == NUD_FAILED &&
1059	       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1060		write_unlock(&neigh->lock);
1061		neigh->ops->error_report(neigh, skb);
1062		write_lock(&neigh->lock);
1063	}
1064	__skb_queue_purge(&neigh->arp_queue);
1065	neigh->arp_queue_len_bytes = 0;
1066}
1067
1068static void neigh_probe(struct neighbour *neigh)
1069	__releases(neigh->lock)
1070{
1071	struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
1072	/* keep skb alive even if arp_queue overflows */
1073	if (skb)
1074		skb = skb_clone(skb, GFP_ATOMIC);
1075	write_unlock(&neigh->lock);
1076	if (neigh->ops->solicit)
1077		neigh->ops->solicit(neigh, skb);
1078	atomic_inc(&neigh->probes);
1079	consume_skb(skb);
1080}
1081
1082/* Called when a timer expires for a neighbour entry. */
1083
1084static void neigh_timer_handler(struct timer_list *t)
1085{
1086	unsigned long now, next;
1087	struct neighbour *neigh = from_timer(neigh, t, timer);
1088	unsigned int state;
1089	int notify = 0;
1090
1091	write_lock(&neigh->lock);
1092
1093	state = neigh->nud_state;
1094	now = jiffies;
1095	next = now + HZ;
1096
1097	if (!(state & NUD_IN_TIMER))
1098		goto out;
1099
1100	if (state & NUD_REACHABLE) {
1101		if (time_before_eq(now,
1102				   neigh->confirmed + neigh->parms->reachable_time)) {
1103			neigh_dbg(2, "neigh %p is still alive\n", neigh);
1104			next = neigh->confirmed + neigh->parms->reachable_time;
1105		} else if (time_before_eq(now,
1106					  neigh->used +
1107					  NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1108			neigh_dbg(2, "neigh %p is delayed\n", neigh);
1109			WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1110			neigh->updated = jiffies;
1111			neigh_suspect(neigh);
1112			next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
1113		} else {
1114			neigh_dbg(2, "neigh %p is suspected\n", neigh);
1115			WRITE_ONCE(neigh->nud_state, NUD_STALE);
1116			neigh->updated = jiffies;
1117			neigh_suspect(neigh);
1118			notify = 1;
1119		}
1120	} else if (state & NUD_DELAY) {
1121		if (time_before_eq(now,
1122				   neigh->confirmed +
1123				   NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
1124			neigh_dbg(2, "neigh %p is now reachable\n", neigh);
1125			WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
1126			neigh->updated = jiffies;
1127			neigh_connect(neigh);
1128			notify = 1;
1129			next = neigh->confirmed + neigh->parms->reachable_time;
1130		} else {
1131			neigh_dbg(2, "neigh %p is probed\n", neigh);
1132			WRITE_ONCE(neigh->nud_state, NUD_PROBE);
1133			neigh->updated = jiffies;
1134			atomic_set(&neigh->probes, 0);
1135			notify = 1;
1136			next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1137					 HZ/100);
1138		}
1139	} else {
1140		/* NUD_PROBE|NUD_INCOMPLETE */
1141		next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100);
1142	}
1143
1144	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
1145	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
1146		WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1147		notify = 1;
1148		neigh_invalidate(neigh);
1149		goto out;
1150	}
1151
1152	if (neigh->nud_state & NUD_IN_TIMER) {
1153		if (time_before(next, jiffies + HZ/100))
1154			next = jiffies + HZ/100;
1155		if (!mod_timer(&neigh->timer, next))
1156			neigh_hold(neigh);
1157	}
1158	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
1159		neigh_probe(neigh);
1160	} else {
1161out:
1162		write_unlock(&neigh->lock);
1163	}
1164
1165	if (notify)
1166		neigh_update_notify(neigh, 0);
1167
1168	trace_neigh_timer_handler(neigh, 0);
1169
1170	neigh_release(neigh);
1171}
1172
1173int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
1174		       const bool immediate_ok)
1175{
1176	int rc;
1177	bool immediate_probe = false;
1178
1179	write_lock_bh(&neigh->lock);
1180
1181	rc = 0;
1182	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
1183		goto out_unlock_bh;
1184	if (neigh->dead)
1185		goto out_dead;
1186
1187	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
1188		if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
1189		    NEIGH_VAR(neigh->parms, APP_PROBES)) {
1190			unsigned long next, now = jiffies;
1191
1192			atomic_set(&neigh->probes,
1193				   NEIGH_VAR(neigh->parms, UCAST_PROBES));
1194			neigh_del_timer(neigh);
1195			WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1196			neigh->updated = now;
1197			if (!immediate_ok) {
1198				next = now + 1;
1199			} else {
1200				immediate_probe = true;
1201				next = now + max(NEIGH_VAR(neigh->parms,
1202							   RETRANS_TIME),
1203						 HZ / 100);
1204			}
1205			neigh_add_timer(neigh, next);
1206		} else {
1207			WRITE_ONCE(neigh->nud_state, NUD_FAILED);
1208			neigh->updated = jiffies;
1209			write_unlock_bh(&neigh->lock);
1210
1211			kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_FAILED);
1212			return 1;
1213		}
1214	} else if (neigh->nud_state & NUD_STALE) {
1215		neigh_dbg(2, "neigh %p is delayed\n", neigh);
1216		neigh_del_timer(neigh);
1217		WRITE_ONCE(neigh->nud_state, NUD_DELAY);
1218		neigh->updated = jiffies;
1219		neigh_add_timer(neigh, jiffies +
1220				NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1221	}
1222
1223	if (neigh->nud_state == NUD_INCOMPLETE) {
1224		if (skb) {
1225			while (neigh->arp_queue_len_bytes + skb->truesize >
1226			       NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1227				struct sk_buff *buff;
1228
1229				buff = __skb_dequeue(&neigh->arp_queue);
1230				if (!buff)
1231					break;
1232				neigh->arp_queue_len_bytes -= buff->truesize;
1233				kfree_skb_reason(buff, SKB_DROP_REASON_NEIGH_QUEUEFULL);
1234				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1235			}
1236			skb_dst_force(skb);
1237			__skb_queue_tail(&neigh->arp_queue, skb);
1238			neigh->arp_queue_len_bytes += skb->truesize;
1239		}
1240		rc = 1;
1241	}
1242out_unlock_bh:
1243	if (immediate_probe)
1244		neigh_probe(neigh);
1245	else
1246		write_unlock(&neigh->lock);
1247	local_bh_enable();
1248	trace_neigh_event_send_done(neigh, rc);
1249	return rc;
1250
1251out_dead:
1252	if (neigh->nud_state & NUD_STALE)
1253		goto out_unlock_bh;
1254	write_unlock_bh(&neigh->lock);
1255	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_DEAD);
1256	trace_neigh_event_send_dead(neigh, 1);
1257	return 1;
1258}
1259EXPORT_SYMBOL(__neigh_event_send);
1260
1261static void neigh_update_hhs(struct neighbour *neigh)
1262{
1263	struct hh_cache *hh;
1264	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1265		= NULL;
1266
1267	if (neigh->dev->header_ops)
1268		update = neigh->dev->header_ops->cache_update;
1269
1270	if (update) {
1271		hh = &neigh->hh;
1272		if (READ_ONCE(hh->hh_len)) {
1273			write_seqlock_bh(&hh->hh_lock);
1274			update(hh, neigh->dev, neigh->ha);
1275			write_sequnlock_bh(&hh->hh_lock);
1276		}
1277	}
1278}
1279
1280/* Generic update routine.
1281   -- lladdr is new lladdr or NULL, if it is not supplied.
1282   -- new    is new state.
1283   -- flags
1284	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1285				if it is different.
1286	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1287				lladdr instead of overriding it
1288				if it is different.
1289	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
1290	NEIGH_UPDATE_F_USE	means that the entry is user triggered.
1291	NEIGH_UPDATE_F_MANAGED	means that the entry will be auto-refreshed.
1292	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1293				NTF_ROUTER flag.
1294	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
1295				a router.
1296
1297   Caller MUST hold reference count on the entry.
1298 */
1299static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
1300			  u8 new, u32 flags, u32 nlmsg_pid,
1301			  struct netlink_ext_ack *extack)
1302{
1303	bool gc_update = false, managed_update = false;
1304	int update_isrouter = 0;
1305	struct net_device *dev;
1306	int err, notify = 0;
1307	u8 old;
1308
1309	trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid);
1310
1311	write_lock_bh(&neigh->lock);
1312
1313	dev    = neigh->dev;
1314	old    = neigh->nud_state;
1315	err    = -EPERM;
1316
1317	if (neigh->dead) {
1318		NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
1319		new = old;
1320		goto out;
1321	}
1322	if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1323	    (old & (NUD_NOARP | NUD_PERMANENT)))
1324		goto out;
1325
1326	neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
1327	if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
1328		new = old & ~NUD_PERMANENT;
1329		WRITE_ONCE(neigh->nud_state, new);
1330		err = 0;
1331		goto out;
1332	}
1333
1334	if (!(new & NUD_VALID)) {
1335		neigh_del_timer(neigh);
1336		if (old & NUD_CONNECTED)
1337			neigh_suspect(neigh);
1338		WRITE_ONCE(neigh->nud_state, new);
1339		err = 0;
1340		notify = old & NUD_VALID;
1341		if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1342		    (new & NUD_FAILED)) {
1343			neigh_invalidate(neigh);
1344			notify = 1;
1345		}
1346		goto out;
1347	}
1348
1349	/* Compare new lladdr with cached one */
1350	if (!dev->addr_len) {
1351		/* First case: device needs no address. */
1352		lladdr = neigh->ha;
1353	} else if (lladdr) {
1354		/* The second case: if something is already cached
1355		   and a new address is proposed:
1356		   - compare new & old
1357		   - if they are different, check override flag
1358		 */
1359		if ((old & NUD_VALID) &&
1360		    !memcmp(lladdr, neigh->ha, dev->addr_len))
1361			lladdr = neigh->ha;
1362	} else {
1363		/* No address is supplied; if we know something,
1364		   use it, otherwise discard the request.
1365		 */
1366		err = -EINVAL;
1367		if (!(old & NUD_VALID)) {
1368			NL_SET_ERR_MSG(extack, "No link layer address given");
1369			goto out;
1370		}
1371		lladdr = neigh->ha;
1372	}
1373
1374	/* Update confirmed timestamp for neighbour entry after we
1375	 * received ARP packet even if it doesn't change IP to MAC binding.
1376	 */
1377	if (new & NUD_CONNECTED)
1378		neigh->confirmed = jiffies;
1379
1380	/* If entry was valid and address is not changed,
1381	   do not change entry state, if new one is STALE.
1382	 */
1383	err = 0;
1384	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1385	if (old & NUD_VALID) {
1386		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1387			update_isrouter = 0;
1388			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1389			    (old & NUD_CONNECTED)) {
1390				lladdr = neigh->ha;
1391				new = NUD_STALE;
1392			} else
1393				goto out;
1394		} else {
1395			if (lladdr == neigh->ha && new == NUD_STALE &&
1396			    !(flags & NEIGH_UPDATE_F_ADMIN))
1397				new = old;
1398		}
1399	}
1400
1401	/* Update timestamp only once we know we will make a change to the
1402	 * neighbour entry. Otherwise we risk to move the locktime window with
1403	 * noop updates and ignore relevant ARP updates.
1404	 */
1405	if (new != old || lladdr != neigh->ha)
1406		neigh->updated = jiffies;
1407
1408	if (new != old) {
1409		neigh_del_timer(neigh);
1410		if (new & NUD_PROBE)
1411			atomic_set(&neigh->probes, 0);
1412		if (new & NUD_IN_TIMER)
1413			neigh_add_timer(neigh, (jiffies +
1414						((new & NUD_REACHABLE) ?
1415						 neigh->parms->reachable_time :
1416						 0)));
1417		WRITE_ONCE(neigh->nud_state, new);
1418		notify = 1;
1419	}
1420
1421	if (lladdr != neigh->ha) {
1422		write_seqlock(&neigh->ha_lock);
1423		memcpy(&neigh->ha, lladdr, dev->addr_len);
1424		write_sequnlock(&neigh->ha_lock);
1425		neigh_update_hhs(neigh);
1426		if (!(new & NUD_CONNECTED))
1427			neigh->confirmed = jiffies -
1428				      (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1429		notify = 1;
1430	}
1431	if (new == old)
1432		goto out;
1433	if (new & NUD_CONNECTED)
1434		neigh_connect(neigh);
1435	else
1436		neigh_suspect(neigh);
1437	if (!(old & NUD_VALID)) {
1438		struct sk_buff *skb;
1439
1440		/* Again: avoid dead loop if something went wrong */
1441
1442		while (neigh->nud_state & NUD_VALID &&
1443		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1444			struct dst_entry *dst = skb_dst(skb);
1445			struct neighbour *n2, *n1 = neigh;
1446			write_unlock_bh(&neigh->lock);
1447
1448			rcu_read_lock();
1449
1450			/* Why not just use 'neigh' as-is?  The problem is that
1451			 * things such as shaper, eql, and sch_teql can end up
1452			 * using alternative, different, neigh objects to output
1453			 * the packet in the output path.  So what we need to do
1454			 * here is re-lookup the top-level neigh in the path so
1455			 * we can reinject the packet there.
1456			 */
1457			n2 = NULL;
1458			if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
1459				n2 = dst_neigh_lookup_skb(dst, skb);
1460				if (n2)
1461					n1 = n2;
1462			}
1463			READ_ONCE(n1->output)(n1, skb);
1464			if (n2)
1465				neigh_release(n2);
1466			rcu_read_unlock();
1467
1468			write_lock_bh(&neigh->lock);
1469		}
1470		__skb_queue_purge(&neigh->arp_queue);
1471		neigh->arp_queue_len_bytes = 0;
1472	}
1473out:
1474	if (update_isrouter)
1475		neigh_update_is_router(neigh, flags, &notify);
1476	write_unlock_bh(&neigh->lock);
1477	if (((new ^ old) & NUD_PERMANENT) || gc_update)
1478		neigh_update_gc_list(neigh);
1479	if (managed_update)
1480		neigh_update_managed_list(neigh);
1481	if (notify)
1482		neigh_update_notify(neigh, nlmsg_pid);
1483	trace_neigh_update_done(neigh, err);
1484	return err;
1485}
1486
1487int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1488		 u32 flags, u32 nlmsg_pid)
1489{
1490	return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
1491}
1492EXPORT_SYMBOL(neigh_update);
1493
1494/* Update the neigh to listen temporarily for probe responses, even if it is
1495 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing.
1496 */
1497void __neigh_set_probe_once(struct neighbour *neigh)
1498{
1499	if (neigh->dead)
1500		return;
1501	neigh->updated = jiffies;
1502	if (!(neigh->nud_state & NUD_FAILED))
1503		return;
1504	WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
1505	atomic_set(&neigh->probes, neigh_max_probes(neigh));
1506	neigh_add_timer(neigh,
1507			jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
1508				      HZ/100));
1509}
1510EXPORT_SYMBOL(__neigh_set_probe_once);
1511
1512struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1513				 u8 *lladdr, void *saddr,
1514				 struct net_device *dev)
1515{
1516	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1517						 lladdr || !dev->addr_len);
1518	if (neigh)
1519		neigh_update(neigh, lladdr, NUD_STALE,
1520			     NEIGH_UPDATE_F_OVERRIDE, 0);
1521	return neigh;
1522}
1523EXPORT_SYMBOL(neigh_event_ns);
1524
1525/* called with read_lock_bh(&n->lock); */
1526static void neigh_hh_init(struct neighbour *n)
1527{
1528	struct net_device *dev = n->dev;
1529	__be16 prot = n->tbl->protocol;
1530	struct hh_cache	*hh = &n->hh;
1531
1532	write_lock_bh(&n->lock);
1533
1534	/* Only one thread can come in here and initialize the
1535	 * hh_cache entry.
1536	 */
1537	if (!hh->hh_len)
1538		dev->header_ops->cache(n, hh, prot);
1539
1540	write_unlock_bh(&n->lock);
1541}
1542
1543/* Slow and careful. */
1544
1545int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1546{
1547	int rc = 0;
1548
1549	if (!neigh_event_send(neigh, skb)) {
1550		int err;
1551		struct net_device *dev = neigh->dev;
1552		unsigned int seq;
1553
1554		if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len))
1555			neigh_hh_init(neigh);
1556
1557		do {
1558			__skb_pull(skb, skb_network_offset(skb));
1559			seq = read_seqbegin(&neigh->ha_lock);
1560			err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1561					      neigh->ha, NULL, skb->len);
1562		} while (read_seqretry(&neigh->ha_lock, seq));
1563
1564		if (err >= 0)
1565			rc = dev_queue_xmit(skb);
1566		else
1567			goto out_kfree_skb;
1568	}
1569out:
1570	return rc;
1571out_kfree_skb:
1572	rc = -EINVAL;
1573	kfree_skb(skb);
1574	goto out;
1575}
1576EXPORT_SYMBOL(neigh_resolve_output);
1577
1578/* As fast as possible without hh cache */
1579
1580int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1581{
1582	struct net_device *dev = neigh->dev;
1583	unsigned int seq;
1584	int err;
1585
1586	do {
1587		__skb_pull(skb, skb_network_offset(skb));
1588		seq = read_seqbegin(&neigh->ha_lock);
1589		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1590				      neigh->ha, NULL, skb->len);
1591	} while (read_seqretry(&neigh->ha_lock, seq));
1592
1593	if (err >= 0)
1594		err = dev_queue_xmit(skb);
1595	else {
1596		err = -EINVAL;
1597		kfree_skb(skb);
1598	}
1599	return err;
1600}
1601EXPORT_SYMBOL(neigh_connected_output);
1602
1603int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1604{
1605	return dev_queue_xmit(skb);
1606}
1607EXPORT_SYMBOL(neigh_direct_output);
1608
1609static void neigh_managed_work(struct work_struct *work)
1610{
1611	struct neigh_table *tbl = container_of(work, struct neigh_table,
1612					       managed_work.work);
1613	struct neighbour *neigh;
1614
1615	write_lock_bh(&tbl->lock);
1616	list_for_each_entry(neigh, &tbl->managed_list, managed_list)
1617		neigh_event_send_probe(neigh, NULL, false);
1618	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work,
1619			   NEIGH_VAR(&tbl->parms, INTERVAL_PROBE_TIME_MS));
1620	write_unlock_bh(&tbl->lock);
1621}
1622
1623static void neigh_proxy_process(struct timer_list *t)
1624{
1625	struct neigh_table *tbl = from_timer(tbl, t, proxy_timer);
1626	long sched_next = 0;
1627	unsigned long now = jiffies;
1628	struct sk_buff *skb, *n;
1629
1630	spin_lock(&tbl->proxy_queue.lock);
1631
1632	skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1633		long tdif = NEIGH_CB(skb)->sched_next - now;
1634
1635		if (tdif <= 0) {
1636			struct net_device *dev = skb->dev;
1637
1638			neigh_parms_qlen_dec(dev, tbl->family);
1639			__skb_unlink(skb, &tbl->proxy_queue);
1640
1641			if (tbl->proxy_redo && netif_running(dev)) {
1642				rcu_read_lock();
1643				tbl->proxy_redo(skb);
1644				rcu_read_unlock();
1645			} else {
1646				kfree_skb(skb);
1647			}
1648
1649			dev_put(dev);
1650		} else if (!sched_next || tdif < sched_next)
1651			sched_next = tdif;
1652	}
1653	del_timer(&tbl->proxy_timer);
1654	if (sched_next)
1655		mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1656	spin_unlock(&tbl->proxy_queue.lock);
1657}
1658
1659static unsigned long neigh_proxy_delay(struct neigh_parms *p)
1660{
1661	/* If proxy_delay is zero, do not call get_random_u32_below()
1662	 * as it is undefined behavior.
1663	 */
1664	unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
1665
1666	return proxy_delay ?
1667	       jiffies + get_random_u32_below(proxy_delay) : jiffies;
1668}
1669
1670void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1671		    struct sk_buff *skb)
1672{
1673	unsigned long sched_next = neigh_proxy_delay(p);
1674
1675	if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1676		kfree_skb(skb);
1677		return;
1678	}
1679
1680	NEIGH_CB(skb)->sched_next = sched_next;
1681	NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1682
1683	spin_lock(&tbl->proxy_queue.lock);
1684	if (del_timer(&tbl->proxy_timer)) {
1685		if (time_before(tbl->proxy_timer.expires, sched_next))
1686			sched_next = tbl->proxy_timer.expires;
1687	}
1688	skb_dst_drop(skb);
1689	dev_hold(skb->dev);
1690	__skb_queue_tail(&tbl->proxy_queue, skb);
1691	p->qlen++;
1692	mod_timer(&tbl->proxy_timer, sched_next);
1693	spin_unlock(&tbl->proxy_queue.lock);
1694}
1695EXPORT_SYMBOL(pneigh_enqueue);
1696
1697static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1698						      struct net *net, int ifindex)
1699{
1700	struct neigh_parms *p;
1701
1702	list_for_each_entry(p, &tbl->parms_list, list) {
1703		if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1704		    (!p->dev && !ifindex && net_eq(net, &init_net)))
1705			return p;
1706	}
1707
1708	return NULL;
1709}
1710
1711struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1712				      struct neigh_table *tbl)
1713{
1714	struct neigh_parms *p;
1715	struct net *net = dev_net(dev);
1716	const struct net_device_ops *ops = dev->netdev_ops;
1717
1718	p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1719	if (p) {
1720		p->tbl		  = tbl;
1721		refcount_set(&p->refcnt, 1);
1722		p->reachable_time =
1723				neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1724		p->qlen = 0;
1725		netdev_hold(dev, &p->dev_tracker, GFP_KERNEL);
1726		p->dev = dev;
1727		write_pnet(&p->net, net);
1728		p->sysctl_table = NULL;
1729
1730		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1731			netdev_put(dev, &p->dev_tracker);
1732			kfree(p);
1733			return NULL;
1734		}
1735
1736		write_lock_bh(&tbl->lock);
1737		list_add(&p->list, &tbl->parms.list);
1738		write_unlock_bh(&tbl->lock);
1739
1740		neigh_parms_data_state_cleanall(p);
1741	}
1742	return p;
1743}
1744EXPORT_SYMBOL(neigh_parms_alloc);
1745
1746static void neigh_rcu_free_parms(struct rcu_head *head)
1747{
1748	struct neigh_parms *parms =
1749		container_of(head, struct neigh_parms, rcu_head);
1750
1751	neigh_parms_put(parms);
1752}
1753
1754void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1755{
1756	if (!parms || parms == &tbl->parms)
1757		return;
1758	write_lock_bh(&tbl->lock);
1759	list_del(&parms->list);
1760	parms->dead = 1;
1761	write_unlock_bh(&tbl->lock);
1762	netdev_put(parms->dev, &parms->dev_tracker);
1763	call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1764}
1765EXPORT_SYMBOL(neigh_parms_release);
1766
1767static void neigh_parms_destroy(struct neigh_parms *parms)
1768{
1769	kfree(parms);
1770}
1771
1772static struct lock_class_key neigh_table_proxy_queue_class;
1773
1774static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly;
1775
1776void neigh_table_init(int index, struct neigh_table *tbl)
1777{
1778	unsigned long now = jiffies;
1779	unsigned long phsize;
1780
1781	INIT_LIST_HEAD(&tbl->parms_list);
1782	INIT_LIST_HEAD(&tbl->gc_list);
1783	INIT_LIST_HEAD(&tbl->managed_list);
1784
1785	list_add(&tbl->parms.list, &tbl->parms_list);
1786	write_pnet(&tbl->parms.net, &init_net);
1787	refcount_set(&tbl->parms.refcnt, 1);
1788	tbl->parms.reachable_time =
1789			  neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1790	tbl->parms.qlen = 0;
1791
1792	tbl->stats = alloc_percpu(struct neigh_statistics);
1793	if (!tbl->stats)
1794		panic("cannot create neighbour cache statistics");
1795
1796#ifdef CONFIG_PROC_FS
1797	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
1798			      &neigh_stat_seq_ops, tbl))
1799		panic("cannot create neighbour proc dir entry");
1800#endif
1801
1802	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1803
1804	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1805	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1806
1807	if (!tbl->nht || !tbl->phash_buckets)
1808		panic("cannot allocate neighbour cache hashes");
1809
1810	if (!tbl->entry_size)
1811		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1812					tbl->key_len, NEIGH_PRIV_ALIGN);
1813	else
1814		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1815
1816	rwlock_init(&tbl->lock);
1817
1818	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1819	queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
1820			tbl->parms.reachable_time);
1821	INIT_DEFERRABLE_WORK(&tbl->managed_work, neigh_managed_work);
1822	queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, 0);
1823
1824	timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0);
1825	skb_queue_head_init_class(&tbl->proxy_queue,
1826			&neigh_table_proxy_queue_class);
1827
1828	tbl->last_flush = now;
1829	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
1830
1831	rcu_assign_pointer(neigh_tables[index], tbl);
1832}
1833EXPORT_SYMBOL(neigh_table_init);
1834
1835/*
1836 * Only called from ndisc_cleanup(), which means this is dead code
1837 * because we no longer can unload IPv6 module.
1838 */
1839int neigh_table_clear(int index, struct neigh_table *tbl)
1840{
1841	RCU_INIT_POINTER(neigh_tables[index], NULL);
1842	synchronize_rcu();
1843
1844	/* It is not clean... Fix it to unload IPv6 module safely */
1845	cancel_delayed_work_sync(&tbl->managed_work);
1846	cancel_delayed_work_sync(&tbl->gc_work);
1847	del_timer_sync(&tbl->proxy_timer);
1848	pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family);
1849	neigh_ifdown(tbl, NULL);
1850	if (atomic_read(&tbl->entries))
1851		pr_crit("neighbour leakage\n");
1852
1853	call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1854		 neigh_hash_free_rcu);
1855	tbl->nht = NULL;
1856
1857	kfree(tbl->phash_buckets);
1858	tbl->phash_buckets = NULL;
1859
1860	remove_proc_entry(tbl->id, init_net.proc_net_stat);
1861
1862	free_percpu(tbl->stats);
1863	tbl->stats = NULL;
1864
1865	return 0;
1866}
1867EXPORT_SYMBOL(neigh_table_clear);
1868
1869static struct neigh_table *neigh_find_table(int family)
1870{
1871	struct neigh_table *tbl = NULL;
1872
1873	switch (family) {
1874	case AF_INET:
1875		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]);
1876		break;
1877	case AF_INET6:
1878		tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]);
1879		break;
1880	}
1881
1882	return tbl;
1883}
1884
1885const struct nla_policy nda_policy[NDA_MAX+1] = {
1886	[NDA_UNSPEC]		= { .strict_start_type = NDA_NH_ID },
1887	[NDA_DST]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1888	[NDA_LLADDR]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
1889	[NDA_CACHEINFO]		= { .len = sizeof(struct nda_cacheinfo) },
1890	[NDA_PROBES]		= { .type = NLA_U32 },
1891	[NDA_VLAN]		= { .type = NLA_U16 },
1892	[NDA_PORT]		= { .type = NLA_U16 },
1893	[NDA_VNI]		= { .type = NLA_U32 },
1894	[NDA_IFINDEX]		= { .type = NLA_U32 },
1895	[NDA_MASTER]		= { .type = NLA_U32 },
1896	[NDA_PROTOCOL]		= { .type = NLA_U8 },
1897	[NDA_NH_ID]		= { .type = NLA_U32 },
1898	[NDA_FLAGS_EXT]		= NLA_POLICY_MASK(NLA_U32, NTF_EXT_MASK),
1899	[NDA_FDB_EXT_ATTRS]	= { .type = NLA_NESTED },
1900};
1901
1902static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
1903			struct netlink_ext_ack *extack)
1904{
1905	struct net *net = sock_net(skb->sk);
1906	struct ndmsg *ndm;
1907	struct nlattr *dst_attr;
1908	struct neigh_table *tbl;
1909	struct neighbour *neigh;
1910	struct net_device *dev = NULL;
1911	int err = -EINVAL;
1912
1913	ASSERT_RTNL();
1914	if (nlmsg_len(nlh) < sizeof(*ndm))
1915		goto out;
1916
1917	dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1918	if (!dst_attr) {
1919		NL_SET_ERR_MSG(extack, "Network address not specified");
1920		goto out;
1921	}
1922
1923	ndm = nlmsg_data(nlh);
1924	if (ndm->ndm_ifindex) {
1925		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1926		if (dev == NULL) {
1927			err = -ENODEV;
1928			goto out;
1929		}
1930	}
1931
1932	tbl = neigh_find_table(ndm->ndm_family);
1933	if (tbl == NULL)
1934		return -EAFNOSUPPORT;
1935
1936	if (nla_len(dst_attr) < (int)tbl->key_len) {
1937		NL_SET_ERR_MSG(extack, "Invalid network address");
1938		goto out;
1939	}
1940
1941	if (ndm->ndm_flags & NTF_PROXY) {
1942		err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1943		goto out;
1944	}
1945
1946	if (dev == NULL)
1947		goto out;
1948
1949	neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1950	if (neigh == NULL) {
1951		err = -ENOENT;
1952		goto out;
1953	}
1954
1955	err = __neigh_update(neigh, NULL, NUD_FAILED,
1956			     NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
1957			     NETLINK_CB(skb).portid, extack);
1958	write_lock_bh(&tbl->lock);
1959	neigh_release(neigh);
1960	neigh_remove_one(neigh, tbl);
1961	write_unlock_bh(&tbl->lock);
1962
1963out:
1964	return err;
1965}
1966
1967static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
1968		     struct netlink_ext_ack *extack)
1969{
1970	int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE |
1971		    NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1972	struct net *net = sock_net(skb->sk);
1973	struct ndmsg *ndm;
1974	struct nlattr *tb[NDA_MAX+1];
1975	struct neigh_table *tbl;
1976	struct net_device *dev = NULL;
1977	struct neighbour *neigh;
1978	void *dst, *lladdr;
1979	u8 protocol = 0;
1980	u32 ndm_flags;
1981	int err;
1982
1983	ASSERT_RTNL();
1984	err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX,
1985				     nda_policy, extack);
1986	if (err < 0)
1987		goto out;
1988
1989	err = -EINVAL;
1990	if (!tb[NDA_DST]) {
1991		NL_SET_ERR_MSG(extack, "Network address not specified");
1992		goto out;
1993	}
1994
1995	ndm = nlmsg_data(nlh);
1996	ndm_flags = ndm->ndm_flags;
1997	if (tb[NDA_FLAGS_EXT]) {
1998		u32 ext = nla_get_u32(tb[NDA_FLAGS_EXT]);
1999
2000		BUILD_BUG_ON(sizeof(neigh->flags) * BITS_PER_BYTE <
2001			     (sizeof(ndm->ndm_flags) * BITS_PER_BYTE +
2002			      hweight32(NTF_EXT_MASK)));
2003		ndm_flags |= (ext << NTF_EXT_SHIFT);
2004	}
2005	if (ndm->ndm_ifindex) {
2006		dev = __dev_get_by_index(net, ndm->ndm_ifindex);
2007		if (dev == NULL) {
2008			err = -ENODEV;
2009			goto out;
2010		}
2011
2012		if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
2013			NL_SET_ERR_MSG(extack, "Invalid link address");
2014			goto out;
2015		}
2016	}
2017
2018	tbl = neigh_find_table(ndm->ndm_family);
2019	if (tbl == NULL)
2020		return -EAFNOSUPPORT;
2021
2022	if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
2023		NL_SET_ERR_MSG(extack, "Invalid network address");
2024		goto out;
2025	}
2026
2027	dst = nla_data(tb[NDA_DST]);
2028	lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
2029
2030	if (tb[NDA_PROTOCOL])
2031		protocol = nla_get_u8(tb[NDA_PROTOCOL]);
2032	if (ndm_flags & NTF_PROXY) {
2033		struct pneigh_entry *pn;
2034
2035		if (ndm_flags & NTF_MANAGED) {
2036			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag combination");
2037			goto out;
2038		}
2039
2040		err = -ENOBUFS;
2041		pn = pneigh_lookup(tbl, net, dst, dev, 1);
2042		if (pn) {
2043			pn->flags = ndm_flags;
2044			if (protocol)
2045				pn->protocol = protocol;
2046			err = 0;
2047		}
2048		goto out;
2049	}
2050
2051	if (!dev) {
2052		NL_SET_ERR_MSG(extack, "Device not specified");
2053		goto out;
2054	}
2055
2056	if (tbl->allow_add && !tbl->allow_add(dev, extack)) {
2057		err = -EINVAL;
2058		goto out;
2059	}
2060
2061	neigh = neigh_lookup(tbl, dst, dev);
2062	if (neigh == NULL) {
2063		bool ndm_permanent  = ndm->ndm_state & NUD_PERMANENT;
2064		bool exempt_from_gc = ndm_permanent ||
2065				      ndm_flags & NTF_EXT_LEARNED;
2066
2067		if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
2068			err = -ENOENT;
2069			goto out;
2070		}
2071		if (ndm_permanent && (ndm_flags & NTF_MANAGED)) {
2072			NL_SET_ERR_MSG(extack, "Invalid NTF_* flag for permanent entry");
2073			err = -EINVAL;
2074			goto out;
2075		}
2076
2077		neigh = ___neigh_create(tbl, dst, dev,
2078					ndm_flags &
2079					(NTF_EXT_LEARNED | NTF_MANAGED),
2080					exempt_from_gc, true);
2081		if (IS_ERR(neigh)) {
2082			err = PTR_ERR(neigh);
2083			goto out;
2084		}
2085	} else {
2086		if (nlh->nlmsg_flags & NLM_F_EXCL) {
2087			err = -EEXIST;
2088			neigh_release(neigh);
2089			goto out;
2090		}
2091
2092		if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
2093			flags &= ~(NEIGH_UPDATE_F_OVERRIDE |
2094				   NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
2095	}
2096
2097	if (protocol)
2098		neigh->protocol = protocol;
2099	if (ndm_flags & NTF_EXT_LEARNED)
2100		flags |= NEIGH_UPDATE_F_EXT_LEARNED;
2101	if (ndm_flags & NTF_ROUTER)
2102		flags |= NEIGH_UPDATE_F_ISROUTER;
2103	if (ndm_flags & NTF_MANAGED)
2104		flags |= NEIGH_UPDATE_F_MANAGED;
2105	if (ndm_flags & NTF_USE)
2106		flags |= NEIGH_UPDATE_F_USE;
2107
2108	err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
2109			     NETLINK_CB(skb).portid, extack);
2110	if (!err && ndm_flags & (NTF_USE | NTF_MANAGED)) {
2111		neigh_event_send(neigh, NULL);
2112		err = 0;
2113	}
2114	neigh_release(neigh);
2115out:
2116	return err;
2117}
2118
2119static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
2120{
2121	struct nlattr *nest;
2122
2123	nest = nla_nest_start_noflag(skb, NDTA_PARMS);
2124	if (nest == NULL)
2125		return -ENOBUFS;
2126
2127	if ((parms->dev &&
2128	     nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
2129	    nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) ||
2130	    nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
2131			NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
2132	    /* approximative value for deprecated QUEUE_LEN (in packets) */
2133	    nla_put_u32(skb, NDTPA_QUEUE_LEN,
2134			NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
2135	    nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
2136	    nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
2137	    nla_put_u32(skb, NDTPA_UCAST_PROBES,
2138			NEIGH_VAR(parms, UCAST_PROBES)) ||
2139	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
2140			NEIGH_VAR(parms, MCAST_PROBES)) ||
2141	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
2142			NEIGH_VAR(parms, MCAST_REPROBES)) ||
2143	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
2144			  NDTPA_PAD) ||
2145	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
2146			  NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
2147	    nla_put_msecs(skb, NDTPA_GC_STALETIME,
2148			  NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
2149	    nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
2150			  NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
2151	    nla_put_msecs(skb, NDTPA_RETRANS_TIME,
2152			  NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) ||
2153	    nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
2154			  NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) ||
2155	    nla_put_msecs(skb, NDTPA_PROXY_DELAY,
2156			  NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) ||
2157	    nla_put_msecs(skb, NDTPA_LOCKTIME,
2158			  NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD) ||
2159	    nla_put_msecs(skb, NDTPA_INTERVAL_PROBE_TIME_MS,
2160			  NEIGH_VAR(parms, INTERVAL_PROBE_TIME_MS), NDTPA_PAD))
2161		goto nla_put_failure;
2162	return nla_nest_end(skb, nest);
2163
2164nla_put_failure:
2165	nla_nest_cancel(skb, nest);
2166	return -EMSGSIZE;
2167}
2168
2169static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
2170			      u32 pid, u32 seq, int type, int flags)
2171{
2172	struct nlmsghdr *nlh;
2173	struct ndtmsg *ndtmsg;
2174
2175	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2176	if (nlh == NULL)
2177		return -EMSGSIZE;
2178
2179	ndtmsg = nlmsg_data(nlh);
2180
2181	read_lock_bh(&tbl->lock);
2182	ndtmsg->ndtm_family = tbl->family;
2183	ndtmsg->ndtm_pad1   = 0;
2184	ndtmsg->ndtm_pad2   = 0;
2185
2186	if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
2187	    nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval),
2188			  NDTA_PAD) ||
2189	    nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) ||
2190	    nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) ||
2191	    nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3)))
2192		goto nla_put_failure;
2193	{
2194		unsigned long now = jiffies;
2195		long flush_delta = now - READ_ONCE(tbl->last_flush);
2196		long rand_delta = now - READ_ONCE(tbl->last_rand);
2197		struct neigh_hash_table *nht;
2198		struct ndt_config ndc = {
2199			.ndtc_key_len		= tbl->key_len,
2200			.ndtc_entry_size	= tbl->entry_size,
2201			.ndtc_entries		= atomic_read(&tbl->entries),
2202			.ndtc_last_flush	= jiffies_to_msecs(flush_delta),
2203			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
2204			.ndtc_proxy_qlen	= READ_ONCE(tbl->proxy_queue.qlen),
2205		};
2206
2207		rcu_read_lock();
2208		nht = rcu_dereference(tbl->nht);
2209		ndc.ndtc_hash_rnd = nht->hash_rnd[0];
2210		ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
2211		rcu_read_unlock();
2212
2213		if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
2214			goto nla_put_failure;
2215	}
2216
2217	{
2218		int cpu;
2219		struct ndt_stats ndst;
2220
2221		memset(&ndst, 0, sizeof(ndst));
2222
2223		for_each_possible_cpu(cpu) {
2224			struct neigh_statistics	*st;
2225
2226			st = per_cpu_ptr(tbl->stats, cpu);
2227			ndst.ndts_allocs		+= READ_ONCE(st->allocs);
2228			ndst.ndts_destroys		+= READ_ONCE(st->destroys);
2229			ndst.ndts_hash_grows		+= READ_ONCE(st->hash_grows);
2230			ndst.ndts_res_failed		+= READ_ONCE(st->res_failed);
2231			ndst.ndts_lookups		+= READ_ONCE(st->lookups);
2232			ndst.ndts_hits			+= READ_ONCE(st->hits);
2233			ndst.ndts_rcv_probes_mcast	+= READ_ONCE(st->rcv_probes_mcast);
2234			ndst.ndts_rcv_probes_ucast	+= READ_ONCE(st->rcv_probes_ucast);
2235			ndst.ndts_periodic_gc_runs	+= READ_ONCE(st->periodic_gc_runs);
2236			ndst.ndts_forced_gc_runs	+= READ_ONCE(st->forced_gc_runs);
2237			ndst.ndts_table_fulls		+= READ_ONCE(st->table_fulls);
2238		}
2239
2240		if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst,
2241				  NDTA_PAD))
2242			goto nla_put_failure;
2243	}
2244
2245	BUG_ON(tbl->parms.dev);
2246	if (neightbl_fill_parms(skb, &tbl->parms) < 0)
2247		goto nla_put_failure;
2248
2249	read_unlock_bh(&tbl->lock);
2250	nlmsg_end(skb, nlh);
2251	return 0;
2252
2253nla_put_failure:
2254	read_unlock_bh(&tbl->lock);
2255	nlmsg_cancel(skb, nlh);
2256	return -EMSGSIZE;
2257}
2258
2259static int neightbl_fill_param_info(struct sk_buff *skb,
2260				    struct neigh_table *tbl,
2261				    struct neigh_parms *parms,
2262				    u32 pid, u32 seq, int type,
2263				    unsigned int flags)
2264{
2265	struct ndtmsg *ndtmsg;
2266	struct nlmsghdr *nlh;
2267
2268	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
2269	if (nlh == NULL)
2270		return -EMSGSIZE;
2271
2272	ndtmsg = nlmsg_data(nlh);
2273
2274	read_lock_bh(&tbl->lock);
2275	ndtmsg->ndtm_family = tbl->family;
2276	ndtmsg->ndtm_pad1   = 0;
2277	ndtmsg->ndtm_pad2   = 0;
2278
2279	if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
2280	    neightbl_fill_parms(skb, parms) < 0)
2281		goto errout;
2282
2283	read_unlock_bh(&tbl->lock);
2284	nlmsg_end(skb, nlh);
2285	return 0;
2286errout:
2287	read_unlock_bh(&tbl->lock);
2288	nlmsg_cancel(skb, nlh);
2289	return -EMSGSIZE;
2290}
2291
2292static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
2293	[NDTA_NAME]		= { .type = NLA_STRING },
2294	[NDTA_THRESH1]		= { .type = NLA_U32 },
2295	[NDTA_THRESH2]		= { .type = NLA_U32 },
2296	[NDTA_THRESH3]		= { .type = NLA_U32 },
2297	[NDTA_GC_INTERVAL]	= { .type = NLA_U64 },
2298	[NDTA_PARMS]		= { .type = NLA_NESTED },
2299};
2300
2301static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
2302	[NDTPA_IFINDEX]			= { .type = NLA_U32 },
2303	[NDTPA_QUEUE_LEN]		= { .type = NLA_U32 },
2304	[NDTPA_PROXY_QLEN]		= { .type = NLA_U32 },
2305	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
2306	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
2307	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
2308	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
2309	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
2310	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
2311	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
2312	[NDTPA_RETRANS_TIME]		= { .type = NLA_U64 },
2313	[NDTPA_ANYCAST_DELAY]		= { .type = NLA_U64 },
2314	[NDTPA_PROXY_DELAY]		= { .type = NLA_U64 },
2315	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
2316	[NDTPA_INTERVAL_PROBE_TIME_MS]	= { .type = NLA_U64, .min = 1 },
2317};
2318
2319static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh,
2320			struct netlink_ext_ack *extack)
2321{
2322	struct net *net = sock_net(skb->sk);
2323	struct neigh_table *tbl;
2324	struct ndtmsg *ndtmsg;
2325	struct nlattr *tb[NDTA_MAX+1];
2326	bool found = false;
2327	int err, tidx;
2328
2329	err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
2330				     nl_neightbl_policy, extack);
2331	if (err < 0)
2332		goto errout;
2333
2334	if (tb[NDTA_NAME] == NULL) {
2335		err = -EINVAL;
2336		goto errout;
2337	}
2338
2339	ndtmsg = nlmsg_data(nlh);
2340
2341	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2342		tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2343		if (!tbl)
2344			continue;
2345		if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
2346			continue;
2347		if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) {
2348			found = true;
2349			break;
2350		}
2351	}
2352
2353	if (!found)
2354		return -ENOENT;
2355
2356	/*
2357	 * We acquire tbl->lock to be nice to the periodic timers and
2358	 * make sure they always see a consistent set of values.
2359	 */
2360	write_lock_bh(&tbl->lock);
2361
2362	if (tb[NDTA_PARMS]) {
2363		struct nlattr *tbp[NDTPA_MAX+1];
2364		struct neigh_parms *p;
2365		int i, ifindex = 0;
2366
2367		err = nla_parse_nested_deprecated(tbp, NDTPA_MAX,
2368						  tb[NDTA_PARMS],
2369						  nl_ntbl_parm_policy, extack);
2370		if (err < 0)
2371			goto errout_tbl_lock;
2372
2373		if (tbp[NDTPA_IFINDEX])
2374			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2375
2376		p = lookup_neigh_parms(tbl, net, ifindex);
2377		if (p == NULL) {
2378			err = -ENOENT;
2379			goto errout_tbl_lock;
2380		}
2381
2382		for (i = 1; i <= NDTPA_MAX; i++) {
2383			if (tbp[i] == NULL)
2384				continue;
2385
2386			switch (i) {
2387			case NDTPA_QUEUE_LEN:
2388				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2389					      nla_get_u32(tbp[i]) *
2390					      SKB_TRUESIZE(ETH_FRAME_LEN));
2391				break;
2392			case NDTPA_QUEUE_LENBYTES:
2393				NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2394					      nla_get_u32(tbp[i]));
2395				break;
2396			case NDTPA_PROXY_QLEN:
2397				NEIGH_VAR_SET(p, PROXY_QLEN,
2398					      nla_get_u32(tbp[i]));
2399				break;
2400			case NDTPA_APP_PROBES:
2401				NEIGH_VAR_SET(p, APP_PROBES,
2402					      nla_get_u32(tbp[i]));
2403				break;
2404			case NDTPA_UCAST_PROBES:
2405				NEIGH_VAR_SET(p, UCAST_PROBES,
2406					      nla_get_u32(tbp[i]));
2407				break;
2408			case NDTPA_MCAST_PROBES:
2409				NEIGH_VAR_SET(p, MCAST_PROBES,
2410					      nla_get_u32(tbp[i]));
2411				break;
2412			case NDTPA_MCAST_REPROBES:
2413				NEIGH_VAR_SET(p, MCAST_REPROBES,
2414					      nla_get_u32(tbp[i]));
2415				break;
2416			case NDTPA_BASE_REACHABLE_TIME:
2417				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2418					      nla_get_msecs(tbp[i]));
2419				/* update reachable_time as well, otherwise, the change will
2420				 * only be effective after the next time neigh_periodic_work
2421				 * decides to recompute it (can be multiple minutes)
2422				 */
2423				p->reachable_time =
2424					neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
2425				break;
2426			case NDTPA_GC_STALETIME:
2427				NEIGH_VAR_SET(p, GC_STALETIME,
2428					      nla_get_msecs(tbp[i]));
2429				break;
2430			case NDTPA_DELAY_PROBE_TIME:
2431				NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2432					      nla_get_msecs(tbp[i]));
2433				call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
2434				break;
2435			case NDTPA_INTERVAL_PROBE_TIME_MS:
2436				NEIGH_VAR_SET(p, INTERVAL_PROBE_TIME_MS,
2437					      nla_get_msecs(tbp[i]));
2438				break;
2439			case NDTPA_RETRANS_TIME:
2440				NEIGH_VAR_SET(p, RETRANS_TIME,
2441					      nla_get_msecs(tbp[i]));
2442				break;
2443			case NDTPA_ANYCAST_DELAY:
2444				NEIGH_VAR_SET(p, ANYCAST_DELAY,
2445					      nla_get_msecs(tbp[i]));
2446				break;
2447			case NDTPA_PROXY_DELAY:
2448				NEIGH_VAR_SET(p, PROXY_DELAY,
2449					      nla_get_msecs(tbp[i]));
2450				break;
2451			case NDTPA_LOCKTIME:
2452				NEIGH_VAR_SET(p, LOCKTIME,
2453					      nla_get_msecs(tbp[i]));
2454				break;
2455			}
2456		}
2457	}
2458
2459	err = -ENOENT;
2460	if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2461	     tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2462	    !net_eq(net, &init_net))
2463		goto errout_tbl_lock;
2464
2465	if (tb[NDTA_THRESH1])
2466		WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1]));
2467
2468	if (tb[NDTA_THRESH2])
2469		WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2]));
2470
2471	if (tb[NDTA_THRESH3])
2472		WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3]));
2473
2474	if (tb[NDTA_GC_INTERVAL])
2475		WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL]));
2476
2477	err = 0;
2478
2479errout_tbl_lock:
2480	write_unlock_bh(&tbl->lock);
2481errout:
2482	return err;
2483}
2484
2485static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
2486				    struct netlink_ext_ack *extack)
2487{
2488	struct ndtmsg *ndtm;
2489
2490	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
2491		NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
2492		return -EINVAL;
2493	}
2494
2495	ndtm = nlmsg_data(nlh);
2496	if (ndtm->ndtm_pad1  || ndtm->ndtm_pad2) {
2497		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
2498		return -EINVAL;
2499	}
2500
2501	if (nlmsg_attrlen(nlh, sizeof(*ndtm))) {
2502		NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request");
2503		return -EINVAL;
2504	}
2505
2506	return 0;
2507}
2508
2509static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2510{
2511	const struct nlmsghdr *nlh = cb->nlh;
2512	struct net *net = sock_net(skb->sk);
2513	int family, tidx, nidx = 0;
2514	int tbl_skip = cb->args[0];
2515	int neigh_skip = cb->args[1];
2516	struct neigh_table *tbl;
2517
2518	if (cb->strict_check) {
2519		int err = neightbl_valid_dump_info(nlh, cb->extack);
2520
2521		if (err < 0)
2522			return err;
2523	}
2524
2525	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2526
2527	for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) {
2528		struct neigh_parms *p;
2529
2530		tbl = rcu_dereference_rtnl(neigh_tables[tidx]);
2531		if (!tbl)
2532			continue;
2533
2534		if (tidx < tbl_skip || (family && tbl->family != family))
2535			continue;
2536
2537		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2538				       nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2539				       NLM_F_MULTI) < 0)
2540			break;
2541
2542		nidx = 0;
2543		p = list_next_entry(&tbl->parms, list);
2544		list_for_each_entry_from(p, &tbl->parms_list, list) {
2545			if (!net_eq(neigh_parms_net(p), net))
2546				continue;
2547
2548			if (nidx < neigh_skip)
2549				goto next;
2550
2551			if (neightbl_fill_param_info(skb, tbl, p,
2552						     NETLINK_CB(cb->skb).portid,
2553						     nlh->nlmsg_seq,
2554						     RTM_NEWNEIGHTBL,
2555						     NLM_F_MULTI) < 0)
2556				goto out;
2557		next:
2558			nidx++;
2559		}
2560
2561		neigh_skip = 0;
2562	}
2563out:
2564	cb->args[0] = tidx;
2565	cb->args[1] = nidx;
2566
2567	return skb->len;
2568}
2569
2570static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2571			   u32 pid, u32 seq, int type, unsigned int flags)
2572{
2573	u32 neigh_flags, neigh_flags_ext;
2574	unsigned long now = jiffies;
2575	struct nda_cacheinfo ci;
2576	struct nlmsghdr *nlh;
2577	struct ndmsg *ndm;
2578
2579	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2580	if (nlh == NULL)
2581		return -EMSGSIZE;
2582
2583	neigh_flags_ext = neigh->flags >> NTF_EXT_SHIFT;
2584	neigh_flags     = neigh->flags & NTF_OLD_MASK;
2585
2586	ndm = nlmsg_data(nlh);
2587	ndm->ndm_family	 = neigh->ops->family;
2588	ndm->ndm_pad1    = 0;
2589	ndm->ndm_pad2    = 0;
2590	ndm->ndm_flags	 = neigh_flags;
2591	ndm->ndm_type	 = neigh->type;
2592	ndm->ndm_ifindex = neigh->dev->ifindex;
2593
2594	if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2595		goto nla_put_failure;
2596
2597	read_lock_bh(&neigh->lock);
2598	ndm->ndm_state	 = neigh->nud_state;
2599	if (neigh->nud_state & NUD_VALID) {
2600		char haddr[MAX_ADDR_LEN];
2601
2602		neigh_ha_snapshot(haddr, neigh, neigh->dev);
2603		if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2604			read_unlock_bh(&neigh->lock);
2605			goto nla_put_failure;
2606		}
2607	}
2608
2609	ci.ndm_used	 = jiffies_to_clock_t(now - neigh->used);
2610	ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2611	ci.ndm_updated	 = jiffies_to_clock_t(now - neigh->updated);
2612	ci.ndm_refcnt	 = refcount_read(&neigh->refcnt) - 1;
2613	read_unlock_bh(&neigh->lock);
2614
2615	if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2616	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2617		goto nla_put_failure;
2618
2619	if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
2620		goto nla_put_failure;
2621	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2622		goto nla_put_failure;
2623
2624	nlmsg_end(skb, nlh);
2625	return 0;
2626
2627nla_put_failure:
2628	nlmsg_cancel(skb, nlh);
2629	return -EMSGSIZE;
2630}
2631
2632static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2633			    u32 pid, u32 seq, int type, unsigned int flags,
2634			    struct neigh_table *tbl)
2635{
2636	u32 neigh_flags, neigh_flags_ext;
2637	struct nlmsghdr *nlh;
2638	struct ndmsg *ndm;
2639
2640	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2641	if (nlh == NULL)
2642		return -EMSGSIZE;
2643
2644	neigh_flags_ext = pn->flags >> NTF_EXT_SHIFT;
2645	neigh_flags     = pn->flags & NTF_OLD_MASK;
2646
2647	ndm = nlmsg_data(nlh);
2648	ndm->ndm_family	 = tbl->family;
2649	ndm->ndm_pad1    = 0;
2650	ndm->ndm_pad2    = 0;
2651	ndm->ndm_flags	 = neigh_flags | NTF_PROXY;
2652	ndm->ndm_type	 = RTN_UNICAST;
2653	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
2654	ndm->ndm_state	 = NUD_NONE;
2655
2656	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2657		goto nla_put_failure;
2658
2659	if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
2660		goto nla_put_failure;
2661	if (neigh_flags_ext && nla_put_u32(skb, NDA_FLAGS_EXT, neigh_flags_ext))
2662		goto nla_put_failure;
2663
2664	nlmsg_end(skb, nlh);
2665	return 0;
2666
2667nla_put_failure:
2668	nlmsg_cancel(skb, nlh);
2669	return -EMSGSIZE;
2670}
2671
2672static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
2673{
2674	call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2675	__neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
2676}
2677
2678static bool neigh_master_filtered(struct net_device *dev, int master_idx)
2679{
2680	struct net_device *master;
2681
2682	if (!master_idx)
2683		return false;
2684
2685	master = dev ? netdev_master_upper_dev_get_rcu(dev) : NULL;
2686
2687	/* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
2688	 * invalid value for ifindex to denote "no master".
2689	 */
2690	if (master_idx == -1)
2691		return !!master;
2692
2693	if (!master || master->ifindex != master_idx)
2694		return true;
2695
2696	return false;
2697}
2698
2699static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
2700{
2701	if (filter_idx && (!dev || dev->ifindex != filter_idx))
2702		return true;
2703
2704	return false;
2705}
2706
2707struct neigh_dump_filter {
2708	int master_idx;
2709	int dev_idx;
2710};
2711
2712static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2713			    struct netlink_callback *cb,
2714			    struct neigh_dump_filter *filter)
2715{
2716	struct net *net = sock_net(skb->sk);
2717	struct neighbour *n;
2718	int err = 0, h, s_h = cb->args[1];
2719	int idx, s_idx = idx = cb->args[2];
2720	struct neigh_hash_table *nht;
2721	unsigned int flags = NLM_F_MULTI;
2722
2723	if (filter->dev_idx || filter->master_idx)
2724		flags |= NLM_F_DUMP_FILTERED;
2725
2726	nht = rcu_dereference(tbl->nht);
2727
2728	for (h = s_h; h < (1 << nht->hash_shift); h++) {
2729		if (h > s_h)
2730			s_idx = 0;
2731		for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0;
2732		     n != NULL;
2733		     n = rcu_dereference(n->next)) {
2734			if (idx < s_idx || !net_eq(dev_net(n->dev), net))
2735				goto next;
2736			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2737			    neigh_master_filtered(n->dev, filter->master_idx))
2738				goto next;
2739			err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2740					      cb->nlh->nlmsg_seq,
2741					      RTM_NEWNEIGH, flags);
2742			if (err < 0)
2743				goto out;
2744next:
2745			idx++;
2746		}
2747	}
2748out:
2749	cb->args[1] = h;
2750	cb->args[2] = idx;
2751	return err;
2752}
2753
2754static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2755			     struct netlink_callback *cb,
2756			     struct neigh_dump_filter *filter)
2757{
2758	struct pneigh_entry *n;
2759	struct net *net = sock_net(skb->sk);
2760	int err = 0, h, s_h = cb->args[3];
2761	int idx, s_idx = idx = cb->args[4];
2762	unsigned int flags = NLM_F_MULTI;
2763
2764	if (filter->dev_idx || filter->master_idx)
2765		flags |= NLM_F_DUMP_FILTERED;
2766
2767	read_lock_bh(&tbl->lock);
2768
2769	for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2770		if (h > s_h)
2771			s_idx = 0;
2772		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2773			if (idx < s_idx || pneigh_net(n) != net)
2774				goto next;
2775			if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
2776			    neigh_master_filtered(n->dev, filter->master_idx))
2777				goto next;
2778			err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2779					       cb->nlh->nlmsg_seq,
2780					       RTM_NEWNEIGH, flags, tbl);
2781			if (err < 0) {
2782				read_unlock_bh(&tbl->lock);
2783				goto out;
2784			}
2785		next:
2786			idx++;
2787		}
2788	}
2789
2790	read_unlock_bh(&tbl->lock);
2791out:
2792	cb->args[3] = h;
2793	cb->args[4] = idx;
2794	return err;
2795}
2796
2797static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
2798				bool strict_check,
2799				struct neigh_dump_filter *filter,
2800				struct netlink_ext_ack *extack)
2801{
2802	struct nlattr *tb[NDA_MAX + 1];
2803	int err, i;
2804
2805	if (strict_check) {
2806		struct ndmsg *ndm;
2807
2808		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2809			NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request");
2810			return -EINVAL;
2811		}
2812
2813		ndm = nlmsg_data(nlh);
2814		if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_ifindex ||
2815		    ndm->ndm_state || ndm->ndm_type) {
2816			NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request");
2817			return -EINVAL;
2818		}
2819
2820		if (ndm->ndm_flags & ~NTF_PROXY) {
2821			NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request");
2822			return -EINVAL;
2823		}
2824
2825		err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg),
2826						    tb, NDA_MAX, nda_policy,
2827						    extack);
2828	} else {
2829		err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb,
2830					     NDA_MAX, nda_policy, extack);
2831	}
2832	if (err < 0)
2833		return err;
2834
2835	for (i = 0; i <= NDA_MAX; ++i) {
2836		if (!tb[i])
2837			continue;
2838
2839		/* all new attributes should require strict_check */
2840		switch (i) {
2841		case NDA_IFINDEX:
2842			filter->dev_idx = nla_get_u32(tb[i]);
2843			break;
2844		case NDA_MASTER:
2845			filter->master_idx = nla_get_u32(tb[i]);
2846			break;
2847		default:
2848			if (strict_check) {
2849				NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request");
2850				return -EINVAL;
2851			}
2852		}
2853	}
2854
2855	return 0;
2856}
2857
2858static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2859{
2860	const struct nlmsghdr *nlh = cb->nlh;
2861	struct neigh_dump_filter filter = {};
2862	struct neigh_table *tbl;
2863	int t, family, s_t;
2864	int proxy = 0;
2865	int err;
2866
2867	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2868
2869	/* check for full ndmsg structure presence, family member is
2870	 * the same for both structures
2871	 */
2872	if (nlmsg_len(nlh) >= sizeof(struct ndmsg) &&
2873	    ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY)
2874		proxy = 1;
2875
2876	err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack);
2877	if (err < 0 && cb->strict_check)
2878		return err;
2879
2880	s_t = cb->args[0];
2881
2882	rcu_read_lock();
2883	for (t = 0; t < NEIGH_NR_TABLES; t++) {
2884		tbl = rcu_dereference(neigh_tables[t]);
2885
2886		if (!tbl)
2887			continue;
2888		if (t < s_t || (family && tbl->family != family))
2889			continue;
2890		if (t > s_t)
2891			memset(&cb->args[1], 0, sizeof(cb->args) -
2892						sizeof(cb->args[0]));
2893		if (proxy)
2894			err = pneigh_dump_table(tbl, skb, cb, &filter);
2895		else
2896			err = neigh_dump_table(tbl, skb, cb, &filter);
2897		if (err < 0)
2898			break;
2899	}
2900	rcu_read_unlock();
2901
2902	cb->args[0] = t;
2903	return err;
2904}
2905
2906static int neigh_valid_get_req(const struct nlmsghdr *nlh,
2907			       struct neigh_table **tbl,
2908			       void **dst, int *dev_idx, u8 *ndm_flags,
2909			       struct netlink_ext_ack *extack)
2910{
2911	struct nlattr *tb[NDA_MAX + 1];
2912	struct ndmsg *ndm;
2913	int err, i;
2914
2915	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
2916		NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
2917		return -EINVAL;
2918	}
2919
2920	ndm = nlmsg_data(nlh);
2921	if (ndm->ndm_pad1  || ndm->ndm_pad2  || ndm->ndm_state ||
2922	    ndm->ndm_type) {
2923		NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
2924		return -EINVAL;
2925	}
2926
2927	if (ndm->ndm_flags & ~NTF_PROXY) {
2928		NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
2929		return -EINVAL;
2930	}
2931
2932	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb,
2933					    NDA_MAX, nda_policy, extack);
2934	if (err < 0)
2935		return err;
2936
2937	*ndm_flags = ndm->ndm_flags;
2938	*dev_idx = ndm->ndm_ifindex;
2939	*tbl = neigh_find_table(ndm->ndm_family);
2940	if (*tbl == NULL) {
2941		NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
2942		return -EAFNOSUPPORT;
2943	}
2944
2945	for (i = 0; i <= NDA_MAX; ++i) {
2946		if (!tb[i])
2947			continue;
2948
2949		switch (i) {
2950		case NDA_DST:
2951			if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
2952				NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
2953				return -EINVAL;
2954			}
2955			*dst = nla_data(tb[i]);
2956			break;
2957		default:
2958			NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
2959			return -EINVAL;
2960		}
2961	}
2962
2963	return 0;
2964}
2965
2966static inline size_t neigh_nlmsg_size(void)
2967{
2968	return NLMSG_ALIGN(sizeof(struct ndmsg))
2969	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2970	       + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2971	       + nla_total_size(sizeof(struct nda_cacheinfo))
2972	       + nla_total_size(4)  /* NDA_PROBES */
2973	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
2974	       + nla_total_size(1); /* NDA_PROTOCOL */
2975}
2976
2977static int neigh_get_reply(struct net *net, struct neighbour *neigh,
2978			   u32 pid, u32 seq)
2979{
2980	struct sk_buff *skb;
2981	int err = 0;
2982
2983	skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
2984	if (!skb)
2985		return -ENOBUFS;
2986
2987	err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
2988	if (err) {
2989		kfree_skb(skb);
2990		goto errout;
2991	}
2992
2993	err = rtnl_unicast(skb, net, pid);
2994errout:
2995	return err;
2996}
2997
2998static inline size_t pneigh_nlmsg_size(void)
2999{
3000	return NLMSG_ALIGN(sizeof(struct ndmsg))
3001	       + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
3002	       + nla_total_size(4)  /* NDA_FLAGS_EXT */
3003	       + nla_total_size(1); /* NDA_PROTOCOL */
3004}
3005
3006static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
3007			    u32 pid, u32 seq, struct neigh_table *tbl)
3008{
3009	struct sk_buff *skb;
3010	int err = 0;
3011
3012	skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
3013	if (!skb)
3014		return -ENOBUFS;
3015
3016	err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
3017	if (err) {
3018		kfree_skb(skb);
3019		goto errout;
3020	}
3021
3022	err = rtnl_unicast(skb, net, pid);
3023errout:
3024	return err;
3025}
3026
3027static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3028		     struct netlink_ext_ack *extack)
3029{
3030	struct net *net = sock_net(in_skb->sk);
3031	struct net_device *dev = NULL;
3032	struct neigh_table *tbl = NULL;
3033	struct neighbour *neigh;
3034	void *dst = NULL;
3035	u8 ndm_flags = 0;
3036	int dev_idx = 0;
3037	int err;
3038
3039	err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
3040				  extack);
3041	if (err < 0)
3042		return err;
3043
3044	if (dev_idx) {
3045		dev = __dev_get_by_index(net, dev_idx);
3046		if (!dev) {
3047			NL_SET_ERR_MSG(extack, "Unknown device ifindex");
3048			return -ENODEV;
3049		}
3050	}
3051
3052	if (!dst) {
3053		NL_SET_ERR_MSG(extack, "Network address not specified");
3054		return -EINVAL;
3055	}
3056
3057	if (ndm_flags & NTF_PROXY) {
3058		struct pneigh_entry *pn;
3059
3060		pn = pneigh_lookup(tbl, net, dst, dev, 0);
3061		if (!pn) {
3062			NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
3063			return -ENOENT;
3064		}
3065		return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
3066					nlh->nlmsg_seq, tbl);
3067	}
3068
3069	if (!dev) {
3070		NL_SET_ERR_MSG(extack, "No device specified");
3071		return -EINVAL;
3072	}
3073
3074	neigh = neigh_lookup(tbl, dst, dev);
3075	if (!neigh) {
3076		NL_SET_ERR_MSG(extack, "Neighbour entry not found");
3077		return -ENOENT;
3078	}
3079
3080	err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
3081			      nlh->nlmsg_seq);
3082
3083	neigh_release(neigh);
3084
3085	return err;
3086}
3087
3088void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
3089{
3090	int chain;
3091	struct neigh_hash_table *nht;
3092
3093	rcu_read_lock();
3094	nht = rcu_dereference(tbl->nht);
3095
3096	read_lock_bh(&tbl->lock); /* avoid resizes */
3097	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3098		struct neighbour *n;
3099
3100		for (n = rcu_dereference(nht->hash_buckets[chain]);
3101		     n != NULL;
3102		     n = rcu_dereference(n->next))
3103			cb(n, cookie);
3104	}
3105	read_unlock_bh(&tbl->lock);
3106	rcu_read_unlock();
3107}
3108EXPORT_SYMBOL(neigh_for_each);
3109
3110/* The tbl->lock must be held as a writer and BH disabled. */
3111void __neigh_for_each_release(struct neigh_table *tbl,
3112			      int (*cb)(struct neighbour *))
3113{
3114	int chain;
3115	struct neigh_hash_table *nht;
3116
3117	nht = rcu_dereference_protected(tbl->nht,
3118					lockdep_is_held(&tbl->lock));
3119	for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
3120		struct neighbour *n;
3121		struct neighbour __rcu **np;
3122
3123		np = &nht->hash_buckets[chain];
3124		while ((n = rcu_dereference_protected(*np,
3125					lockdep_is_held(&tbl->lock))) != NULL) {
3126			int release;
3127
3128			write_lock(&n->lock);
3129			release = cb(n);
3130			if (release) {
3131				rcu_assign_pointer(*np,
3132					rcu_dereference_protected(n->next,
3133						lockdep_is_held(&tbl->lock)));
3134				neigh_mark_dead(n);
3135			} else
3136				np = &n->next;
3137			write_unlock(&n->lock);
3138			if (release)
3139				neigh_cleanup_and_release(n);
3140		}
3141	}
3142}
3143EXPORT_SYMBOL(__neigh_for_each_release);
3144
3145int neigh_xmit(int index, struct net_device *dev,
3146	       const void *addr, struct sk_buff *skb)
3147{
3148	int err = -EAFNOSUPPORT;
3149
3150	if (likely(index < NEIGH_NR_TABLES)) {
3151		struct neigh_table *tbl;
3152		struct neighbour *neigh;
3153
3154		rcu_read_lock();
3155		tbl = rcu_dereference(neigh_tables[index]);
3156		if (!tbl)
3157			goto out_unlock;
3158		if (index == NEIGH_ARP_TABLE) {
3159			u32 key = *((u32 *)addr);
3160
3161			neigh = __ipv4_neigh_lookup_noref(dev, key);
3162		} else {
3163			neigh = __neigh_lookup_noref(tbl, addr, dev);
3164		}
3165		if (!neigh)
3166			neigh = __neigh_create(tbl, addr, dev, false);
3167		err = PTR_ERR(neigh);
3168		if (IS_ERR(neigh)) {
3169			rcu_read_unlock();
3170			goto out_kfree_skb;
3171		}
3172		err = READ_ONCE(neigh->output)(neigh, skb);
3173out_unlock:
3174		rcu_read_unlock();
3175	}
3176	else if (index == NEIGH_LINK_TABLE) {
3177		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
3178				      addr, NULL, skb->len);
3179		if (err < 0)
3180			goto out_kfree_skb;
3181		err = dev_queue_xmit(skb);
3182	}
3183out:
3184	return err;
3185out_kfree_skb:
3186	kfree_skb(skb);
3187	goto out;
3188}
3189EXPORT_SYMBOL(neigh_xmit);
3190
3191#ifdef CONFIG_PROC_FS
3192
3193static struct neighbour *neigh_get_first(struct seq_file *seq)
3194{
3195	struct neigh_seq_state *state = seq->private;
3196	struct net *net = seq_file_net(seq);
3197	struct neigh_hash_table *nht = state->nht;
3198	struct neighbour *n = NULL;
3199	int bucket;
3200
3201	state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
3202	for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
3203		n = rcu_dereference(nht->hash_buckets[bucket]);
3204
3205		while (n) {
3206			if (!net_eq(dev_net(n->dev), net))
3207				goto next;
3208			if (state->neigh_sub_iter) {
3209				loff_t fakep = 0;
3210				void *v;
3211
3212				v = state->neigh_sub_iter(state, n, &fakep);
3213				if (!v)
3214					goto next;
3215			}
3216			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3217				break;
3218			if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3219				break;
3220next:
3221			n = rcu_dereference(n->next);
3222		}
3223
3224		if (n)
3225			break;
3226	}
3227	state->bucket = bucket;
3228
3229	return n;
3230}
3231
3232static struct neighbour *neigh_get_next(struct seq_file *seq,
3233					struct neighbour *n,
3234					loff_t *pos)
3235{
3236	struct neigh_seq_state *state = seq->private;
3237	struct net *net = seq_file_net(seq);
3238	struct neigh_hash_table *nht = state->nht;
3239
3240	if (state->neigh_sub_iter) {
3241		void *v = state->neigh_sub_iter(state, n, pos);
3242		if (v)
3243			return n;
3244	}
3245	n = rcu_dereference(n->next);
3246
3247	while (1) {
3248		while (n) {
3249			if (!net_eq(dev_net(n->dev), net))
3250				goto next;
3251			if (state->neigh_sub_iter) {
3252				void *v = state->neigh_sub_iter(state, n, pos);
3253				if (v)
3254					return n;
3255				goto next;
3256			}
3257			if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
3258				break;
3259
3260			if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
3261				break;
3262next:
3263			n = rcu_dereference(n->next);
3264		}
3265
3266		if (n)
3267			break;
3268
3269		if (++state->bucket >= (1 << nht->hash_shift))
3270			break;
3271
3272		n = rcu_dereference(nht->hash_buckets[state->bucket]);
3273	}
3274
3275	if (n && pos)
3276		--(*pos);
3277	return n;
3278}
3279
3280static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
3281{
3282	struct neighbour *n = neigh_get_first(seq);
3283
3284	if (n) {
3285		--(*pos);
3286		while (*pos) {
3287			n = neigh_get_next(seq, n, pos);
3288			if (!n)
3289				break;
3290		}
3291	}
3292	return *pos ? NULL : n;
3293}
3294
3295static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
3296{
3297	struct neigh_seq_state *state = seq->private;
3298	struct net *net = seq_file_net(seq);
3299	struct neigh_table *tbl = state->tbl;
3300	struct pneigh_entry *pn = NULL;
3301	int bucket;
3302
3303	state->flags |= NEIGH_SEQ_IS_PNEIGH;
3304	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
3305		pn = tbl->phash_buckets[bucket];
3306		while (pn && !net_eq(pneigh_net(pn), net))
3307			pn = pn->next;
3308		if (pn)
3309			break;
3310	}
3311	state->bucket = bucket;
3312
3313	return pn;
3314}
3315
3316static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
3317					    struct pneigh_entry *pn,
3318					    loff_t *pos)
3319{
3320	struct neigh_seq_state *state = seq->private;
3321	struct net *net = seq_file_net(seq);
3322	struct neigh_table *tbl = state->tbl;
3323
3324	do {
3325		pn = pn->next;
3326	} while (pn && !net_eq(pneigh_net(pn), net));
3327
3328	while (!pn) {
3329		if (++state->bucket > PNEIGH_HASHMASK)
3330			break;
3331		pn = tbl->phash_buckets[state->bucket];
3332		while (pn && !net_eq(pneigh_net(pn), net))
3333			pn = pn->next;
3334		if (pn)
3335			break;
3336	}
3337
3338	if (pn && pos)
3339		--(*pos);
3340
3341	return pn;
3342}
3343
3344static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
3345{
3346	struct pneigh_entry *pn = pneigh_get_first(seq);
3347
3348	if (pn) {
3349		--(*pos);
3350		while (*pos) {
3351			pn = pneigh_get_next(seq, pn, pos);
3352			if (!pn)
3353				break;
3354		}
3355	}
3356	return *pos ? NULL : pn;
3357}
3358
3359static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
3360{
3361	struct neigh_seq_state *state = seq->private;
3362	void *rc;
3363	loff_t idxpos = *pos;
3364
3365	rc = neigh_get_idx(seq, &idxpos);
3366	if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3367		rc = pneigh_get_idx(seq, &idxpos);
3368
3369	return rc;
3370}
3371
3372void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
3373	__acquires(tbl->lock)
3374	__acquires(rcu)
3375{
3376	struct neigh_seq_state *state = seq->private;
3377
3378	state->tbl = tbl;
3379	state->bucket = 0;
3380	state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
3381
3382	rcu_read_lock();
3383	state->nht = rcu_dereference(tbl->nht);
3384	read_lock_bh(&tbl->lock);
3385
3386	return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
3387}
3388EXPORT_SYMBOL(neigh_seq_start);
3389
3390void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3391{
3392	struct neigh_seq_state *state;
3393	void *rc;
3394
3395	if (v == SEQ_START_TOKEN) {
3396		rc = neigh_get_first(seq);
3397		goto out;
3398	}
3399
3400	state = seq->private;
3401	if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
3402		rc = neigh_get_next(seq, v, NULL);
3403		if (rc)
3404			goto out;
3405		if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
3406			rc = pneigh_get_first(seq);
3407	} else {
3408		BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
3409		rc = pneigh_get_next(seq, v, NULL);
3410	}
3411out:
3412	++(*pos);
3413	return rc;
3414}
3415EXPORT_SYMBOL(neigh_seq_next);
3416
3417void neigh_seq_stop(struct seq_file *seq, void *v)
3418	__releases(tbl->lock)
3419	__releases(rcu)
3420{
3421	struct neigh_seq_state *state = seq->private;
3422	struct neigh_table *tbl = state->tbl;
3423
3424	read_unlock_bh(&tbl->lock);
3425	rcu_read_unlock();
3426}
3427EXPORT_SYMBOL(neigh_seq_stop);
3428
3429/* statistics via seq_file */
3430
3431static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
3432{
3433	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3434	int cpu;
3435
3436	if (*pos == 0)
3437		return SEQ_START_TOKEN;
3438
3439	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
3440		if (!cpu_possible(cpu))
3441			continue;
3442		*pos = cpu+1;
3443		return per_cpu_ptr(tbl->stats, cpu);
3444	}
3445	return NULL;
3446}
3447
3448static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3449{
3450	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3451	int cpu;
3452
3453	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
3454		if (!cpu_possible(cpu))
3455			continue;
3456		*pos = cpu+1;
3457		return per_cpu_ptr(tbl->stats, cpu);
3458	}
3459	(*pos)++;
3460	return NULL;
3461}
3462
3463static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
3464{
3465
3466}
3467
3468static int neigh_stat_seq_show(struct seq_file *seq, void *v)
3469{
3470	struct neigh_table *tbl = pde_data(file_inode(seq->file));
3471	struct neigh_statistics *st = v;
3472
3473	if (v == SEQ_START_TOKEN) {
3474		seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
3475		return 0;
3476	}
3477
3478	seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
3479			"%08lx         %08lx         %08lx         "
3480			"%08lx       %08lx            %08lx\n",
3481		   atomic_read(&tbl->entries),
3482
3483		   st->allocs,
3484		   st->destroys,
3485		   st->hash_grows,
3486
3487		   st->lookups,
3488		   st->hits,
3489
3490		   st->res_failed,
3491
3492		   st->rcv_probes_mcast,
3493		   st->rcv_probes_ucast,
3494
3495		   st->periodic_gc_runs,
3496		   st->forced_gc_runs,
3497		   st->unres_discards,
3498		   st->table_fulls
3499		   );
3500
3501	return 0;
3502}
3503
3504static const struct seq_operations neigh_stat_seq_ops = {
3505	.start	= neigh_stat_seq_start,
3506	.next	= neigh_stat_seq_next,
3507	.stop	= neigh_stat_seq_stop,
3508	.show	= neigh_stat_seq_show,
3509};
3510#endif /* CONFIG_PROC_FS */
3511
3512static void __neigh_notify(struct neighbour *n, int type, int flags,
3513			   u32 pid)
3514{
3515	struct net *net = dev_net(n->dev);
3516	struct sk_buff *skb;
3517	int err = -ENOBUFS;
3518
3519	skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
3520	if (skb == NULL)
3521		goto errout;
3522
3523	err = neigh_fill_info(skb, n, pid, 0, type, flags);
3524	if (err < 0) {
3525		/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
3526		WARN_ON(err == -EMSGSIZE);
3527		kfree_skb(skb);
3528		goto errout;
3529	}
3530	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
3531	return;
3532errout:
3533	if (err < 0)
3534		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
3535}
3536
3537void neigh_app_ns(struct neighbour *n)
3538{
3539	__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
3540}
3541EXPORT_SYMBOL(neigh_app_ns);
3542
3543#ifdef CONFIG_SYSCTL
3544static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
3545
3546static int proc_unres_qlen(struct ctl_table *ctl, int write,
3547			   void *buffer, size_t *lenp, loff_t *ppos)
3548{
3549	int size, ret;
3550	struct ctl_table tmp = *ctl;
3551
3552	tmp.extra1 = SYSCTL_ZERO;
3553	tmp.extra2 = &unres_qlen_max;
3554	tmp.data = &size;
3555
3556	size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
3557	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3558
3559	if (write && !ret)
3560		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
3561	return ret;
3562}
3563
3564static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
3565				  int index)
3566{
3567	struct net_device *dev;
3568	int family = neigh_parms_family(p);
3569
3570	rcu_read_lock();
3571	for_each_netdev_rcu(net, dev) {
3572		struct neigh_parms *dst_p =
3573				neigh_get_dev_parms_rcu(dev, family);
3574
3575		if (dst_p && !test_bit(index, dst_p->data_state))
3576			dst_p->data[index] = p->data[index];
3577	}
3578	rcu_read_unlock();
3579}
3580
3581static void neigh_proc_update(struct ctl_table *ctl, int write)
3582{
3583	struct net_device *dev = ctl->extra1;
3584	struct neigh_parms *p = ctl->extra2;
3585	struct net *net = neigh_parms_net(p);
3586	int index = (int *) ctl->data - p->data;
3587
3588	if (!write)
3589		return;
3590
3591	set_bit(index, p->data_state);
3592	if (index == NEIGH_VAR_DELAY_PROBE_TIME)
3593		call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
3594	if (!dev) /* NULL dev means this is default value */
3595		neigh_copy_dflt_parms(net, p, index);
3596}
3597
3598static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
3599					   void *buffer, size_t *lenp,
3600					   loff_t *ppos)
3601{
3602	struct ctl_table tmp = *ctl;
3603	int ret;
3604
3605	tmp.extra1 = SYSCTL_ZERO;
3606	tmp.extra2 = SYSCTL_INT_MAX;
3607
3608	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3609	neigh_proc_update(ctl, write);
3610	return ret;
3611}
3612
3613static int neigh_proc_dointvec_ms_jiffies_positive(struct ctl_table *ctl, int write,
3614						   void *buffer, size_t *lenp, loff_t *ppos)
3615{
3616	struct ctl_table tmp = *ctl;
3617	int ret;
3618
3619	int min = msecs_to_jiffies(1);
3620
3621	tmp.extra1 = &min;
3622	tmp.extra2 = NULL;
3623
3624	ret = proc_dointvec_ms_jiffies_minmax(&tmp, write, buffer, lenp, ppos);
3625	neigh_proc_update(ctl, write);
3626	return ret;
3627}
3628
3629int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
3630			size_t *lenp, loff_t *ppos)
3631{
3632	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
3633
3634	neigh_proc_update(ctl, write);
3635	return ret;
3636}
3637EXPORT_SYMBOL(neigh_proc_dointvec);
3638
3639int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
3640				size_t *lenp, loff_t *ppos)
3641{
3642	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3643
3644	neigh_proc_update(ctl, write);
3645	return ret;
3646}
3647EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
3648
3649static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
3650					      void *buffer, size_t *lenp,
3651					      loff_t *ppos)
3652{
3653	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
3654
3655	neigh_proc_update(ctl, write);
3656	return ret;
3657}
3658
3659int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
3660				   void *buffer, size_t *lenp, loff_t *ppos)
3661{
3662	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3663
3664	neigh_proc_update(ctl, write);
3665	return ret;
3666}
3667EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
3668
3669static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
3670					  void *buffer, size_t *lenp,
3671					  loff_t *ppos)
3672{
3673	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
3674
3675	neigh_proc_update(ctl, write);
3676	return ret;
3677}
3678
3679static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
3680					  void *buffer, size_t *lenp,
3681					  loff_t *ppos)
3682{
3683	struct neigh_parms *p = ctl->extra2;
3684	int ret;
3685
3686	if (strcmp(ctl->procname, "base_reachable_time") == 0)
3687		ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3688	else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0)
3689		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
3690	else
3691		ret = -1;
3692
3693	if (write && ret == 0) {
3694		/* update reachable_time as well, otherwise, the change will
3695		 * only be effective after the next time neigh_periodic_work
3696		 * decides to recompute it
3697		 */
3698		p->reachable_time =
3699			neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
3700	}
3701	return ret;
3702}
3703
3704#define NEIGH_PARMS_DATA_OFFSET(index)	\
3705	(&((struct neigh_parms *) 0)->data[index])
3706
3707#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
3708	[NEIGH_VAR_ ## attr] = { \
3709		.procname	= name, \
3710		.data		= NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
3711		.maxlen		= sizeof(int), \
3712		.mode		= mval, \
3713		.proc_handler	= proc, \
3714	}
3715
3716#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
3717	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
3718
3719#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
3720	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
3721
3722#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
3723	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
3724
3725#define NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(attr, name) \
3726	NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies_positive)
3727
3728#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
3729	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
3730
3731#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
3732	NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
3733
3734static struct neigh_sysctl_table {
3735	struct ctl_table_header *sysctl_header;
3736	struct ctl_table neigh_vars[NEIGH_VAR_MAX];
3737} neigh_sysctl_template __read_mostly = {
3738	.neigh_vars = {
3739		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
3740		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
3741		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
3742		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
3743		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
3744		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
3745		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
3746		NEIGH_SYSCTL_MS_JIFFIES_POSITIVE_ENTRY(INTERVAL_PROBE_TIME_MS,
3747						       "interval_probe_time_ms"),
3748		NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
3749		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
3750		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
3751		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
3752		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
3753		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
3754		NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
3755		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
3756		NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
3757		[NEIGH_VAR_GC_INTERVAL] = {
3758			.procname	= "gc_interval",
3759			.maxlen		= sizeof(int),
3760			.mode		= 0644,
3761			.proc_handler	= proc_dointvec_jiffies,
3762		},
3763		[NEIGH_VAR_GC_THRESH1] = {
3764			.procname	= "gc_thresh1",
3765			.maxlen		= sizeof(int),
3766			.mode		= 0644,
3767			.extra1		= SYSCTL_ZERO,
3768			.extra2		= SYSCTL_INT_MAX,
3769			.proc_handler	= proc_dointvec_minmax,
3770		},
3771		[NEIGH_VAR_GC_THRESH2] = {
3772			.procname	= "gc_thresh2",
3773			.maxlen		= sizeof(int),
3774			.mode		= 0644,
3775			.extra1		= SYSCTL_ZERO,
3776			.extra2		= SYSCTL_INT_MAX,
3777			.proc_handler	= proc_dointvec_minmax,
3778		},
3779		[NEIGH_VAR_GC_THRESH3] = {
3780			.procname	= "gc_thresh3",
3781			.maxlen		= sizeof(int),
3782			.mode		= 0644,
3783			.extra1		= SYSCTL_ZERO,
3784			.extra2		= SYSCTL_INT_MAX,
3785			.proc_handler	= proc_dointvec_minmax,
3786		},
3787	},
3788};
3789
3790int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3791			  proc_handler *handler)
3792{
3793	int i;
3794	struct neigh_sysctl_table *t;
3795	const char *dev_name_source;
3796	char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3797	char *p_name;
3798	size_t neigh_vars_size;
3799
3800	t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
3801	if (!t)
3802		goto err;
3803
3804	for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
3805		t->neigh_vars[i].data += (long) p;
3806		t->neigh_vars[i].extra1 = dev;
3807		t->neigh_vars[i].extra2 = p;
3808	}
3809
3810	neigh_vars_size = ARRAY_SIZE(t->neigh_vars);
3811	if (dev) {
3812		dev_name_source = dev->name;
3813		/* Terminate the table early */
3814		neigh_vars_size = NEIGH_VAR_BASE_REACHABLE_TIME_MS + 1;
3815	} else {
3816		struct neigh_table *tbl = p->tbl;
3817		dev_name_source = "default";
3818		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval;
3819		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1;
3820		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2;
3821		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3;
3822	}
3823
3824	if (handler) {
3825		/* RetransTime */
3826		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3827		/* ReachableTime */
3828		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3829		/* RetransTime (in milliseconds)*/
3830		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3831		/* ReachableTime (in milliseconds) */
3832		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3833	} else {
3834		/* Those handlers will update p->reachable_time after
3835		 * base_reachable_time(_ms) is set to ensure the new timer starts being
3836		 * applied after the next neighbour update instead of waiting for
3837		 * neigh_periodic_work to update its value (can be multiple minutes)
3838		 * So any handler that replaces them should do this as well
3839		 */
3840		/* ReachableTime */
3841		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler =
3842			neigh_proc_base_reachable_time;
3843		/* ReachableTime (in milliseconds) */
3844		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler =
3845			neigh_proc_base_reachable_time;
3846	}
3847
3848	switch (neigh_parms_family(p)) {
3849	case AF_INET:
3850	      p_name = "ipv4";
3851	      break;
3852	case AF_INET6:
3853	      p_name = "ipv6";
3854	      break;
3855	default:
3856	      BUG();
3857	}
3858
3859	snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3860		p_name, dev_name_source);
3861	t->sysctl_header = register_net_sysctl_sz(neigh_parms_net(p),
3862						  neigh_path, t->neigh_vars,
3863						  neigh_vars_size);
3864	if (!t->sysctl_header)
3865		goto free;
3866
3867	p->sysctl_table = t;
3868	return 0;
3869
3870free:
3871	kfree(t);
3872err:
3873	return -ENOBUFS;
3874}
3875EXPORT_SYMBOL(neigh_sysctl_register);
3876
3877void neigh_sysctl_unregister(struct neigh_parms *p)
3878{
3879	if (p->sysctl_table) {
3880		struct neigh_sysctl_table *t = p->sysctl_table;
3881		p->sysctl_table = NULL;
3882		unregister_net_sysctl_table(t->sysctl_header);
3883		kfree(t);
3884	}
3885}
3886EXPORT_SYMBOL(neigh_sysctl_unregister);
3887
3888#endif	/* CONFIG_SYSCTL */
3889
3890static int __init neigh_init(void)
3891{
3892	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
3893	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
3894	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info,
3895		      RTNL_FLAG_DUMP_UNLOCKED);
3896
3897	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3898		      0);
3899	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
3900
3901	return 0;
3902}
3903
3904subsys_initcall(neigh_init);
3905