1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	NET3	IP device support routines.
4 *
5 *	Derived from the IP parts of dev.c 1.0.19
6 * 		Authors:	Ross Biro
7 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
8 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
9 *
10 *	Additional Authors:
11 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
12 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 *	Changes:
15 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
16 *					lists.
17 *		Cyrus Durgin:		updated for kmod
18 *		Matthias Andree:	in devinet_ioctl, compare label and
19 *					address (4.4BSD alias style support),
20 *					fall back to comparing just the label
21 *					if no match found.
22 */
23
24
25#include <linux/uaccess.h>
26#include <linux/bitops.h>
27#include <linux/capability.h>
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/kernel.h>
31#include <linux/sched/signal.h>
32#include <linux/string.h>
33#include <linux/mm.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/in.h>
37#include <linux/errno.h>
38#include <linux/interrupt.h>
39#include <linux/if_addr.h>
40#include <linux/if_ether.h>
41#include <linux/inet.h>
42#include <linux/netdevice.h>
43#include <linux/etherdevice.h>
44#include <linux/skbuff.h>
45#include <linux/init.h>
46#include <linux/notifier.h>
47#include <linux/inetdevice.h>
48#include <linux/igmp.h>
49#include <linux/slab.h>
50#include <linux/hash.h>
51#ifdef CONFIG_SYSCTL
52#include <linux/sysctl.h>
53#endif
54#include <linux/kmod.h>
55#include <linux/netconf.h>
56
57#include <net/arp.h>
58#include <net/ip.h>
59#include <net/route.h>
60#include <net/ip_fib.h>
61#include <net/rtnetlink.h>
62#include <net/net_namespace.h>
63#include <net/addrconf.h>
64
65#define IPV6ONLY_FLAGS	\
66		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
67		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
68		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
69
70static struct ipv4_devconf ipv4_devconf = {
71	.data = {
72		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
77		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
78		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
79	},
80};
81
82static struct ipv4_devconf ipv4_devconf_dflt = {
83	.data = {
84		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
85		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
86		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
87		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
88		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
89		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
90		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
91		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
92	},
93};
94
95#define IPV4_DEVCONF_DFLT(net, attr) \
96	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
97
98static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
99	[IFA_LOCAL]     	= { .type = NLA_U32 },
100	[IFA_ADDRESS]   	= { .type = NLA_U32 },
101	[IFA_BROADCAST] 	= { .type = NLA_U32 },
102	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
103	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
104	[IFA_FLAGS]		= { .type = NLA_U32 },
105	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
106	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
107	[IFA_PROTO]		= { .type = NLA_U8 },
108};
109
110struct inet_fill_args {
111	u32 portid;
112	u32 seq;
113	int event;
114	unsigned int flags;
115	int netnsid;
116	int ifindex;
117};
118
119#define IN4_ADDR_HSIZE_SHIFT	8
120#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
121
122static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123
124static u32 inet_addr_hash(const struct net *net, __be32 addr)
125{
126	u32 val = (__force u32) addr ^ net_hash_mix(net);
127
128	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
129}
130
131static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
132{
133	u32 hash = inet_addr_hash(net, ifa->ifa_local);
134
135	ASSERT_RTNL();
136	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
137}
138
139static void inet_hash_remove(struct in_ifaddr *ifa)
140{
141	ASSERT_RTNL();
142	hlist_del_init_rcu(&ifa->hash);
143}
144
145/**
146 * __ip_dev_find - find the first device with a given source address.
147 * @net: the net namespace
148 * @addr: the source address
149 * @devref: if true, take a reference on the found device
150 *
151 * If a caller uses devref=false, it should be protected by RCU, or RTNL
152 */
153struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154{
155	struct net_device *result = NULL;
156	struct in_ifaddr *ifa;
157
158	rcu_read_lock();
159	ifa = inet_lookup_ifaddr_rcu(net, addr);
160	if (!ifa) {
161		struct flowi4 fl4 = { .daddr = addr };
162		struct fib_result res = { 0 };
163		struct fib_table *local;
164
165		/* Fallback to FIB local table so that communication
166		 * over loopback subnets work.
167		 */
168		local = fib_get_table(net, RT_TABLE_LOCAL);
169		if (local &&
170		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
171		    res.type == RTN_LOCAL)
172			result = FIB_RES_DEV(res);
173	} else {
174		result = ifa->ifa_dev->dev;
175	}
176	if (result && devref)
177		dev_hold(result);
178	rcu_read_unlock();
179	return result;
180}
181EXPORT_SYMBOL(__ip_dev_find);
182
183/* called under RCU lock */
184struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
185{
186	u32 hash = inet_addr_hash(net, addr);
187	struct in_ifaddr *ifa;
188
189	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190		if (ifa->ifa_local == addr &&
191		    net_eq(dev_net(ifa->ifa_dev->dev), net))
192			return ifa;
193
194	return NULL;
195}
196
197static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
198
199static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
200static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
201static void inet_del_ifa(struct in_device *in_dev,
202			 struct in_ifaddr __rcu **ifap,
203			 int destroy);
204#ifdef CONFIG_SYSCTL
205static int devinet_sysctl_register(struct in_device *idev);
206static void devinet_sysctl_unregister(struct in_device *idev);
207#else
208static int devinet_sysctl_register(struct in_device *idev)
209{
210	return 0;
211}
212static void devinet_sysctl_unregister(struct in_device *idev)
213{
214}
215#endif
216
217/* Locks all the inet devices. */
218
219static struct in_ifaddr *inet_alloc_ifa(void)
220{
221	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
222}
223
224static void inet_rcu_free_ifa(struct rcu_head *head)
225{
226	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
227
228	if (ifa->ifa_dev)
229		in_dev_put(ifa->ifa_dev);
230	kfree(ifa);
231}
232
233static void inet_free_ifa(struct in_ifaddr *ifa)
234{
235	/* Our reference to ifa->ifa_dev must be freed ASAP
236	 * to release the reference to the netdev the same way.
237	 * in_dev_put() -> in_dev_finish_destroy() -> netdev_put()
238	 */
239	call_rcu_hurry(&ifa->rcu_head, inet_rcu_free_ifa);
240}
241
242static void in_dev_free_rcu(struct rcu_head *head)
243{
244	struct in_device *idev = container_of(head, struct in_device, rcu_head);
245
246	kfree(rcu_dereference_protected(idev->mc_hash, 1));
247	kfree(idev);
248}
249
250void in_dev_finish_destroy(struct in_device *idev)
251{
252	struct net_device *dev = idev->dev;
253
254	WARN_ON(idev->ifa_list);
255	WARN_ON(idev->mc_list);
256#ifdef NET_REFCNT_DEBUG
257	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
258#endif
259	netdev_put(dev, &idev->dev_tracker);
260	if (!idev->dead)
261		pr_err("Freeing alive in_device %p\n", idev);
262	else
263		call_rcu(&idev->rcu_head, in_dev_free_rcu);
264}
265EXPORT_SYMBOL(in_dev_finish_destroy);
266
267static struct in_device *inetdev_init(struct net_device *dev)
268{
269	struct in_device *in_dev;
270	int err = -ENOMEM;
271
272	ASSERT_RTNL();
273
274	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
275	if (!in_dev)
276		goto out;
277	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
278			sizeof(in_dev->cnf));
279	in_dev->cnf.sysctl = NULL;
280	in_dev->dev = dev;
281	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
282	if (!in_dev->arp_parms)
283		goto out_kfree;
284	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
285		dev_disable_lro(dev);
286	/* Reference in_dev->dev */
287	netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
288	/* Account for reference dev->ip_ptr (below) */
289	refcount_set(&in_dev->refcnt, 1);
290
291	err = devinet_sysctl_register(in_dev);
292	if (err) {
293		in_dev->dead = 1;
294		neigh_parms_release(&arp_tbl, in_dev->arp_parms);
295		in_dev_put(in_dev);
296		in_dev = NULL;
297		goto out;
298	}
299	ip_mc_init_dev(in_dev);
300	if (dev->flags & IFF_UP)
301		ip_mc_up(in_dev);
302
303	/* we can receive as soon as ip_ptr is set -- do this last */
304	rcu_assign_pointer(dev->ip_ptr, in_dev);
305out:
306	return in_dev ?: ERR_PTR(err);
307out_kfree:
308	kfree(in_dev);
309	in_dev = NULL;
310	goto out;
311}
312
313static void inetdev_destroy(struct in_device *in_dev)
314{
315	struct net_device *dev;
316	struct in_ifaddr *ifa;
317
318	ASSERT_RTNL();
319
320	dev = in_dev->dev;
321
322	in_dev->dead = 1;
323
324	ip_mc_destroy_dev(in_dev);
325
326	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
327		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
328		inet_free_ifa(ifa);
329	}
330
331	RCU_INIT_POINTER(dev->ip_ptr, NULL);
332
333	devinet_sysctl_unregister(in_dev);
334	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
335	arp_ifdown(dev);
336
337	in_dev_put(in_dev);
338}
339
340int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
341{
342	const struct in_ifaddr *ifa;
343
344	rcu_read_lock();
345	in_dev_for_each_ifa_rcu(ifa, in_dev) {
346		if (inet_ifa_match(a, ifa)) {
347			if (!b || inet_ifa_match(b, ifa)) {
348				rcu_read_unlock();
349				return 1;
350			}
351		}
352	}
353	rcu_read_unlock();
354	return 0;
355}
356
357static void __inet_del_ifa(struct in_device *in_dev,
358			   struct in_ifaddr __rcu **ifap,
359			   int destroy, struct nlmsghdr *nlh, u32 portid)
360{
361	struct in_ifaddr *promote = NULL;
362	struct in_ifaddr *ifa, *ifa1;
363	struct in_ifaddr __rcu **last_prim;
364	struct in_ifaddr *prev_prom = NULL;
365	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
366
367	ASSERT_RTNL();
368
369	ifa1 = rtnl_dereference(*ifap);
370	last_prim = ifap;
371	if (in_dev->dead)
372		goto no_promotions;
373
374	/* 1. Deleting primary ifaddr forces deletion all secondaries
375	 * unless alias promotion is set
376	 **/
377
378	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
379		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
380
381		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
382			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
383			    ifa1->ifa_scope <= ifa->ifa_scope)
384				last_prim = &ifa->ifa_next;
385
386			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
387			    ifa1->ifa_mask != ifa->ifa_mask ||
388			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
389				ifap1 = &ifa->ifa_next;
390				prev_prom = ifa;
391				continue;
392			}
393
394			if (!do_promote) {
395				inet_hash_remove(ifa);
396				*ifap1 = ifa->ifa_next;
397
398				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
399				blocking_notifier_call_chain(&inetaddr_chain,
400						NETDEV_DOWN, ifa);
401				inet_free_ifa(ifa);
402			} else {
403				promote = ifa;
404				break;
405			}
406		}
407	}
408
409	/* On promotion all secondaries from subnet are changing
410	 * the primary IP, we must remove all their routes silently
411	 * and later to add them back with new prefsrc. Do this
412	 * while all addresses are on the device list.
413	 */
414	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
415		if (ifa1->ifa_mask == ifa->ifa_mask &&
416		    inet_ifa_match(ifa1->ifa_address, ifa))
417			fib_del_ifaddr(ifa, ifa1);
418	}
419
420no_promotions:
421	/* 2. Unlink it */
422
423	*ifap = ifa1->ifa_next;
424	inet_hash_remove(ifa1);
425
426	/* 3. Announce address deletion */
427
428	/* Send message first, then call notifier.
429	   At first sight, FIB update triggered by notifier
430	   will refer to already deleted ifaddr, that could confuse
431	   netlink listeners. It is not true: look, gated sees
432	   that route deleted and if it still thinks that ifaddr
433	   is valid, it will try to restore deleted routes... Grr.
434	   So that, this order is correct.
435	 */
436	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
437	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
438
439	if (promote) {
440		struct in_ifaddr *next_sec;
441
442		next_sec = rtnl_dereference(promote->ifa_next);
443		if (prev_prom) {
444			struct in_ifaddr *last_sec;
445
446			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
447
448			last_sec = rtnl_dereference(*last_prim);
449			rcu_assign_pointer(promote->ifa_next, last_sec);
450			rcu_assign_pointer(*last_prim, promote);
451		}
452
453		promote->ifa_flags &= ~IFA_F_SECONDARY;
454		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
455		blocking_notifier_call_chain(&inetaddr_chain,
456				NETDEV_UP, promote);
457		for (ifa = next_sec; ifa;
458		     ifa = rtnl_dereference(ifa->ifa_next)) {
459			if (ifa1->ifa_mask != ifa->ifa_mask ||
460			    !inet_ifa_match(ifa1->ifa_address, ifa))
461					continue;
462			fib_add_ifaddr(ifa);
463		}
464
465	}
466	if (destroy)
467		inet_free_ifa(ifa1);
468}
469
470static void inet_del_ifa(struct in_device *in_dev,
471			 struct in_ifaddr __rcu **ifap,
472			 int destroy)
473{
474	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
475}
476
477static void check_lifetime(struct work_struct *work);
478
479static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
480
481static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
482			     u32 portid, struct netlink_ext_ack *extack)
483{
484	struct in_ifaddr __rcu **last_primary, **ifap;
485	struct in_device *in_dev = ifa->ifa_dev;
486	struct in_validator_info ivi;
487	struct in_ifaddr *ifa1;
488	int ret;
489
490	ASSERT_RTNL();
491
492	if (!ifa->ifa_local) {
493		inet_free_ifa(ifa);
494		return 0;
495	}
496
497	ifa->ifa_flags &= ~IFA_F_SECONDARY;
498	last_primary = &in_dev->ifa_list;
499
500	/* Don't set IPv6 only flags to IPv4 addresses */
501	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
502
503	ifap = &in_dev->ifa_list;
504	ifa1 = rtnl_dereference(*ifap);
505
506	while (ifa1) {
507		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
508		    ifa->ifa_scope <= ifa1->ifa_scope)
509			last_primary = &ifa1->ifa_next;
510		if (ifa1->ifa_mask == ifa->ifa_mask &&
511		    inet_ifa_match(ifa1->ifa_address, ifa)) {
512			if (ifa1->ifa_local == ifa->ifa_local) {
513				inet_free_ifa(ifa);
514				return -EEXIST;
515			}
516			if (ifa1->ifa_scope != ifa->ifa_scope) {
517				NL_SET_ERR_MSG(extack, "ipv4: Invalid scope value");
518				inet_free_ifa(ifa);
519				return -EINVAL;
520			}
521			ifa->ifa_flags |= IFA_F_SECONDARY;
522		}
523
524		ifap = &ifa1->ifa_next;
525		ifa1 = rtnl_dereference(*ifap);
526	}
527
528	/* Allow any devices that wish to register ifaddr validtors to weigh
529	 * in now, before changes are committed.  The rntl lock is serializing
530	 * access here, so the state should not change between a validator call
531	 * and a final notify on commit.  This isn't invoked on promotion under
532	 * the assumption that validators are checking the address itself, and
533	 * not the flags.
534	 */
535	ivi.ivi_addr = ifa->ifa_address;
536	ivi.ivi_dev = ifa->ifa_dev;
537	ivi.extack = extack;
538	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
539					   NETDEV_UP, &ivi);
540	ret = notifier_to_errno(ret);
541	if (ret) {
542		inet_free_ifa(ifa);
543		return ret;
544	}
545
546	if (!(ifa->ifa_flags & IFA_F_SECONDARY))
547		ifap = last_primary;
548
549	rcu_assign_pointer(ifa->ifa_next, *ifap);
550	rcu_assign_pointer(*ifap, ifa);
551
552	inet_hash_insert(dev_net(in_dev->dev), ifa);
553
554	cancel_delayed_work(&check_lifetime_work);
555	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
556
557	/* Send message first, then call notifier.
558	   Notifier will trigger FIB update, so that
559	   listeners of netlink will know about new ifaddr */
560	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
561	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
562
563	return 0;
564}
565
566static int inet_insert_ifa(struct in_ifaddr *ifa)
567{
568	return __inet_insert_ifa(ifa, NULL, 0, NULL);
569}
570
571static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
572{
573	struct in_device *in_dev = __in_dev_get_rtnl(dev);
574
575	ASSERT_RTNL();
576
577	if (!in_dev) {
578		inet_free_ifa(ifa);
579		return -ENOBUFS;
580	}
581	ipv4_devconf_setall(in_dev);
582	neigh_parms_data_state_setall(in_dev->arp_parms);
583	if (ifa->ifa_dev != in_dev) {
584		WARN_ON(ifa->ifa_dev);
585		in_dev_hold(in_dev);
586		ifa->ifa_dev = in_dev;
587	}
588	if (ipv4_is_loopback(ifa->ifa_local))
589		ifa->ifa_scope = RT_SCOPE_HOST;
590	return inet_insert_ifa(ifa);
591}
592
593/* Caller must hold RCU or RTNL :
594 * We dont take a reference on found in_device
595 */
596struct in_device *inetdev_by_index(struct net *net, int ifindex)
597{
598	struct net_device *dev;
599	struct in_device *in_dev = NULL;
600
601	rcu_read_lock();
602	dev = dev_get_by_index_rcu(net, ifindex);
603	if (dev)
604		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
605	rcu_read_unlock();
606	return in_dev;
607}
608EXPORT_SYMBOL(inetdev_by_index);
609
610/* Called only from RTNL semaphored context. No locks. */
611
612struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
613				    __be32 mask)
614{
615	struct in_ifaddr *ifa;
616
617	ASSERT_RTNL();
618
619	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
620		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
621			return ifa;
622	}
623	return NULL;
624}
625
626static int ip_mc_autojoin_config(struct net *net, bool join,
627				 const struct in_ifaddr *ifa)
628{
629#if defined(CONFIG_IP_MULTICAST)
630	struct ip_mreqn mreq = {
631		.imr_multiaddr.s_addr = ifa->ifa_address,
632		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
633	};
634	struct sock *sk = net->ipv4.mc_autojoin_sk;
635	int ret;
636
637	ASSERT_RTNL();
638
639	lock_sock(sk);
640	if (join)
641		ret = ip_mc_join_group(sk, &mreq);
642	else
643		ret = ip_mc_leave_group(sk, &mreq);
644	release_sock(sk);
645
646	return ret;
647#else
648	return -EOPNOTSUPP;
649#endif
650}
651
652static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
653			    struct netlink_ext_ack *extack)
654{
655	struct net *net = sock_net(skb->sk);
656	struct in_ifaddr __rcu **ifap;
657	struct nlattr *tb[IFA_MAX+1];
658	struct in_device *in_dev;
659	struct ifaddrmsg *ifm;
660	struct in_ifaddr *ifa;
661	int err;
662
663	ASSERT_RTNL();
664
665	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
666				     ifa_ipv4_policy, extack);
667	if (err < 0)
668		goto errout;
669
670	ifm = nlmsg_data(nlh);
671	in_dev = inetdev_by_index(net, ifm->ifa_index);
672	if (!in_dev) {
673		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
674		err = -ENODEV;
675		goto errout;
676	}
677
678	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
679	     ifap = &ifa->ifa_next) {
680		if (tb[IFA_LOCAL] &&
681		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
682			continue;
683
684		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
685			continue;
686
687		if (tb[IFA_ADDRESS] &&
688		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
689		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
690			continue;
691
692		if (ipv4_is_multicast(ifa->ifa_address))
693			ip_mc_autojoin_config(net, false, ifa);
694		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
695		return 0;
696	}
697
698	NL_SET_ERR_MSG(extack, "ipv4: Address not found");
699	err = -EADDRNOTAVAIL;
700errout:
701	return err;
702}
703
704#define INFINITY_LIFE_TIME	0xFFFFFFFF
705
706static void check_lifetime(struct work_struct *work)
707{
708	unsigned long now, next, next_sec, next_sched;
709	struct in_ifaddr *ifa;
710	struct hlist_node *n;
711	int i;
712
713	now = jiffies;
714	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
715
716	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
717		bool change_needed = false;
718
719		rcu_read_lock();
720		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
721			unsigned long age, tstamp;
722			u32 preferred_lft;
723			u32 valid_lft;
724			u32 flags;
725
726			flags = READ_ONCE(ifa->ifa_flags);
727			if (flags & IFA_F_PERMANENT)
728				continue;
729
730			preferred_lft = READ_ONCE(ifa->ifa_preferred_lft);
731			valid_lft = READ_ONCE(ifa->ifa_valid_lft);
732			tstamp = READ_ONCE(ifa->ifa_tstamp);
733			/* We try to batch several events at once. */
734			age = (now - tstamp +
735			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
736
737			if (valid_lft != INFINITY_LIFE_TIME &&
738			    age >= valid_lft) {
739				change_needed = true;
740			} else if (preferred_lft ==
741				   INFINITY_LIFE_TIME) {
742				continue;
743			} else if (age >= preferred_lft) {
744				if (time_before(tstamp + valid_lft * HZ, next))
745					next = tstamp + valid_lft * HZ;
746
747				if (!(flags & IFA_F_DEPRECATED))
748					change_needed = true;
749			} else if (time_before(tstamp + preferred_lft * HZ,
750					       next)) {
751				next = tstamp + preferred_lft * HZ;
752			}
753		}
754		rcu_read_unlock();
755		if (!change_needed)
756			continue;
757		rtnl_lock();
758		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
759			unsigned long age;
760
761			if (ifa->ifa_flags & IFA_F_PERMANENT)
762				continue;
763
764			/* We try to batch several events at once. */
765			age = (now - ifa->ifa_tstamp +
766			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
767
768			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
769			    age >= ifa->ifa_valid_lft) {
770				struct in_ifaddr __rcu **ifap;
771				struct in_ifaddr *tmp;
772
773				ifap = &ifa->ifa_dev->ifa_list;
774				tmp = rtnl_dereference(*ifap);
775				while (tmp) {
776					if (tmp == ifa) {
777						inet_del_ifa(ifa->ifa_dev,
778							     ifap, 1);
779						break;
780					}
781					ifap = &tmp->ifa_next;
782					tmp = rtnl_dereference(*ifap);
783				}
784			} else if (ifa->ifa_preferred_lft !=
785				   INFINITY_LIFE_TIME &&
786				   age >= ifa->ifa_preferred_lft &&
787				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
788				ifa->ifa_flags |= IFA_F_DEPRECATED;
789				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
790			}
791		}
792		rtnl_unlock();
793	}
794
795	next_sec = round_jiffies_up(next);
796	next_sched = next;
797
798	/* If rounded timeout is accurate enough, accept it. */
799	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
800		next_sched = next_sec;
801
802	now = jiffies;
803	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
804	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
805		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
806
807	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
808			next_sched - now);
809}
810
811static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
812			     __u32 prefered_lft)
813{
814	unsigned long timeout;
815	u32 flags;
816
817	flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
818
819	timeout = addrconf_timeout_fixup(valid_lft, HZ);
820	if (addrconf_finite_timeout(timeout))
821		WRITE_ONCE(ifa->ifa_valid_lft, timeout);
822	else
823		flags |= IFA_F_PERMANENT;
824
825	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
826	if (addrconf_finite_timeout(timeout)) {
827		if (timeout == 0)
828			flags |= IFA_F_DEPRECATED;
829		WRITE_ONCE(ifa->ifa_preferred_lft, timeout);
830	}
831	WRITE_ONCE(ifa->ifa_flags, flags);
832	WRITE_ONCE(ifa->ifa_tstamp, jiffies);
833	if (!ifa->ifa_cstamp)
834		WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp);
835}
836
837static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
838				       __u32 *pvalid_lft, __u32 *pprefered_lft,
839				       struct netlink_ext_ack *extack)
840{
841	struct nlattr *tb[IFA_MAX+1];
842	struct in_ifaddr *ifa;
843	struct ifaddrmsg *ifm;
844	struct net_device *dev;
845	struct in_device *in_dev;
846	int err;
847
848	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
849				     ifa_ipv4_policy, extack);
850	if (err < 0)
851		goto errout;
852
853	ifm = nlmsg_data(nlh);
854	err = -EINVAL;
855
856	if (ifm->ifa_prefixlen > 32) {
857		NL_SET_ERR_MSG(extack, "ipv4: Invalid prefix length");
858		goto errout;
859	}
860
861	if (!tb[IFA_LOCAL]) {
862		NL_SET_ERR_MSG(extack, "ipv4: Local address is not supplied");
863		goto errout;
864	}
865
866	dev = __dev_get_by_index(net, ifm->ifa_index);
867	err = -ENODEV;
868	if (!dev) {
869		NL_SET_ERR_MSG(extack, "ipv4: Device not found");
870		goto errout;
871	}
872
873	in_dev = __in_dev_get_rtnl(dev);
874	err = -ENOBUFS;
875	if (!in_dev)
876		goto errout;
877
878	ifa = inet_alloc_ifa();
879	if (!ifa)
880		/*
881		 * A potential indev allocation can be left alive, it stays
882		 * assigned to its device and is destroy with it.
883		 */
884		goto errout;
885
886	ipv4_devconf_setall(in_dev);
887	neigh_parms_data_state_setall(in_dev->arp_parms);
888	in_dev_hold(in_dev);
889
890	if (!tb[IFA_ADDRESS])
891		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
892
893	INIT_HLIST_NODE(&ifa->hash);
894	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
895	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
896	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
897					 ifm->ifa_flags;
898	ifa->ifa_scope = ifm->ifa_scope;
899	ifa->ifa_dev = in_dev;
900
901	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
902	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
903
904	if (tb[IFA_BROADCAST])
905		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
906
907	if (tb[IFA_LABEL])
908		nla_strscpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
909	else
910		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
911
912	if (tb[IFA_RT_PRIORITY])
913		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
914
915	if (tb[IFA_PROTO])
916		ifa->ifa_proto = nla_get_u8(tb[IFA_PROTO]);
917
918	if (tb[IFA_CACHEINFO]) {
919		struct ifa_cacheinfo *ci;
920
921		ci = nla_data(tb[IFA_CACHEINFO]);
922		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
923			NL_SET_ERR_MSG(extack, "ipv4: address lifetime invalid");
924			err = -EINVAL;
925			goto errout_free;
926		}
927		*pvalid_lft = ci->ifa_valid;
928		*pprefered_lft = ci->ifa_prefered;
929	}
930
931	return ifa;
932
933errout_free:
934	inet_free_ifa(ifa);
935errout:
936	return ERR_PTR(err);
937}
938
939static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
940{
941	struct in_device *in_dev = ifa->ifa_dev;
942	struct in_ifaddr *ifa1;
943
944	if (!ifa->ifa_local)
945		return NULL;
946
947	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
948		if (ifa1->ifa_mask == ifa->ifa_mask &&
949		    inet_ifa_match(ifa1->ifa_address, ifa) &&
950		    ifa1->ifa_local == ifa->ifa_local)
951			return ifa1;
952	}
953	return NULL;
954}
955
956static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
957			    struct netlink_ext_ack *extack)
958{
959	struct net *net = sock_net(skb->sk);
960	struct in_ifaddr *ifa;
961	struct in_ifaddr *ifa_existing;
962	__u32 valid_lft = INFINITY_LIFE_TIME;
963	__u32 prefered_lft = INFINITY_LIFE_TIME;
964
965	ASSERT_RTNL();
966
967	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
968	if (IS_ERR(ifa))
969		return PTR_ERR(ifa);
970
971	ifa_existing = find_matching_ifa(ifa);
972	if (!ifa_existing) {
973		/* It would be best to check for !NLM_F_CREATE here but
974		 * userspace already relies on not having to provide this.
975		 */
976		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
977		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
978			int ret = ip_mc_autojoin_config(net, true, ifa);
979
980			if (ret < 0) {
981				NL_SET_ERR_MSG(extack, "ipv4: Multicast auto join failed");
982				inet_free_ifa(ifa);
983				return ret;
984			}
985		}
986		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
987					 extack);
988	} else {
989		u32 new_metric = ifa->ifa_rt_priority;
990		u8 new_proto = ifa->ifa_proto;
991
992		inet_free_ifa(ifa);
993
994		if (nlh->nlmsg_flags & NLM_F_EXCL ||
995		    !(nlh->nlmsg_flags & NLM_F_REPLACE)) {
996			NL_SET_ERR_MSG(extack, "ipv4: Address already assigned");
997			return -EEXIST;
998		}
999		ifa = ifa_existing;
1000
1001		if (ifa->ifa_rt_priority != new_metric) {
1002			fib_modify_prefix_metric(ifa, new_metric);
1003			ifa->ifa_rt_priority = new_metric;
1004		}
1005
1006		ifa->ifa_proto = new_proto;
1007
1008		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1009		cancel_delayed_work(&check_lifetime_work);
1010		queue_delayed_work(system_power_efficient_wq,
1011				&check_lifetime_work, 0);
1012		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
1013	}
1014	return 0;
1015}
1016
1017/*
1018 *	Determine a default network mask, based on the IP address.
1019 */
1020
1021static int inet_abc_len(__be32 addr)
1022{
1023	int rc = -1;	/* Something else, probably a multicast. */
1024
1025	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
1026		rc = 0;
1027	else {
1028		__u32 haddr = ntohl(addr);
1029		if (IN_CLASSA(haddr))
1030			rc = 8;
1031		else if (IN_CLASSB(haddr))
1032			rc = 16;
1033		else if (IN_CLASSC(haddr))
1034			rc = 24;
1035		else if (IN_CLASSE(haddr))
1036			rc = 32;
1037	}
1038
1039	return rc;
1040}
1041
1042
1043int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1044{
1045	struct sockaddr_in sin_orig;
1046	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1047	struct in_ifaddr __rcu **ifap = NULL;
1048	struct in_device *in_dev;
1049	struct in_ifaddr *ifa = NULL;
1050	struct net_device *dev;
1051	char *colon;
1052	int ret = -EFAULT;
1053	int tryaddrmatch = 0;
1054
1055	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1056
1057	/* save original address for comparison */
1058	memcpy(&sin_orig, sin, sizeof(*sin));
1059
1060	colon = strchr(ifr->ifr_name, ':');
1061	if (colon)
1062		*colon = 0;
1063
1064	dev_load(net, ifr->ifr_name);
1065
1066	switch (cmd) {
1067	case SIOCGIFADDR:	/* Get interface address */
1068	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1069	case SIOCGIFDSTADDR:	/* Get the destination address */
1070	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1071		/* Note that these ioctls will not sleep,
1072		   so that we do not impose a lock.
1073		   One day we will be forced to put shlock here (I mean SMP)
1074		 */
1075		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1076		memset(sin, 0, sizeof(*sin));
1077		sin->sin_family = AF_INET;
1078		break;
1079
1080	case SIOCSIFFLAGS:
1081		ret = -EPERM;
1082		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1083			goto out;
1084		break;
1085	case SIOCSIFADDR:	/* Set interface address (and family) */
1086	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1087	case SIOCSIFDSTADDR:	/* Set the destination address */
1088	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1089		ret = -EPERM;
1090		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1091			goto out;
1092		ret = -EINVAL;
1093		if (sin->sin_family != AF_INET)
1094			goto out;
1095		break;
1096	default:
1097		ret = -EINVAL;
1098		goto out;
1099	}
1100
1101	rtnl_lock();
1102
1103	ret = -ENODEV;
1104	dev = __dev_get_by_name(net, ifr->ifr_name);
1105	if (!dev)
1106		goto done;
1107
1108	if (colon)
1109		*colon = ':';
1110
1111	in_dev = __in_dev_get_rtnl(dev);
1112	if (in_dev) {
1113		if (tryaddrmatch) {
1114			/* Matthias Andree */
1115			/* compare label and address (4.4BSD style) */
1116			/* note: we only do this for a limited set of ioctls
1117			   and only if the original address family was AF_INET.
1118			   This is checked above. */
1119
1120			for (ifap = &in_dev->ifa_list;
1121			     (ifa = rtnl_dereference(*ifap)) != NULL;
1122			     ifap = &ifa->ifa_next) {
1123				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1124				    sin_orig.sin_addr.s_addr ==
1125							ifa->ifa_local) {
1126					break; /* found */
1127				}
1128			}
1129		}
1130		/* we didn't get a match, maybe the application is
1131		   4.3BSD-style and passed in junk so we fall back to
1132		   comparing just the label */
1133		if (!ifa) {
1134			for (ifap = &in_dev->ifa_list;
1135			     (ifa = rtnl_dereference(*ifap)) != NULL;
1136			     ifap = &ifa->ifa_next)
1137				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1138					break;
1139		}
1140	}
1141
1142	ret = -EADDRNOTAVAIL;
1143	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1144		goto done;
1145
1146	switch (cmd) {
1147	case SIOCGIFADDR:	/* Get interface address */
1148		ret = 0;
1149		sin->sin_addr.s_addr = ifa->ifa_local;
1150		break;
1151
1152	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1153		ret = 0;
1154		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1155		break;
1156
1157	case SIOCGIFDSTADDR:	/* Get the destination address */
1158		ret = 0;
1159		sin->sin_addr.s_addr = ifa->ifa_address;
1160		break;
1161
1162	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1163		ret = 0;
1164		sin->sin_addr.s_addr = ifa->ifa_mask;
1165		break;
1166
1167	case SIOCSIFFLAGS:
1168		if (colon) {
1169			ret = -EADDRNOTAVAIL;
1170			if (!ifa)
1171				break;
1172			ret = 0;
1173			if (!(ifr->ifr_flags & IFF_UP))
1174				inet_del_ifa(in_dev, ifap, 1);
1175			break;
1176		}
1177		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1178		break;
1179
1180	case SIOCSIFADDR:	/* Set interface address (and family) */
1181		ret = -EINVAL;
1182		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1183			break;
1184
1185		if (!ifa) {
1186			ret = -ENOBUFS;
1187			ifa = inet_alloc_ifa();
1188			if (!ifa)
1189				break;
1190			INIT_HLIST_NODE(&ifa->hash);
1191			if (colon)
1192				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1193			else
1194				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1195		} else {
1196			ret = 0;
1197			if (ifa->ifa_local == sin->sin_addr.s_addr)
1198				break;
1199			inet_del_ifa(in_dev, ifap, 0);
1200			ifa->ifa_broadcast = 0;
1201			ifa->ifa_scope = 0;
1202		}
1203
1204		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1205
1206		if (!(dev->flags & IFF_POINTOPOINT)) {
1207			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1208			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1209			if ((dev->flags & IFF_BROADCAST) &&
1210			    ifa->ifa_prefixlen < 31)
1211				ifa->ifa_broadcast = ifa->ifa_address |
1212						     ~ifa->ifa_mask;
1213		} else {
1214			ifa->ifa_prefixlen = 32;
1215			ifa->ifa_mask = inet_make_mask(32);
1216		}
1217		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1218		ret = inet_set_ifa(dev, ifa);
1219		break;
1220
1221	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1222		ret = 0;
1223		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1224			inet_del_ifa(in_dev, ifap, 0);
1225			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1226			inet_insert_ifa(ifa);
1227		}
1228		break;
1229
1230	case SIOCSIFDSTADDR:	/* Set the destination address */
1231		ret = 0;
1232		if (ifa->ifa_address == sin->sin_addr.s_addr)
1233			break;
1234		ret = -EINVAL;
1235		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1236			break;
1237		ret = 0;
1238		inet_del_ifa(in_dev, ifap, 0);
1239		ifa->ifa_address = sin->sin_addr.s_addr;
1240		inet_insert_ifa(ifa);
1241		break;
1242
1243	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1244
1245		/*
1246		 *	The mask we set must be legal.
1247		 */
1248		ret = -EINVAL;
1249		if (bad_mask(sin->sin_addr.s_addr, 0))
1250			break;
1251		ret = 0;
1252		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1253			__be32 old_mask = ifa->ifa_mask;
1254			inet_del_ifa(in_dev, ifap, 0);
1255			ifa->ifa_mask = sin->sin_addr.s_addr;
1256			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1257
1258			/* See if current broadcast address matches
1259			 * with current netmask, then recalculate
1260			 * the broadcast address. Otherwise it's a
1261			 * funny address, so don't touch it since
1262			 * the user seems to know what (s)he's doing...
1263			 */
1264			if ((dev->flags & IFF_BROADCAST) &&
1265			    (ifa->ifa_prefixlen < 31) &&
1266			    (ifa->ifa_broadcast ==
1267			     (ifa->ifa_local|~old_mask))) {
1268				ifa->ifa_broadcast = (ifa->ifa_local |
1269						      ~sin->sin_addr.s_addr);
1270			}
1271			inet_insert_ifa(ifa);
1272		}
1273		break;
1274	}
1275done:
1276	rtnl_unlock();
1277out:
1278	return ret;
1279}
1280
1281int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1282{
1283	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1284	const struct in_ifaddr *ifa;
1285	struct ifreq ifr;
1286	int done = 0;
1287
1288	if (WARN_ON(size > sizeof(struct ifreq)))
1289		goto out;
1290
1291	if (!in_dev)
1292		goto out;
1293
1294	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1295		if (!buf) {
1296			done += size;
1297			continue;
1298		}
1299		if (len < size)
1300			break;
1301		memset(&ifr, 0, sizeof(struct ifreq));
1302		strcpy(ifr.ifr_name, ifa->ifa_label);
1303
1304		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1305		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1306								ifa->ifa_local;
1307
1308		if (copy_to_user(buf + done, &ifr, size)) {
1309			done = -EFAULT;
1310			break;
1311		}
1312		len  -= size;
1313		done += size;
1314	}
1315out:
1316	return done;
1317}
1318
1319static __be32 in_dev_select_addr(const struct in_device *in_dev,
1320				 int scope)
1321{
1322	const struct in_ifaddr *ifa;
1323
1324	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1325		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1326			continue;
1327		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1328		    ifa->ifa_scope <= scope)
1329			return ifa->ifa_local;
1330	}
1331
1332	return 0;
1333}
1334
1335__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1336{
1337	const struct in_ifaddr *ifa;
1338	__be32 addr = 0;
1339	unsigned char localnet_scope = RT_SCOPE_HOST;
1340	struct in_device *in_dev;
1341	struct net *net = dev_net(dev);
1342	int master_idx;
1343
1344	rcu_read_lock();
1345	in_dev = __in_dev_get_rcu(dev);
1346	if (!in_dev)
1347		goto no_in_dev;
1348
1349	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1350		localnet_scope = RT_SCOPE_LINK;
1351
1352	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1353		if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY)
1354			continue;
1355		if (min(ifa->ifa_scope, localnet_scope) > scope)
1356			continue;
1357		if (!dst || inet_ifa_match(dst, ifa)) {
1358			addr = ifa->ifa_local;
1359			break;
1360		}
1361		if (!addr)
1362			addr = ifa->ifa_local;
1363	}
1364
1365	if (addr)
1366		goto out_unlock;
1367no_in_dev:
1368	master_idx = l3mdev_master_ifindex_rcu(dev);
1369
1370	/* For VRFs, the VRF device takes the place of the loopback device,
1371	 * with addresses on it being preferred.  Note in such cases the
1372	 * loopback device will be among the devices that fail the master_idx
1373	 * equality check in the loop below.
1374	 */
1375	if (master_idx &&
1376	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1377	    (in_dev = __in_dev_get_rcu(dev))) {
1378		addr = in_dev_select_addr(in_dev, scope);
1379		if (addr)
1380			goto out_unlock;
1381	}
1382
1383	/* Not loopback addresses on loopback should be preferred
1384	   in this case. It is important that lo is the first interface
1385	   in dev_base list.
1386	 */
1387	for_each_netdev_rcu(net, dev) {
1388		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1389			continue;
1390
1391		in_dev = __in_dev_get_rcu(dev);
1392		if (!in_dev)
1393			continue;
1394
1395		addr = in_dev_select_addr(in_dev, scope);
1396		if (addr)
1397			goto out_unlock;
1398	}
1399out_unlock:
1400	rcu_read_unlock();
1401	return addr;
1402}
1403EXPORT_SYMBOL(inet_select_addr);
1404
1405static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1406			      __be32 local, int scope)
1407{
1408	unsigned char localnet_scope = RT_SCOPE_HOST;
1409	const struct in_ifaddr *ifa;
1410	__be32 addr = 0;
1411	int same = 0;
1412
1413	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1414		localnet_scope = RT_SCOPE_LINK;
1415
1416	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1417		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1418
1419		if (!addr &&
1420		    (local == ifa->ifa_local || !local) &&
1421		    min_scope <= scope) {
1422			addr = ifa->ifa_local;
1423			if (same)
1424				break;
1425		}
1426		if (!same) {
1427			same = (!local || inet_ifa_match(local, ifa)) &&
1428				(!dst || inet_ifa_match(dst, ifa));
1429			if (same && addr) {
1430				if (local || !dst)
1431					break;
1432				/* Is the selected addr into dst subnet? */
1433				if (inet_ifa_match(addr, ifa))
1434					break;
1435				/* No, then can we use new local src? */
1436				if (min_scope <= scope) {
1437					addr = ifa->ifa_local;
1438					break;
1439				}
1440				/* search for large dst subnet for addr */
1441				same = 0;
1442			}
1443		}
1444	}
1445
1446	return same ? addr : 0;
1447}
1448
1449/*
1450 * Confirm that local IP address exists using wildcards:
1451 * - net: netns to check, cannot be NULL
1452 * - in_dev: only on this interface, NULL=any interface
1453 * - dst: only in the same subnet as dst, 0=any dst
1454 * - local: address, 0=autoselect the local address
1455 * - scope: maximum allowed scope value for the local address
1456 */
1457__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1458			 __be32 dst, __be32 local, int scope)
1459{
1460	__be32 addr = 0;
1461	struct net_device *dev;
1462
1463	if (in_dev)
1464		return confirm_addr_indev(in_dev, dst, local, scope);
1465
1466	rcu_read_lock();
1467	for_each_netdev_rcu(net, dev) {
1468		in_dev = __in_dev_get_rcu(dev);
1469		if (in_dev) {
1470			addr = confirm_addr_indev(in_dev, dst, local, scope);
1471			if (addr)
1472				break;
1473		}
1474	}
1475	rcu_read_unlock();
1476
1477	return addr;
1478}
1479EXPORT_SYMBOL(inet_confirm_addr);
1480
1481/*
1482 *	Device notifier
1483 */
1484
1485int register_inetaddr_notifier(struct notifier_block *nb)
1486{
1487	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1488}
1489EXPORT_SYMBOL(register_inetaddr_notifier);
1490
1491int unregister_inetaddr_notifier(struct notifier_block *nb)
1492{
1493	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1494}
1495EXPORT_SYMBOL(unregister_inetaddr_notifier);
1496
1497int register_inetaddr_validator_notifier(struct notifier_block *nb)
1498{
1499	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1500}
1501EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1502
1503int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1504{
1505	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1506	    nb);
1507}
1508EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1509
1510/* Rename ifa_labels for a device name change. Make some effort to preserve
1511 * existing alias numbering and to create unique labels if possible.
1512*/
1513static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1514{
1515	struct in_ifaddr *ifa;
1516	int named = 0;
1517
1518	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1519		char old[IFNAMSIZ], *dot;
1520
1521		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1522		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1523		if (named++ == 0)
1524			goto skip;
1525		dot = strchr(old, ':');
1526		if (!dot) {
1527			sprintf(old, ":%d", named);
1528			dot = old;
1529		}
1530		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1531			strcat(ifa->ifa_label, dot);
1532		else
1533			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1534skip:
1535		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1536	}
1537}
1538
1539static void inetdev_send_gratuitous_arp(struct net_device *dev,
1540					struct in_device *in_dev)
1541
1542{
1543	const struct in_ifaddr *ifa;
1544
1545	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1546		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1547			 ifa->ifa_local, dev,
1548			 ifa->ifa_local, NULL,
1549			 dev->dev_addr, NULL);
1550	}
1551}
1552
1553/* Called only under RTNL semaphore */
1554
1555static int inetdev_event(struct notifier_block *this, unsigned long event,
1556			 void *ptr)
1557{
1558	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1559	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1560
1561	ASSERT_RTNL();
1562
1563	if (!in_dev) {
1564		if (event == NETDEV_REGISTER) {
1565			in_dev = inetdev_init(dev);
1566			if (IS_ERR(in_dev))
1567				return notifier_from_errno(PTR_ERR(in_dev));
1568			if (dev->flags & IFF_LOOPBACK) {
1569				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1570				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1571			}
1572		} else if (event == NETDEV_CHANGEMTU) {
1573			/* Re-enabling IP */
1574			if (inetdev_valid_mtu(dev->mtu))
1575				in_dev = inetdev_init(dev);
1576		}
1577		goto out;
1578	}
1579
1580	switch (event) {
1581	case NETDEV_REGISTER:
1582		pr_debug("%s: bug\n", __func__);
1583		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1584		break;
1585	case NETDEV_UP:
1586		if (!inetdev_valid_mtu(dev->mtu))
1587			break;
1588		if (dev->flags & IFF_LOOPBACK) {
1589			struct in_ifaddr *ifa = inet_alloc_ifa();
1590
1591			if (ifa) {
1592				INIT_HLIST_NODE(&ifa->hash);
1593				ifa->ifa_local =
1594				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1595				ifa->ifa_prefixlen = 8;
1596				ifa->ifa_mask = inet_make_mask(8);
1597				in_dev_hold(in_dev);
1598				ifa->ifa_dev = in_dev;
1599				ifa->ifa_scope = RT_SCOPE_HOST;
1600				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1601				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1602						 INFINITY_LIFE_TIME);
1603				ipv4_devconf_setall(in_dev);
1604				neigh_parms_data_state_setall(in_dev->arp_parms);
1605				inet_insert_ifa(ifa);
1606			}
1607		}
1608		ip_mc_up(in_dev);
1609		fallthrough;
1610	case NETDEV_CHANGEADDR:
1611		if (!IN_DEV_ARP_NOTIFY(in_dev))
1612			break;
1613		fallthrough;
1614	case NETDEV_NOTIFY_PEERS:
1615		/* Send gratuitous ARP to notify of link change */
1616		inetdev_send_gratuitous_arp(dev, in_dev);
1617		break;
1618	case NETDEV_DOWN:
1619		ip_mc_down(in_dev);
1620		break;
1621	case NETDEV_PRE_TYPE_CHANGE:
1622		ip_mc_unmap(in_dev);
1623		break;
1624	case NETDEV_POST_TYPE_CHANGE:
1625		ip_mc_remap(in_dev);
1626		break;
1627	case NETDEV_CHANGEMTU:
1628		if (inetdev_valid_mtu(dev->mtu))
1629			break;
1630		/* disable IP when MTU is not enough */
1631		fallthrough;
1632	case NETDEV_UNREGISTER:
1633		inetdev_destroy(in_dev);
1634		break;
1635	case NETDEV_CHANGENAME:
1636		/* Do not notify about label change, this event is
1637		 * not interesting to applications using netlink.
1638		 */
1639		inetdev_changename(dev, in_dev);
1640
1641		devinet_sysctl_unregister(in_dev);
1642		devinet_sysctl_register(in_dev);
1643		break;
1644	}
1645out:
1646	return NOTIFY_DONE;
1647}
1648
1649static struct notifier_block ip_netdev_notifier = {
1650	.notifier_call = inetdev_event,
1651};
1652
1653static size_t inet_nlmsg_size(void)
1654{
1655	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1656	       + nla_total_size(4) /* IFA_ADDRESS */
1657	       + nla_total_size(4) /* IFA_LOCAL */
1658	       + nla_total_size(4) /* IFA_BROADCAST */
1659	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1660	       + nla_total_size(4)  /* IFA_FLAGS */
1661	       + nla_total_size(1)  /* IFA_PROTO */
1662	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1663	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1664}
1665
1666static inline u32 cstamp_delta(unsigned long cstamp)
1667{
1668	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1669}
1670
1671static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1672			 unsigned long tstamp, u32 preferred, u32 valid)
1673{
1674	struct ifa_cacheinfo ci;
1675
1676	ci.cstamp = cstamp_delta(cstamp);
1677	ci.tstamp = cstamp_delta(tstamp);
1678	ci.ifa_prefered = preferred;
1679	ci.ifa_valid = valid;
1680
1681	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1682}
1683
1684static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa,
1685			    struct inet_fill_args *args)
1686{
1687	struct ifaddrmsg *ifm;
1688	struct nlmsghdr  *nlh;
1689	unsigned long tstamp;
1690	u32 preferred, valid;
1691	u32 flags;
1692
1693	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1694			args->flags);
1695	if (!nlh)
1696		return -EMSGSIZE;
1697
1698	ifm = nlmsg_data(nlh);
1699	ifm->ifa_family = AF_INET;
1700	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1701
1702	flags = READ_ONCE(ifa->ifa_flags);
1703	/* Warning : ifm->ifa_flags is an __u8, it holds only 8 bits.
1704	 * The 32bit value is given in IFA_FLAGS attribute.
1705	 */
1706	ifm->ifa_flags = (__u8)flags;
1707
1708	ifm->ifa_scope = ifa->ifa_scope;
1709	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1710
1711	if (args->netnsid >= 0 &&
1712	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1713		goto nla_put_failure;
1714
1715	tstamp = READ_ONCE(ifa->ifa_tstamp);
1716	if (!(flags & IFA_F_PERMANENT)) {
1717		preferred = READ_ONCE(ifa->ifa_preferred_lft);
1718		valid = READ_ONCE(ifa->ifa_valid_lft);
1719		if (preferred != INFINITY_LIFE_TIME) {
1720			long tval = (jiffies - tstamp) / HZ;
1721
1722			if (preferred > tval)
1723				preferred -= tval;
1724			else
1725				preferred = 0;
1726			if (valid != INFINITY_LIFE_TIME) {
1727				if (valid > tval)
1728					valid -= tval;
1729				else
1730					valid = 0;
1731			}
1732		}
1733	} else {
1734		preferred = INFINITY_LIFE_TIME;
1735		valid = INFINITY_LIFE_TIME;
1736	}
1737	if ((ifa->ifa_address &&
1738	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1739	    (ifa->ifa_local &&
1740	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1741	    (ifa->ifa_broadcast &&
1742	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1743	    (ifa->ifa_label[0] &&
1744	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1745	    (ifa->ifa_proto &&
1746	     nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) ||
1747	    nla_put_u32(skb, IFA_FLAGS, flags) ||
1748	    (ifa->ifa_rt_priority &&
1749	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1750	    put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp,
1751			  preferred, valid))
1752		goto nla_put_failure;
1753
1754	nlmsg_end(skb, nlh);
1755	return 0;
1756
1757nla_put_failure:
1758	nlmsg_cancel(skb, nlh);
1759	return -EMSGSIZE;
1760}
1761
1762static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1763				      struct inet_fill_args *fillargs,
1764				      struct net **tgt_net, struct sock *sk,
1765				      struct netlink_callback *cb)
1766{
1767	struct netlink_ext_ack *extack = cb->extack;
1768	struct nlattr *tb[IFA_MAX+1];
1769	struct ifaddrmsg *ifm;
1770	int err, i;
1771
1772	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1773		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1774		return -EINVAL;
1775	}
1776
1777	ifm = nlmsg_data(nlh);
1778	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1779		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1780		return -EINVAL;
1781	}
1782
1783	fillargs->ifindex = ifm->ifa_index;
1784	if (fillargs->ifindex) {
1785		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1786		fillargs->flags |= NLM_F_DUMP_FILTERED;
1787	}
1788
1789	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1790					    ifa_ipv4_policy, extack);
1791	if (err < 0)
1792		return err;
1793
1794	for (i = 0; i <= IFA_MAX; ++i) {
1795		if (!tb[i])
1796			continue;
1797
1798		if (i == IFA_TARGET_NETNSID) {
1799			struct net *net;
1800
1801			fillargs->netnsid = nla_get_s32(tb[i]);
1802
1803			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1804			if (IS_ERR(net)) {
1805				fillargs->netnsid = -1;
1806				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1807				return PTR_ERR(net);
1808			}
1809			*tgt_net = net;
1810		} else {
1811			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1812			return -EINVAL;
1813		}
1814	}
1815
1816	return 0;
1817}
1818
1819static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1820			    struct netlink_callback *cb, int *s_ip_idx,
1821			    struct inet_fill_args *fillargs)
1822{
1823	struct in_ifaddr *ifa;
1824	int ip_idx = 0;
1825	int err;
1826
1827	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1828		if (ip_idx < *s_ip_idx) {
1829			ip_idx++;
1830			continue;
1831		}
1832		err = inet_fill_ifaddr(skb, ifa, fillargs);
1833		if (err < 0)
1834			goto done;
1835
1836		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1837		ip_idx++;
1838	}
1839	err = 0;
1840	ip_idx = 0;
1841done:
1842	*s_ip_idx = ip_idx;
1843
1844	return err;
1845}
1846
1847/* Combine dev_addr_genid and dev_base_seq to detect changes.
1848 */
1849static u32 inet_base_seq(const struct net *net)
1850{
1851	u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
1852		  READ_ONCE(net->dev_base_seq);
1853
1854	/* Must not return 0 (see nl_dump_check_consistent()).
1855	 * Chose a value far away from 0.
1856	 */
1857	if (!res)
1858		res = 0x80000000;
1859	return res;
1860}
1861
1862static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1863{
1864	const struct nlmsghdr *nlh = cb->nlh;
1865	struct inet_fill_args fillargs = {
1866		.portid = NETLINK_CB(cb->skb).portid,
1867		.seq = nlh->nlmsg_seq,
1868		.event = RTM_NEWADDR,
1869		.flags = NLM_F_MULTI,
1870		.netnsid = -1,
1871	};
1872	struct net *net = sock_net(skb->sk);
1873	struct net *tgt_net = net;
1874	struct {
1875		unsigned long ifindex;
1876		int ip_idx;
1877	} *ctx = (void *)cb->ctx;
1878	struct in_device *in_dev;
1879	struct net_device *dev;
1880	int err = 0;
1881
1882	rcu_read_lock();
1883	if (cb->strict_check) {
1884		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1885						 skb->sk, cb);
1886		if (err < 0)
1887			goto done;
1888
1889		if (fillargs.ifindex) {
1890			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
1891			if (!dev) {
1892				err = -ENODEV;
1893				goto done;
1894			}
1895			in_dev = __in_dev_get_rcu(dev);
1896			if (!in_dev)
1897				goto done;
1898			err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1899					       &fillargs);
1900			goto done;
1901		}
1902	}
1903
1904	cb->seq = inet_base_seq(tgt_net);
1905
1906	for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
1907		in_dev = __in_dev_get_rcu(dev);
1908		if (!in_dev)
1909			continue;
1910		err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx,
1911				       &fillargs);
1912		if (err < 0)
1913			goto done;
1914	}
1915done:
1916	if (fillargs.netnsid >= 0)
1917		put_net(tgt_net);
1918	rcu_read_unlock();
1919	return err;
1920}
1921
1922static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1923		      u32 portid)
1924{
1925	struct inet_fill_args fillargs = {
1926		.portid = portid,
1927		.seq = nlh ? nlh->nlmsg_seq : 0,
1928		.event = event,
1929		.flags = 0,
1930		.netnsid = -1,
1931	};
1932	struct sk_buff *skb;
1933	int err = -ENOBUFS;
1934	struct net *net;
1935
1936	net = dev_net(ifa->ifa_dev->dev);
1937	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1938	if (!skb)
1939		goto errout;
1940
1941	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1942	if (err < 0) {
1943		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1944		WARN_ON(err == -EMSGSIZE);
1945		kfree_skb(skb);
1946		goto errout;
1947	}
1948	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1949	return;
1950errout:
1951	if (err < 0)
1952		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1953}
1954
1955static size_t inet_get_link_af_size(const struct net_device *dev,
1956				    u32 ext_filter_mask)
1957{
1958	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1959
1960	if (!in_dev)
1961		return 0;
1962
1963	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1964}
1965
1966static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1967			     u32 ext_filter_mask)
1968{
1969	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1970	struct nlattr *nla;
1971	int i;
1972
1973	if (!in_dev)
1974		return -ENODATA;
1975
1976	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1977	if (!nla)
1978		return -EMSGSIZE;
1979
1980	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1981		((u32 *) nla_data(nla))[i] = READ_ONCE(in_dev->cnf.data[i]);
1982
1983	return 0;
1984}
1985
1986static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1987	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1988};
1989
1990static int inet_validate_link_af(const struct net_device *dev,
1991				 const struct nlattr *nla,
1992				 struct netlink_ext_ack *extack)
1993{
1994	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1995	int err, rem;
1996
1997	if (dev && !__in_dev_get_rtnl(dev))
1998		return -EAFNOSUPPORT;
1999
2000	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
2001					  inet_af_policy, extack);
2002	if (err < 0)
2003		return err;
2004
2005	if (tb[IFLA_INET_CONF]) {
2006		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
2007			int cfgid = nla_type(a);
2008
2009			if (nla_len(a) < 4)
2010				return -EINVAL;
2011
2012			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
2013				return -EINVAL;
2014		}
2015	}
2016
2017	return 0;
2018}
2019
2020static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
2021			    struct netlink_ext_ack *extack)
2022{
2023	struct in_device *in_dev = __in_dev_get_rtnl(dev);
2024	struct nlattr *a, *tb[IFLA_INET_MAX+1];
2025	int rem;
2026
2027	if (!in_dev)
2028		return -EAFNOSUPPORT;
2029
2030	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
2031		return -EINVAL;
2032
2033	if (tb[IFLA_INET_CONF]) {
2034		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
2035			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
2036	}
2037
2038	return 0;
2039}
2040
2041static int inet_netconf_msgsize_devconf(int type)
2042{
2043	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2044		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2045	bool all = false;
2046
2047	if (type == NETCONFA_ALL)
2048		all = true;
2049
2050	if (all || type == NETCONFA_FORWARDING)
2051		size += nla_total_size(4);
2052	if (all || type == NETCONFA_RP_FILTER)
2053		size += nla_total_size(4);
2054	if (all || type == NETCONFA_MC_FORWARDING)
2055		size += nla_total_size(4);
2056	if (all || type == NETCONFA_BC_FORWARDING)
2057		size += nla_total_size(4);
2058	if (all || type == NETCONFA_PROXY_NEIGH)
2059		size += nla_total_size(4);
2060	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2061		size += nla_total_size(4);
2062
2063	return size;
2064}
2065
2066static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2067				     const struct ipv4_devconf *devconf,
2068				     u32 portid, u32 seq, int event,
2069				     unsigned int flags, int type)
2070{
2071	struct nlmsghdr  *nlh;
2072	struct netconfmsg *ncm;
2073	bool all = false;
2074
2075	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2076			flags);
2077	if (!nlh)
2078		return -EMSGSIZE;
2079
2080	if (type == NETCONFA_ALL)
2081		all = true;
2082
2083	ncm = nlmsg_data(nlh);
2084	ncm->ncm_family = AF_INET;
2085
2086	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2087		goto nla_put_failure;
2088
2089	if (!devconf)
2090		goto out;
2091
2092	if ((all || type == NETCONFA_FORWARDING) &&
2093	    nla_put_s32(skb, NETCONFA_FORWARDING,
2094			IPV4_DEVCONF_RO(*devconf, FORWARDING)) < 0)
2095		goto nla_put_failure;
2096	if ((all || type == NETCONFA_RP_FILTER) &&
2097	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2098			IPV4_DEVCONF_RO(*devconf, RP_FILTER)) < 0)
2099		goto nla_put_failure;
2100	if ((all || type == NETCONFA_MC_FORWARDING) &&
2101	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2102			IPV4_DEVCONF_RO(*devconf, MC_FORWARDING)) < 0)
2103		goto nla_put_failure;
2104	if ((all || type == NETCONFA_BC_FORWARDING) &&
2105	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2106			IPV4_DEVCONF_RO(*devconf, BC_FORWARDING)) < 0)
2107		goto nla_put_failure;
2108	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2109	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2110			IPV4_DEVCONF_RO(*devconf, PROXY_ARP)) < 0)
2111		goto nla_put_failure;
2112	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2113	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2114			IPV4_DEVCONF_RO(*devconf,
2115					IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2116		goto nla_put_failure;
2117
2118out:
2119	nlmsg_end(skb, nlh);
2120	return 0;
2121
2122nla_put_failure:
2123	nlmsg_cancel(skb, nlh);
2124	return -EMSGSIZE;
2125}
2126
2127void inet_netconf_notify_devconf(struct net *net, int event, int type,
2128				 int ifindex, struct ipv4_devconf *devconf)
2129{
2130	struct sk_buff *skb;
2131	int err = -ENOBUFS;
2132
2133	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2134	if (!skb)
2135		goto errout;
2136
2137	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2138					event, 0, type);
2139	if (err < 0) {
2140		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2141		WARN_ON(err == -EMSGSIZE);
2142		kfree_skb(skb);
2143		goto errout;
2144	}
2145	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2146	return;
2147errout:
2148	if (err < 0)
2149		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2150}
2151
2152static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2153	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2154	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2155	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2156	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2157	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2158};
2159
2160static int inet_netconf_valid_get_req(struct sk_buff *skb,
2161				      const struct nlmsghdr *nlh,
2162				      struct nlattr **tb,
2163				      struct netlink_ext_ack *extack)
2164{
2165	int i, err;
2166
2167	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2168		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2169		return -EINVAL;
2170	}
2171
2172	if (!netlink_strict_get_check(skb))
2173		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2174					      tb, NETCONFA_MAX,
2175					      devconf_ipv4_policy, extack);
2176
2177	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2178					    tb, NETCONFA_MAX,
2179					    devconf_ipv4_policy, extack);
2180	if (err)
2181		return err;
2182
2183	for (i = 0; i <= NETCONFA_MAX; i++) {
2184		if (!tb[i])
2185			continue;
2186
2187		switch (i) {
2188		case NETCONFA_IFINDEX:
2189			break;
2190		default:
2191			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2192			return -EINVAL;
2193		}
2194	}
2195
2196	return 0;
2197}
2198
2199static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2200				    struct nlmsghdr *nlh,
2201				    struct netlink_ext_ack *extack)
2202{
2203	struct net *net = sock_net(in_skb->sk);
2204	struct nlattr *tb[NETCONFA_MAX + 1];
2205	const struct ipv4_devconf *devconf;
2206	struct in_device *in_dev = NULL;
2207	struct net_device *dev = NULL;
2208	struct sk_buff *skb;
2209	int ifindex;
2210	int err;
2211
2212	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2213	if (err)
2214		return err;
2215
2216	if (!tb[NETCONFA_IFINDEX])
2217		return -EINVAL;
2218
2219	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2220	switch (ifindex) {
2221	case NETCONFA_IFINDEX_ALL:
2222		devconf = net->ipv4.devconf_all;
2223		break;
2224	case NETCONFA_IFINDEX_DEFAULT:
2225		devconf = net->ipv4.devconf_dflt;
2226		break;
2227	default:
2228		err = -ENODEV;
2229		dev = dev_get_by_index(net, ifindex);
2230		if (dev)
2231			in_dev = in_dev_get(dev);
2232		if (!in_dev)
2233			goto errout;
2234		devconf = &in_dev->cnf;
2235		break;
2236	}
2237
2238	err = -ENOBUFS;
2239	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2240	if (!skb)
2241		goto errout;
2242
2243	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2244					NETLINK_CB(in_skb).portid,
2245					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2246					NETCONFA_ALL);
2247	if (err < 0) {
2248		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2249		WARN_ON(err == -EMSGSIZE);
2250		kfree_skb(skb);
2251		goto errout;
2252	}
2253	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2254errout:
2255	if (in_dev)
2256		in_dev_put(in_dev);
2257	dev_put(dev);
2258	return err;
2259}
2260
2261static int inet_netconf_dump_devconf(struct sk_buff *skb,
2262				     struct netlink_callback *cb)
2263{
2264	const struct nlmsghdr *nlh = cb->nlh;
2265	struct net *net = sock_net(skb->sk);
2266	struct {
2267		unsigned long ifindex;
2268		unsigned int all_default;
2269	} *ctx = (void *)cb->ctx;
2270	const struct in_device *in_dev;
2271	struct net_device *dev;
2272	int err = 0;
2273
2274	if (cb->strict_check) {
2275		struct netlink_ext_ack *extack = cb->extack;
2276		struct netconfmsg *ncm;
2277
2278		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2279			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2280			return -EINVAL;
2281		}
2282
2283		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2284			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2285			return -EINVAL;
2286		}
2287	}
2288
2289	rcu_read_lock();
2290	for_each_netdev_dump(net, dev, ctx->ifindex) {
2291		in_dev = __in_dev_get_rcu(dev);
2292		if (!in_dev)
2293			continue;
2294		err = inet_netconf_fill_devconf(skb, dev->ifindex,
2295						&in_dev->cnf,
2296						NETLINK_CB(cb->skb).portid,
2297						nlh->nlmsg_seq,
2298						RTM_NEWNETCONF, NLM_F_MULTI,
2299						NETCONFA_ALL);
2300		if (err < 0)
2301			goto done;
2302	}
2303	if (ctx->all_default == 0) {
2304		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2305						net->ipv4.devconf_all,
2306						NETLINK_CB(cb->skb).portid,
2307						nlh->nlmsg_seq,
2308						RTM_NEWNETCONF, NLM_F_MULTI,
2309						NETCONFA_ALL);
2310		if (err < 0)
2311			goto done;
2312		ctx->all_default++;
2313	}
2314	if (ctx->all_default == 1) {
2315		err = inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2316						net->ipv4.devconf_dflt,
2317						NETLINK_CB(cb->skb).portid,
2318						nlh->nlmsg_seq,
2319						RTM_NEWNETCONF, NLM_F_MULTI,
2320						NETCONFA_ALL);
2321		if (err < 0)
2322			goto done;
2323		ctx->all_default++;
2324	}
2325done:
2326	rcu_read_unlock();
2327	return err;
2328}
2329
2330#ifdef CONFIG_SYSCTL
2331
2332static void devinet_copy_dflt_conf(struct net *net, int i)
2333{
2334	struct net_device *dev;
2335
2336	rcu_read_lock();
2337	for_each_netdev_rcu(net, dev) {
2338		struct in_device *in_dev;
2339
2340		in_dev = __in_dev_get_rcu(dev);
2341		if (in_dev && !test_bit(i, in_dev->cnf.state))
2342			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2343	}
2344	rcu_read_unlock();
2345}
2346
2347/* called with RTNL locked */
2348static void inet_forward_change(struct net *net)
2349{
2350	struct net_device *dev;
2351	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2352
2353	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2354	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2355	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2356				    NETCONFA_FORWARDING,
2357				    NETCONFA_IFINDEX_ALL,
2358				    net->ipv4.devconf_all);
2359	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2360				    NETCONFA_FORWARDING,
2361				    NETCONFA_IFINDEX_DEFAULT,
2362				    net->ipv4.devconf_dflt);
2363
2364	for_each_netdev(net, dev) {
2365		struct in_device *in_dev;
2366
2367		if (on)
2368			dev_disable_lro(dev);
2369
2370		in_dev = __in_dev_get_rtnl(dev);
2371		if (in_dev) {
2372			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2373			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2374						    NETCONFA_FORWARDING,
2375						    dev->ifindex, &in_dev->cnf);
2376		}
2377	}
2378}
2379
2380static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2381{
2382	if (cnf == net->ipv4.devconf_dflt)
2383		return NETCONFA_IFINDEX_DEFAULT;
2384	else if (cnf == net->ipv4.devconf_all)
2385		return NETCONFA_IFINDEX_ALL;
2386	else {
2387		struct in_device *idev
2388			= container_of(cnf, struct in_device, cnf);
2389		return idev->dev->ifindex;
2390	}
2391}
2392
2393static int devinet_conf_proc(struct ctl_table *ctl, int write,
2394			     void *buffer, size_t *lenp, loff_t *ppos)
2395{
2396	int old_value = *(int *)ctl->data;
2397	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2398	int new_value = *(int *)ctl->data;
2399
2400	if (write) {
2401		struct ipv4_devconf *cnf = ctl->extra1;
2402		struct net *net = ctl->extra2;
2403		int i = (int *)ctl->data - cnf->data;
2404		int ifindex;
2405
2406		set_bit(i, cnf->state);
2407
2408		if (cnf == net->ipv4.devconf_dflt)
2409			devinet_copy_dflt_conf(net, i);
2410		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2411		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2412			if ((new_value == 0) && (old_value != 0))
2413				rt_cache_flush(net);
2414
2415		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2416		    new_value != old_value)
2417			rt_cache_flush(net);
2418
2419		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2420		    new_value != old_value) {
2421			ifindex = devinet_conf_ifindex(net, cnf);
2422			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2423						    NETCONFA_RP_FILTER,
2424						    ifindex, cnf);
2425		}
2426		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2427		    new_value != old_value) {
2428			ifindex = devinet_conf_ifindex(net, cnf);
2429			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2430						    NETCONFA_PROXY_NEIGH,
2431						    ifindex, cnf);
2432		}
2433		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2434		    new_value != old_value) {
2435			ifindex = devinet_conf_ifindex(net, cnf);
2436			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2437						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2438						    ifindex, cnf);
2439		}
2440	}
2441
2442	return ret;
2443}
2444
2445static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2446				  void *buffer, size_t *lenp, loff_t *ppos)
2447{
2448	int *valp = ctl->data;
2449	int val = *valp;
2450	loff_t pos = *ppos;
2451	struct net *net = ctl->extra2;
2452	int ret;
2453
2454	if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
2455		return -EPERM;
2456
2457	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2458
2459	if (write && *valp != val) {
2460		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2461			if (!rtnl_trylock()) {
2462				/* Restore the original values before restarting */
2463				*valp = val;
2464				*ppos = pos;
2465				return restart_syscall();
2466			}
2467			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2468				inet_forward_change(net);
2469			} else {
2470				struct ipv4_devconf *cnf = ctl->extra1;
2471				struct in_device *idev =
2472					container_of(cnf, struct in_device, cnf);
2473				if (*valp)
2474					dev_disable_lro(idev->dev);
2475				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2476							    NETCONFA_FORWARDING,
2477							    idev->dev->ifindex,
2478							    cnf);
2479			}
2480			rtnl_unlock();
2481			rt_cache_flush(net);
2482		} else
2483			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2484						    NETCONFA_FORWARDING,
2485						    NETCONFA_IFINDEX_DEFAULT,
2486						    net->ipv4.devconf_dflt);
2487	}
2488
2489	return ret;
2490}
2491
2492static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2493				void *buffer, size_t *lenp, loff_t *ppos)
2494{
2495	int *valp = ctl->data;
2496	int val = *valp;
2497	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2498	struct net *net = ctl->extra2;
2499
2500	if (write && *valp != val)
2501		rt_cache_flush(net);
2502
2503	return ret;
2504}
2505
2506#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2507	{ \
2508		.procname	= name, \
2509		.data		= ipv4_devconf.data + \
2510				  IPV4_DEVCONF_ ## attr - 1, \
2511		.maxlen		= sizeof(int), \
2512		.mode		= mval, \
2513		.proc_handler	= proc, \
2514		.extra1		= &ipv4_devconf, \
2515	}
2516
2517#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2518	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2519
2520#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2521	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2522
2523#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2524	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2525
2526#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2527	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2528
2529static struct devinet_sysctl_table {
2530	struct ctl_table_header *sysctl_header;
2531	struct ctl_table devinet_vars[IPV4_DEVCONF_MAX];
2532} devinet_sysctl = {
2533	.devinet_vars = {
2534		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2535					     devinet_sysctl_forward),
2536		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2537		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2538
2539		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2540		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2541		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2542		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2543		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2544		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2545					"accept_source_route"),
2546		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2547		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2548		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2549		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2550		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2551		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2552		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2553		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2554		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2555		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2556		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2557		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2558		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
2559					"arp_evict_nocarrier"),
2560		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2561		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2562					"force_igmp_version"),
2563		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2564					"igmpv2_unsolicited_report_interval"),
2565		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2566					"igmpv3_unsolicited_report_interval"),
2567		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2568					"ignore_routes_with_linkdown"),
2569		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2570					"drop_gratuitous_arp"),
2571
2572		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2573		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2574		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2575					      "promote_secondaries"),
2576		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2577					      "route_localnet"),
2578		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2579					      "drop_unicast_in_l2_multicast"),
2580	},
2581};
2582
2583static int __devinet_sysctl_register(struct net *net, char *dev_name,
2584				     int ifindex, struct ipv4_devconf *p)
2585{
2586	int i;
2587	struct devinet_sysctl_table *t;
2588	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2589
2590	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
2591	if (!t)
2592		goto out;
2593
2594	for (i = 0; i < ARRAY_SIZE(t->devinet_vars); i++) {
2595		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2596		t->devinet_vars[i].extra1 = p;
2597		t->devinet_vars[i].extra2 = net;
2598	}
2599
2600	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2601
2602	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2603	if (!t->sysctl_header)
2604		goto free;
2605
2606	p->sysctl = t;
2607
2608	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2609				    ifindex, p);
2610	return 0;
2611
2612free:
2613	kfree(t);
2614out:
2615	return -ENOMEM;
2616}
2617
2618static void __devinet_sysctl_unregister(struct net *net,
2619					struct ipv4_devconf *cnf, int ifindex)
2620{
2621	struct devinet_sysctl_table *t = cnf->sysctl;
2622
2623	if (t) {
2624		cnf->sysctl = NULL;
2625		unregister_net_sysctl_table(t->sysctl_header);
2626		kfree(t);
2627	}
2628
2629	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2630}
2631
2632static int devinet_sysctl_register(struct in_device *idev)
2633{
2634	int err;
2635
2636	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2637		return -EINVAL;
2638
2639	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2640	if (err)
2641		return err;
2642	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2643					idev->dev->ifindex, &idev->cnf);
2644	if (err)
2645		neigh_sysctl_unregister(idev->arp_parms);
2646	return err;
2647}
2648
2649static void devinet_sysctl_unregister(struct in_device *idev)
2650{
2651	struct net *net = dev_net(idev->dev);
2652
2653	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2654	neigh_sysctl_unregister(idev->arp_parms);
2655}
2656
2657static struct ctl_table ctl_forward_entry[] = {
2658	{
2659		.procname	= "ip_forward",
2660		.data		= &ipv4_devconf.data[
2661					IPV4_DEVCONF_FORWARDING - 1],
2662		.maxlen		= sizeof(int),
2663		.mode		= 0644,
2664		.proc_handler	= devinet_sysctl_forward,
2665		.extra1		= &ipv4_devconf,
2666		.extra2		= &init_net,
2667	},
2668};
2669#endif
2670
2671static __net_init int devinet_init_net(struct net *net)
2672{
2673	int err;
2674	struct ipv4_devconf *all, *dflt;
2675#ifdef CONFIG_SYSCTL
2676	struct ctl_table *tbl;
2677	struct ctl_table_header *forw_hdr;
2678#endif
2679
2680	err = -ENOMEM;
2681	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2682	if (!all)
2683		goto err_alloc_all;
2684
2685	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2686	if (!dflt)
2687		goto err_alloc_dflt;
2688
2689#ifdef CONFIG_SYSCTL
2690	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2691	if (!tbl)
2692		goto err_alloc_ctl;
2693
2694	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2695	tbl[0].extra1 = all;
2696	tbl[0].extra2 = net;
2697#endif
2698
2699	if (!net_eq(net, &init_net)) {
2700		switch (net_inherit_devconf()) {
2701		case 3:
2702			/* copy from the current netns */
2703			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
2704			       sizeof(ipv4_devconf));
2705			memcpy(dflt,
2706			       current->nsproxy->net_ns->ipv4.devconf_dflt,
2707			       sizeof(ipv4_devconf_dflt));
2708			break;
2709		case 0:
2710		case 1:
2711			/* copy from init_net */
2712			memcpy(all, init_net.ipv4.devconf_all,
2713			       sizeof(ipv4_devconf));
2714			memcpy(dflt, init_net.ipv4.devconf_dflt,
2715			       sizeof(ipv4_devconf_dflt));
2716			break;
2717		case 2:
2718			/* use compiled values */
2719			break;
2720		}
2721	}
2722
2723#ifdef CONFIG_SYSCTL
2724	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2725	if (err < 0)
2726		goto err_reg_all;
2727
2728	err = __devinet_sysctl_register(net, "default",
2729					NETCONFA_IFINDEX_DEFAULT, dflt);
2730	if (err < 0)
2731		goto err_reg_dflt;
2732
2733	err = -ENOMEM;
2734	forw_hdr = register_net_sysctl_sz(net, "net/ipv4", tbl,
2735					  ARRAY_SIZE(ctl_forward_entry));
2736	if (!forw_hdr)
2737		goto err_reg_ctl;
2738	net->ipv4.forw_hdr = forw_hdr;
2739#endif
2740
2741	net->ipv4.devconf_all = all;
2742	net->ipv4.devconf_dflt = dflt;
2743	return 0;
2744
2745#ifdef CONFIG_SYSCTL
2746err_reg_ctl:
2747	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2748err_reg_dflt:
2749	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2750err_reg_all:
2751	kfree(tbl);
2752err_alloc_ctl:
2753#endif
2754	kfree(dflt);
2755err_alloc_dflt:
2756	kfree(all);
2757err_alloc_all:
2758	return err;
2759}
2760
2761static __net_exit void devinet_exit_net(struct net *net)
2762{
2763#ifdef CONFIG_SYSCTL
2764	const struct ctl_table *tbl;
2765
2766	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2767	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2768	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2769				    NETCONFA_IFINDEX_DEFAULT);
2770	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2771				    NETCONFA_IFINDEX_ALL);
2772	kfree(tbl);
2773#endif
2774	kfree(net->ipv4.devconf_dflt);
2775	kfree(net->ipv4.devconf_all);
2776}
2777
2778static __net_initdata struct pernet_operations devinet_ops = {
2779	.init = devinet_init_net,
2780	.exit = devinet_exit_net,
2781};
2782
2783static struct rtnl_af_ops inet_af_ops __read_mostly = {
2784	.family		  = AF_INET,
2785	.fill_link_af	  = inet_fill_link_af,
2786	.get_link_af_size = inet_get_link_af_size,
2787	.validate_link_af = inet_validate_link_af,
2788	.set_link_af	  = inet_set_link_af,
2789};
2790
2791void __init devinet_init(void)
2792{
2793	int i;
2794
2795	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2796		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2797
2798	register_pernet_subsys(&devinet_ops);
2799	register_netdevice_notifier(&ip_netdev_notifier);
2800
2801	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2802
2803	rtnl_af_register(&inet_af_ops);
2804
2805	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2806	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2807	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
2808		      RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
2809	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2810		      inet_netconf_dump_devconf,
2811		      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
2812}
2813