1/*	$OpenBSD: mpls_input.c,v 1.79 2023/05/13 13:35:18 bluhm Exp $	*/
2
3/*
4 * Copyright (c) 2008 Claudio Jeker <claudio@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h>
20#include <sys/mbuf.h>
21#include <sys/systm.h>
22#include <sys/socket.h>
23
24#include <net/if.h>
25#include <net/if_var.h>
26#include <net/if_types.h>
27#include <net/netisr.h>
28#include <net/route.h>
29
30#include <netinet/in.h>
31#include <netinet/ip.h>
32#include <netinet/ip_var.h>
33#include <netinet/ip_icmp.h>
34
35#ifdef INET6
36#include <netinet/ip6.h>
37#endif /* INET6 */
38
39#include <netmpls/mpls.h>
40
41#ifdef MPLS_DEBUG
42#define MPLS_LABEL_GET(l)	((ntohl((l) & MPLS_LABEL_MASK)) >> MPLS_LABEL_OFFSET)
43#define MPLS_TTL_GET(l)		(ntohl((l) & MPLS_TTL_MASK))
44#endif
45
46struct mbuf	*mpls_do_error(struct mbuf *, int, int, int);
47void		 mpls_input_local(struct rtentry *, struct mbuf *);
48
49void
50mpls_input(struct ifnet *ifp, struct mbuf *m)
51{
52	struct sockaddr_mpls *smpls;
53	struct sockaddr_mpls sa_mpls;
54	struct shim_hdr	*shim;
55	struct rtentry *rt;
56	struct rt_mpls *rt_mpls;
57	uint8_t ttl;
58	int hasbos;
59
60	if (!ISSET(ifp->if_xflags, IFXF_MPLS)) {
61		m_freem(m);
62		return;
63	}
64
65	/* drop all broadcast and multicast packets */
66	if (m->m_flags & (M_BCAST | M_MCAST)) {
67		m_freem(m);
68		return;
69	}
70
71	if (m->m_len < sizeof(*shim)) {
72		m = m_pullup(m, sizeof(*shim));
73		if (m == NULL)
74			return;
75	}
76
77	shim = mtod(m, struct shim_hdr *);
78#ifdef MPLS_DEBUG
79	printf("mpls_input: iface %s label=%d, ttl=%d BoS %d\n",
80	    ifp->if_xname, MPLS_LABEL_GET(shim->shim_label),
81	    MPLS_TTL_GET(shim->shim_label),
82	    MPLS_BOS_ISSET(shim->shim_label));
83#endif
84
85	/* check and decrement TTL */
86	ttl = ntohl(shim->shim_label & MPLS_TTL_MASK);
87	if (ttl <= 1) {
88		/* TTL exceeded */
89		m = mpls_do_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0);
90		if (m == NULL)
91			return;
92
93		shim = mtod(m, struct shim_hdr *);
94		ttl = ntohl(shim->shim_label & MPLS_TTL_MASK);
95	} else
96		ttl--;
97	hasbos = MPLS_BOS_ISSET(shim->shim_label);
98
99	bzero(&sa_mpls, sizeof(sa_mpls));
100	smpls = &sa_mpls;
101	smpls->smpls_family = AF_MPLS;
102	smpls->smpls_len = sizeof(*smpls);
103	smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
104
105	if (ntohl(smpls->smpls_label) < MPLS_LABEL_RESERVED_MAX) {
106		m = mpls_shim_pop(m);
107		if (m == NULL)
108			return;
109		if (!hasbos) {
110			/*
111			 * RFC 4182 relaxes the position of the
112			 * explicit NULL labels. They no longer need
113			 * to be at the beginning of the stack.
114			 * In this case the label is ignored and the decision
115			 * is made based on the lower one.
116			 */
117			shim = mtod(m, struct shim_hdr *);
118			smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
119			hasbos = MPLS_BOS_ISSET(shim->shim_label);
120		} else {
121			switch (ntohl(smpls->smpls_label)) {
122			case MPLS_LABEL_IPV4NULL:
123do_v4:
124				if (mpls_mapttl_ip) {
125					m = mpls_ip_adjttl(m, ttl);
126					if (m == NULL)
127						return;
128				}
129				ipv4_input(ifp, m);
130				return;
131#ifdef INET6
132			case MPLS_LABEL_IPV6NULL:
133do_v6:
134				if (mpls_mapttl_ip6) {
135					m = mpls_ip6_adjttl(m, ttl);
136					if (m == NULL)
137						return;
138				}
139				ipv6_input(ifp, m);
140				return;
141#endif	/* INET6 */
142			case MPLS_LABEL_IMPLNULL:
143				if (m->m_len < sizeof(u_char) &&
144				    (m = m_pullup(m, sizeof(u_char))) == NULL)
145					return;
146				switch (*mtod(m, u_char *) >> 4) {
147				case IPVERSION:
148					goto do_v4;
149#ifdef INET6
150				case IPV6_VERSION >> 4:
151					goto do_v6;
152#endif
153				default:
154					m_freem(m);
155					return;
156				}
157			default:
158				/* Other cases are not handled for now */
159				m_freem(m);
160				return;
161			}
162		}
163	}
164
165	ifp = NULL;
166
167	rt = rtalloc(smplstosa(smpls), RT_RESOLVE, m->m_pkthdr.ph_rtableid);
168	if (!rtisvalid(rt)) {
169		/* no entry for this label */
170#ifdef MPLS_DEBUG
171		printf("MPLS_DEBUG: label not found\n");
172#endif
173		m_freem(m);
174		goto done;
175	}
176
177	rt_mpls = (struct rt_mpls *)rt->rt_llinfo;
178	if (rt_mpls == NULL || (rt->rt_flags & RTF_MPLS) == 0) {
179#ifdef MPLS_DEBUG
180		printf("MPLS_DEBUG: no MPLS information attached\n");
181#endif
182		m_freem(m);
183		goto done;
184	}
185
186	switch (rt_mpls->mpls_operation) {
187	case MPLS_OP_POP:
188		if (ISSET(rt->rt_flags, RTF_LOCAL)) {
189			mpls_input_local(rt, m);
190			goto done;
191		}
192
193		m = mpls_shim_pop(m);
194		if (m == NULL)
195			goto done;
196		if (!hasbos)
197			/* just forward to gw */
198			break;
199
200		/* last label popped so decide where to push it to */
201		ifp = if_get(rt->rt_ifidx);
202		if (ifp == NULL) {
203			m_freem(m);
204			goto done;
205		}
206
207		KASSERT(rt->rt_gateway);
208
209		switch(rt->rt_gateway->sa_family) {
210		case AF_INET:
211			if ((m = mpls_ip_adjttl(m, ttl)) == NULL)
212				goto done;
213			break;
214#ifdef INET6
215		case AF_INET6:
216			if ((m = mpls_ip6_adjttl(m, ttl)) == NULL)
217				goto done;
218			break;
219#endif
220		case AF_LINK:
221			break;
222		default:
223			m_freem(m);
224			goto done;
225		}
226
227		/* shortcut sending out the packet */
228		if (!ISSET(ifp->if_xflags, IFXF_MPLS))
229			(*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
230		else
231			(*ifp->if_ll_output)(ifp, m, rt->rt_gateway, rt);
232		goto done;
233	case MPLS_OP_PUSH:
234		/* this does not make much sense but it does not hurt */
235		m = mpls_shim_push(m, rt_mpls);
236		break;
237	case MPLS_OP_SWAP:
238		m = mpls_shim_swap(m, rt_mpls);
239		break;
240	default:
241		m_freem(m);
242		goto done;
243	}
244
245	if (m == NULL)
246		goto done;
247
248	/* refetch label and write back TTL */
249	shim = mtod(m, struct shim_hdr *);
250	shim->shim_label = (shim->shim_label & ~MPLS_TTL_MASK) | htonl(ttl);
251
252	ifp = if_get(rt->rt_ifidx);
253	if (ifp == NULL) {
254		m_freem(m);
255		goto done;
256	}
257#ifdef MPLS_DEBUG
258	printf("MPLS: sending on %s outlabel %x dst af %d in %d out %d\n",
259    	    ifp->if_xname, ntohl(shim->shim_label), smpls->smpls_family,
260	    MPLS_LABEL_GET(smpls->smpls_label),
261	    MPLS_LABEL_GET(rt_mpls->mpls_label));
262#endif
263
264	/* Output iface is not MPLS-enabled */
265	if (!ISSET(ifp->if_xflags, IFXF_MPLS)) {
266#ifdef MPLS_DEBUG
267		printf("MPLS_DEBUG: interface %s not mpls enabled\n",
268		    ifp->if_xname);
269#endif
270		m_freem(m);
271		goto done;
272	}
273
274	(*ifp->if_ll_output)(ifp, m, smplstosa(smpls), rt);
275done:
276	if_put(ifp);
277	rtfree(rt);
278}
279
280void
281mpls_input_local(struct rtentry *rt, struct mbuf *m)
282{
283	struct ifnet *ifp;
284
285	ifp = if_get(rt->rt_ifidx);
286	if (ifp == NULL) {
287		m_freem(m);
288		return;
289	}
290
291	/* shortcut sending out the packet */
292	if (!ISSET(ifp->if_xflags, IFXF_MPLS))
293		(*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
294	else
295		(*ifp->if_ll_output)(ifp, m, rt->rt_gateway, rt);
296
297	if_put(ifp);
298}
299
300struct mbuf *
301mpls_ip_adjttl(struct mbuf *m, u_int8_t ttl)
302{
303	struct ip *ip;
304	uint16_t old, new;
305	uint32_t x;
306
307	if (m->m_len < sizeof(*ip)) {
308		m = m_pullup(m, sizeof(*ip));
309		if (m == NULL)
310			return (NULL);
311	}
312	ip = mtod(m, struct ip *);
313
314	old = htons(ip->ip_ttl << 8);
315	new = htons(ttl << 8);
316	x = ip->ip_sum + old - new;
317
318	ip->ip_ttl = ttl;
319	/* see pf_cksum_fixup() */
320	ip->ip_sum = (x) + (x >> 16);
321
322	return (m);
323}
324
325#ifdef INET6
326struct mbuf *
327mpls_ip6_adjttl(struct mbuf *m, u_int8_t ttl)
328{
329	struct ip6_hdr *ip6;
330
331	if (m->m_len < sizeof(*ip6)) {
332		m = m_pullup(m, sizeof(*ip6));
333		if (m == NULL)
334			return (NULL);
335	}
336	ip6 = mtod(m, struct ip6_hdr *);
337
338	ip6->ip6_hlim = ttl;
339
340	return (m);
341}
342#endif	/* INET6 */
343
344struct mbuf *
345mpls_do_error(struct mbuf *m, int type, int code, int destmtu)
346{
347	struct shim_hdr stack[MPLS_INKERNEL_LOOP_MAX];
348	struct sockaddr_mpls sa_mpls;
349	struct sockaddr_mpls *smpls;
350	struct rtentry *rt = NULL;
351	struct shim_hdr *shim;
352	struct in_ifaddr *ia;
353	struct icmp *icp;
354	struct ip *ip;
355	int nstk, error;
356
357	for (nstk = 0; nstk < MPLS_INKERNEL_LOOP_MAX; nstk++) {
358		if (m->m_len < sizeof(*shim) &&
359		    (m = m_pullup(m, sizeof(*shim))) == NULL)
360			return (NULL);
361		stack[nstk] = *mtod(m, struct shim_hdr *);
362		m_adj(m, sizeof(*shim));
363		if (MPLS_BOS_ISSET(stack[nstk].shim_label))
364			break;
365	}
366	shim = &stack[0];
367
368	if (m->m_len < sizeof(u_char) &&
369	    (m = m_pullup(m, sizeof(u_char))) == NULL)
370		return (NULL);
371	switch (*mtod(m, u_char *) >> 4) {
372	case IPVERSION:
373		if (m->m_len < sizeof(*ip) &&
374		    (m = m_pullup(m, sizeof(*ip))) == NULL)
375			return (NULL);
376		m = icmp_do_error(m, type, code, 0, destmtu);
377		if (m == NULL)
378			return (NULL);
379
380		if (icmp_do_exthdr(m, ICMP_EXT_MPLS, 1, stack,
381		    (nstk + 1) * sizeof(*shim)))
382			return (NULL);
383
384		/* set ip_src to something usable, based on the MPLS label */
385		bzero(&sa_mpls, sizeof(sa_mpls));
386		smpls = &sa_mpls;
387		smpls->smpls_family = AF_MPLS;
388		smpls->smpls_len = sizeof(*smpls);
389		smpls->smpls_label = shim->shim_label & MPLS_LABEL_MASK;
390
391		rt = rtalloc(smplstosa(smpls), RT_RESOLVE, 0);
392		if (!rtisvalid(rt)) {
393			rtfree(rt);
394			/* no entry for this label */
395			m_freem(m);
396			return (NULL);
397		}
398		if (rt->rt_ifa->ifa_addr->sa_family == AF_INET)
399			ia = ifatoia(rt->rt_ifa);
400		else {
401			/* XXX this needs fixing, if the MPLS is on an IP
402			 * less interface we need to find some other IP to
403			 * use as source.
404			 */
405			rtfree(rt);
406			m_freem(m);
407			return (NULL);
408		}
409		/* It is safe to dereference ``ia'' iff ``rt'' is valid. */
410		error = icmp_reflect(m, NULL, ia);
411		rtfree(rt);
412		if (error)
413			return (NULL);
414
415		ip = mtod(m, struct ip *);
416		/* stuff to fix up which is normally done in ip_output */
417		ip->ip_v = IPVERSION;
418		ip->ip_id = htons(ip_randomid());
419		in_hdr_cksum_out(m, NULL);
420
421		/* stolen from icmp_send() */
422		icp = (struct icmp *)(mtod(m, caddr_t) + sizeof(*ip));
423		icp->icmp_cksum = 0;
424		icp->icmp_cksum = in4_cksum(m, 0, sizeof(*ip),
425		    ntohs(ip->ip_len) - sizeof(*ip));
426
427		break;
428#ifdef INET6
429	case IPV6_VERSION >> 4:
430#endif
431	default:
432		m_freem(m);
433		return (NULL);
434	}
435
436	/* add mpls stack back to new packet */
437	M_PREPEND(m, (nstk + 1) * sizeof(*shim), M_NOWAIT);
438	if (m == NULL)
439		return (NULL);
440	m_copyback(m, 0, (nstk + 1) * sizeof(*shim), stack, M_NOWAIT);
441
442	/* change TTL to default */
443	shim = mtod(m, struct shim_hdr *);
444	shim->shim_label =
445	    (shim->shim_label & ~MPLS_TTL_MASK) | htonl(mpls_defttl);
446
447	return (m);
448}
449