1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2006, Myricom Inc.
5 * Copyright (c) 2008, Intel Corporation.
6 * Copyright (c) 2016-2021 Mellanox Technologies.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * $FreeBSD$
31 */
32
33#ifndef _TCP_LRO_H_
34#define _TCP_LRO_H_
35
36#include <sys/time.h>
37
38#ifndef TCP_LRO_ENTRIES
39/* Define default number of LRO entries per RX queue */
40#define	TCP_LRO_ENTRIES	8
41#endif
42
43/*
44 * Flags for ACK entry for compression
45 * the bottom 8 bits has the th_flags.
46 * LRO itself adds only the TSTMP flags
47 * to indicate if either of the types
48 * of timestamps are filled and the
49 * HAS_TSTMP option to indicate if the
50 * TCP timestamp option is valid.
51 *
52 * The other 5 flag bits are for processing
53 * by a stack.
54 *
55 */
56#define TSTMP_LRO		0x0100
57#define TSTMP_HDWR		0x0200
58#define HAS_TSTMP		0x0400
59
60struct inpcb;
61
62union lro_address {
63	u_long raw[1];
64	struct {
65		uint16_t lro_type;	/* internal */
66#define	LRO_TYPE_NONE     0
67#define	LRO_TYPE_IPV4_TCP 1
68#define	LRO_TYPE_IPV6_TCP 2
69#define	LRO_TYPE_IPV4_UDP 3
70#define	LRO_TYPE_IPV6_UDP 4
71		uint16_t vlan_id;	/* VLAN identifier */
72		uint16_t s_port;	/* source TCP/UDP port */
73		uint16_t d_port;	/* destination TCP/UDP port */
74		uint32_t vxlan_vni;	/* VXLAN virtual network identifier */
75		union {
76#ifdef INET
77			struct in_addr v4;
78#endif
79#ifdef INET6
80			struct in6_addr v6;
81#endif
82		} s_addr;	/* source IPv4/IPv6 address */
83		union {
84#ifdef INET
85			struct in_addr v4;
86#endif
87#ifdef INET6
88			struct in6_addr v6;
89#endif
90		} d_addr;	/* destination IPv4/IPv6 address */
91	};
92} __aligned(sizeof(u_long));
93
94#define	LRO_RAW_ADDRESS_MAX \
95    (sizeof(union lro_address) / sizeof(u_long))
96
97/* Optimize address comparison by comparing one unsigned long at a time: */
98
99static inline bool
100lro_address_compare(const union lro_address *pa, const union lro_address *pb)
101{
102	if (pa->lro_type == LRO_TYPE_NONE && pb->lro_type == LRO_TYPE_NONE) {
103		return (true);
104	} else for (unsigned i = 0; i < LRO_RAW_ADDRESS_MAX; i++) {
105		if (pa->raw[i] != pb->raw[i])
106			return (false);
107	}
108	return (true);
109}
110
111struct lro_parser {
112	union lro_address data;
113	union {
114		uint8_t *l3;
115		struct ip *ip4;
116		struct ip6_hdr *ip6;
117	};
118	union {
119		uint8_t *l4;
120		struct tcphdr *tcp;
121		struct udphdr *udp;
122	};
123	uint16_t total_hdr_len;
124};
125
126/* This structure is zeroed frequently, try to keep it small. */
127struct lro_entry {
128	LIST_ENTRY(lro_entry)	next;
129	LIST_ENTRY(lro_entry)	hash_next;
130	struct mbuf		*m_head;
131	struct mbuf		*m_tail;
132	struct mbuf		*m_last_mbuf;
133	struct lro_parser	outer;
134	struct lro_parser	inner;
135	uint32_t		next_seq;	/* tcp_seq */
136	uint32_t		ack_seq;	/* tcp_seq */
137	uint32_t		tsval;
138	uint32_t		tsecr;
139	uint16_t		compressed;
140	uint16_t		uncompressed;
141	uint16_t		window;
142	uint16_t		timestamp;	/* flag, not a TCP hdr field. */
143	struct bintime		alloc_time;	/* time when entry was allocated */
144};
145
146LIST_HEAD(lro_head, lro_entry);
147
148struct lro_mbuf_sort {
149	uint64_t seq;
150	struct mbuf *mb;
151};
152
153/* NB: This is part of driver structs. */
154struct lro_ctrl {
155	struct ifnet	*ifp;
156	struct lro_mbuf_sort *lro_mbuf_data;
157	struct bintime	lro_last_queue_time;	/* last time data was queued */
158	uint64_t	lro_queued;
159	uint64_t	lro_flushed;
160	uint64_t	lro_bad_csum;
161	unsigned	lro_cnt;
162	unsigned	lro_mbuf_count;
163	unsigned	lro_mbuf_max;
164	unsigned short	lro_ackcnt_lim;		/* max # of aggregated ACKs */
165	unsigned 	lro_length_lim;		/* max len of aggregated data */
166
167	u_long		lro_hashsz;
168	struct lro_head	*lro_hash;
169	struct lro_head	lro_active;
170	struct lro_head	lro_free;
171};
172
173struct tcp_ackent {
174	uint64_t timestamp;	/* hardware or sofware timestamp, valid if TSTMP_LRO or TSTMP_HDRW set */
175	uint32_t seq;		/* th_seq value */
176	uint32_t ack;		/* th_ack value */
177	uint32_t ts_value;	/* If ts option value, valid if HAS_TSTMP is set */
178	uint32_t ts_echo;	/* If ts option echo, valid if HAS_TSTMP is set */
179	uint16_t win;		/* TCP window */
180	uint16_t flags;		/* Flags to say if TS is present and type of timestamp and th_flags */
181	uint8_t  codepoint;	/* IP level codepoint including ECN bits */
182	uint8_t  ack_val_set;	/* Classification of ack used by the stack */
183	uint8_t  pad[2];	/* To 32 byte boundary */
184};
185
186/* We use two M_PROTO on the mbuf */
187#define M_ACKCMP	M_PROTO4   /* Indicates LRO is sending in a  Ack-compression mbuf */
188#define M_LRO_EHDRSTRP	M_PROTO6   /* Indicates that LRO has stripped the etherenet header */
189
190#define	TCP_LRO_LENGTH_MAX	(65535 - 255)	/* safe value with room for outer headers */
191#define	TCP_LRO_ACKCNT_MAX	65535		/* unlimited */
192
193int tcp_lro_init(struct lro_ctrl *);
194int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned);
195void tcp_lro_free(struct lro_ctrl *);
196void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
197void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
198void tcp_lro_flush_all(struct lro_ctrl *);
199int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
200void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
201void tcp_lro_reg_mbufq(void);
202void tcp_lro_dereg_mbufq(void);
203
204#define	TCP_LRO_NO_ENTRIES	-2
205#define	TCP_LRO_CANNOT		-1
206#define	TCP_LRO_NOT_SUPPORTED	1
207
208#endif /* _TCP_LRO_H_ */
209