1/*-
2 * Copyright (c) 2016-2018 Netflix, Inc.
3 * Copyright (c) 2016-2021 Mellanox Technologies.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27#include <sys/cdefs.h>
28#include "opt_inet.h"
29#include "opt_inet6.h"
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
34#include <sys/malloc.h>
35#include <sys/mbuf.h>
36#include <sys/socket.h>
37#include <sys/socketvar.h>
38#include <sys/sysctl.h>
39
40#include <net/if.h>
41#include <net/if_var.h>
42#include <net/ethernet.h>
43#include <net/bpf.h>
44#include <net/vnet.h>
45#include <net/if_dl.h>
46#include <net/if_media.h>
47#include <net/if_types.h>
48#include <net/infiniband.h>
49#include <net/if_lagg.h>
50#include <net/pfil.h>
51
52#include <netinet/in.h>
53#include <netinet/in_kdtrace.h>
54#include <netinet/ip6.h>
55#include <netinet/ip.h>
56#include <netinet/ip_var.h>
57#include <netinet/in_pcb.h>
58#include <netinet6/in6_pcb.h>
59#include <netinet6/ip6_var.h>
60#include <netinet/tcp.h>
61#include <netinet/tcp_lro.h>
62#include <netinet/tcp_var.h>
63#include <netinet/tcp_hpts.h>
64#include <netinet/tcp_log_buf.h>
65
66static void
67build_ack_entry(struct tcp_ackent *ae, struct tcphdr *th, struct mbuf *m,
68    uint32_t *ts_ptr, uint16_t iptos)
69{
70	/*
71	 * Given a TCP ACK, summarize it down into the small TCP ACK
72	 * entry.
73	 */
74	ae->timestamp = m->m_pkthdr.rcv_tstmp;
75	ae->flags = 0;
76	if (m->m_flags & M_TSTMP_LRO)
77		ae->flags |= TSTMP_LRO;
78	else if (m->m_flags & M_TSTMP)
79		ae->flags |= TSTMP_HDWR;
80	ae->seq = th->th_seq;
81	ae->ack = th->th_ack;
82	ae->flags |= tcp_get_flags(th);
83	if (ts_ptr != NULL) {
84		ae->ts_value = ntohl(ts_ptr[1]);
85		ae->ts_echo = ntohl(ts_ptr[2]);
86		ae->flags |= HAS_TSTMP;
87	}
88	ae->win = th->th_win;
89	ae->codepoint = iptos;
90}
91
92static inline bool
93tcp_lro_ack_valid(struct mbuf *m, struct tcphdr *th, uint32_t **ppts, bool *other_opts)
94{
95	/*
96	 * This function returns two bits of valuable information.
97	 * a) Is what is present capable of being ack-compressed,
98	 *    we can ack-compress if there is no options or just
99	 *    a timestamp option, and of course the th_flags must
100	 *    be correct as well.
101	 * b) Our other options present such as SACK. This is
102	 *    used to determine if we want to wakeup or not.
103	 */
104	bool ret = true;
105
106	switch (th->th_off << 2) {
107	case (sizeof(*th) + TCPOLEN_TSTAMP_APPA):
108		*ppts = (uint32_t *)(th + 1);
109		/* Check if we have only one timestamp option. */
110		if (**ppts == TCP_LRO_TS_OPTION)
111			*other_opts = false;
112		else {
113			*other_opts = true;
114			ret = false;
115		}
116		break;
117	case (sizeof(*th)):
118		/* No options. */
119		*ppts = NULL;
120		*other_opts = false;
121		break;
122	default:
123		*ppts = NULL;
124		*other_opts = true;
125		ret = false;
126		break;
127	}
128	/* For ACKCMP we only accept ACK, PUSH, ECE and CWR. */
129	if ((tcp_get_flags(th) & ~(TH_ACK | TH_PUSH | TH_ECE | TH_CWR)) != 0)
130		ret = false;
131	/* If it has data on it we cannot compress it */
132	if (m->m_pkthdr.lro_tcp_d_len)
133		ret = false;
134
135	/* ACK flag must be set. */
136	if (!(tcp_get_flags(th) & TH_ACK))
137		ret = false;
138	return (ret);
139}
140
141static bool
142tcp_lro_check_wake_status(struct tcpcb *tp)
143{
144
145	if (tp->t_fb->tfb_early_wake_check != NULL)
146		return ((tp->t_fb->tfb_early_wake_check)(tp));
147	return (false);
148}
149
150static void
151tcp_lro_log(struct tcpcb *tp, const struct lro_ctrl *lc,
152    const struct lro_entry *le, const struct mbuf *m,
153    int frm, int32_t tcp_data_len, uint32_t th_seq,
154    uint32_t th_ack, uint16_t th_win)
155{
156	if (tcp_bblogging_on(tp)) {
157		union tcp_log_stackspecific log;
158		struct timeval tv, btv;
159		uint32_t cts;
160
161		cts = tcp_get_usecs(&tv);
162		memset(&log, 0, sizeof(union tcp_log_stackspecific));
163		log.u_bbr.flex8 = frm;
164		log.u_bbr.flex1 = tcp_data_len;
165		if (m)
166			log.u_bbr.flex2 = m->m_pkthdr.len;
167		else
168			log.u_bbr.flex2 = 0;
169		if (le->m_head) {
170			log.u_bbr.flex3 = le->m_head->m_pkthdr.lro_nsegs;
171			log.u_bbr.flex4 = le->m_head->m_pkthdr.lro_tcp_d_len;
172			log.u_bbr.flex5 = le->m_head->m_pkthdr.len;
173			log.u_bbr.delRate = le->m_head->m_flags;
174			log.u_bbr.rttProp = le->m_head->m_pkthdr.rcv_tstmp;
175		}
176		log.u_bbr.inflight = th_seq;
177		log.u_bbr.delivered = th_ack;
178		log.u_bbr.timeStamp = cts;
179		log.u_bbr.epoch = le->next_seq;
180		log.u_bbr.lt_epoch = le->ack_seq;
181		log.u_bbr.pacing_gain = th_win;
182		log.u_bbr.cwnd_gain = le->window;
183		log.u_bbr.lost = curcpu;
184		log.u_bbr.cur_del_rate = (uintptr_t)m;
185		log.u_bbr.bw_inuse = (uintptr_t)le->m_head;
186		bintime2timeval(&lc->lro_last_queue_time, &btv);
187		log.u_bbr.flex6 = tcp_tv_to_usectick(&btv);
188		log.u_bbr.flex7 = le->compressed;
189		log.u_bbr.pacing_gain = le->uncompressed;
190		if (in_epoch(net_epoch_preempt))
191			log.u_bbr.inhpts = 1;
192		else
193			log.u_bbr.inhpts = 0;
194		TCP_LOG_EVENTP(tp, NULL, &tptosocket(tp)->so_rcv,
195		    &tptosocket(tp)->so_snd,
196		    TCP_LOG_LRO, 0, 0, &log, false, &tv);
197	}
198}
199
200static struct mbuf *
201tcp_lro_get_last_if_ackcmp(struct lro_ctrl *lc, struct lro_entry *le,
202    struct tcpcb *tp, int32_t *new_m, bool can_append_old_cmp)
203{
204	struct mbuf *m;
205
206	/* Look at the last mbuf if any in queue */
207	if (can_append_old_cmp) {
208		m = STAILQ_LAST(&tp->t_inqueue, mbuf, m_stailqpkt);
209		if (m != NULL && (m->m_flags & M_ACKCMP) != 0) {
210			if (M_TRAILINGSPACE(m) >= sizeof(struct tcp_ackent)) {
211				tcp_lro_log(tp, lc, le, NULL, 23, 0, 0, 0, 0);
212				*new_m = 0;
213				counter_u64_add(tcp_extra_mbuf, 1);
214				return (m);
215			} else {
216				/* Mark we ran out of space */
217				tp->t_flags2 |= TF2_MBUF_L_ACKS;
218			}
219		}
220	}
221	/* Decide mbuf size. */
222	tcp_lro_log(tp, lc, le, NULL, 21, 0, 0, 0, 0);
223	if (tp->t_flags2 & TF2_MBUF_L_ACKS)
224		m = m_getcl(M_NOWAIT, MT_DATA, M_ACKCMP | M_PKTHDR);
225	else
226		m = m_gethdr(M_NOWAIT, MT_DATA);
227
228	if (__predict_false(m == NULL)) {
229		counter_u64_add(tcp_would_have_but, 1);
230		return (NULL);
231	}
232	counter_u64_add(tcp_comp_total, 1);
233	m->m_pkthdr.rcvif = lc->ifp;
234	m->m_flags |= M_ACKCMP;
235	*new_m = 1;
236	return (m);
237}
238
239/*
240 * Do BPF tap for either ACK_CMP packets or MBUF QUEUE type packets
241 * and strip all, but the IPv4/IPv6 header.
242 */
243static bool
244do_bpf_strip_and_compress(struct tcpcb *tp, struct lro_ctrl *lc,
245    struct lro_entry *le, struct mbuf **pp, struct mbuf **cmp,
246    struct mbuf **mv_to, bool *should_wake, bool bpf_req, bool lagg_bpf_req,
247    struct ifnet *lagg_ifp, bool can_append_old_cmp)
248{
249	union {
250		void *ptr;
251		struct ip *ip4;
252		struct ip6_hdr *ip6;
253	} l3;
254	struct mbuf *m;
255	struct mbuf *nm;
256	struct tcphdr *th;
257	struct tcp_ackent *ack_ent;
258	uint32_t *ts_ptr;
259	int32_t n_mbuf;
260	bool other_opts, can_compress;
261	uint8_t lro_type;
262	uint16_t iptos;
263	int tcp_hdr_offset;
264	int idx;
265
266	/* Get current mbuf. */
267	m = *pp;
268
269	/* Let the BPF see the packet */
270	if (__predict_false(bpf_req))
271		ETHER_BPF_MTAP(lc->ifp, m);
272
273	if (__predict_false(lagg_bpf_req))
274		ETHER_BPF_MTAP(lagg_ifp, m);
275
276	tcp_hdr_offset = m->m_pkthdr.lro_tcp_h_off;
277	lro_type = le->inner.data.lro_type;
278	switch (lro_type) {
279	case LRO_TYPE_NONE:
280		lro_type = le->outer.data.lro_type;
281		switch (lro_type) {
282		case LRO_TYPE_IPV4_TCP:
283			tcp_hdr_offset -= sizeof(*le->outer.ip4);
284			m->m_pkthdr.lro_etype = ETHERTYPE_IP;
285			IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp,
286			    le->outer.ip4, NULL);
287			break;
288		case LRO_TYPE_IPV6_TCP:
289			tcp_hdr_offset -= sizeof(*le->outer.ip6);
290			m->m_pkthdr.lro_etype = ETHERTYPE_IPV6;
291			IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp,
292			    NULL, le->outer.ip6);
293			break;
294		default:
295			goto compressed;
296		}
297		break;
298	case LRO_TYPE_IPV4_TCP:
299		switch (le->outer.data.lro_type) {
300		case LRO_TYPE_IPV4_UDP:
301			IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp,
302			    le->outer.ip4, NULL);
303			UDP_PROBE(receive, NULL, NULL, le->outer.ip4, NULL,
304			    le->outer.udp);
305			break;
306		case LRO_TYPE_IPV6_UDP:
307			IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp,
308			    NULL, le->outer.ip6);
309			UDP_PROBE(receive, NULL, NULL, le->outer.ip6, NULL,
310			    le->outer.udp);
311			break;
312		default:
313			__assert_unreachable();
314			break;
315		}
316		tcp_hdr_offset -= sizeof(*le->outer.ip4);
317		m->m_pkthdr.lro_etype = ETHERTYPE_IP;
318		IP_PROBE(receive, NULL, NULL, le->inner.ip4, NULL,
319		    le->inner.ip4, NULL);
320		break;
321	case LRO_TYPE_IPV6_TCP:
322		switch (le->outer.data.lro_type) {
323		case LRO_TYPE_IPV4_UDP:
324			IP_PROBE(receive, NULL, NULL, le->outer.ip4, lc->ifp,
325			    le->outer.ip4, NULL);
326			UDP_PROBE(receive, NULL, NULL, le->outer.ip4, NULL,
327			    le->outer.udp);
328			break;
329		case LRO_TYPE_IPV6_UDP:
330			IP_PROBE(receive, NULL, NULL, le->outer.ip6, lc->ifp,
331			    NULL, le->outer.ip6);
332			UDP_PROBE(receive, NULL, NULL, le->outer.ip6, NULL,
333			    le->outer.udp);
334			break;
335		default:
336			__assert_unreachable();
337			break;
338		}
339		tcp_hdr_offset -= sizeof(*le->outer.ip6);
340		m->m_pkthdr.lro_etype = ETHERTYPE_IPV6;
341		IP_PROBE(receive, NULL, NULL, le->inner.ip6, NULL, NULL,
342		    le->inner.ip6);
343		break;
344	default:
345		goto compressed;
346	}
347
348	MPASS(tcp_hdr_offset >= 0);
349
350	m_adj(m, tcp_hdr_offset);
351	m->m_flags |= M_LRO_EHDRSTRP;
352	m->m_flags &= ~M_ACKCMP;
353	m->m_pkthdr.lro_tcp_h_off -= tcp_hdr_offset;
354
355	th = tcp_lro_get_th(m);
356
357	th->th_sum = 0;		/* TCP checksum is valid. */
358	tcp_fields_to_host(th);
359	TCP_PROBE5(receive, NULL, tp, m, tp, th);
360
361	/* Check if ACK can be compressed */
362	can_compress = tcp_lro_ack_valid(m, th, &ts_ptr, &other_opts);
363
364	/* Now lets look at the should wake states */
365	if ((other_opts == true) &&
366	    ((tp->t_flags2 & TF2_DONT_SACK_QUEUE) == 0)) {
367		/*
368		 * If there are other options (SACK?) and the
369		 * tcp endpoint has not expressly told us it does
370		 * not care about SACKS, then we should wake up.
371		 */
372		*should_wake = true;
373	} else if (*should_wake == false) {
374		/* Wakeup override check if we are false here  */
375		*should_wake = tcp_lro_check_wake_status(tp);
376	}
377	/* Is the ack compressable? */
378	if (can_compress == false)
379		goto done;
380	/* Does the TCP endpoint support ACK compression? */
381	if ((tp->t_flags2 & TF2_MBUF_ACKCMP) == 0)
382		goto done;
383
384	/* Lets get the TOS/traffic class field */
385	l3.ptr = mtod(m, void *);
386	switch (lro_type) {
387	case LRO_TYPE_IPV4_TCP:
388		iptos = l3.ip4->ip_tos;
389		break;
390	case LRO_TYPE_IPV6_TCP:
391		iptos = IPV6_TRAFFIC_CLASS(l3.ip6);
392		break;
393	default:
394		iptos = 0;	/* Keep compiler happy. */
395		break;
396	}
397	/* Now lets get space if we don't have some already */
398	if (*cmp == NULL) {
399new_one:
400		nm = tcp_lro_get_last_if_ackcmp(lc, le, tp, &n_mbuf,
401		    can_append_old_cmp);
402		if (__predict_false(nm == NULL))
403			goto done;
404		*cmp = nm;
405		if (n_mbuf) {
406			/*
407			 *  Link in the new cmp ack to our in-order place,
408			 * first set our cmp ack's next to where we are.
409			 */
410			nm->m_nextpkt = m;
411			(*pp) = nm;
412			/*
413			 * Set it up so mv_to is advanced to our
414			 * compressed ack. This way the caller can
415			 * advance pp to the right place.
416			 */
417			*mv_to = nm;
418			/*
419			 * Advance it here locally as well.
420			 */
421			pp = &nm->m_nextpkt;
422		}
423	} else {
424		/* We have one already we are working on */
425		nm = *cmp;
426		if (M_TRAILINGSPACE(nm) < sizeof(struct tcp_ackent)) {
427			/* We ran out of space */
428			tp->t_flags2 |= TF2_MBUF_L_ACKS;
429			goto new_one;
430		}
431	}
432	MPASS(M_TRAILINGSPACE(nm) >= sizeof(struct tcp_ackent));
433	counter_u64_add(tcp_inp_lro_compressed, 1);
434	le->compressed++;
435	/* We can add in to the one on the tail */
436	ack_ent = mtod(nm, struct tcp_ackent *);
437	idx = (nm->m_len / sizeof(struct tcp_ackent));
438	build_ack_entry(&ack_ent[idx], th, m, ts_ptr, iptos);
439
440	/* Bump the size of both pkt-hdr and len */
441	nm->m_len += sizeof(struct tcp_ackent);
442	nm->m_pkthdr.len += sizeof(struct tcp_ackent);
443compressed:
444	/* Advance to next mbuf before freeing. */
445	*pp = m->m_nextpkt;
446	m->m_nextpkt = NULL;
447	m_freem(m);
448	return (true);
449done:
450	counter_u64_add(tcp_uncomp_total, 1);
451	le->uncompressed++;
452	return (false);
453}
454
455static void
456tcp_queue_pkts(struct tcpcb *tp, struct lro_entry *le)
457{
458
459	INP_WLOCK_ASSERT(tptoinpcb(tp));
460
461	STAILQ_HEAD(, mbuf) q = { le->m_head,
462	    &STAILQ_NEXT(le->m_last_mbuf, m_stailqpkt) };
463	STAILQ_CONCAT(&tp->t_inqueue, &q);
464	le->m_head = NULL;
465	le->m_last_mbuf = NULL;
466}
467
468static struct tcpcb *
469tcp_lro_lookup(struct ifnet *ifp, struct lro_parser *pa)
470{
471	struct inpcb *inp;
472
473	CURVNET_ASSERT_SET();
474	switch (pa->data.lro_type) {
475#ifdef INET6
476	case LRO_TYPE_IPV6_TCP:
477		inp = in6_pcblookup(&V_tcbinfo,
478		    &pa->data.s_addr.v6,
479		    pa->data.s_port,
480		    &pa->data.d_addr.v6,
481		    pa->data.d_port,
482		    INPLOOKUP_WLOCKPCB,
483		    ifp);
484		break;
485#endif
486#ifdef INET
487	case LRO_TYPE_IPV4_TCP:
488		inp = in_pcblookup(&V_tcbinfo,
489		    pa->data.s_addr.v4,
490		    pa->data.s_port,
491		    pa->data.d_addr.v4,
492		    pa->data.d_port,
493		    INPLOOKUP_WLOCKPCB,
494		    ifp);
495		break;
496#endif
497	default:
498		return (NULL);
499	}
500
501	return (intotcpcb(inp));
502}
503
504static int
505_tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
506{
507	struct tcpcb *tp;
508	struct mbuf **pp, *cmp, *mv_to;
509	struct ifnet *lagg_ifp;
510	bool bpf_req, lagg_bpf_req, should_wake, can_append_old_cmp;
511
512	/* Check if packet doesn't belongs to our network interface. */
513	if ((tcplro_stacks_wanting_mbufq == 0) ||
514	    (le->outer.data.vlan_id != 0) ||
515	    (le->inner.data.lro_type != LRO_TYPE_NONE))
516		return (TCP_LRO_CANNOT);
517
518#ifdef INET6
519	/*
520	 * Be proactive about unspecified IPv6 address in source. As
521	 * we use all-zero to indicate unbounded/unconnected pcb,
522	 * unspecified IPv6 address can be used to confuse us.
523	 *
524	 * Note that packets with unspecified IPv6 destination is
525	 * already dropped in ip6_input.
526	 */
527	if (__predict_false(le->outer.data.lro_type == LRO_TYPE_IPV6_TCP &&
528	    IN6_IS_ADDR_UNSPECIFIED(&le->outer.data.s_addr.v6)))
529		return (TCP_LRO_CANNOT);
530
531	if (__predict_false(le->inner.data.lro_type == LRO_TYPE_IPV6_TCP &&
532	    IN6_IS_ADDR_UNSPECIFIED(&le->inner.data.s_addr.v6)))
533		return (TCP_LRO_CANNOT);
534#endif
535
536	CURVNET_SET(lc->ifp->if_vnet);
537	/*
538	 * Ensure that there are no packet filter hooks which would normally
539	 * being triggered in ether_demux(), ip_input(), or ip6_input().
540	 */
541	if (
542#ifdef INET
543	    PFIL_HOOKED_IN(V_inet_pfil_head) ||
544#endif
545#ifdef INET6
546	    PFIL_HOOKED_IN(V_inet6_pfil_head) ||
547#endif
548	    PFIL_HOOKED_IN(V_link_pfil_head)) {
549		CURVNET_RESTORE();
550		return (TCP_LRO_CANNOT);
551	}
552
553	/* Lookup inp, if any.  Returns locked TCP inpcb. */
554	tp = tcp_lro_lookup(lc->ifp,
555	    (le->inner.data.lro_type == LRO_TYPE_NONE) ? &le->outer : &le->inner);
556	CURVNET_RESTORE();
557	if (tp == NULL)
558		return (TCP_LRO_CANNOT);
559
560	counter_u64_add(tcp_inp_lro_locks_taken, 1);
561
562	/* Check if the inp is dead, Jim. */
563	if (tp->t_state == TCPS_TIME_WAIT) {
564		INP_WUNLOCK(tptoinpcb(tp));
565		return (TCP_LRO_CANNOT);
566	}
567	if (tp->t_lro_cpu == HPTS_CPU_NONE && lc->lro_cpu_is_set == 1)
568		tp->t_lro_cpu = lc->lro_last_cpu;
569	/* Check if the transport doesn't support the needed optimizations. */
570	if ((tp->t_flags2 & (TF2_SUPPORTS_MBUFQ | TF2_MBUF_ACKCMP)) == 0) {
571		INP_WUNLOCK(tptoinpcb(tp));
572		return (TCP_LRO_CANNOT);
573	}
574
575	if (tp->t_flags2 & TF2_MBUF_QUEUE_READY)
576		should_wake = false;
577	else
578		should_wake = true;
579	/* Check if packets should be tapped to BPF. */
580	bpf_req = bpf_peers_present(lc->ifp->if_bpf);
581	lagg_bpf_req = false;
582	lagg_ifp = NULL;
583	if (lc->ifp->if_type == IFT_IEEE8023ADLAG ||
584	    lc->ifp->if_type == IFT_INFINIBANDLAG) {
585		struct lagg_port *lp = lc->ifp->if_lagg;
586		struct lagg_softc *sc = lp->lp_softc;
587
588		lagg_ifp = sc->sc_ifp;
589		if (lagg_ifp != NULL)
590			lagg_bpf_req = bpf_peers_present(lagg_ifp->if_bpf);
591	}
592
593	/* Strip and compress all the incoming packets. */
594	can_append_old_cmp = true;
595	cmp = NULL;
596	for (pp = &le->m_head; *pp != NULL; ) {
597		mv_to = NULL;
598		if (do_bpf_strip_and_compress(tp, lc, le, pp, &cmp, &mv_to,
599		    &should_wake, bpf_req, lagg_bpf_req, lagg_ifp,
600		    can_append_old_cmp) == false) {
601			/* Advance to next mbuf. */
602			pp = &(*pp)->m_nextpkt;
603			/*
604			 * Once we have appended we can't look in the pending
605			 * inbound packets for a compressed ack to append to.
606			 */
607			can_append_old_cmp = false;
608			/*
609			 * Once we append we also need to stop adding to any
610			 * compressed ack we were remembering. A new cmp
611			 * ack will be required.
612			 */
613			cmp = NULL;
614			tcp_lro_log(tp, lc, le, NULL, 25, 0, 0, 0, 0);
615		} else if (mv_to != NULL) {
616			/* We are asked to move pp up */
617			pp = &mv_to->m_nextpkt;
618			tcp_lro_log(tp, lc, le, NULL, 24, 0, 0, 0, 0);
619		} else
620			tcp_lro_log(tp, lc, le, NULL, 26, 0, 0, 0, 0);
621	}
622	/* Update "m_last_mbuf", if any. */
623	if (pp == &le->m_head)
624		le->m_last_mbuf = *pp;
625	else
626		le->m_last_mbuf = __containerof(pp, struct mbuf, m_nextpkt);
627
628	/* Check if any data mbufs left. */
629	if (le->m_head != NULL) {
630		counter_u64_add(tcp_inp_lro_direct_queue, 1);
631		tcp_lro_log(tp, lc, le, NULL, 22, 1, tp->t_flags2, 0, 1);
632		tcp_queue_pkts(tp, le);
633	}
634	if (should_wake) {
635		/* Wakeup */
636		counter_u64_add(tcp_inp_lro_wokeup_queue, 1);
637		if ((*tp->t_fb->tfb_do_queued_segments)(tp, 0))
638			/* TCP cb gone and unlocked. */
639			return (0);
640	}
641	INP_WUNLOCK(tptoinpcb(tp));
642
643	return (0);	/* Success. */
644}
645
646void
647tcp_lro_hpts_init(void)
648{
649	tcp_lro_flush_tcphpts = _tcp_lro_flush_tcphpts;
650}
651
652void
653tcp_lro_hpts_uninit(void)
654{
655	atomic_store_ptr(&tcp_lro_flush_tcphpts, NULL);
656}
657