dn_sched_wf2q.c revision 301772
1/*
2 * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
3 * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
4 * All rights reserved
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28/*
29 * $FreeBSD: stable/10/sys/netpfil/ipfw/dn_sched_wf2q.c 301772 2016-06-10 00:00:25Z truckman $
30 */
31
32#ifdef _KERNEL
33#include <sys/malloc.h>
34#include <sys/socket.h>
35#include <sys/socketvar.h>
36#include <sys/kernel.h>
37#include <sys/mbuf.h>
38#include <sys/module.h>
39#include <net/if.h>	/* IFNAMSIZ */
40#include <netinet/in.h>
41#include <netinet/ip_var.h>		/* ipfw_rule_ref */
42#include <netinet/ip_fw.h>	/* flow_id */
43#include <netinet/ip_dummynet.h>
44#include <netpfil/ipfw/dn_heap.h>
45#include <netpfil/ipfw/ip_dn_private.h>
46#ifdef NEW_AQM
47#include <netpfil/ipfw/dn_aqm.h>
48#endif
49#include <netpfil/ipfw/dn_sched.h>
50#else
51#include <dn_test.h>
52#endif
53
54#ifndef MAX64
55#define MAX64(x,y)  (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
56#endif
57
58/*
59 * timestamps are computed on 64 bit using fixed point arithmetic.
60 * LMAX_BITS, WMAX_BITS are the max number of bits for the packet len
61 * and sum of weights, respectively. FRAC_BITS is the number of
62 * fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large
63 * errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w
64 * using an unsigned 32-bit division, and to avoid wraparounds we need
65 * LMAX_BITS + WMAX_BITS + FRAC_BITS << 64
66 * As an example
67 * FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19
68 */
69#ifndef FRAC_BITS
70#define FRAC_BITS    28 /* shift for fixed point arithmetic */
71#define	ONE_FP	(1UL << FRAC_BITS)
72#endif
73
74/*
75 * Private information for the scheduler instance:
76 * sch_heap (key is Finish time) returns the next queue to serve
77 * ne_heap (key is Start time) stores not-eligible queues
78 * idle_heap (key=start/finish time) stores idle flows. It must
79 *	support extract-from-middle.
80 * A flow is only in 1 of the three heaps.
81 * XXX todo: use a more efficient data structure, e.g. a tree sorted
82 * by F with min_subtree(S) in each node
83 */
84struct wf2qp_si {
85    struct dn_heap sch_heap;	/* top extract - key Finish  time */
86    struct dn_heap ne_heap;	/* top extract - key Start   time */
87    struct dn_heap idle_heap;	/* random extract - key Start=Finish time */
88    uint64_t V;			/* virtual time */
89    uint32_t inv_wsum;		/* inverse of sum of weights */
90    uint32_t wsum;		/* sum of weights */
91};
92
93struct wf2qp_queue {
94    struct dn_queue _q;
95    uint64_t S, F;		/* start time, finish time */
96    uint32_t inv_w;		/* ONE_FP / weight */
97    int32_t heap_pos;		/* position (index) of struct in heap */
98};
99
100/*
101 * This file implements a WF2Q+ scheduler as it has been in dummynet
102 * since 2000.
103 * The scheduler supports per-flow queues and has O(log N) complexity.
104 *
105 * WF2Q+ needs to drain entries from the idle heap so that we
106 * can keep the sum of weights up to date. We can do it whenever
107 * we get a chance, or periodically, or following some other
108 * strategy. The function idle_check() drains at most N elements
109 * from the idle heap.
110 */
111static void
112idle_check(struct wf2qp_si *si, int n, int force)
113{
114    struct dn_heap *h = &si->idle_heap;
115    while (n-- > 0 && h->elements > 0 &&
116		(force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) {
117	struct dn_queue *q = HEAP_TOP(h)->object;
118        struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
119
120        heap_extract(h, NULL);
121        /* XXX to let the flowset delete the queue we should
122	 * mark it as 'unused' by the scheduler.
123	 */
124        alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */
125        si->wsum -= q->fs->fs.par[0];	/* adjust sum of weights */
126	if (si->wsum > 0)
127		si->inv_wsum = ONE_FP/si->wsum;
128    }
129}
130
131static int
132wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
133{
134    struct dn_fsk *fs = q->fs;
135    struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
136    struct wf2qp_queue *alg_fq;
137    uint64_t len = m->m_pkthdr.len;
138
139    if (m != q->mq.head) {
140	if (dn_enqueue(q, m, 0)) /* packet was dropped */
141	    return 1;
142	if (m != q->mq.head)	/* queue was already busy */
143	    return 0;
144    }
145
146    /* If reach this point, queue q was idle */
147    alg_fq = (struct wf2qp_queue *)q;
148
149    if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
150        /* F<S means timestamps are invalid ->brand new queue. */
151        alg_fq->S = si->V;		/* init start time */
152        si->wsum += fs->fs.par[0];	/* add weight of new queue. */
153	si->inv_wsum = ONE_FP/si->wsum;
154    } else { /* if it was idle then it was in the idle heap */
155        heap_extract(&si->idle_heap, q);
156        alg_fq->S = MAX64(alg_fq->F, si->V);	/* compute new S */
157    }
158    alg_fq->F = alg_fq->S + len * alg_fq->inv_w;
159
160    /* if nothing is backlogged, make sure this flow is eligible */
161    if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0)
162        si->V = MAX64(alg_fq->S, si->V);
163
164    /*
165     * Look at eligibility. A flow is not eligibile if S>V (when
166     * this happens, it means that there is some other flow already
167     * scheduled for the same pipe, so the sch_heap cannot be
168     * empty). If the flow is not eligible we just store it in the
169     * ne_heap. Otherwise, we store in the sch_heap.
170     * Note that for all flows in sch_heap (SCH), S_i <= V,
171     * and for all flows in ne_heap (NEH), S_i > V.
172     * So when we need to compute max(V, min(S_i)) forall i in
173     * SCH+NEH, we only need to look into NEH.
174     */
175    if (DN_KEY_LT(si->V, alg_fq->S)) {
176        /* S>V means flow Not eligible. */
177        if (si->sch_heap.elements == 0)
178            D("++ ouch! not eligible but empty scheduler!");
179        heap_insert(&si->ne_heap, alg_fq->S, q);
180    } else {
181        heap_insert(&si->sch_heap, alg_fq->F, q);
182    }
183    return 0;
184}
185
186/* XXX invariant: sch > 0 || V >= min(S in neh) */
187static struct mbuf *
188wf2qp_dequeue(struct dn_sch_inst *_si)
189{
190	/* Access scheduler instance private data */
191	struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
192	struct mbuf *m;
193	struct dn_queue *q;
194	struct dn_heap *sch = &si->sch_heap;
195	struct dn_heap *neh = &si->ne_heap;
196	struct wf2qp_queue *alg_fq;
197
198	if (sch->elements == 0 && neh->elements == 0) {
199		/* we have nothing to do. We could kill the idle heap
200		 * altogether and reset V
201		 */
202		idle_check(si, 0x7fffffff, 1);
203		si->V = 0;
204		si->wsum = 0;	/* should be set already */
205		return NULL;	/* quick return if nothing to do */
206	}
207	idle_check(si, 1, 0);	/* drain something from the idle heap */
208
209	/* make sure at least one element is eligible, bumping V
210	 * and moving entries that have become eligible.
211	 * We need to repeat the first part twice, before and
212	 * after extracting the candidate, or enqueue() will
213	 * find the data structure in a wrong state.
214	 */
215  m = NULL;
216  for(;;) {
217	/*
218	 * Compute V = max(V, min(S_i)). Remember that all elements
219	 * in sch have by definition S_i <= V so if sch is not empty,
220	 * V is surely the max and we must not update it. Conversely,
221	 * if sch is empty we only need to look at neh.
222	 * We don't need to move the queues, as it will be done at the
223	 * next enqueue
224	 */
225	if (sch->elements == 0 && neh->elements > 0) {
226		si->V = MAX64(si->V, HEAP_TOP(neh)->key);
227	}
228	while (neh->elements > 0 &&
229		    DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
230		q = HEAP_TOP(neh)->object;
231		alg_fq = (struct wf2qp_queue *)q;
232		heap_extract(neh, NULL);
233		heap_insert(sch, alg_fq->F, q);
234	}
235	if (m) /* pkt found in previous iteration */
236		break;
237	/* ok we have at least one eligible pkt */
238	q = HEAP_TOP(sch)->object;
239	alg_fq = (struct wf2qp_queue *)q;
240	m = dn_dequeue(q);
241	heap_extract(sch, NULL); /* Remove queue from heap. */
242	si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum;
243	alg_fq->S = alg_fq->F;  /* Update start time. */
244	if (q->mq.head == 0) {	/* not backlogged any more. */
245		heap_insert(&si->idle_heap, alg_fq->F, q);
246	} else {			/* Still backlogged. */
247		/* Update F, store in neh or sch */
248		uint64_t len = q->mq.head->m_pkthdr.len;
249		alg_fq->F += len * alg_fq->inv_w;
250		if (DN_KEY_LEQ(alg_fq->S, si->V)) {
251			heap_insert(sch, alg_fq->F, q);
252		} else {
253			heap_insert(neh, alg_fq->S, q);
254		}
255	}
256    }
257	return m;
258}
259
260static int
261wf2qp_new_sched(struct dn_sch_inst *_si)
262{
263	struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
264	int ofs = offsetof(struct wf2qp_queue, heap_pos);
265
266	/* all heaps support extract from middle */
267	if (heap_init(&si->idle_heap, 16, ofs) ||
268	    heap_init(&si->sch_heap, 16, ofs) ||
269	    heap_init(&si->ne_heap, 16, ofs)) {
270		heap_free(&si->ne_heap);
271		heap_free(&si->sch_heap);
272		heap_free(&si->idle_heap);
273		return ENOMEM;
274	}
275	return 0;
276}
277
278static int
279wf2qp_free_sched(struct dn_sch_inst *_si)
280{
281	struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
282
283	heap_free(&si->sch_heap);
284	heap_free(&si->ne_heap);
285	heap_free(&si->idle_heap);
286
287	return 0;
288}
289
290static int
291wf2qp_new_fsk(struct dn_fsk *fs)
292{
293	ipdn_bound_var(&fs->fs.par[0], 1,
294		1, 100, "WF2Q+ weight");
295	return 0;
296}
297
298static int
299wf2qp_new_queue(struct dn_queue *_q)
300{
301	struct wf2qp_queue *q = (struct wf2qp_queue *)_q;
302
303	_q->ni.oid.subtype = DN_SCHED_WF2QP;
304	q->F = 0;	/* not strictly necessary */
305	q->S = q->F + 1;    /* mark timestamp as invalid. */
306        q->inv_w = ONE_FP / _q->fs->fs.par[0];
307	if (_q->mq.head != NULL) {
308		wf2qp_enqueue(_q->_si, _q, _q->mq.head);
309	}
310	return 0;
311}
312
313/*
314 * Called when the infrastructure removes a queue (e.g. flowset
315 * is reconfigured). Nothing to do if we did not 'own' the queue,
316 * otherwise remove it from the right heap and adjust the sum
317 * of weights.
318 */
319static int
320wf2qp_free_queue(struct dn_queue *q)
321{
322	struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
323	struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
324
325	if (alg_fq->S >= alg_fq->F + 1)
326		return 0;	/* nothing to do, not in any heap */
327	si->wsum -= q->fs->fs.par[0];
328	if (si->wsum > 0)
329		si->inv_wsum = ONE_FP/si->wsum;
330
331	/* extract from the heap. XXX TODO we may need to adjust V
332	 * to make sure the invariants hold.
333	 */
334	if (q->mq.head == NULL) {
335		heap_extract(&si->idle_heap, q);
336	} else if (DN_KEY_LT(si->V, alg_fq->S)) {
337		heap_extract(&si->ne_heap, q);
338	} else {
339		heap_extract(&si->sch_heap, q);
340	}
341	return 0;
342}
343
344/*
345 * WF2Q+ scheduler descriptor
346 * contains the type of the scheduler, the name, the size of the
347 * structures and function pointers.
348 */
349static struct dn_alg wf2qp_desc = {
350	_SI( .type = ) DN_SCHED_WF2QP,
351	_SI( .name = ) "WF2Q+",
352	_SI( .flags = ) DN_MULTIQUEUE,
353
354	/* we need extra space in the si and the queue */
355	_SI( .schk_datalen = ) 0,
356	_SI( .si_datalen = ) sizeof(struct wf2qp_si),
357	_SI( .q_datalen = ) sizeof(struct wf2qp_queue) -
358				sizeof(struct dn_queue),
359
360	_SI( .enqueue = ) wf2qp_enqueue,
361	_SI( .dequeue = ) wf2qp_dequeue,
362
363	_SI( .config = )  NULL,
364	_SI( .destroy = )  NULL,
365	_SI( .new_sched = ) wf2qp_new_sched,
366	_SI( .free_sched = ) wf2qp_free_sched,
367
368	_SI( .new_fsk = ) wf2qp_new_fsk,
369	_SI( .free_fsk = )  NULL,
370
371	_SI( .new_queue = ) wf2qp_new_queue,
372	_SI( .free_queue = ) wf2qp_free_queue,
373#ifdef NEW_AQM
374	_SI( .getconfig = )  NULL,
375#endif
376
377};
378
379
380DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);
381