1207673Sjoel/*-
2207673Sjoel * Copyright (c) 2007-2009 Kip Macy <kmacy@freebsd.org>
3185162Skmacy * All rights reserved.
4185162Skmacy *
5185162Skmacy * Redistribution and use in source and binary forms, with or without
6207673Sjoel * modification, are permitted provided that the following conditions
7207673Sjoel * are met:
8207673Sjoel * 1. Redistributions of source code must retain the above copyright
9207673Sjoel *    notice, this list of conditions and the following disclaimer.
10207673Sjoel * 2. Redistributions in binary form must reproduce the above copyright
11207673Sjoel *    notice, this list of conditions and the following disclaimer in the
12207673Sjoel *    documentation and/or other materials provided with the distribution.
13185162Skmacy *
14207673Sjoel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15207673Sjoel * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16185162Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17207673Sjoel * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18207673Sjoel * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19207673Sjoel * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20207673Sjoel * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21207673Sjoel * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22207673Sjoel * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23207673Sjoel * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24207673Sjoel * SUCH DAMAGE.
25185162Skmacy *
26185162Skmacy * $FreeBSD$
27185162Skmacy *
28207673Sjoel */
29185162Skmacy
30185162Skmacy#ifndef	_SYS_BUF_RING_H_
31185162Skmacy#define	_SYS_BUF_RING_H_
32185162Skmacy
33185162Skmacy#include <machine/cpu.h>
34185162Skmacy
35185162Skmacy#if defined(INVARIANTS) && !defined(DEBUG_BUFRING)
36185162Skmacy#define DEBUG_BUFRING 1
37185162Skmacy#endif
38185162Skmacy
39185162Skmacy#ifdef DEBUG_BUFRING
40185162Skmacy#include <sys/lock.h>
41185162Skmacy#include <sys/mutex.h>
42185162Skmacy#endif
43185162Skmacy
44185162Skmacystruct buf_ring {
45185162Skmacy	volatile uint32_t	br_prod_head;
46185162Skmacy	volatile uint32_t	br_prod_tail;
47185162Skmacy	int              	br_prod_size;
48185162Skmacy	int              	br_prod_mask;
49186207Skmacy	uint64_t		br_drops;
50244774Sattilio	volatile uint32_t	br_cons_head __aligned(CACHE_LINE_SIZE);
51185162Skmacy	volatile uint32_t	br_cons_tail;
52185162Skmacy	int		 	br_cons_size;
53185162Skmacy	int              	br_cons_mask;
54185162Skmacy#ifdef DEBUG_BUFRING
55185162Skmacy	struct mtx		*br_lock;
56185162Skmacy#endif
57244774Sattilio	void			*br_ring[0] __aligned(CACHE_LINE_SIZE);
58185162Skmacy};
59185162Skmacy
60191899Skmacy/*
61191899Skmacy * multi-producer safe lock-free ring buffer enqueue
62191899Skmacy *
63191899Skmacy */
64185162Skmacystatic __inline int
65241037Sglebiusbuf_ring_enqueue(struct buf_ring *br, void *buf)
66185162Skmacy{
67274646Sjpaetzel	uint32_t prod_head, prod_next, cons_tail;
68185162Skmacy#ifdef DEBUG_BUFRING
69185162Skmacy	int i;
70185162Skmacy	for (i = br->br_cons_head; i != br->br_prod_head;
71185162Skmacy	     i = ((i + 1) & br->br_cons_mask))
72185162Skmacy		if(br->br_ring[i] == buf)
73185162Skmacy			panic("buf=%p already enqueue at %d prod=%d cons=%d",
74185162Skmacy			    buf, i, br->br_prod_tail, br->br_cons_tail);
75185162Skmacy#endif
76185162Skmacy	critical_enter();
77185162Skmacy	do {
78185162Skmacy		prod_head = br->br_prod_head;
79274646Sjpaetzel		prod_next = (prod_head + 1) & br->br_prod_mask;
80185162Skmacy		cons_tail = br->br_cons_tail;
81185162Skmacy
82185162Skmacy		if (prod_next == cons_tail) {
83274646Sjpaetzel			rmb();
84274646Sjpaetzel			if (prod_head == br->br_prod_head &&
85274646Sjpaetzel			    cons_tail == br->br_cons_tail) {
86274646Sjpaetzel				br->br_drops++;
87274646Sjpaetzel				critical_exit();
88274646Sjpaetzel				return (ENOBUFS);
89274646Sjpaetzel			}
90274646Sjpaetzel			continue;
91185162Skmacy		}
92274646Sjpaetzel	} while (!atomic_cmpset_acq_int(&br->br_prod_head, prod_head, prod_next));
93185162Skmacy#ifdef DEBUG_BUFRING
94185162Skmacy	if (br->br_ring[prod_head] != NULL)
95185162Skmacy		panic("dangling value in enqueue");
96185162Skmacy#endif
97185162Skmacy	br->br_ring[prod_head] = buf;
98185162Skmacy
99185162Skmacy	/*
100185162Skmacy	 * If there are other enqueues in progress
101300060Spfg	 * that preceded us, we need to wait for them
102185162Skmacy	 * to complete
103185162Skmacy	 */
104244774Sattilio	while (br->br_prod_tail != prod_head)
105185162Skmacy		cpu_spinwait();
106274646Sjpaetzel	atomic_store_rel_int(&br->br_prod_tail, prod_next);
107185162Skmacy	critical_exit();
108185162Skmacy	return (0);
109185162Skmacy}
110185162Skmacy
111185162Skmacy/*
112185162Skmacy * multi-consumer safe dequeue
113185162Skmacy *
114185162Skmacy */
115185162Skmacystatic __inline void *
116185162Skmacybuf_ring_dequeue_mc(struct buf_ring *br)
117185162Skmacy{
118185162Skmacy	uint32_t cons_head, cons_next;
119185162Skmacy	void *buf;
120185162Skmacy
121185162Skmacy	critical_enter();
122185162Skmacy	do {
123185162Skmacy		cons_head = br->br_cons_head;
124274646Sjpaetzel		cons_next = (cons_head + 1) & br->br_cons_mask;
125185162Skmacy
126274646Sjpaetzel		if (cons_head == br->br_prod_tail) {
127185162Skmacy			critical_exit();
128185162Skmacy			return (NULL);
129185162Skmacy		}
130274646Sjpaetzel	} while (!atomic_cmpset_acq_int(&br->br_cons_head, cons_head, cons_next));
131185162Skmacy
132185162Skmacy	buf = br->br_ring[cons_head];
133185162Skmacy#ifdef DEBUG_BUFRING
134185162Skmacy	br->br_ring[cons_head] = NULL;
135185162Skmacy#endif
136244774Sattilio	/*
137185162Skmacy	 * If there are other dequeues in progress
138300060Spfg	 * that preceded us, we need to wait for them
139185162Skmacy	 * to complete
140185162Skmacy	 */
141244774Sattilio	while (br->br_cons_tail != cons_head)
142185162Skmacy		cpu_spinwait();
143185162Skmacy
144274646Sjpaetzel	atomic_store_rel_int(&br->br_cons_tail, cons_next);
145185162Skmacy	critical_exit();
146185162Skmacy
147185162Skmacy	return (buf);
148185162Skmacy}
149185162Skmacy
150185162Skmacy/*
151191899Skmacy * single-consumer dequeue
152191899Skmacy * use where dequeue is protected by a lock
153191899Skmacy * e.g. a network driver's tx queue lock
154185162Skmacy */
155185162Skmacystatic __inline void *
156185162Skmacybuf_ring_dequeue_sc(struct buf_ring *br)
157185162Skmacy{
158193848Skmacy	uint32_t cons_head, cons_next, cons_next_next;
159185162Skmacy	uint32_t prod_tail;
160185162Skmacy	void *buf;
161185162Skmacy
162185162Skmacy	cons_head = br->br_cons_head;
163185162Skmacy	prod_tail = br->br_prod_tail;
164185162Skmacy
165185162Skmacy	cons_next = (cons_head + 1) & br->br_cons_mask;
166193848Skmacy	cons_next_next = (cons_head + 2) & br->br_cons_mask;
167193848Skmacy
168193848Skmacy	if (cons_head == prod_tail)
169185162Skmacy		return (NULL);
170193848Skmacy
171193848Skmacy#ifdef PREFETCH_DEFINED
172193848Skmacy	if (cons_next != prod_tail) {
173193848Skmacy		prefetch(br->br_ring[cons_next]);
174193848Skmacy		if (cons_next_next != prod_tail)
175193848Skmacy			prefetch(br->br_ring[cons_next_next]);
176185162Skmacy	}
177193848Skmacy#endif
178185162Skmacy	br->br_cons_head = cons_next;
179185162Skmacy	buf = br->br_ring[cons_head];
180193848Skmacy
181185162Skmacy#ifdef DEBUG_BUFRING
182185162Skmacy	br->br_ring[cons_head] = NULL;
183185162Skmacy	if (!mtx_owned(br->br_lock))
184185162Skmacy		panic("lock not held on single consumer dequeue");
185185162Skmacy	if (br->br_cons_tail != cons_head)
186185162Skmacy		panic("inconsistent list cons_tail=%d cons_head=%d",
187185162Skmacy		    br->br_cons_tail, cons_head);
188185162Skmacy#endif
189244774Sattilio	br->br_cons_tail = cons_next;
190185162Skmacy	return (buf);
191185162Skmacy}
192185162Skmacy
193191899Skmacy/*
194246482Srrs * single-consumer advance after a peek
195246482Srrs * use where it is protected by a lock
196246482Srrs * e.g. a network driver's tx queue lock
197246482Srrs */
198246482Srrsstatic __inline void
199246482Srrsbuf_ring_advance_sc(struct buf_ring *br)
200246482Srrs{
201246482Srrs	uint32_t cons_head, cons_next;
202246482Srrs	uint32_t prod_tail;
203246482Srrs
204246482Srrs	cons_head = br->br_cons_head;
205246482Srrs	prod_tail = br->br_prod_tail;
206246482Srrs
207246482Srrs	cons_next = (cons_head + 1) & br->br_cons_mask;
208246482Srrs	if (cons_head == prod_tail)
209246482Srrs		return;
210246482Srrs	br->br_cons_head = cons_next;
211246482Srrs#ifdef DEBUG_BUFRING
212246482Srrs	br->br_ring[cons_head] = NULL;
213246482Srrs#endif
214246482Srrs	br->br_cons_tail = cons_next;
215246482Srrs}
216246482Srrs
217246482Srrs/*
218246482Srrs * Used to return a buffer (most likely already there)
219246482Srrs * to the top od the ring. The caller should *not*
220246482Srrs * have used any dequeue to pull it out of the ring
221246482Srrs * but instead should have used the peek() function.
222246482Srrs * This is normally used where the transmit queue
223246482Srrs * of a driver is full, and an mubf must be returned.
224246482Srrs * Most likely whats in the ring-buffer is what
225246482Srrs * is being put back (since it was not removed), but
226246482Srrs * sometimes the lower transmit function may have
227246482Srrs * done a pullup or other function that will have
228246482Srrs * changed it. As an optimzation we always put it
229246482Srrs * back (since jhb says the store is probably cheaper),
230246482Srrs * if we have to do a multi-queue version we will need
231246482Srrs * the compare and an atomic.
232246482Srrs */
233246482Srrsstatic __inline void
234246482Srrsbuf_ring_putback_sc(struct buf_ring *br, void *new)
235246482Srrs{
236246482Srrs	KASSERT(br->br_cons_head != br->br_prod_tail,
237246482Srrs		("Buf-Ring has none in putback")) ;
238246482Srrs	br->br_ring[br->br_cons_head] = new;
239246482Srrs}
240246482Srrs
241246482Srrs/*
242191899Skmacy * return a pointer to the first entry in the ring
243191899Skmacy * without modifying it, or NULL if the ring is empty
244191899Skmacy * race-prone if not protected by a lock
245191899Skmacy */
246185162Skmacystatic __inline void *
247185162Skmacybuf_ring_peek(struct buf_ring *br)
248185162Skmacy{
249185162Skmacy
250185162Skmacy#ifdef DEBUG_BUFRING
251185162Skmacy	if ((br->br_lock != NULL) && !mtx_owned(br->br_lock))
252185162Skmacy		panic("lock not held on single consumer dequeue");
253185162Skmacy#endif
254193848Skmacy	/*
255193848Skmacy	 * I believe it is safe to not have a memory barrier
256193848Skmacy	 * here because we control cons and tail is worst case
257193848Skmacy	 * a lagging indicator so we worst case we might
258193848Skmacy	 * return NULL immediately after a buffer has been enqueued
259193848Skmacy	 */
260185193Skmacy	if (br->br_cons_head == br->br_prod_tail)
261185193Skmacy		return (NULL);
262185193Skmacy
263185193Skmacy	return (br->br_ring[br->br_cons_head]);
264185162Skmacy}
265185162Skmacy
266301913Ssephestatic __inline void *
267301913Ssephebuf_ring_peek_clear_sc(struct buf_ring *br)
268301913Ssephe{
269301913Ssephe#ifdef DEBUG_BUFRING
270301913Ssephe	void *ret;
271301913Ssephe
272301913Ssephe	if (!mtx_owned(br->br_lock))
273301913Ssephe		panic("lock not held on single consumer dequeue");
274301913Ssephe#endif
275301913Ssephe	/*
276301913Ssephe	 * I believe it is safe to not have a memory barrier
277301913Ssephe	 * here because we control cons and tail is worst case
278301913Ssephe	 * a lagging indicator so we worst case we might
279301913Ssephe	 * return NULL immediately after a buffer has been enqueued
280301913Ssephe	 */
281301913Ssephe	if (br->br_cons_head == br->br_prod_tail)
282301913Ssephe		return (NULL);
283301913Ssephe
284301913Ssephe#ifdef DEBUG_BUFRING
285301913Ssephe	/*
286301913Ssephe	 * Single consumer, i.e. cons_head will not move while we are
287301913Ssephe	 * running, so atomic_swap_ptr() is not necessary here.
288301913Ssephe	 */
289301913Ssephe	ret = br->br_ring[br->br_cons_head];
290301913Ssephe	br->br_ring[br->br_cons_head] = NULL;
291301913Ssephe	return (ret);
292301913Ssephe#else
293301913Ssephe	return (br->br_ring[br->br_cons_head]);
294301913Ssephe#endif
295301913Ssephe}
296301913Ssephe
297185162Skmacystatic __inline int
298185162Skmacybuf_ring_full(struct buf_ring *br)
299185162Skmacy{
300185162Skmacy
301185162Skmacy	return (((br->br_prod_head + 1) & br->br_prod_mask) == br->br_cons_tail);
302185162Skmacy}
303185162Skmacy
304185162Skmacystatic __inline int
305185162Skmacybuf_ring_empty(struct buf_ring *br)
306185162Skmacy{
307185162Skmacy
308185162Skmacy	return (br->br_cons_head == br->br_prod_tail);
309185162Skmacy}
310185162Skmacy
311185162Skmacystatic __inline int
312185162Skmacybuf_ring_count(struct buf_ring *br)
313185162Skmacy{
314185162Skmacy
315185162Skmacy	return ((br->br_prod_size + br->br_prod_tail - br->br_cons_tail)
316185162Skmacy	    & br->br_prod_mask);
317185162Skmacy}
318185162Skmacy
319185162Skmacystruct buf_ring *buf_ring_alloc(int count, struct malloc_type *type, int flags,
320185162Skmacy    struct mtx *);
321185162Skmacyvoid buf_ring_free(struct buf_ring *br, struct malloc_type *type);
322185162Skmacy
323185162Skmacy
324185162Skmacy
325185162Skmacy#endif
326