1207673Sjoel/*- 2207673Sjoel * Copyright (c) 2007-2009 Kip Macy <kmacy@freebsd.org> 3185162Skmacy * All rights reserved. 4185162Skmacy * 5185162Skmacy * Redistribution and use in source and binary forms, with or without 6207673Sjoel * modification, are permitted provided that the following conditions 7207673Sjoel * are met: 8207673Sjoel * 1. Redistributions of source code must retain the above copyright 9207673Sjoel * notice, this list of conditions and the following disclaimer. 10207673Sjoel * 2. Redistributions in binary form must reproduce the above copyright 11207673Sjoel * notice, this list of conditions and the following disclaimer in the 12207673Sjoel * documentation and/or other materials provided with the distribution. 13185162Skmacy * 14207673Sjoel * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15207673Sjoel * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16185162Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17207673Sjoel * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18207673Sjoel * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19207673Sjoel * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20207673Sjoel * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21207673Sjoel * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22207673Sjoel * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23207673Sjoel * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24207673Sjoel * SUCH DAMAGE. 25185162Skmacy * 26185162Skmacy * $FreeBSD$ 27185162Skmacy * 28207673Sjoel */ 29185162Skmacy 30185162Skmacy#ifndef _SYS_BUF_RING_H_ 31185162Skmacy#define _SYS_BUF_RING_H_ 32185162Skmacy 33185162Skmacy#include <machine/cpu.h> 34185162Skmacy 35185162Skmacy#if defined(INVARIANTS) && !defined(DEBUG_BUFRING) 36185162Skmacy#define DEBUG_BUFRING 1 37185162Skmacy#endif 38185162Skmacy 39185162Skmacy#ifdef DEBUG_BUFRING 40185162Skmacy#include <sys/lock.h> 41185162Skmacy#include <sys/mutex.h> 42185162Skmacy#endif 43185162Skmacy 44185162Skmacystruct buf_ring { 45185162Skmacy volatile uint32_t br_prod_head; 46185162Skmacy volatile uint32_t br_prod_tail; 47185162Skmacy int br_prod_size; 48185162Skmacy int br_prod_mask; 49186207Skmacy uint64_t br_drops; 50244774Sattilio volatile uint32_t br_cons_head __aligned(CACHE_LINE_SIZE); 51185162Skmacy volatile uint32_t br_cons_tail; 52185162Skmacy int br_cons_size; 53185162Skmacy int br_cons_mask; 54185162Skmacy#ifdef DEBUG_BUFRING 55185162Skmacy struct mtx *br_lock; 56185162Skmacy#endif 57244774Sattilio void *br_ring[0] __aligned(CACHE_LINE_SIZE); 58185162Skmacy}; 59185162Skmacy 60191899Skmacy/* 61191899Skmacy * multi-producer safe lock-free ring buffer enqueue 62191899Skmacy * 63191899Skmacy */ 64185162Skmacystatic __inline int 65241037Sglebiusbuf_ring_enqueue(struct buf_ring *br, void *buf) 66185162Skmacy{ 67274646Sjpaetzel uint32_t prod_head, prod_next, cons_tail; 68185162Skmacy#ifdef DEBUG_BUFRING 69185162Skmacy int i; 70185162Skmacy for (i = br->br_cons_head; i != br->br_prod_head; 71185162Skmacy i = ((i + 1) & br->br_cons_mask)) 72185162Skmacy if(br->br_ring[i] == buf) 73185162Skmacy panic("buf=%p already enqueue at %d prod=%d cons=%d", 74185162Skmacy buf, i, br->br_prod_tail, br->br_cons_tail); 75185162Skmacy#endif 76185162Skmacy critical_enter(); 77185162Skmacy do { 78185162Skmacy prod_head = br->br_prod_head; 79274646Sjpaetzel prod_next = (prod_head + 1) & br->br_prod_mask; 80185162Skmacy cons_tail = br->br_cons_tail; 81185162Skmacy 82185162Skmacy if (prod_next == cons_tail) { 83274646Sjpaetzel rmb(); 84274646Sjpaetzel if (prod_head == br->br_prod_head && 85274646Sjpaetzel cons_tail == br->br_cons_tail) { 86274646Sjpaetzel br->br_drops++; 87274646Sjpaetzel critical_exit(); 88274646Sjpaetzel return (ENOBUFS); 89274646Sjpaetzel } 90274646Sjpaetzel continue; 91185162Skmacy } 92274646Sjpaetzel } while (!atomic_cmpset_acq_int(&br->br_prod_head, prod_head, prod_next)); 93185162Skmacy#ifdef DEBUG_BUFRING 94185162Skmacy if (br->br_ring[prod_head] != NULL) 95185162Skmacy panic("dangling value in enqueue"); 96185162Skmacy#endif 97185162Skmacy br->br_ring[prod_head] = buf; 98185162Skmacy 99185162Skmacy /* 100185162Skmacy * If there are other enqueues in progress 101300060Spfg * that preceded us, we need to wait for them 102185162Skmacy * to complete 103185162Skmacy */ 104244774Sattilio while (br->br_prod_tail != prod_head) 105185162Skmacy cpu_spinwait(); 106274646Sjpaetzel atomic_store_rel_int(&br->br_prod_tail, prod_next); 107185162Skmacy critical_exit(); 108185162Skmacy return (0); 109185162Skmacy} 110185162Skmacy 111185162Skmacy/* 112185162Skmacy * multi-consumer safe dequeue 113185162Skmacy * 114185162Skmacy */ 115185162Skmacystatic __inline void * 116185162Skmacybuf_ring_dequeue_mc(struct buf_ring *br) 117185162Skmacy{ 118185162Skmacy uint32_t cons_head, cons_next; 119185162Skmacy void *buf; 120185162Skmacy 121185162Skmacy critical_enter(); 122185162Skmacy do { 123185162Skmacy cons_head = br->br_cons_head; 124274646Sjpaetzel cons_next = (cons_head + 1) & br->br_cons_mask; 125185162Skmacy 126274646Sjpaetzel if (cons_head == br->br_prod_tail) { 127185162Skmacy critical_exit(); 128185162Skmacy return (NULL); 129185162Skmacy } 130274646Sjpaetzel } while (!atomic_cmpset_acq_int(&br->br_cons_head, cons_head, cons_next)); 131185162Skmacy 132185162Skmacy buf = br->br_ring[cons_head]; 133185162Skmacy#ifdef DEBUG_BUFRING 134185162Skmacy br->br_ring[cons_head] = NULL; 135185162Skmacy#endif 136244774Sattilio /* 137185162Skmacy * If there are other dequeues in progress 138300060Spfg * that preceded us, we need to wait for them 139185162Skmacy * to complete 140185162Skmacy */ 141244774Sattilio while (br->br_cons_tail != cons_head) 142185162Skmacy cpu_spinwait(); 143185162Skmacy 144274646Sjpaetzel atomic_store_rel_int(&br->br_cons_tail, cons_next); 145185162Skmacy critical_exit(); 146185162Skmacy 147185162Skmacy return (buf); 148185162Skmacy} 149185162Skmacy 150185162Skmacy/* 151191899Skmacy * single-consumer dequeue 152191899Skmacy * use where dequeue is protected by a lock 153191899Skmacy * e.g. a network driver's tx queue lock 154185162Skmacy */ 155185162Skmacystatic __inline void * 156185162Skmacybuf_ring_dequeue_sc(struct buf_ring *br) 157185162Skmacy{ 158193848Skmacy uint32_t cons_head, cons_next, cons_next_next; 159185162Skmacy uint32_t prod_tail; 160185162Skmacy void *buf; 161185162Skmacy 162185162Skmacy cons_head = br->br_cons_head; 163185162Skmacy prod_tail = br->br_prod_tail; 164185162Skmacy 165185162Skmacy cons_next = (cons_head + 1) & br->br_cons_mask; 166193848Skmacy cons_next_next = (cons_head + 2) & br->br_cons_mask; 167193848Skmacy 168193848Skmacy if (cons_head == prod_tail) 169185162Skmacy return (NULL); 170193848Skmacy 171193848Skmacy#ifdef PREFETCH_DEFINED 172193848Skmacy if (cons_next != prod_tail) { 173193848Skmacy prefetch(br->br_ring[cons_next]); 174193848Skmacy if (cons_next_next != prod_tail) 175193848Skmacy prefetch(br->br_ring[cons_next_next]); 176185162Skmacy } 177193848Skmacy#endif 178185162Skmacy br->br_cons_head = cons_next; 179185162Skmacy buf = br->br_ring[cons_head]; 180193848Skmacy 181185162Skmacy#ifdef DEBUG_BUFRING 182185162Skmacy br->br_ring[cons_head] = NULL; 183185162Skmacy if (!mtx_owned(br->br_lock)) 184185162Skmacy panic("lock not held on single consumer dequeue"); 185185162Skmacy if (br->br_cons_tail != cons_head) 186185162Skmacy panic("inconsistent list cons_tail=%d cons_head=%d", 187185162Skmacy br->br_cons_tail, cons_head); 188185162Skmacy#endif 189244774Sattilio br->br_cons_tail = cons_next; 190185162Skmacy return (buf); 191185162Skmacy} 192185162Skmacy 193191899Skmacy/* 194246482Srrs * single-consumer advance after a peek 195246482Srrs * use where it is protected by a lock 196246482Srrs * e.g. a network driver's tx queue lock 197246482Srrs */ 198246482Srrsstatic __inline void 199246482Srrsbuf_ring_advance_sc(struct buf_ring *br) 200246482Srrs{ 201246482Srrs uint32_t cons_head, cons_next; 202246482Srrs uint32_t prod_tail; 203246482Srrs 204246482Srrs cons_head = br->br_cons_head; 205246482Srrs prod_tail = br->br_prod_tail; 206246482Srrs 207246482Srrs cons_next = (cons_head + 1) & br->br_cons_mask; 208246482Srrs if (cons_head == prod_tail) 209246482Srrs return; 210246482Srrs br->br_cons_head = cons_next; 211246482Srrs#ifdef DEBUG_BUFRING 212246482Srrs br->br_ring[cons_head] = NULL; 213246482Srrs#endif 214246482Srrs br->br_cons_tail = cons_next; 215246482Srrs} 216246482Srrs 217246482Srrs/* 218246482Srrs * Used to return a buffer (most likely already there) 219246482Srrs * to the top od the ring. The caller should *not* 220246482Srrs * have used any dequeue to pull it out of the ring 221246482Srrs * but instead should have used the peek() function. 222246482Srrs * This is normally used where the transmit queue 223246482Srrs * of a driver is full, and an mubf must be returned. 224246482Srrs * Most likely whats in the ring-buffer is what 225246482Srrs * is being put back (since it was not removed), but 226246482Srrs * sometimes the lower transmit function may have 227246482Srrs * done a pullup or other function that will have 228246482Srrs * changed it. As an optimzation we always put it 229246482Srrs * back (since jhb says the store is probably cheaper), 230246482Srrs * if we have to do a multi-queue version we will need 231246482Srrs * the compare and an atomic. 232246482Srrs */ 233246482Srrsstatic __inline void 234246482Srrsbuf_ring_putback_sc(struct buf_ring *br, void *new) 235246482Srrs{ 236246482Srrs KASSERT(br->br_cons_head != br->br_prod_tail, 237246482Srrs ("Buf-Ring has none in putback")) ; 238246482Srrs br->br_ring[br->br_cons_head] = new; 239246482Srrs} 240246482Srrs 241246482Srrs/* 242191899Skmacy * return a pointer to the first entry in the ring 243191899Skmacy * without modifying it, or NULL if the ring is empty 244191899Skmacy * race-prone if not protected by a lock 245191899Skmacy */ 246185162Skmacystatic __inline void * 247185162Skmacybuf_ring_peek(struct buf_ring *br) 248185162Skmacy{ 249185162Skmacy 250185162Skmacy#ifdef DEBUG_BUFRING 251185162Skmacy if ((br->br_lock != NULL) && !mtx_owned(br->br_lock)) 252185162Skmacy panic("lock not held on single consumer dequeue"); 253185162Skmacy#endif 254193848Skmacy /* 255193848Skmacy * I believe it is safe to not have a memory barrier 256193848Skmacy * here because we control cons and tail is worst case 257193848Skmacy * a lagging indicator so we worst case we might 258193848Skmacy * return NULL immediately after a buffer has been enqueued 259193848Skmacy */ 260185193Skmacy if (br->br_cons_head == br->br_prod_tail) 261185193Skmacy return (NULL); 262185193Skmacy 263185193Skmacy return (br->br_ring[br->br_cons_head]); 264185162Skmacy} 265185162Skmacy 266301913Ssephestatic __inline void * 267301913Ssephebuf_ring_peek_clear_sc(struct buf_ring *br) 268301913Ssephe{ 269301913Ssephe#ifdef DEBUG_BUFRING 270301913Ssephe void *ret; 271301913Ssephe 272301913Ssephe if (!mtx_owned(br->br_lock)) 273301913Ssephe panic("lock not held on single consumer dequeue"); 274301913Ssephe#endif 275301913Ssephe /* 276301913Ssephe * I believe it is safe to not have a memory barrier 277301913Ssephe * here because we control cons and tail is worst case 278301913Ssephe * a lagging indicator so we worst case we might 279301913Ssephe * return NULL immediately after a buffer has been enqueued 280301913Ssephe */ 281301913Ssephe if (br->br_cons_head == br->br_prod_tail) 282301913Ssephe return (NULL); 283301913Ssephe 284301913Ssephe#ifdef DEBUG_BUFRING 285301913Ssephe /* 286301913Ssephe * Single consumer, i.e. cons_head will not move while we are 287301913Ssephe * running, so atomic_swap_ptr() is not necessary here. 288301913Ssephe */ 289301913Ssephe ret = br->br_ring[br->br_cons_head]; 290301913Ssephe br->br_ring[br->br_cons_head] = NULL; 291301913Ssephe return (ret); 292301913Ssephe#else 293301913Ssephe return (br->br_ring[br->br_cons_head]); 294301913Ssephe#endif 295301913Ssephe} 296301913Ssephe 297185162Skmacystatic __inline int 298185162Skmacybuf_ring_full(struct buf_ring *br) 299185162Skmacy{ 300185162Skmacy 301185162Skmacy return (((br->br_prod_head + 1) & br->br_prod_mask) == br->br_cons_tail); 302185162Skmacy} 303185162Skmacy 304185162Skmacystatic __inline int 305185162Skmacybuf_ring_empty(struct buf_ring *br) 306185162Skmacy{ 307185162Skmacy 308185162Skmacy return (br->br_cons_head == br->br_prod_tail); 309185162Skmacy} 310185162Skmacy 311185162Skmacystatic __inline int 312185162Skmacybuf_ring_count(struct buf_ring *br) 313185162Skmacy{ 314185162Skmacy 315185162Skmacy return ((br->br_prod_size + br->br_prod_tail - br->br_cons_tail) 316185162Skmacy & br->br_prod_mask); 317185162Skmacy} 318185162Skmacy 319185162Skmacystruct buf_ring *buf_ring_alloc(int count, struct malloc_type *type, int flags, 320185162Skmacy struct mtx *); 321185162Skmacyvoid buf_ring_free(struct buf_ring *br, struct malloc_type *type); 322185162Skmacy 323185162Skmacy 324185162Skmacy 325185162Skmacy#endif 326