vmbus_br.c revision 307114
1169689Skan/*- 2169689Skan * Copyright (c) 2009-2012,2016 Microsoft Corp. 3169689Skan * Copyright (c) 2012 NetApp Inc. 4169689Skan * Copyright (c) 2012 Citrix Inc. 5169689Skan * All rights reserved. 6169689Skan * 7169689Skan * Redistribution and use in source and binary forms, with or without 8169689Skan * modification, are permitted provided that the following conditions 9169689Skan * are met: 10169689Skan * 1. Redistributions of source code must retain the above copyright 11169689Skan * notice unmodified, this list of conditions, and the following 12169689Skan * disclaimer. 13169689Skan * 2. Redistributions in binary form must reproduce the above copyright 14169689Skan * notice, this list of conditions and the following disclaimer in the 15169689Skan * documentation and/or other materials provided with the distribution. 16169689Skan * 17169689Skan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18169689Skan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19169689Skan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20169689Skan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21169689Skan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22169689Skan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23169689Skan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24169689Skan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__FBSDID("$FreeBSD$"); 31 32#include <sys/param.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/sysctl.h> 36 37#include <dev/hyperv/vmbus/vmbus_reg.h> 38#include <dev/hyperv/vmbus/vmbus_brvar.h> 39 40/* Amount of space available for write */ 41#define VMBUS_BR_WAVAIL(r, w, z) \ 42 (((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w))) 43 44/* Increase bufing index */ 45#define VMBUS_BR_IDXINC(idx, inc, sz) (((idx) + (inc)) % (sz)) 46 47static int 48vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS) 49{ 50 const struct vmbus_br *br = arg1; 51 uint32_t rindex, windex, imask, ravail, wavail; 52 char state[256]; 53 54 rindex = br->vbr_rindex; 55 windex = br->vbr_windex; 56 imask = br->vbr_imask; 57 wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize); 58 ravail = br->vbr_dsize - wavail; 59 60 snprintf(state, sizeof(state), 61 "rindex:%u windex:%u imask:%u ravail:%u wavail:%u", 62 rindex, windex, imask, ravail, wavail); 63 return sysctl_handle_string(oidp, state, sizeof(state), req); 64} 65 66/* 67 * Binary bufring states. 68 */ 69static int 70vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS) 71{ 72#define BR_STATE_RIDX 0 73#define BR_STATE_WIDX 1 74#define BR_STATE_IMSK 2 75#define BR_STATE_RSPC 3 76#define BR_STATE_WSPC 4 77#define BR_STATE_MAX 5 78 79 const struct vmbus_br *br = arg1; 80 uint32_t rindex, windex, wavail, state[BR_STATE_MAX]; 81 82 rindex = br->vbr_rindex; 83 windex = br->vbr_windex; 84 wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize); 85 86 state[BR_STATE_RIDX] = rindex; 87 state[BR_STATE_WIDX] = windex; 88 state[BR_STATE_IMSK] = br->vbr_imask; 89 state[BR_STATE_WSPC] = wavail; 90 state[BR_STATE_RSPC] = br->vbr_dsize - wavail; 91 92 return sysctl_handle_opaque(oidp, state, sizeof(state), req); 93} 94 95void 96vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx, struct sysctl_oid *br_tree, 97 struct vmbus_br *br, const char *name) 98{ 99 struct sysctl_oid *tree; 100 char desc[64]; 101 102 tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(br_tree), OID_AUTO, 103 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 104 if (tree == NULL) 105 return; 106 107 snprintf(desc, sizeof(desc), "%s state", name); 108 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state", 109 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 110 br, 0, vmbus_br_sysctl_state, "A", desc); 111 112 snprintf(desc, sizeof(desc), "%s binary state", name); 113 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state_bin", 114 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, 115 br, 0, vmbus_br_sysctl_state_bin, "IU", desc); 116} 117 118void 119vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr) 120{ 121 rbr->rxbr_imask = 1; 122 mb(); 123} 124 125static __inline uint32_t 126vmbus_rxbr_avail(const struct vmbus_rxbr *rbr) 127{ 128 uint32_t rindex, windex; 129 130 /* Get snapshot */ 131 rindex = rbr->rxbr_rindex; 132 windex = rbr->rxbr_windex; 133 134 return (rbr->rxbr_dsize - 135 VMBUS_BR_WAVAIL(rindex, windex, rbr->rxbr_dsize)); 136} 137 138uint32_t 139vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr) 140{ 141 rbr->rxbr_imask = 0; 142 mb(); 143 144 /* 145 * Now check to see if the ring buffer is still empty. 146 * If it is not, we raced and we need to process new 147 * incoming channel packets. 148 */ 149 return vmbus_rxbr_avail(rbr); 150} 151 152static void 153vmbus_br_setup(struct vmbus_br *br, void *buf, int blen) 154{ 155 br->vbr = buf; 156 br->vbr_dsize = blen - sizeof(struct vmbus_bufring); 157} 158 159void 160vmbus_rxbr_init(struct vmbus_rxbr *rbr) 161{ 162 mtx_init(&rbr->rxbr_lock, "vmbus_rxbr", NULL, MTX_SPIN); 163} 164 165void 166vmbus_rxbr_deinit(struct vmbus_rxbr *rbr) 167{ 168 mtx_destroy(&rbr->rxbr_lock); 169} 170 171void 172vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen) 173{ 174 vmbus_br_setup(&rbr->rxbr, buf, blen); 175} 176 177void 178vmbus_txbr_init(struct vmbus_txbr *tbr) 179{ 180 mtx_init(&tbr->txbr_lock, "vmbus_txbr", NULL, MTX_SPIN); 181} 182 183void 184vmbus_txbr_deinit(struct vmbus_txbr *tbr) 185{ 186 mtx_destroy(&tbr->txbr_lock); 187} 188 189void 190vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen) 191{ 192 vmbus_br_setup(&tbr->txbr, buf, blen); 193} 194 195/* 196 * When we write to the ring buffer, check if the host needs to be 197 * signaled. 198 * 199 * The contract: 200 * - The host guarantees that while it is draining the TX bufring, 201 * it will set the br_imask to indicate it does not need to be 202 * interrupted when new data are added. 203 * - The host guarantees that it will completely drain the TX bufring 204 * before exiting the read loop. Further, once the TX bufring is 205 * empty, it will clear the br_imask and re-check to see if new 206 * data have arrived. 207 */ 208static __inline boolean_t 209vmbus_txbr_need_signal(const struct vmbus_txbr *tbr, uint32_t old_windex) 210{ 211 mb(); 212 if (tbr->txbr_imask) 213 return (FALSE); 214 215 /* XXX only compiler fence is needed */ 216 /* Read memory barrier */ 217 rmb(); 218 219 /* 220 * This is the only case we need to signal when the 221 * ring transitions from being empty to non-empty. 222 */ 223 if (old_windex == tbr->txbr_rindex) 224 return (TRUE); 225 226 return (FALSE); 227} 228 229static __inline uint32_t 230vmbus_txbr_avail(const struct vmbus_txbr *tbr) 231{ 232 uint32_t rindex, windex; 233 234 /* Get snapshot */ 235 rindex = tbr->txbr_rindex; 236 windex = tbr->txbr_windex; 237 238 return VMBUS_BR_WAVAIL(rindex, windex, tbr->txbr_dsize); 239} 240 241static __inline uint32_t 242vmbus_txbr_copyto(const struct vmbus_txbr *tbr, uint32_t windex, 243 const void *src0, uint32_t cplen) 244{ 245 const uint8_t *src = src0; 246 uint8_t *br_data = tbr->txbr_data; 247 uint32_t br_dsize = tbr->txbr_dsize; 248 249 if (cplen > br_dsize - windex) { 250 uint32_t fraglen = br_dsize - windex; 251 252 /* Wrap-around detected */ 253 memcpy(br_data + windex, src, fraglen); 254 memcpy(br_data, src + fraglen, cplen - fraglen); 255 } else { 256 memcpy(br_data + windex, src, cplen); 257 } 258 return VMBUS_BR_IDXINC(windex, cplen, br_dsize); 259} 260 261/* 262 * Write scattered channel packet to TX bufring. 263 * 264 * The offset of this channel packet is written as a 64bits value 265 * immediately after this channel packet. 266 */ 267int 268vmbus_txbr_write(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen, 269 boolean_t *need_sig) 270{ 271 uint32_t old_windex, windex, total; 272 uint64_t save_windex; 273 int i; 274 275 total = 0; 276 for (i = 0; i < iovlen; i++) 277 total += iov[i].iov_len; 278 total += sizeof(save_windex); 279 280 mtx_lock_spin(&tbr->txbr_lock); 281 282 /* 283 * NOTE: 284 * If this write is going to make br_windex same as br_rindex, 285 * i.e. the available space for write is same as the write size, 286 * we can't do it then, since br_windex == br_rindex means that 287 * the bufring is empty. 288 */ 289 if (vmbus_txbr_avail(tbr) <= total) { 290 mtx_unlock_spin(&tbr->txbr_lock); 291 return (EAGAIN); 292 } 293 294 /* Save br_windex for later use */ 295 old_windex = tbr->txbr_windex; 296 297 /* 298 * Copy the scattered channel packet to the TX bufring. 299 */ 300 windex = old_windex; 301 for (i = 0; i < iovlen; i++) { 302 windex = vmbus_txbr_copyto(tbr, windex, 303 iov[i].iov_base, iov[i].iov_len); 304 } 305 306 /* 307 * Set the offset of the current channel packet. 308 */ 309 save_windex = ((uint64_t)old_windex) << 32; 310 windex = vmbus_txbr_copyto(tbr, windex, &save_windex, 311 sizeof(save_windex)); 312 313 /* 314 * XXX only compiler fence is needed. 315 * Full memory barrier before upding the write index. 316 */ 317 mb(); 318 319 /* 320 * Update the write index _after_ the channel packet 321 * is copied. 322 */ 323 tbr->txbr_windex = windex; 324 325 mtx_unlock_spin(&tbr->txbr_lock); 326 327 *need_sig = vmbus_txbr_need_signal(tbr, old_windex); 328 329 return (0); 330} 331 332static __inline uint32_t 333vmbus_rxbr_copyfrom(const struct vmbus_rxbr *rbr, uint32_t rindex, 334 void *dst0, int cplen) 335{ 336 uint8_t *dst = dst0; 337 const uint8_t *br_data = rbr->rxbr_data; 338 uint32_t br_dsize = rbr->rxbr_dsize; 339 340 if (cplen > br_dsize - rindex) { 341 uint32_t fraglen = br_dsize - rindex; 342 343 /* Wrap-around detected. */ 344 memcpy(dst, br_data + rindex, fraglen); 345 memcpy(dst + fraglen, br_data, cplen - fraglen); 346 } else { 347 memcpy(dst, br_data + rindex, cplen); 348 } 349 return VMBUS_BR_IDXINC(rindex, cplen, br_dsize); 350} 351 352int 353vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen) 354{ 355 mtx_lock_spin(&rbr->rxbr_lock); 356 357 /* 358 * The requested data and the 64bits channel packet 359 * offset should be there at least. 360 */ 361 if (vmbus_rxbr_avail(rbr) < dlen + sizeof(uint64_t)) { 362 mtx_unlock_spin(&rbr->rxbr_lock); 363 return (EAGAIN); 364 } 365 vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen); 366 367 mtx_unlock_spin(&rbr->rxbr_lock); 368 369 return (0); 370} 371 372/* 373 * NOTE: 374 * We assume (dlen + skip) == sizeof(channel packet). 375 */ 376int 377vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t skip) 378{ 379 uint32_t rindex, br_dsize = rbr->rxbr_dsize; 380 381 KASSERT(dlen + skip > 0, ("invalid dlen %d, offset %u", dlen, skip)); 382 383 mtx_lock_spin(&rbr->rxbr_lock); 384 385 if (vmbus_rxbr_avail(rbr) < dlen + skip + sizeof(uint64_t)) { 386 mtx_unlock_spin(&rbr->rxbr_lock); 387 return (EAGAIN); 388 } 389 390 /* 391 * Copy channel packet from RX bufring. 392 */ 393 rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize); 394 rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen); 395 396 /* 397 * Discard this channel packet's 64bits offset, which is useless to us. 398 */ 399 rindex = VMBUS_BR_IDXINC(rindex, sizeof(uint64_t), br_dsize); 400 401 /* 402 * XXX only compiler fence is needed. 403 * Make sure all reads are done before we update the read index since 404 * the writer may start writing to the read area once the read index 405 * is updated. 406 */ 407 wmb(); 408 409 /* 410 * Update the read index _after_ the channel packet is fetched. 411 */ 412 rbr->rxbr_rindex = rindex; 413 414 mtx_unlock_spin(&rbr->rxbr_lock); 415 416 return (0); 417} 418