vmbus_br.c revision 307114
1169689Skan/*-
2169689Skan * Copyright (c) 2009-2012,2016 Microsoft Corp.
3169689Skan * Copyright (c) 2012 NetApp Inc.
4169689Skan * Copyright (c) 2012 Citrix Inc.
5169689Skan * All rights reserved.
6169689Skan *
7169689Skan * Redistribution and use in source and binary forms, with or without
8169689Skan * modification, are permitted provided that the following conditions
9169689Skan * are met:
10169689Skan * 1. Redistributions of source code must retain the above copyright
11169689Skan *    notice unmodified, this list of conditions, and the following
12169689Skan *    disclaimer.
13169689Skan * 2. Redistributions in binary form must reproduce the above copyright
14169689Skan *    notice, this list of conditions and the following disclaimer in the
15169689Skan *    documentation and/or other materials provided with the distribution.
16169689Skan *
17169689Skan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18169689Skan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19169689Skan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20169689Skan * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21169689Skan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22169689Skan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23169689Skan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24169689Skan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/lock.h>
34#include <sys/mutex.h>
35#include <sys/sysctl.h>
36
37#include <dev/hyperv/vmbus/vmbus_reg.h>
38#include <dev/hyperv/vmbus/vmbus_brvar.h>
39
40/* Amount of space available for write */
41#define	VMBUS_BR_WAVAIL(r, w, z)	\
42	(((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w)))
43
44/* Increase bufing index */
45#define VMBUS_BR_IDXINC(idx, inc, sz)	(((idx) + (inc)) % (sz))
46
47static int
48vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS)
49{
50	const struct vmbus_br *br = arg1;
51	uint32_t rindex, windex, imask, ravail, wavail;
52	char state[256];
53
54	rindex = br->vbr_rindex;
55	windex = br->vbr_windex;
56	imask = br->vbr_imask;
57	wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize);
58	ravail = br->vbr_dsize - wavail;
59
60	snprintf(state, sizeof(state),
61	    "rindex:%u windex:%u imask:%u ravail:%u wavail:%u",
62	    rindex, windex, imask, ravail, wavail);
63	return sysctl_handle_string(oidp, state, sizeof(state), req);
64}
65
66/*
67 * Binary bufring states.
68 */
69static int
70vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS)
71{
72#define BR_STATE_RIDX	0
73#define BR_STATE_WIDX	1
74#define BR_STATE_IMSK	2
75#define BR_STATE_RSPC	3
76#define BR_STATE_WSPC	4
77#define BR_STATE_MAX	5
78
79	const struct vmbus_br *br = arg1;
80	uint32_t rindex, windex, wavail, state[BR_STATE_MAX];
81
82	rindex = br->vbr_rindex;
83	windex = br->vbr_windex;
84	wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize);
85
86	state[BR_STATE_RIDX] = rindex;
87	state[BR_STATE_WIDX] = windex;
88	state[BR_STATE_IMSK] = br->vbr_imask;
89	state[BR_STATE_WSPC] = wavail;
90	state[BR_STATE_RSPC] = br->vbr_dsize - wavail;
91
92	return sysctl_handle_opaque(oidp, state, sizeof(state), req);
93}
94
95void
96vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx, struct sysctl_oid *br_tree,
97    struct vmbus_br *br, const char *name)
98{
99	struct sysctl_oid *tree;
100	char desc[64];
101
102	tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(br_tree), OID_AUTO,
103	    name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
104	if (tree == NULL)
105		return;
106
107	snprintf(desc, sizeof(desc), "%s state", name);
108	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state",
109	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
110	    br, 0, vmbus_br_sysctl_state, "A", desc);
111
112	snprintf(desc, sizeof(desc), "%s binary state", name);
113	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "state_bin",
114	    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
115	    br, 0, vmbus_br_sysctl_state_bin, "IU", desc);
116}
117
118void
119vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr)
120{
121	rbr->rxbr_imask = 1;
122	mb();
123}
124
125static __inline uint32_t
126vmbus_rxbr_avail(const struct vmbus_rxbr *rbr)
127{
128	uint32_t rindex, windex;
129
130	/* Get snapshot */
131	rindex = rbr->rxbr_rindex;
132	windex = rbr->rxbr_windex;
133
134	return (rbr->rxbr_dsize -
135	    VMBUS_BR_WAVAIL(rindex, windex, rbr->rxbr_dsize));
136}
137
138uint32_t
139vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr)
140{
141	rbr->rxbr_imask = 0;
142	mb();
143
144	/*
145	 * Now check to see if the ring buffer is still empty.
146	 * If it is not, we raced and we need to process new
147	 * incoming channel packets.
148	 */
149	return vmbus_rxbr_avail(rbr);
150}
151
152static void
153vmbus_br_setup(struct vmbus_br *br, void *buf, int blen)
154{
155	br->vbr = buf;
156	br->vbr_dsize = blen - sizeof(struct vmbus_bufring);
157}
158
159void
160vmbus_rxbr_init(struct vmbus_rxbr *rbr)
161{
162	mtx_init(&rbr->rxbr_lock, "vmbus_rxbr", NULL, MTX_SPIN);
163}
164
165void
166vmbus_rxbr_deinit(struct vmbus_rxbr *rbr)
167{
168	mtx_destroy(&rbr->rxbr_lock);
169}
170
171void
172vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen)
173{
174	vmbus_br_setup(&rbr->rxbr, buf, blen);
175}
176
177void
178vmbus_txbr_init(struct vmbus_txbr *tbr)
179{
180	mtx_init(&tbr->txbr_lock, "vmbus_txbr", NULL, MTX_SPIN);
181}
182
183void
184vmbus_txbr_deinit(struct vmbus_txbr *tbr)
185{
186	mtx_destroy(&tbr->txbr_lock);
187}
188
189void
190vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen)
191{
192	vmbus_br_setup(&tbr->txbr, buf, blen);
193}
194
195/*
196 * When we write to the ring buffer, check if the host needs to be
197 * signaled.
198 *
199 * The contract:
200 * - The host guarantees that while it is draining the TX bufring,
201 *   it will set the br_imask to indicate it does not need to be
202 *   interrupted when new data are added.
203 * - The host guarantees that it will completely drain the TX bufring
204 *   before exiting the read loop.  Further, once the TX bufring is
205 *   empty, it will clear the br_imask and re-check to see if new
206 *   data have arrived.
207 */
208static __inline boolean_t
209vmbus_txbr_need_signal(const struct vmbus_txbr *tbr, uint32_t old_windex)
210{
211	mb();
212	if (tbr->txbr_imask)
213		return (FALSE);
214
215	/* XXX only compiler fence is needed */
216	/* Read memory barrier */
217	rmb();
218
219	/*
220	 * This is the only case we need to signal when the
221	 * ring transitions from being empty to non-empty.
222	 */
223	if (old_windex == tbr->txbr_rindex)
224		return (TRUE);
225
226	return (FALSE);
227}
228
229static __inline uint32_t
230vmbus_txbr_avail(const struct vmbus_txbr *tbr)
231{
232	uint32_t rindex, windex;
233
234	/* Get snapshot */
235	rindex = tbr->txbr_rindex;
236	windex = tbr->txbr_windex;
237
238	return VMBUS_BR_WAVAIL(rindex, windex, tbr->txbr_dsize);
239}
240
241static __inline uint32_t
242vmbus_txbr_copyto(const struct vmbus_txbr *tbr, uint32_t windex,
243    const void *src0, uint32_t cplen)
244{
245	const uint8_t *src = src0;
246	uint8_t *br_data = tbr->txbr_data;
247	uint32_t br_dsize = tbr->txbr_dsize;
248
249	if (cplen > br_dsize - windex) {
250		uint32_t fraglen = br_dsize - windex;
251
252		/* Wrap-around detected */
253		memcpy(br_data + windex, src, fraglen);
254		memcpy(br_data, src + fraglen, cplen - fraglen);
255	} else {
256		memcpy(br_data + windex, src, cplen);
257	}
258	return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
259}
260
261/*
262 * Write scattered channel packet to TX bufring.
263 *
264 * The offset of this channel packet is written as a 64bits value
265 * immediately after this channel packet.
266 */
267int
268vmbus_txbr_write(struct vmbus_txbr *tbr, const struct iovec iov[], int iovlen,
269    boolean_t *need_sig)
270{
271	uint32_t old_windex, windex, total;
272	uint64_t save_windex;
273	int i;
274
275	total = 0;
276	for (i = 0; i < iovlen; i++)
277		total += iov[i].iov_len;
278	total += sizeof(save_windex);
279
280	mtx_lock_spin(&tbr->txbr_lock);
281
282	/*
283	 * NOTE:
284	 * If this write is going to make br_windex same as br_rindex,
285	 * i.e. the available space for write is same as the write size,
286	 * we can't do it then, since br_windex == br_rindex means that
287	 * the bufring is empty.
288	 */
289	if (vmbus_txbr_avail(tbr) <= total) {
290		mtx_unlock_spin(&tbr->txbr_lock);
291		return (EAGAIN);
292	}
293
294	/* Save br_windex for later use */
295	old_windex = tbr->txbr_windex;
296
297	/*
298	 * Copy the scattered channel packet to the TX bufring.
299	 */
300	windex = old_windex;
301	for (i = 0; i < iovlen; i++) {
302		windex = vmbus_txbr_copyto(tbr, windex,
303		    iov[i].iov_base, iov[i].iov_len);
304	}
305
306	/*
307	 * Set the offset of the current channel packet.
308	 */
309	save_windex = ((uint64_t)old_windex) << 32;
310	windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
311	    sizeof(save_windex));
312
313	/*
314	 * XXX only compiler fence is needed.
315	 * Full memory barrier before upding the write index.
316	 */
317	mb();
318
319	/*
320	 * Update the write index _after_ the channel packet
321	 * is copied.
322	 */
323	tbr->txbr_windex = windex;
324
325	mtx_unlock_spin(&tbr->txbr_lock);
326
327	*need_sig = vmbus_txbr_need_signal(tbr, old_windex);
328
329	return (0);
330}
331
332static __inline uint32_t
333vmbus_rxbr_copyfrom(const struct vmbus_rxbr *rbr, uint32_t rindex,
334    void *dst0, int cplen)
335{
336	uint8_t *dst = dst0;
337	const uint8_t *br_data = rbr->rxbr_data;
338	uint32_t br_dsize = rbr->rxbr_dsize;
339
340	if (cplen > br_dsize - rindex) {
341		uint32_t fraglen = br_dsize - rindex;
342
343		/* Wrap-around detected. */
344		memcpy(dst, br_data + rindex, fraglen);
345		memcpy(dst + fraglen, br_data, cplen - fraglen);
346	} else {
347		memcpy(dst, br_data + rindex, cplen);
348	}
349	return VMBUS_BR_IDXINC(rindex, cplen, br_dsize);
350}
351
352int
353vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen)
354{
355	mtx_lock_spin(&rbr->rxbr_lock);
356
357	/*
358	 * The requested data and the 64bits channel packet
359	 * offset should be there at least.
360	 */
361	if (vmbus_rxbr_avail(rbr) < dlen + sizeof(uint64_t)) {
362		mtx_unlock_spin(&rbr->rxbr_lock);
363		return (EAGAIN);
364	}
365	vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
366
367	mtx_unlock_spin(&rbr->rxbr_lock);
368
369	return (0);
370}
371
372/*
373 * NOTE:
374 * We assume (dlen + skip) == sizeof(channel packet).
375 */
376int
377vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen, uint32_t skip)
378{
379	uint32_t rindex, br_dsize = rbr->rxbr_dsize;
380
381	KASSERT(dlen + skip > 0, ("invalid dlen %d, offset %u", dlen, skip));
382
383	mtx_lock_spin(&rbr->rxbr_lock);
384
385	if (vmbus_rxbr_avail(rbr) < dlen + skip + sizeof(uint64_t)) {
386		mtx_unlock_spin(&rbr->rxbr_lock);
387		return (EAGAIN);
388	}
389
390	/*
391	 * Copy channel packet from RX bufring.
392	 */
393	rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize);
394	rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
395
396	/*
397	 * Discard this channel packet's 64bits offset, which is useless to us.
398	 */
399	rindex = VMBUS_BR_IDXINC(rindex, sizeof(uint64_t), br_dsize);
400
401	/*
402	 * XXX only compiler fence is needed.
403	 * Make sure all reads are done before we update the read index since
404	 * the writer may start writing to the read area once the read index
405	 * is updated.
406	 */
407	wmb();
408
409	/*
410	 * Update the read index _after_ the channel packet is fetched.
411	 */
412	rbr->rxbr_rindex = rindex;
413
414	mtx_unlock_spin(&rbr->rxbr_lock);
415
416	return (0);
417}
418