1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
3 * Copyright (c) 2010-2012 Citrix Inc.
4 * Copyright (c) 2012 NetApp Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * HyperV vmbus network VSC (virtual services client) module
31 *
32 */
33
34
35#include <sys/param.h>
36#include <sys/kernel.h>
37#include <sys/socket.h>
38#include <sys/lock.h>
39#include <net/if.h>
40#include <net/if_arp.h>
41#include <machine/bus.h>
42#include <machine/atomic.h>
43
44#include <dev/hyperv/include/hyperv.h>
45#include "hv_net_vsc.h"
46#include "hv_rndis.h"
47#include "hv_rndis_filter.h"
48
49
50/*
51 * Forward declarations
52 */
53static void hv_nv_on_channel_callback(void *context);
54static int  hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
55static int  hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
56static int  hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
57static int  hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
58static int  hv_nv_connect_to_vsp(struct hv_device *device);
59static void hv_nv_on_send_completion(struct hv_device *device,
60				     hv_vm_packet_descriptor *pkt);
61static void hv_nv_on_receive(struct hv_device *device,
62			     hv_vm_packet_descriptor *pkt);
63static void hv_nv_send_receive_completion(struct hv_device *device,
64					  uint64_t tid);
65
66
67/*
68 *
69 */
70static inline netvsc_dev *
71hv_nv_alloc_net_device(struct hv_device *device)
72{
73	netvsc_dev *net_dev;
74	hn_softc_t *sc = device_get_softc(device->device);
75
76	net_dev = malloc(sizeof(netvsc_dev), M_DEVBUF, M_NOWAIT | M_ZERO);
77	if (net_dev == NULL) {
78		return (NULL);
79	}
80
81	net_dev->dev = device;
82	net_dev->destroy = FALSE;
83	sc->net_dev = net_dev;
84
85	return (net_dev);
86}
87
88/*
89 *
90 */
91static inline netvsc_dev *
92hv_nv_get_outbound_net_device(struct hv_device *device)
93{
94	hn_softc_t *sc = device_get_softc(device->device);
95	netvsc_dev *net_dev = sc->net_dev;;
96
97	if ((net_dev != NULL) && net_dev->destroy) {
98		return (NULL);
99	}
100
101	return (net_dev);
102}
103
104/*
105 *
106 */
107static inline netvsc_dev *
108hv_nv_get_inbound_net_device(struct hv_device *device)
109{
110	hn_softc_t *sc = device_get_softc(device->device);
111	netvsc_dev *net_dev = sc->net_dev;;
112
113	if (net_dev == NULL) {
114		return (net_dev);
115	}
116	/*
117	 * When the device is being destroyed; we only
118	 * permit incoming packets if and only if there
119	 * are outstanding sends.
120	 */
121	if (net_dev->destroy && net_dev->num_outstanding_sends == 0) {
122		return (NULL);
123	}
124
125	return (net_dev);
126}
127
128/*
129 * Net VSC initialize receive buffer with net VSP
130 *
131 * Net VSP:  Network virtual services client, also known as the
132 *     Hyper-V extensible switch and the synthetic data path.
133 */
134static int
135hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
136{
137	netvsc_dev *net_dev;
138	nvsp_msg *init_pkt;
139	int ret = 0;
140
141	net_dev = hv_nv_get_outbound_net_device(device);
142	if (!net_dev) {
143		return (ENODEV);
144	}
145
146	net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_DEVBUF,
147	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
148	if (net_dev->rx_buf == NULL) {
149		ret = ENOMEM;
150		goto cleanup;
151	}
152
153	/*
154	 * Establish the GPADL handle for this buffer on this channel.
155	 * Note:  This call uses the vmbus connection rather than the
156	 * channel to establish the gpadl handle.
157	 * GPADL:  Guest physical address descriptor list.
158	 */
159	ret = hv_vmbus_channel_establish_gpadl(
160		device->channel, net_dev->rx_buf,
161		net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle);
162	if (ret != 0) {
163		goto cleanup;
164	}
165
166	/* sema_wait(&ext->channel_init_sema); KYS CHECK */
167
168	/* Notify the NetVsp of the gpadl handle */
169	init_pkt = &net_dev->channel_init_packet;
170
171	memset(init_pkt, 0, sizeof(nvsp_msg));
172
173	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf;
174	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
175	    net_dev->rx_buf_gpadl_handle;
176	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
177	    NETVSC_RECEIVE_BUFFER_ID;
178
179	/* Send the gpadl notification request */
180
181	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
182	    sizeof(nvsp_msg), (uint64_t)init_pkt,
183	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
184	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
185	if (ret != 0) {
186		goto cleanup;
187	}
188
189	sema_wait(&net_dev->channel_init_sema);
190
191	/* Check the response */
192	if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status
193	    != nvsp_status_success) {
194		ret = EINVAL;
195		goto cleanup;
196	}
197
198	net_dev->rx_section_count =
199	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
200
201	net_dev->rx_sections = malloc(net_dev->rx_section_count *
202	    sizeof(nvsp_1_rx_buf_section), M_DEVBUF, M_NOWAIT);
203	if (net_dev->rx_sections == NULL) {
204		ret = EINVAL;
205		goto cleanup;
206	}
207	memcpy(net_dev->rx_sections,
208	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
209	    net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
210
211
212	/*
213	 * For first release, there should only be 1 section that represents
214	 * the entire receive buffer
215	 */
216	if (net_dev->rx_section_count != 1
217	    || net_dev->rx_sections->offset != 0) {
218		ret = EINVAL;
219		goto cleanup;
220	}
221
222	goto exit;
223
224cleanup:
225	hv_nv_destroy_rx_buffer(net_dev);
226
227exit:
228	return (ret);
229}
230
231/*
232 * Net VSC initialize send buffer with net VSP
233 */
234static int
235hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
236{
237	netvsc_dev *net_dev;
238	nvsp_msg *init_pkt;
239	int ret = 0;
240
241	net_dev = hv_nv_get_outbound_net_device(device);
242	if (!net_dev) {
243		return (ENODEV);
244	}
245
246	net_dev->send_buf  = contigmalloc(net_dev->send_buf_size, M_DEVBUF,
247	    M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
248	if (net_dev->send_buf == NULL) {
249		ret = ENOMEM;
250		goto cleanup;
251	}
252
253	/*
254	 * Establish the gpadl handle for this buffer on this channel.
255	 * Note:  This call uses the vmbus connection rather than the
256	 * channel to establish the gpadl handle.
257	 */
258	ret = hv_vmbus_channel_establish_gpadl(device->channel,
259	    net_dev->send_buf, net_dev->send_buf_size,
260	    &net_dev->send_buf_gpadl_handle);
261	if (ret != 0) {
262		goto cleanup;
263	}
264
265	/* Notify the NetVsp of the gpadl handle */
266
267	init_pkt = &net_dev->channel_init_packet;
268
269	memset(init_pkt, 0, sizeof(nvsp_msg));
270
271	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf;
272	init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle =
273	    net_dev->send_buf_gpadl_handle;
274	init_pkt->msgs.vers_1_msgs.send_rx_buf.id =
275	    NETVSC_SEND_BUFFER_ID;
276
277	/* Send the gpadl notification request */
278
279	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
280	    sizeof(nvsp_msg), (uint64_t)init_pkt,
281	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
282	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
283	if (ret != 0) {
284		goto cleanup;
285	}
286
287	sema_wait(&net_dev->channel_init_sema);
288
289	/* Check the response */
290	if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status
291	    != nvsp_status_success) {
292		ret = EINVAL;
293		goto cleanup;
294	}
295
296	net_dev->send_section_size =
297	    init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size;
298
299	goto exit;
300
301cleanup:
302	hv_nv_destroy_send_buffer(net_dev);
303
304exit:
305	return (ret);
306}
307
308/*
309 * Net VSC destroy receive buffer
310 */
311static int
312hv_nv_destroy_rx_buffer(netvsc_dev *net_dev)
313{
314	nvsp_msg *revoke_pkt;
315	int ret = 0;
316
317	/*
318	 * If we got a section count, it means we received a
319	 * send_rx_buf_complete msg
320	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
321	 * we need to send a revoke msg here
322	 */
323	if (net_dev->rx_section_count) {
324		/* Send the revoke receive buffer */
325		revoke_pkt = &net_dev->revoke_packet;
326		memset(revoke_pkt, 0, sizeof(nvsp_msg));
327
328		revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf;
329		revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id =
330		    NETVSC_RECEIVE_BUFFER_ID;
331
332		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
333		    revoke_pkt, sizeof(nvsp_msg),
334		    (uint64_t)revoke_pkt,
335		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
336
337		/*
338		 * If we failed here, we might as well return and have a leak
339		 * rather than continue and a bugchk
340		 */
341		if (ret != 0) {
342			return (ret);
343		}
344	}
345
346	/* Tear down the gpadl on the vsp end */
347	if (net_dev->rx_buf_gpadl_handle) {
348		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
349		    net_dev->rx_buf_gpadl_handle);
350		/*
351		 * If we failed here, we might as well return and have a leak
352		 * rather than continue and a bugchk
353		 */
354		if (ret != 0) {
355			return (ret);
356		}
357		net_dev->rx_buf_gpadl_handle = 0;
358	}
359
360	if (net_dev->rx_buf) {
361		/* Free up the receive buffer */
362		contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_DEVBUF);
363		net_dev->rx_buf = NULL;
364	}
365
366	if (net_dev->rx_sections) {
367		free(net_dev->rx_sections, M_DEVBUF);
368		net_dev->rx_sections = NULL;
369		net_dev->rx_section_count = 0;
370	}
371
372	return (ret);
373}
374
375/*
376 * Net VSC destroy send buffer
377 */
378static int
379hv_nv_destroy_send_buffer(netvsc_dev *net_dev)
380{
381	nvsp_msg *revoke_pkt;
382	int ret = 0;
383
384	/*
385	 * If we got a section count, it means we received a
386	 * send_rx_buf_complete msg
387	 * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore,
388	 * we need to send a revoke msg here
389	 */
390	if (net_dev->send_section_size) {
391		/* Send the revoke send buffer */
392		revoke_pkt = &net_dev->revoke_packet;
393		memset(revoke_pkt, 0, sizeof(nvsp_msg));
394
395		revoke_pkt->hdr.msg_type =
396		    nvsp_msg_1_type_revoke_send_buf;
397		revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id =
398		    NETVSC_SEND_BUFFER_ID;
399
400		ret = hv_vmbus_channel_send_packet(net_dev->dev->channel,
401		    revoke_pkt, sizeof(nvsp_msg),
402		    (uint64_t)revoke_pkt,
403		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
404		/*
405		 * If we failed here, we might as well return and have a leak
406		 * rather than continue and a bugchk
407		 */
408		if (ret != 0) {
409			return (ret);
410		}
411	}
412
413	/* Tear down the gpadl on the vsp end */
414	if (net_dev->send_buf_gpadl_handle) {
415		ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel,
416		    net_dev->send_buf_gpadl_handle);
417
418		/*
419		 * If we failed here, we might as well return and have a leak
420		 * rather than continue and a bugchk
421		 */
422		if (ret != 0) {
423			return (ret);
424		}
425		net_dev->send_buf_gpadl_handle = 0;
426	}
427
428	if (net_dev->send_buf) {
429		/* Free up the receive buffer */
430		contigfree(net_dev->send_buf, net_dev->send_buf_size, M_DEVBUF);
431		net_dev->send_buf = NULL;
432	}
433
434	return (ret);
435}
436
437
438/*
439 * Attempt to negotiate the caller-specified NVSP version
440 *
441 * For NVSP v2, Server 2008 R2 does not set
442 * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers
443 * to the negotiated version, so we cannot rely on that.
444 */
445static int
446hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev,
447			      uint32_t nvsp_ver)
448{
449	nvsp_msg *init_pkt;
450	int ret;
451
452	init_pkt = &net_dev->channel_init_packet;
453	memset(init_pkt, 0, sizeof(nvsp_msg));
454	init_pkt->hdr.msg_type = nvsp_msg_type_init;
455
456	/*
457	 * Specify parameter as the only acceptable protocol version
458	 */
459	init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver;
460	init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver;
461
462	/* Send the init request */
463	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
464	    sizeof(nvsp_msg), (uint64_t)init_pkt,
465	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
466	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
467	if (ret != 0)
468		return (-1);
469
470	sema_wait(&net_dev->channel_init_sema);
471
472	if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success)
473		return (EINVAL);
474
475	return (0);
476}
477
478/*
479 * Send NDIS version 2 config packet containing MTU.
480 *
481 * Not valid for NDIS version 1.
482 */
483static int
484hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu)
485{
486	netvsc_dev *net_dev;
487	nvsp_msg *init_pkt;
488	int ret;
489
490	net_dev = hv_nv_get_outbound_net_device(device);
491	if (!net_dev)
492		return (-ENODEV);
493
494	/*
495	 * Set up configuration packet, write MTU
496	 * Indicate we are capable of handling VLAN tags
497	 */
498	init_pkt = &net_dev->channel_init_packet;
499	memset(init_pkt, 0, sizeof(nvsp_msg));
500	init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config;
501	init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu;
502	init_pkt->
503		msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q
504		= 1;
505
506	/* Send the configuration packet */
507	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
508	    sizeof(nvsp_msg), (uint64_t)init_pkt,
509	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
510	if (ret != 0)
511		return (-EINVAL);
512
513	return (0);
514}
515
516/*
517 * Net VSC connect to VSP
518 */
519static int
520hv_nv_connect_to_vsp(struct hv_device *device)
521{
522	netvsc_dev *net_dev;
523	nvsp_msg *init_pkt;
524	uint32_t nvsp_vers;
525	uint32_t ndis_version;
526	int ret = 0;
527	device_t dev = device->device;
528	hn_softc_t *sc = device_get_softc(dev);
529	struct ifnet *ifp = sc->arpcom.ac_ifp;
530
531	net_dev = hv_nv_get_outbound_net_device(device);
532	if (!net_dev) {
533		return (ENODEV);
534	}
535
536	/*
537	 * Negotiate the NVSP version.  Try NVSP v2 first.
538	 */
539	nvsp_vers = NVSP_PROTOCOL_VERSION_2;
540	ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
541	if (ret != 0) {
542		/* NVSP v2 failed, try NVSP v1 */
543		nvsp_vers = NVSP_PROTOCOL_VERSION_1;
544		ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers);
545		if (ret != 0) {
546			/* NVSP v1 failed, return bad status */
547			return (ret);
548		}
549	}
550	net_dev->nvsp_version = nvsp_vers;
551
552	/*
553	 * Set the MTU if supported by this NVSP protocol version
554	 * This needs to be right after the NVSP init message per Haiyang
555	 */
556	if (nvsp_vers >= NVSP_PROTOCOL_VERSION_2)
557		ret = hv_nv_send_ndis_config(device, ifp->if_mtu);
558
559	/*
560	 * Send the NDIS version
561	 */
562	init_pkt = &net_dev->channel_init_packet;
563
564	memset(init_pkt, 0, sizeof(nvsp_msg));
565
566	/*
567	 * Updated to version 5.1, minimum, for VLAN per Haiyang
568	 */
569	ndis_version = NDIS_VERSION;
570
571	init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers;
572	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers =
573	    (ndis_version & 0xFFFF0000) >> 16;
574	init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers =
575	    ndis_version & 0xFFFF;
576
577	/* Send the init request */
578
579	ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
580	    sizeof(nvsp_msg), (uint64_t)init_pkt,
581	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
582	if (ret != 0) {
583		goto cleanup;
584	}
585	/*
586	 * TODO:  BUGBUG - We have to wait for the above msg since the netvsp
587	 * uses KMCL which acknowledges packet (completion packet)
588	 * since our Vmbus always set the
589	 * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag
590	 */
591	/* sema_wait(&NetVscChannel->channel_init_sema); */
592
593	/* Post the big receive buffer to NetVSP */
594	ret = hv_nv_init_rx_buffer_with_net_vsp(device);
595	if (ret == 0)
596		ret = hv_nv_init_send_buffer_with_net_vsp(device);
597
598cleanup:
599	return (ret);
600}
601
602/*
603 * Net VSC disconnect from VSP
604 */
605static void
606hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
607{
608	hv_nv_destroy_rx_buffer(net_dev);
609	hv_nv_destroy_send_buffer(net_dev);
610}
611
612/*
613 * Net VSC on device add
614 *
615 * Callback when the device belonging to this driver is added
616 */
617netvsc_dev *
618hv_nv_on_device_add(struct hv_device *device, void *additional_info)
619{
620	netvsc_dev *net_dev;
621	netvsc_packet *packet;
622	netvsc_packet *next_packet;
623	int i, ret = 0;
624
625	net_dev = hv_nv_alloc_net_device(device);
626	if (!net_dev)
627		goto cleanup;
628
629	/* Initialize the NetVSC channel extension */
630	net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
631	mtx_init(&net_dev->rx_pkt_list_lock, "HV-RPL", NULL,
632	    MTX_SPIN | MTX_RECURSE);
633
634	net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
635
636	/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
637	STAILQ_INIT(&net_dev->myrx_packet_list);
638
639	/*
640	 * malloc a sufficient number of netvsc_packet buffers to hold
641	 * a packet list.  Add them to the netvsc device packet queue.
642	 */
643	for (i=0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
644		packet = malloc(sizeof(netvsc_packet) +
645		    (NETVSC_RECEIVE_SG_COUNT * sizeof(hv_vmbus_page_buffer)),
646		    M_DEVBUF, M_NOWAIT | M_ZERO);
647		if (!packet) {
648			break;
649		}
650		STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet,
651		    mylist_entry);
652	}
653
654	sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
655
656	/*
657	 * Open the channel
658	 */
659	ret = hv_vmbus_channel_open(device->channel,
660	    NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
661	    NULL, 0, hv_nv_on_channel_callback, device);
662	if (ret != 0)
663		goto cleanup;
664
665	/*
666	 * Connect with the NetVsp
667	 */
668	ret = hv_nv_connect_to_vsp(device);
669	if (ret != 0)
670		goto close;
671
672	return (net_dev);
673
674close:
675	/* Now, we can close the channel safely */
676
677	hv_vmbus_channel_close(device->channel);
678
679cleanup:
680	/*
681	 * Free the packet buffers on the netvsc device packet queue.
682	 * Release other resources.
683	 */
684	if (net_dev) {
685		sema_destroy(&net_dev->channel_init_sema);
686
687		packet = STAILQ_FIRST(&net_dev->myrx_packet_list);
688		while (packet != NULL) {
689			next_packet = STAILQ_NEXT(packet, mylist_entry);
690			free(packet, M_DEVBUF);
691			packet = next_packet;
692		}
693		/* Reset the list to initial state */
694		STAILQ_INIT(&net_dev->myrx_packet_list);
695
696		mtx_destroy(&net_dev->rx_pkt_list_lock);
697
698		free(net_dev, M_DEVBUF);
699	}
700
701	return (NULL);
702}
703
704/*
705 * Net VSC on device remove
706 */
707int
708hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
709{
710	netvsc_packet *net_vsc_pkt;
711	netvsc_packet *next_net_vsc_pkt;
712	hn_softc_t *sc = device_get_softc(device->device);
713	netvsc_dev *net_dev = sc->net_dev;;
714
715	/* Stop outbound traffic ie sends and receives completions */
716	mtx_lock(&device->channel->inbound_lock);
717	net_dev->destroy = TRUE;
718	mtx_unlock(&device->channel->inbound_lock);
719
720	/* Wait for all send completions */
721	while (net_dev->num_outstanding_sends) {
722		DELAY(100);
723	}
724
725	hv_nv_disconnect_from_vsp(net_dev);
726
727	/* At this point, no one should be accessing net_dev except in here */
728
729	/* Now, we can close the channel safely */
730
731	if (!destroy_channel) {
732		device->channel->state =
733		    HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
734	}
735
736	hv_vmbus_channel_close(device->channel);
737
738	/* Release all resources */
739	net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
740	while (net_vsc_pkt != NULL) {
741		next_net_vsc_pkt = STAILQ_NEXT(net_vsc_pkt, mylist_entry);
742		free(net_vsc_pkt, M_DEVBUF);
743		net_vsc_pkt = next_net_vsc_pkt;
744	}
745
746	/* Reset the list to initial state */
747	STAILQ_INIT(&net_dev->myrx_packet_list);
748
749	mtx_destroy(&net_dev->rx_pkt_list_lock);
750	sema_destroy(&net_dev->channel_init_sema);
751	free(net_dev, M_DEVBUF);
752
753	return (0);
754}
755
756/*
757 * Net VSC on send completion
758 */
759static void
760hv_nv_on_send_completion(struct hv_device *device, hv_vm_packet_descriptor *pkt)
761{
762	netvsc_dev *net_dev;
763	nvsp_msg *nvsp_msg_pkt;
764	netvsc_packet *net_vsc_pkt;
765
766	net_dev = hv_nv_get_inbound_net_device(device);
767	if (!net_dev) {
768		return;
769	}
770
771	nvsp_msg_pkt =
772	    (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
773
774	if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete
775		|| nvsp_msg_pkt->hdr.msg_type
776			== nvsp_msg_1_type_send_rx_buf_complete
777		|| nvsp_msg_pkt->hdr.msg_type
778			== nvsp_msg_1_type_send_send_buf_complete) {
779		/* Copy the response back */
780		memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
781		    sizeof(nvsp_msg));
782		sema_post(&net_dev->channel_init_sema);
783	} else if (nvsp_msg_pkt->hdr.msg_type ==
784				   nvsp_msg_1_type_send_rndis_pkt_complete) {
785		/* Get the send context */
786		net_vsc_pkt =
787		    (netvsc_packet *)(unsigned long)pkt->transaction_id;
788
789		/* Notify the layer above us */
790		net_vsc_pkt->compl.send.on_send_completion(
791		    net_vsc_pkt->compl.send.send_completion_context);
792
793		atomic_subtract_int(&net_dev->num_outstanding_sends, 1);
794	}
795}
796
797/*
798 * Net VSC on send
799 * Sends a packet on the specified Hyper-V device.
800 * Returns 0 on success, non-zero on failure.
801 */
802int
803hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
804{
805	netvsc_dev *net_dev;
806	nvsp_msg send_msg;
807	int ret;
808
809	net_dev = hv_nv_get_outbound_net_device(device);
810	if (!net_dev)
811		return (ENODEV);
812
813	send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
814	if (pkt->is_data_pkt) {
815		/* 0 is RMC_DATA */
816		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0;
817	} else {
818		/* 1 is RMC_CONTROL */
819		send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1;
820	}
821
822	/* Not using send buffer section */
823	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx =
824	    0xFFFFFFFF;
825	send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = 0;
826
827	if (pkt->page_buf_count) {
828		ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel,
829		    pkt->page_buffers, pkt->page_buf_count,
830		    &send_msg, sizeof(nvsp_msg), (uint64_t)pkt);
831	} else {
832		ret = hv_vmbus_channel_send_packet(device->channel,
833		    &send_msg, sizeof(nvsp_msg), (uint64_t)pkt,
834		    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
835		    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
836	}
837
838	/* Record outstanding send only if send_packet() succeeded */
839	if (ret == 0)
840		atomic_add_int(&net_dev->num_outstanding_sends, 1);
841
842	return (ret);
843}
844
845/*
846 * Net VSC on receive
847 *
848 * In the FreeBSD Hyper-V virtual world, this function deals exclusively
849 * with virtual addresses.
850 */
851static void
852hv_nv_on_receive(struct hv_device *device, hv_vm_packet_descriptor *pkt)
853{
854	netvsc_dev *net_dev;
855	hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
856	nvsp_msg *nvsp_msg_pkt;
857	netvsc_packet *net_vsc_pkt = NULL;
858	unsigned long start;
859	xfer_page_packet *xfer_page_pkt = NULL;
860	STAILQ_HEAD(PKT_LIST, netvsc_packet_) mylist_head =
861	    STAILQ_HEAD_INITIALIZER(mylist_head);
862	int count = 0;
863	int i = 0;
864
865	net_dev = hv_nv_get_inbound_net_device(device);
866	if (!net_dev)
867		return;
868
869	/*
870	 * All inbound packets other than send completion should be
871	 * xfer page packet.
872	 */
873	if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)
874		return;
875
876	nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt
877		+ (pkt->data_offset8 << 3));
878
879	/* Make sure this is a valid nvsp packet */
880	if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt)
881		return;
882
883	vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt;
884
885	if (vm_xfer_page_pkt->transfer_page_set_id
886		!= NETVSC_RECEIVE_BUFFER_ID) {
887		return;
888	}
889
890	STAILQ_INIT(&mylist_head);
891
892	/*
893	 * Grab free packets (range count + 1) to represent this xfer page
894	 * packet.  +1 to represent the xfer page packet itself.  We grab it
895	 * here so that we know exactly how many we can fulfill.
896	 */
897	mtx_lock_spin(&net_dev->rx_pkt_list_lock);
898	while (!STAILQ_EMPTY(&net_dev->myrx_packet_list)) {
899		net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list);
900		STAILQ_REMOVE_HEAD(&net_dev->myrx_packet_list, mylist_entry);
901
902		STAILQ_INSERT_TAIL(&mylist_head, net_vsc_pkt, mylist_entry);
903
904		if (++count == vm_xfer_page_pkt->range_count + 1)
905			break;
906	}
907
908	mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
909
910	/*
911	 * We need at least 2 netvsc pkts (1 to represent the xfer page
912	 * and at least 1 for the range) i.e. we can handle some of the
913	 * xfer page packet ranges...
914	 */
915	if (count < 2) {
916		/* Return netvsc packet to the freelist */
917		mtx_lock_spin(&net_dev->rx_pkt_list_lock);
918		for (i=count; i != 0; i--) {
919			net_vsc_pkt = STAILQ_FIRST(&mylist_head);
920			STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
921
922			STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
923			    net_vsc_pkt, mylist_entry);
924		}
925		mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
926
927		hv_nv_send_receive_completion(device,
928		    vm_xfer_page_pkt->d.transaction_id);
929
930		return;
931	}
932
933	/* Take the first packet in the list */
934	xfer_page_pkt = (xfer_page_packet *)STAILQ_FIRST(&mylist_head);
935	STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
936
937	/* This is how many data packets we can supply */
938	xfer_page_pkt->count = count - 1;
939
940	/* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
941	for (i=0; i < (count - 1); i++) {
942		net_vsc_pkt = STAILQ_FIRST(&mylist_head);
943		STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry);
944
945		/*
946		 * Initialize the netvsc packet
947		 */
948		net_vsc_pkt->xfer_page_pkt = xfer_page_pkt;
949		net_vsc_pkt->compl.rx.rx_completion_context = net_vsc_pkt;
950		net_vsc_pkt->device = device;
951		/* Save this so that we can send it back */
952		net_vsc_pkt->compl.rx.rx_completion_tid =
953		    vm_xfer_page_pkt->d.transaction_id;
954
955		net_vsc_pkt->tot_data_buf_len =
956		    vm_xfer_page_pkt->ranges[i].byte_count;
957		net_vsc_pkt->page_buf_count = 1;
958
959		net_vsc_pkt->page_buffers[0].length =
960		    vm_xfer_page_pkt->ranges[i].byte_count;
961
962		/* The virtual address of the packet in the receive buffer */
963		start = ((unsigned long)net_dev->rx_buf +
964		    vm_xfer_page_pkt->ranges[i].byte_offset);
965		start = ((unsigned long)start) & ~(PAGE_SIZE - 1);
966
967		/* Page number of the virtual page containing packet start */
968		net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
969
970		/* Calculate the page relative offset */
971		net_vsc_pkt->page_buffers[0].offset =
972		    vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1);
973
974		/*
975		 * In this implementation, we are dealing with virtual
976		 * addresses exclusively.  Since we aren't using physical
977		 * addresses at all, we don't care if a packet crosses a
978		 * page boundary.  For this reason, the original code to
979		 * check for and handle page crossings has been removed.
980		 */
981
982		/*
983		 * Pass it to the upper layer.  The receive completion call
984		 * has been moved into this function.
985		 */
986		hv_rf_on_receive(device, net_vsc_pkt);
987
988		/*
989		 * Moved completion call back here so that all received
990		 * messages (not just data messages) will trigger a response
991		 * message back to the host.
992		 */
993		hv_nv_on_receive_completion(net_vsc_pkt);
994	}
995}
996
997/*
998 * Net VSC send receive completion
999 */
1000static void
1001hv_nv_send_receive_completion(struct hv_device *device, uint64_t tid)
1002{
1003	nvsp_msg rx_comp_msg;
1004	int retries = 0;
1005	int ret = 0;
1006
1007	rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
1008
1009	/* Pass in the status */
1010	rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status =
1011	    nvsp_status_success;
1012
1013retry_send_cmplt:
1014	/* Send the completion */
1015	ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg,
1016	    sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
1017	if (ret == 0) {
1018		/* success */
1019		/* no-op */
1020	} else if (ret == EAGAIN) {
1021		/* no more room... wait a bit and attempt to retry 3 times */
1022		retries++;
1023
1024		if (retries < 4) {
1025			DELAY(100);
1026			goto retry_send_cmplt;
1027		}
1028	}
1029}
1030
1031/*
1032 * Net VSC on receive completion
1033 *
1034 * Send a receive completion packet to RNDIS device (ie NetVsp)
1035 */
1036void
1037hv_nv_on_receive_completion(void *context)
1038{
1039	netvsc_packet *packet = (netvsc_packet *)context;
1040	struct hv_device *device = (struct hv_device *)packet->device;
1041	netvsc_dev    *net_dev;
1042	uint64_t       tid = 0;
1043	boolean_t send_rx_completion = FALSE;
1044
1045	/*
1046	 * Even though it seems logical to do a hv_nv_get_outbound_net_device()
1047	 * here to send out receive completion, we are using
1048	 * hv_nv_get_inbound_net_device() since we may have disabled
1049	 * outbound traffic already.
1050	 */
1051	net_dev = hv_nv_get_inbound_net_device(device);
1052	if (net_dev == NULL)
1053		return;
1054
1055	/* Overloading use of the lock. */
1056	mtx_lock_spin(&net_dev->rx_pkt_list_lock);
1057
1058	packet->xfer_page_pkt->count--;
1059
1060	/*
1061	 * Last one in the line that represent 1 xfer page packet.
1062	 * Return the xfer page packet itself to the free list.
1063	 */
1064	if (packet->xfer_page_pkt->count == 0) {
1065		send_rx_completion = TRUE;
1066		tid = packet->compl.rx.rx_completion_tid;
1067		STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list,
1068		    (netvsc_packet *)(packet->xfer_page_pkt), mylist_entry);
1069	}
1070
1071	/* Put the packet back on the free list */
1072	STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, mylist_entry);
1073	mtx_unlock_spin(&net_dev->rx_pkt_list_lock);
1074
1075	/* Send a receive completion for the xfer page packet */
1076	if (send_rx_completion)
1077		hv_nv_send_receive_completion(device, tid);
1078}
1079
1080/*
1081 * Net VSC on channel callback
1082 */
1083static void
1084hv_nv_on_channel_callback(void *context)
1085{
1086	/* Fixme:  Magic number */
1087	const int net_pkt_size = 2048;
1088	struct hv_device *device = (struct hv_device *)context;
1089	netvsc_dev *net_dev;
1090	uint32_t bytes_rxed;
1091	uint64_t request_id;
1092	uint8_t  *packet;
1093	hv_vm_packet_descriptor *desc;
1094	uint8_t *buffer;
1095	int     bufferlen = net_pkt_size;
1096	int     ret = 0;
1097
1098	packet = malloc(net_pkt_size * sizeof(uint8_t), M_DEVBUF, M_NOWAIT);
1099	if (!packet)
1100		return;
1101
1102	buffer = packet;
1103
1104	net_dev = hv_nv_get_inbound_net_device(device);
1105	if (net_dev == NULL)
1106		goto out;
1107
1108	do {
1109		ret = hv_vmbus_channel_recv_packet_raw(device->channel,
1110		    buffer, bufferlen, &bytes_rxed, &request_id);
1111		if (ret == 0) {
1112			if (bytes_rxed > 0) {
1113				desc = (hv_vm_packet_descriptor *)buffer;
1114				switch (desc->type) {
1115				case HV_VMBUS_PACKET_TYPE_COMPLETION:
1116					hv_nv_on_send_completion(device, desc);
1117					break;
1118				case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
1119					hv_nv_on_receive(device, desc);
1120					break;
1121				default:
1122					break;
1123				}
1124			} else {
1125				break;
1126			}
1127		} else if (ret == ENOBUFS) {
1128			/* Handle large packet */
1129			free(buffer, M_DEVBUF);
1130			buffer = malloc(bytes_rxed, M_DEVBUF, M_NOWAIT);
1131			if (buffer == NULL) {
1132				break;
1133			}
1134			bufferlen = bytes_rxed;
1135		}
1136	} while (1);
1137
1138out:
1139	free(buffer, M_DEVBUF);
1140}
1141
1142