vsw.c revision 11878:ac93462db6d7
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/errno.h>
29#include <sys/debug.h>
30#include <sys/time.h>
31#include <sys/sysmacros.h>
32#include <sys/systm.h>
33#include <sys/user.h>
34#include <sys/stropts.h>
35#include <sys/stream.h>
36#include <sys/strlog.h>
37#include <sys/strsubr.h>
38#include <sys/cmn_err.h>
39#include <sys/cpu.h>
40#include <sys/kmem.h>
41#include <sys/conf.h>
42#include <sys/ddi.h>
43#include <sys/sunddi.h>
44#include <sys/ksynch.h>
45#include <sys/stat.h>
46#include <sys/kstat.h>
47#include <sys/vtrace.h>
48#include <sys/strsun.h>
49#include <sys/dlpi.h>
50#include <sys/ethernet.h>
51#include <net/if.h>
52#include <sys/varargs.h>
53#include <sys/machsystm.h>
54#include <sys/modctl.h>
55#include <sys/modhash.h>
56#include <sys/mac_provider.h>
57#include <sys/mac_ether.h>
58#include <sys/taskq.h>
59#include <sys/note.h>
60#include <sys/mach_descrip.h>
61#include <sys/mac_provider.h>
62#include <sys/mdeg.h>
63#include <sys/ldc.h>
64#include <sys/vsw_fdb.h>
65#include <sys/vsw.h>
66#include <sys/vio_mailbox.h>
67#include <sys/vnet_mailbox.h>
68#include <sys/vnet_common.h>
69#include <sys/vio_util.h>
70#include <sys/sdt.h>
71#include <sys/atomic.h>
72#include <sys/callb.h>
73#include <sys/vlan.h>
74
75/*
76 * Function prototypes.
77 */
78static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
79static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
80static	int vsw_unattach(vsw_t *vswp);
81static	int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *);
82static	int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *);
83void vsw_destroy_rxpools(void *);
84
85/* MDEG routines */
86static	int vsw_mdeg_register(vsw_t *vswp);
87static	void vsw_mdeg_unregister(vsw_t *vswp);
88static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
89static	int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *);
90static	int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t);
91static	int vsw_read_mdprops(vsw_t *vswp);
92static	void vsw_vlan_read_ids(void *arg, int type, md_t *mdp,
93	mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp,
94	uint16_t *nvidsp, uint16_t *default_idp);
95static	void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp,
96	mde_cookie_t node, uint64_t *bw);
97static	int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
98	md_t *mdp, mde_cookie_t *node);
99static	void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp,
100	mde_cookie_t node);
101static	void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
102	uint32_t *mtu);
103static	int vsw_mtu_update(vsw_t *vswp, uint32_t mtu);
104static	void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
105	boolean_t *pls);
106static	void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
107	uint64_t *bw);
108static	void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t);
109static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr);
110static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1,
111	vsw_vlanid_t *vids2, int nvids);
112
113/* Mac driver related routines */
114static int vsw_mac_register(vsw_t *);
115static int vsw_mac_unregister(vsw_t *);
116static int vsw_m_stat(void *, uint_t, uint64_t *);
117static void vsw_m_stop(void *arg);
118static int vsw_m_start(void *arg);
119static int vsw_m_unicst(void *arg, const uint8_t *);
120static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
121static int vsw_m_promisc(void *arg, boolean_t);
122static mblk_t *vsw_m_tx(void *arg, mblk_t *);
123void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
124void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
125    mblk_t *mp, vsw_macrx_flags_t flags);
126void vsw_physlink_state_update(vsw_t *vswp);
127
128/*
129 * Functions imported from other files.
130 */
131extern void vsw_setup_switching_thread(void *arg);
132extern int vsw_setup_switching_start(vsw_t *vswp);
133extern void vsw_setup_switching_stop(vsw_t *vswp);
134extern int vsw_setup_switching(vsw_t *);
135extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
136    vsw_port_t *port, mac_resource_handle_t mrh);
137extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
138extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
139extern void vsw_del_mcst_vsw(vsw_t *);
140extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr);
141extern void vsw_detach_ports(vsw_t *vswp);
142extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
143extern int vsw_port_detach(vsw_t *vswp, int p_instance);
144static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
145	md_t *prev_mdp, mde_cookie_t prev_mdex);
146extern	int vsw_port_attach(vsw_port_t *port);
147extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
148extern int vsw_mac_open(vsw_t *vswp);
149extern void vsw_mac_close(vsw_t *vswp);
150extern void vsw_mac_cleanup_ports(vsw_t *vswp);
151extern void vsw_unset_addrs(vsw_t *vswp);
152extern void vsw_setup_switching_post_process(vsw_t *vswp);
153extern void vsw_create_vlans(void *arg, int type);
154extern void vsw_destroy_vlans(void *arg, int type);
155extern void vsw_vlan_add_ids(void *arg, int type);
156extern void vsw_vlan_remove_ids(void *arg, int type);
157extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp);
158extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np,
159	mblk_t **npt);
160extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
161extern void vsw_hio_cleanup(vsw_t *vswp);
162extern void vsw_hio_start_ports(vsw_t *vswp);
163extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled);
164extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
165extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int);
166extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid,
167    vsw_vlanid_t *new_vids, int new_nvids);
168extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type);
169extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type);
170extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans,
171    uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids);
172extern void vsw_reset_ports(vsw_t *vswp);
173extern void vsw_port_reset(vsw_port_t *portp);
174extern void vsw_physlink_update_ports(vsw_t *vswp);
175extern void vsw_update_bandwidth(vsw_t *vswp, vsw_port_t *port, int type,
176    uint64_t maxbw);
177
178/*
179 * Internal tunables.
180 */
181int	vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */
182int	vsw_wretries = 100;		/* # of write attempts */
183int	vsw_desc_delay = 0;		/* delay in us */
184int	vsw_read_attempts = 5;		/* # of reads of descriptor */
185int	vsw_setup_switching_delay = 3;	/* setup sw timeout interval in sec */
186int	vsw_mac_open_retries = 300;	/* max # of mac_open() retries */
187					/* 300*3 = 900sec(15min) of max tmout */
188int	vsw_ldc_tx_delay = 5;		/* delay(ticks) for tx retries */
189int	vsw_ldc_tx_retries = 10;	/* # of ldc tx retries */
190int	vsw_ldc_retries = 5;		/* # of ldc_close() retries */
191int	vsw_ldc_delay = 1000;		/* 1 ms delay for ldc_close() */
192boolean_t vsw_ldc_rxthr_enabled = B_TRUE;	/* LDC Rx thread enabled */
193boolean_t vsw_ldc_txthr_enabled = B_TRUE;	/* LDC Tx thread enabled */
194int	vsw_rxpool_cleanup_delay = 100000;	/* 100ms */
195
196
197uint32_t	vsw_fdb_nchains = 8;	/* # of chains in fdb hash table */
198uint32_t	vsw_vlan_nchains = 4;	/* # of chains in vlan id hash table */
199uint32_t	vsw_ethermtu = 1500;	/* mtu of the device */
200
201/* delay in usec to wait for all references on a fdb entry to be dropped */
202uint32_t vsw_fdbe_refcnt_delay = 10;
203
204/*
205 * Default vlan id. This is only used internally when the "default-vlan-id"
206 * property is not present in the MD device node. Therefore, this should not be
207 * used as a tunable; if this value is changed, the corresponding variable
208 * should be updated to the same value in all vnets connected to this vsw.
209 */
210uint16_t	vsw_default_vlan_id = 1;
211
212/*
213 * Workaround for a version handshake bug in obp's vnet.
214 * If vsw initiates version negotiation starting from the highest version,
215 * obp sends a nack and terminates version handshake. To workaround
216 * this, we do not initiate version handshake when the channel comes up.
217 * Instead, we wait for the peer to send its version info msg and go through
218 * the version protocol exchange. If we successfully negotiate a version,
219 * before sending the ack, we send our version info msg to the peer
220 * using the <major,minor> version that we are about to ack.
221 */
222boolean_t vsw_obp_ver_proto_workaround = B_TRUE;
223
224/*
225 * In the absence of "priority-ether-types" property in MD, the following
226 * internal tunable can be set to specify a single priority ethertype.
227 */
228uint64_t vsw_pri_eth_type = 0;
229
230/*
231 * Number of transmit priority buffers that are preallocated per device.
232 * This number is chosen to be a small value to throttle transmission
233 * of priority packets. Note: Must be a power of 2 for vio_create_mblks().
234 */
235uint32_t vsw_pri_tx_nmblks = 64;
236
237/*
238 * Number of RARP packets sent to announce macaddr to the physical switch,
239 * after vsw's physical device is changed dynamically or after a guest (client
240 * vnet) is live migrated in.
241 */
242uint32_t vsw_publish_macaddr_count = 3;
243
244boolean_t vsw_hio_enabled = B_TRUE;	/* Enable/disable HybridIO */
245int vsw_hio_max_cleanup_retries = 10;	/* Max retries for HybridIO cleanp */
246int vsw_hio_cleanup_delay = 10000;	/* 10ms */
247
248/* Number of transmit descriptors -  must be power of 2 */
249uint32_t vsw_ntxds = VSW_RING_NUM_EL;
250
251/*
252 * Max number of mblks received in one receive operation.
253 */
254uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6);
255
256/*
257 * Internal tunables for receive buffer pools, that is,  the size and number of
258 * mblks for each pool. At least 3 sizes must be specified if these are used.
259 * The sizes must be specified in increasing order. Non-zero value of the first
260 * size will be used as a hint to use these values instead of the algorithm
261 * that determines the sizes based on MTU.
262 */
263uint32_t vsw_mblk_size1 = 0;
264uint32_t vsw_mblk_size2 = 0;
265uint32_t vsw_mblk_size3 = 0;
266uint32_t vsw_mblk_size4 = 0;
267uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS;	/* number of mblks for pool1 */
268uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS;	/* number of mblks for pool2 */
269uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS;	/* number of mblks for pool3 */
270uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS;	/* number of mblks for pool4 */
271
272/*
273 * Set this to non-zero to enable additional internal receive buffer pools
274 * based on the MTU of the device for better performance at the cost of more
275 * memory consumption. This is turned off by default, to use allocb(9F) for
276 * receive buffer allocations of sizes > 2K.
277 */
278boolean_t vsw_jumbo_rxpools = B_FALSE;
279
280/*
281 * vsw_max_tx_qcount is the maximum # of packets that can be queued
282 * before the tx worker thread begins processing the queue. Its value
283 * is chosen to be 4x the default length of tx descriptor ring.
284 */
285uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL;
286
287/*
288 * MAC callbacks
289 */
290static	mac_callbacks_t	vsw_m_callbacks = {
291	0,
292	vsw_m_stat,
293	vsw_m_start,
294	vsw_m_stop,
295	vsw_m_promisc,
296	vsw_m_multicst,
297	vsw_m_unicst,
298	vsw_m_tx
299};
300
301static	struct	cb_ops	vsw_cb_ops = {
302	nulldev,			/* cb_open */
303	nulldev,			/* cb_close */
304	nodev,				/* cb_strategy */
305	nodev,				/* cb_print */
306	nodev,				/* cb_dump */
307	nodev,				/* cb_read */
308	nodev,				/* cb_write */
309	nodev,				/* cb_ioctl */
310	nodev,				/* cb_devmap */
311	nodev,				/* cb_mmap */
312	nodev,				/* cb_segmap */
313	nochpoll,			/* cb_chpoll */
314	ddi_prop_op,			/* cb_prop_op */
315	NULL,				/* cb_stream */
316	D_MP,				/* cb_flag */
317	CB_REV,				/* rev */
318	nodev,				/* int (*cb_aread)() */
319	nodev				/* int (*cb_awrite)() */
320};
321
322static	struct	dev_ops	vsw_ops = {
323	DEVO_REV,		/* devo_rev */
324	0,			/* devo_refcnt */
325	NULL,			/* devo_getinfo */
326	nulldev,		/* devo_identify */
327	nulldev,		/* devo_probe */
328	vsw_attach,		/* devo_attach */
329	vsw_detach,		/* devo_detach */
330	nodev,			/* devo_reset */
331	&vsw_cb_ops,		/* devo_cb_ops */
332	(struct bus_ops *)NULL,	/* devo_bus_ops */
333	ddi_power		/* devo_power */
334};
335
336extern	struct	mod_ops	mod_driverops;
337static struct modldrv vswmodldrv = {
338	&mod_driverops,
339	"sun4v Virtual Switch",
340	&vsw_ops,
341};
342
343#define	LDC_ENTER_LOCK(ldcp)	\
344				mutex_enter(&((ldcp)->ldc_cblock));\
345				mutex_enter(&((ldcp)->ldc_rxlock));\
346				mutex_enter(&((ldcp)->ldc_txlock));
347#define	LDC_EXIT_LOCK(ldcp)	\
348				mutex_exit(&((ldcp)->ldc_txlock));\
349				mutex_exit(&((ldcp)->ldc_rxlock));\
350				mutex_exit(&((ldcp)->ldc_cblock));
351
352/* Driver soft state ptr  */
353static void	*vsw_state;
354
355/*
356 * Linked list of "vsw_t" structures - one per instance.
357 */
358vsw_t		*vsw_head = NULL;
359krwlock_t	vsw_rw;
360
361/*
362 * Property names
363 */
364static char vdev_propname[] = "virtual-device";
365static char vsw_propname[] = "virtual-network-switch";
366static char physdev_propname[] = "vsw-phys-dev";
367static char smode_propname[] = "vsw-switch-mode";
368static char macaddr_propname[] = "local-mac-address";
369static char remaddr_propname[] = "remote-mac-address";
370static char ldcids_propname[] = "ldc-ids";
371static char chan_propname[] = "channel-endpoint";
372static char id_propname[] = "id";
373static char reg_propname[] = "reg";
374static char pri_types_propname[] = "priority-ether-types";
375static char vsw_pvid_propname[] = "port-vlan-id";
376static char vsw_vid_propname[] = "vlan-id";
377static char vsw_dvid_propname[] = "default-vlan-id";
378static char port_pvid_propname[] = "remote-port-vlan-id";
379static char port_vid_propname[] = "remote-vlan-id";
380static char hybrid_propname[] = "hybrid";
381static char vsw_mtu_propname[] = "mtu";
382static char vsw_linkprop_propname[] = "linkprop";
383static char vsw_maxbw_propname[] = "maxbw";
384static char port_maxbw_propname[] = "maxbw";
385
386/*
387 * Matching criteria passed to the MDEG to register interest
388 * in changes to 'virtual-device-port' nodes identified by their
389 * 'id' property.
390 */
391static md_prop_match_t vport_prop_match[] = {
392	{ MDET_PROP_VAL,    "id"   },
393	{ MDET_LIST_END,    NULL    }
394};
395
396static mdeg_node_match_t vport_match = { "virtual-device-port",
397						vport_prop_match };
398
399/*
400 * Matching criteria passed to the MDEG to register interest
401 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified
402 * by their 'name' and 'cfg-handle' properties.
403 */
404static md_prop_match_t vdev_prop_match[] = {
405	{ MDET_PROP_STR,    "name"   },
406	{ MDET_PROP_VAL,    "cfg-handle" },
407	{ MDET_LIST_END,    NULL    }
408};
409
410static mdeg_node_match_t vdev_match = { "virtual-device",
411						vdev_prop_match };
412
413
414/*
415 * Specification of an MD node passed to the MDEG to filter any
416 * 'vport' nodes that do not belong to the specified node. This
417 * template is copied for each vsw instance and filled in with
418 * the appropriate 'cfg-handle' value before being passed to the MDEG.
419 */
420static mdeg_prop_spec_t vsw_prop_template[] = {
421	{ MDET_PROP_STR,    "name",		vsw_propname },
422	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
423	{ MDET_LIST_END,    NULL,		NULL	}
424};
425
426#define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
427
428#ifdef	DEBUG
429/*
430 * Print debug messages - set to 0x1f to enable all msgs
431 * or 0x0 to turn all off.
432 */
433int vswdbg = 0x0;
434
435/*
436 * debug levels:
437 * 0x01:	Function entry/exit tracing
438 * 0x02:	Internal function messages
439 * 0x04:	Verbose internal messages
440 * 0x08:	Warning messages
441 * 0x10:	Error messages
442 */
443
444void
445vswdebug(vsw_t *vswp, const char *fmt, ...)
446{
447	char buf[512];
448	va_list ap;
449
450	va_start(ap, fmt);
451	(void) vsprintf(buf, fmt, ap);
452	va_end(ap);
453
454	if (vswp == NULL)
455		cmn_err(CE_CONT, "%s\n", buf);
456	else
457		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
458}
459
460#endif	/* DEBUG */
461
462static struct modlinkage modlinkage = {
463	MODREV_1,
464	&vswmodldrv,
465	NULL
466};
467
468int
469_init(void)
470{
471	int status;
472
473	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
474
475	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
476	if (status != 0) {
477		return (status);
478	}
479
480	mac_init_ops(&vsw_ops, DRV_NAME);
481	status = mod_install(&modlinkage);
482	if (status != 0) {
483		ddi_soft_state_fini(&vsw_state);
484	}
485	return (status);
486}
487
488int
489_fini(void)
490{
491	int status;
492
493	status = mod_remove(&modlinkage);
494	if (status != 0)
495		return (status);
496	mac_fini_ops(&vsw_ops);
497	ddi_soft_state_fini(&vsw_state);
498
499	rw_destroy(&vsw_rw);
500
501	return (status);
502}
503
504int
505_info(struct modinfo *modinfop)
506{
507	return (mod_info(&modlinkage, modinfop));
508}
509
510static int
511vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
512{
513	vsw_t			*vswp;
514	int			instance;
515	char			hashname[MAXNAMELEN];
516	char			qname[TASKQ_NAMELEN];
517	vsw_attach_progress_t	progress = PROG_init;
518	int			rv;
519
520	switch (cmd) {
521	case DDI_ATTACH:
522		break;
523	case DDI_RESUME:
524		/* nothing to do for this non-device */
525		return (DDI_SUCCESS);
526	case DDI_PM_RESUME:
527	default:
528		return (DDI_FAILURE);
529	}
530
531	instance = ddi_get_instance(dip);
532	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
533		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
534		return (DDI_FAILURE);
535	}
536	vswp = ddi_get_soft_state(vsw_state, instance);
537
538	if (vswp == NULL) {
539		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
540		goto vsw_attach_fail;
541	}
542
543	vswp->dip = dip;
544	vswp->instance = instance;
545	vswp->phys_link_state = LINK_STATE_UNKNOWN;
546	ddi_set_driver_private(dip, (caddr_t)vswp);
547
548	mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL);
549	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
550	mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL);
551	cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL);
552	rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL);
553	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
554	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
555	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
556
557	progress |= PROG_locks;
558
559	rv = vsw_read_mdprops(vswp);
560	if (rv != 0)
561		goto vsw_attach_fail;
562
563	progress |= PROG_readmd;
564
565	/* setup the unicast forwarding database  */
566	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
567	    vswp->instance);
568	D2(vswp, "creating unicast hash table (%s)...", hashname);
569	vswp->fdb_nchains = vsw_fdb_nchains;
570	vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains,
571	    mod_hash_null_valdtor, sizeof (void *));
572	vsw_create_vlans((void *)vswp, VSW_LOCALDEV);
573	progress |= PROG_fdb;
574
575	/* setup the multicast fowarding database */
576	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
577	    vswp->instance);
578	D2(vswp, "creating multicast hash table %s)...", hashname);
579	vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains,
580	    mod_hash_null_valdtor, sizeof (void *));
581
582	progress |= PROG_mfdb;
583
584	/*
585	 * Create the taskq which will process all the VIO
586	 * control messages.
587	 */
588	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
589	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
590	    TASKQ_DEFAULTPRI, 0)) == NULL) {
591		cmn_err(CE_WARN, "!vsw%d: Unable to create task queue",
592		    vswp->instance);
593		goto vsw_attach_fail;
594	}
595
596	progress |= PROG_taskq;
597
598	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_rxp_taskq%d",
599	    vswp->instance);
600	if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1,
601	    TASKQ_DEFAULTPRI, 0)) == NULL) {
602		cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue",
603		    vswp->instance);
604		goto vsw_attach_fail;
605	}
606
607	progress |= PROG_rxp_taskq;
608
609	/* prevent auto-detaching */
610	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
611	    DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
612		cmn_err(CE_NOTE, "!Unable to set \"%s\" property for "
613		    "instance %u", DDI_NO_AUTODETACH, instance);
614	}
615
616	/*
617	 * The null switching function is set to avoid panic until
618	 * switch mode is setup.
619	 */
620	vswp->vsw_switch_frame = vsw_switch_frame_nop;
621
622	/*
623	 * Setup the required switching mode, based on the mdprops that we read
624	 * earlier. We start a thread to do this, to avoid calling mac_open()
625	 * directly from attach().
626	 */
627	rv = vsw_setup_switching_start(vswp);
628	if (rv != 0) {
629		goto vsw_attach_fail;
630	}
631
632	progress |= PROG_swmode;
633
634	/* Register with mac layer as a provider */
635	rv = vsw_mac_register(vswp);
636	if (rv != 0)
637		goto vsw_attach_fail;
638
639	progress |= PROG_macreg;
640
641	/*
642	 * Now we have everything setup, register an interest in
643	 * specific MD nodes.
644	 *
645	 * The callback is invoked in 2 cases, firstly if upon mdeg
646	 * registration there are existing nodes which match our specified
647	 * criteria, and secondly if the MD is changed (and again, there
648	 * are nodes which we are interested in present within it. Note
649	 * that our callback will be invoked even if our specified nodes
650	 * have not actually changed).
651	 *
652	 */
653	rv = vsw_mdeg_register(vswp);
654	if (rv != 0)
655		goto vsw_attach_fail;
656
657	progress |= PROG_mdreg;
658
659	vswp->attach_progress = progress;
660
661	WRITE_ENTER(&vsw_rw);
662	vswp->next = vsw_head;
663	vsw_head = vswp;
664	RW_EXIT(&vsw_rw);
665
666	ddi_report_dev(vswp->dip);
667	return (DDI_SUCCESS);
668
669vsw_attach_fail:
670	DERR(NULL, "vsw_attach: failed");
671
672	vswp->attach_progress = progress;
673	(void) vsw_unattach(vswp);
674	ddi_soft_state_free(vsw_state, instance);
675	return (DDI_FAILURE);
676}
677
678static int
679vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
680{
681	vsw_t			**vswpp, *vswp;
682	int 			instance;
683
684	instance = ddi_get_instance(dip);
685	vswp = ddi_get_soft_state(vsw_state, instance);
686
687	if (vswp == NULL) {
688		return (DDI_FAILURE);
689	}
690
691	switch (cmd) {
692	case DDI_DETACH:
693		break;
694	case DDI_SUSPEND:
695	case DDI_PM_SUSPEND:
696	default:
697		return (DDI_FAILURE);
698	}
699
700	D2(vswp, "detaching instance %d", instance);
701
702	if (vsw_unattach(vswp) != 0) {
703		return (DDI_FAILURE);
704	}
705
706	ddi_remove_minor_node(dip, NULL);
707
708	WRITE_ENTER(&vsw_rw);
709	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
710		if (*vswpp == vswp) {
711			*vswpp = vswp->next;
712			break;
713		}
714	}
715	RW_EXIT(&vsw_rw);
716
717	ddi_soft_state_free(vsw_state, instance);
718
719	return (DDI_SUCCESS);
720}
721
722/*
723 * Common routine to handle vsw_attach() failure and vsw_detach(). Note that
724 * the only reason this function could fail is if mac_unregister() fails.
725 * Otherwise, this function must ensure that all resources are freed and return
726 * success.
727 */
728static int
729vsw_unattach(vsw_t *vswp)
730{
731	vsw_attach_progress_t	progress;
732
733	progress = vswp->attach_progress;
734
735	/*
736	 * Unregister from the gldv3 subsystem. This can fail, in particular
737	 * if there are still any open references to this mac device; in which
738	 * case we just return failure without continuing to detach further.
739	 */
740	if (progress & PROG_macreg) {
741		if (vsw_mac_unregister(vswp) != 0) {
742			cmn_err(CE_WARN, "!vsw%d: Unable to detach from "
743			    "MAC layer", vswp->instance);
744			return (1);
745		}
746		progress &= ~PROG_macreg;
747	}
748
749	/*
750	 * Now that we have unregistered from gldv3, we must finish all other
751	 * steps and successfully return from this function; otherwise we will
752	 * end up leaving the device in a broken/unusable state.
753	 *
754	 * If we have registered with mdeg, unregister now to stop further
755	 * callbacks to this vsw device and/or its ports. Then, detach any
756	 * existing ports.
757	 */
758	if (progress & PROG_mdreg) {
759		vsw_mdeg_unregister(vswp);
760		vsw_detach_ports(vswp);
761		progress &= ~PROG_mdreg;
762	}
763
764	/*
765	 * If we have started a thread to setup the switching mode, stop it, if
766	 * it is still running. If it has finished setting up the switching
767	 * mode, then we need to clean up some additional things if we are
768	 * running in L2 mode: first free up any hybrid resources; then stop
769	 * and close the underlying physical device. Note that we would have
770	 * already released all per mac_client resources (ucast, mcast addrs,
771	 * hio-shares etc) as all the ports are detached and if the vsw device
772	 * itself was in use as an interface, it has been unplumbed (otherwise
773	 * mac_unregister() above would fail).
774	 */
775	if (progress & PROG_swmode) {
776
777		vsw_setup_switching_stop(vswp);
778
779		if (vswp->hio_capable == B_TRUE) {
780			vsw_hio_cleanup(vswp);
781			vswp->hio_capable = B_FALSE;
782		}
783
784		mutex_enter(&vswp->mac_lock);
785		vsw_mac_close(vswp);
786		mutex_exit(&vswp->mac_lock);
787
788		progress &= ~PROG_swmode;
789	}
790
791	/*
792	 * We now destroy the taskq used to clean up rx mblk pools that
793	 * couldn't be destroyed when the ports/channels were detached.
794	 * We implicitly wait for those tasks to complete in
795	 * ddi_taskq_destroy().
796	 */
797	if (progress & PROG_rxp_taskq) {
798		ddi_taskq_destroy(vswp->rxp_taskq);
799		progress &= ~PROG_rxp_taskq;
800	}
801
802	/*
803	 * By now any pending tasks have finished and the underlying
804	 * ldc's have been destroyed, so its safe to delete the control
805	 * message taskq.
806	 */
807	if (progress & PROG_taskq) {
808		ddi_taskq_destroy(vswp->taskq_p);
809		progress &= ~PROG_taskq;
810	}
811
812	/* Destroy the multicast hash table */
813	if (progress & PROG_mfdb) {
814		mod_hash_destroy_hash(vswp->mfdb);
815		progress &= ~PROG_mfdb;
816	}
817
818	/* Destroy the vlan hash table and fdb */
819	if (progress & PROG_fdb) {
820		vsw_destroy_vlans(vswp, VSW_LOCALDEV);
821		mod_hash_destroy_hash(vswp->fdb_hashp);
822		progress &= ~PROG_fdb;
823	}
824
825	if (progress & PROG_readmd) {
826		if (VSW_PRI_ETH_DEFINED(vswp)) {
827			kmem_free(vswp->pri_types,
828			    sizeof (uint16_t) * vswp->pri_num_types);
829			(void) vio_destroy_mblks(vswp->pri_tx_vmp);
830		}
831		progress &= ~PROG_readmd;
832	}
833
834	if (progress & PROG_locks) {
835		rw_destroy(&vswp->plist.lockrw);
836		rw_destroy(&vswp->mfdbrw);
837		rw_destroy(&vswp->if_lockrw);
838		rw_destroy(&vswp->maccl_rwlock);
839		cv_destroy(&vswp->sw_thr_cv);
840		mutex_destroy(&vswp->sw_thr_lock);
841		mutex_destroy(&vswp->mca_lock);
842		mutex_destroy(&vswp->mac_lock);
843		progress &= ~PROG_locks;
844	}
845
846	vswp->attach_progress = progress;
847
848	return (0);
849}
850
851void
852vsw_destroy_rxpools(void *arg)
853{
854	vio_mblk_pool_t	*poolp = (vio_mblk_pool_t *)arg;
855	vio_mblk_pool_t	*npoolp;
856
857	while (poolp != NULL) {
858		npoolp =  poolp->nextp;
859		while (vio_destroy_mblks(poolp) != 0) {
860			drv_usecwait(vsw_rxpool_cleanup_delay);
861		}
862		poolp = npoolp;
863	}
864}
865
866/*
867 * Get the value of the "vsw-phys-dev" property in the specified
868 * node. This property is the name of the physical device that
869 * the virtual switch will use to talk to the outside world.
870 *
871 * Note it is valid for this property to be NULL (but the property
872 * itself must exist). Callers of this routine should verify that
873 * the value returned is what they expected (i.e. either NULL or non NULL).
874 *
875 * On success returns value of the property in region pointed to by
876 * the 'name' argument, and with return value of 0. Otherwise returns 1.
877 */
878static int
879vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name)
880{
881	int		len = 0;
882	int		instance;
883	char		*physname = NULL;
884	char		*dev;
885	const char	*dev_name;
886	char		myname[MAXNAMELEN];
887
888	dev_name = ddi_driver_name(vswp->dip);
889	instance = ddi_get_instance(vswp->dip);
890	(void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance);
891
892	if (md_get_prop_data(mdp, node, physdev_propname,
893	    (uint8_t **)(&physname), &len) != 0) {
894		cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical "
895		    "device(s) from MD", vswp->instance);
896		return (1);
897	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
898		cmn_err(CE_WARN, "!vsw%d: %s is too long a device name",
899		    vswp->instance, physname);
900		return (1);
901	} else if (strcmp(myname, physname) == 0) {
902		/*
903		 * Prevent the vswitch from opening itself as the
904		 * network device.
905		 */
906		cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name",
907		    vswp->instance, physname);
908		return (1);
909	} else {
910		(void) strncpy(name, physname, strlen(physname) + 1);
911		D2(vswp, "%s: using first device specified (%s)",
912		    __func__, physname);
913	}
914
915#ifdef DEBUG
916	/*
917	 * As a temporary measure to aid testing we check to see if there
918	 * is a vsw.conf file present. If there is we use the value of the
919	 * vsw_physname property in the file as the name of the physical
920	 * device, overriding the value from the MD.
921	 *
922	 * There may be multiple devices listed, but for the moment
923	 * we just use the first one.
924	 */
925	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
926	    "vsw_physname", &dev) == DDI_PROP_SUCCESS) {
927		if ((strlen(dev) + 1) > LIFNAMSIZ) {
928			cmn_err(CE_WARN, "vsw%d: %s is too long a device name",
929			    vswp->instance, dev);
930			ddi_prop_free(dev);
931			return (1);
932		} else {
933			cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from "
934			    "config file", vswp->instance, dev);
935
936			(void) strncpy(name, dev, strlen(dev) + 1);
937		}
938
939		ddi_prop_free(dev);
940	}
941#endif
942
943	return (0);
944}
945
946/*
947 * Read the 'vsw-switch-mode' property from the specified MD node.
948 *
949 * Returns 0 on success, otherwise returns 1.
950 */
951static int
952vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode)
953{
954	int		len = 0;
955	char		*smode = NULL;
956	char		*curr_mode = NULL;
957
958	D1(vswp, "%s: enter", __func__);
959
960	/*
961	 * Get the switch-mode property. The modes are listed in
962	 * decreasing order of preference, i.e. prefered mode is
963	 * first item in list.
964	 */
965	len = 0;
966	if (md_get_prop_data(mdp, node, smode_propname,
967	    (uint8_t **)(&smode), &len) != 0) {
968		/*
969		 * Unable to get switch-mode property from MD, nothing
970		 * more we can do.
971		 */
972		cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property"
973		    " from the MD", vswp->instance);
974		return (1);
975	}
976
977	curr_mode = smode;
978	/*
979	 * Modes of operation:
980	 * 'switched'	 - layer 2 switching, underlying HW in
981	 *			programmed mode.
982	 * 'promiscuous' - layer 2 switching, underlying HW in
983	 *			promiscuous mode.
984	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
985	 *			in non-promiscuous mode.
986	 */
987	while (curr_mode < (smode + len)) {
988		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
989		if (strcmp(curr_mode, "switched") == 0) {
990			*mode = VSW_LAYER2;
991		} else if (strcmp(curr_mode, "promiscuous") == 0) {
992			*mode = VSW_LAYER2 | VSW_LAYER2_PROMISC;
993		} else if (strcmp(curr_mode, "routed") == 0) {
994			*mode = VSW_LAYER3;
995		} else {
996			cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, "
997			    "setting to default switched mode",
998			    vswp->instance, curr_mode);
999			*mode = VSW_LAYER2;
1000		}
1001		curr_mode += strlen(curr_mode) + 1;
1002	}
1003
1004	D2(vswp, "%s: %d mode", __func__, *mode);
1005
1006	D1(vswp, "%s: exit", __func__);
1007
1008	return (0);
1009}
1010
1011/*
1012 * Register with the MAC layer as a network device, so we
1013 * can be plumbed if necessary.
1014 */
1015static int
1016vsw_mac_register(vsw_t *vswp)
1017{
1018	mac_register_t	*macp;
1019	int		rv;
1020
1021	D1(vswp, "%s: enter", __func__);
1022
1023	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1024		return (EINVAL);
1025	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1026	macp->m_driver = vswp;
1027	macp->m_dip = vswp->dip;
1028	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1029	macp->m_callbacks = &vsw_m_callbacks;
1030	macp->m_min_sdu = 0;
1031	macp->m_max_sdu = vswp->mtu;
1032	macp->m_margin = VLAN_TAGSZ;
1033	rv = mac_register(macp, &vswp->if_mh);
1034	mac_free(macp);
1035	if (rv != 0) {
1036		/*
1037		 * Treat this as a non-fatal error as we may be
1038		 * able to operate in some other mode.
1039		 */
1040		cmn_err(CE_NOTE, "!vsw%d: Unable to register as "
1041		    "a provider with MAC layer", vswp->instance);
1042		return (rv);
1043	}
1044
1045	vswp->if_state |= VSW_IF_REG;
1046
1047	D1(vswp, "%s: exit", __func__);
1048
1049	return (rv);
1050}
1051
1052static int
1053vsw_mac_unregister(vsw_t *vswp)
1054{
1055	int		rv = 0;
1056
1057	D1(vswp, "%s: enter", __func__);
1058
1059	WRITE_ENTER(&vswp->if_lockrw);
1060
1061	if (vswp->if_state & VSW_IF_REG) {
1062		rv = mac_unregister(vswp->if_mh);
1063		if (rv != 0) {
1064			DWARN(vswp, "%s: unable to unregister from MAC "
1065			    "framework", __func__);
1066
1067			RW_EXIT(&vswp->if_lockrw);
1068			D1(vswp, "%s: fail exit", __func__);
1069			return (rv);
1070		}
1071
1072		/* mark i/f as down and unregistered */
1073		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
1074	}
1075	RW_EXIT(&vswp->if_lockrw);
1076
1077	D1(vswp, "%s: exit", __func__);
1078
1079	return (rv);
1080}
1081
1082static int
1083vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
1084{
1085	vsw_t			*vswp = (vsw_t *)arg;
1086
1087	D1(vswp, "%s: enter", __func__);
1088
1089	mutex_enter(&vswp->mac_lock);
1090	if (vswp->mh == NULL) {
1091		mutex_exit(&vswp->mac_lock);
1092		return (EINVAL);
1093	}
1094
1095	/* return stats from underlying device */
1096	*val = mac_stat_get(vswp->mh, stat);
1097
1098	mutex_exit(&vswp->mac_lock);
1099
1100	return (0);
1101}
1102
1103static void
1104vsw_m_stop(void *arg)
1105{
1106	vsw_t	*vswp = (vsw_t *)arg;
1107
1108	D1(vswp, "%s: enter", __func__);
1109
1110	WRITE_ENTER(&vswp->if_lockrw);
1111	vswp->if_state &= ~VSW_IF_UP;
1112	RW_EXIT(&vswp->if_lockrw);
1113
1114	/* Cleanup and close the mac client */
1115	vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV);
1116
1117	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1118}
1119
1120static int
1121vsw_m_start(void *arg)
1122{
1123	int		rv;
1124	vsw_t		*vswp = (vsw_t *)arg;
1125
1126	D1(vswp, "%s: enter", __func__);
1127
1128	WRITE_ENTER(&vswp->if_lockrw);
1129
1130	vswp->if_state |= VSW_IF_UP;
1131
1132	if (vswp->switching_setup_done == B_FALSE) {
1133		/*
1134		 * If the switching mode has not been setup yet, just
1135		 * return. The unicast address will be programmed
1136		 * after the physical device is successfully setup by the
1137		 * timeout handler.
1138		 */
1139		RW_EXIT(&vswp->if_lockrw);
1140		return (0);
1141	}
1142
1143	/* if in layer2 mode, program unicast address. */
1144	if (vswp->mh != NULL) {
1145		/* Init a mac client and program addresses */
1146		rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV);
1147		if (rv != 0) {
1148			cmn_err(CE_NOTE,
1149			    "!vsw%d: failed to program interface "
1150			    "unicast address\n", vswp->instance);
1151		}
1152	}
1153
1154	RW_EXIT(&vswp->if_lockrw);
1155
1156	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
1157	return (0);
1158}
1159
1160/*
1161 * Change the local interface address.
1162 *
1163 * Note: we don't support this entry point. The local
1164 * mac address of the switch can only be changed via its
1165 * MD node properties.
1166 */
1167static int
1168vsw_m_unicst(void *arg, const uint8_t *macaddr)
1169{
1170	_NOTE(ARGUNUSED(arg, macaddr))
1171
1172	return (DDI_FAILURE);
1173}
1174
1175static int
1176vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
1177{
1178	vsw_t		*vswp = (vsw_t *)arg;
1179	mcst_addr_t	*mcst_p = NULL;
1180	uint64_t	addr = 0x0;
1181	int		i, ret = 0;
1182
1183	D1(vswp, "%s: enter", __func__);
1184
1185	/*
1186	 * Convert address into form that can be used
1187	 * as hash table key.
1188	 */
1189	for (i = 0; i < ETHERADDRL; i++) {
1190		addr = (addr << 8) | mca[i];
1191	}
1192
1193	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
1194
1195	if (add) {
1196		D2(vswp, "%s: adding multicast", __func__);
1197		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1198			/*
1199			 * Update the list of multicast addresses
1200			 * contained within the vsw_t structure to
1201			 * include this new one.
1202			 */
1203			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
1204			if (mcst_p == NULL) {
1205				DERR(vswp, "%s unable to alloc mem", __func__);
1206				(void) vsw_del_mcst(vswp,
1207				    VSW_LOCALDEV, addr, NULL);
1208				return (1);
1209			}
1210			mcst_p->addr = addr;
1211			ether_copy(mca, &mcst_p->mca);
1212
1213			/*
1214			 * Call into the underlying driver to program the
1215			 * address into HW.
1216			 */
1217			ret = vsw_mac_multicast_add(vswp, NULL, mcst_p,
1218			    VSW_LOCALDEV);
1219			if (ret != 0) {
1220				(void) vsw_del_mcst(vswp,
1221				    VSW_LOCALDEV, addr, NULL);
1222				kmem_free(mcst_p, sizeof (*mcst_p));
1223				return (ret);
1224			}
1225
1226			mutex_enter(&vswp->mca_lock);
1227			mcst_p->nextp = vswp->mcap;
1228			vswp->mcap = mcst_p;
1229			mutex_exit(&vswp->mca_lock);
1230		} else {
1231			cmn_err(CE_WARN, "!vsw%d: unable to add multicast "
1232			    "address", vswp->instance);
1233		}
1234		return (ret);
1235	}
1236
1237	D2(vswp, "%s: removing multicast", __func__);
1238	/*
1239	 * Remove the address from the hash table..
1240	 */
1241	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
1242
1243		/*
1244		 * ..and then from the list maintained in the
1245		 * vsw_t structure.
1246		 */
1247		mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr);
1248		ASSERT(mcst_p != NULL);
1249
1250		vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV);
1251		kmem_free(mcst_p, sizeof (*mcst_p));
1252	}
1253
1254	D1(vswp, "%s: exit", __func__);
1255
1256	return (0);
1257}
1258
1259static int
1260vsw_m_promisc(void *arg, boolean_t on)
1261{
1262	vsw_t		*vswp = (vsw_t *)arg;
1263
1264	D1(vswp, "%s: enter", __func__);
1265
1266	WRITE_ENTER(&vswp->if_lockrw);
1267	if (on)
1268		vswp->if_state |= VSW_IF_PROMISC;
1269	else
1270		vswp->if_state &= ~VSW_IF_PROMISC;
1271	RW_EXIT(&vswp->if_lockrw);
1272
1273	D1(vswp, "%s: exit", __func__);
1274
1275	return (0);
1276}
1277
1278static mblk_t *
1279vsw_m_tx(void *arg, mblk_t *mp)
1280{
1281	vsw_t		*vswp = (vsw_t *)arg;
1282
1283	D1(vswp, "%s: enter", __func__);
1284
1285	mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp);
1286
1287	if (mp == NULL) {
1288		return (NULL);
1289	}
1290
1291	vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
1292
1293	D1(vswp, "%s: exit", __func__);
1294
1295	return (NULL);
1296}
1297
1298/*
1299 * Register for machine description (MD) updates.
1300 *
1301 * Returns 0 on success, 1 on failure.
1302 */
1303static int
1304vsw_mdeg_register(vsw_t *vswp)
1305{
1306	mdeg_prop_spec_t	*pspecp;
1307	mdeg_node_spec_t	*inst_specp;
1308	mdeg_handle_t		mdeg_hdl, mdeg_port_hdl;
1309	size_t			templatesz;
1310	int			rv;
1311
1312	D1(vswp, "%s: enter", __func__);
1313
1314	/*
1315	 * Allocate and initialize a per-instance copy
1316	 * of the global property spec array that will
1317	 * uniquely identify this vsw instance.
1318	 */
1319	templatesz = sizeof (vsw_prop_template);
1320	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
1321
1322	bcopy(vsw_prop_template, pspecp, templatesz);
1323
1324	VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop);
1325
1326	/* initialize the complete prop spec structure */
1327	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
1328	inst_specp->namep = "virtual-device";
1329	inst_specp->specp = pspecp;
1330
1331	D2(vswp, "%s: instance %d registering with mdeg", __func__,
1332	    vswp->regprop);
1333	/*
1334	 * Register an interest in 'virtual-device' nodes with a
1335	 * 'name' property of 'virtual-network-switch'
1336	 */
1337	rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb,
1338	    (void *)vswp, &mdeg_hdl);
1339	if (rv != MDEG_SUCCESS) {
1340		DERR(vswp, "%s: mdeg_register failed (%d) for vsw node",
1341		    __func__, rv);
1342		goto mdeg_reg_fail;
1343	}
1344
1345	/*
1346	 * Register an interest in 'vsw-port' nodes.
1347	 */
1348	rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb,
1349	    (void *)vswp, &mdeg_port_hdl);
1350	if (rv != MDEG_SUCCESS) {
1351		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
1352		(void) mdeg_unregister(mdeg_hdl);
1353		goto mdeg_reg_fail;
1354	}
1355
1356	/* save off data that will be needed later */
1357	vswp->inst_spec = inst_specp;
1358	vswp->mdeg_hdl = mdeg_hdl;
1359	vswp->mdeg_port_hdl = mdeg_port_hdl;
1360
1361	D1(vswp, "%s: exit", __func__);
1362	return (0);
1363
1364mdeg_reg_fail:
1365	cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks",
1366	    vswp->instance);
1367	kmem_free(pspecp, templatesz);
1368	kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
1369
1370	vswp->mdeg_hdl = NULL;
1371	vswp->mdeg_port_hdl = NULL;
1372
1373	return (1);
1374}
1375
1376static void
1377vsw_mdeg_unregister(vsw_t *vswp)
1378{
1379	D1(vswp, "vsw_mdeg_unregister: enter");
1380
1381	if (vswp->mdeg_hdl != NULL)
1382		(void) mdeg_unregister(vswp->mdeg_hdl);
1383
1384	if (vswp->mdeg_port_hdl != NULL)
1385		(void) mdeg_unregister(vswp->mdeg_port_hdl);
1386
1387	if (vswp->inst_spec != NULL) {
1388		if (vswp->inst_spec->specp != NULL) {
1389			(void) kmem_free(vswp->inst_spec->specp,
1390			    sizeof (vsw_prop_template));
1391			vswp->inst_spec->specp = NULL;
1392		}
1393
1394		(void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t));
1395		vswp->inst_spec = NULL;
1396	}
1397
1398	D1(vswp, "vsw_mdeg_unregister: exit");
1399}
1400
1401/*
1402 * Mdeg callback invoked for the vsw node itself.
1403 */
1404static int
1405vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1406{
1407	vsw_t		*vswp;
1408	md_t		*mdp;
1409	mde_cookie_t	node;
1410	uint64_t	inst;
1411	char		*node_name = NULL;
1412
1413	if (resp == NULL)
1414		return (MDEG_FAILURE);
1415
1416	vswp = (vsw_t *)cb_argp;
1417
1418	D1(vswp, "%s: added %d : removed %d : curr matched %d"
1419	    " : prev matched %d", __func__, resp->added.nelem,
1420	    resp->removed.nelem, resp->match_curr.nelem,
1421	    resp->match_prev.nelem);
1422
1423	/*
1424	 * We get an initial callback for this node as 'added'
1425	 * after registering with mdeg. Note that we would have
1426	 * already gathered information about this vsw node by
1427	 * walking MD earlier during attach (in vsw_read_mdprops()).
1428	 * So, there is a window where the properties of this
1429	 * node might have changed when we get this initial 'added'
1430	 * callback. We handle this as if an update occured
1431	 * and invoke the same function which handles updates to
1432	 * the properties of this vsw-node if any.
1433	 *
1434	 * A non-zero 'match' value indicates that the MD has been
1435	 * updated and that a virtual-network-switch node is
1436	 * present which may or may not have been updated. It is
1437	 * up to the clients to examine their own nodes and
1438	 * determine if they have changed.
1439	 */
1440	if (resp->added.nelem != 0) {
1441
1442		if (resp->added.nelem != 1) {
1443			cmn_err(CE_NOTE, "!vsw%d: number of nodes added "
1444			    "invalid: %d\n", vswp->instance, resp->added.nelem);
1445			return (MDEG_FAILURE);
1446		}
1447
1448		mdp = resp->added.mdp;
1449		node = resp->added.mdep[0];
1450
1451	} else if (resp->match_curr.nelem != 0) {
1452
1453		if (resp->match_curr.nelem != 1) {
1454			cmn_err(CE_NOTE, "!vsw%d: number of nodes updated "
1455			    "invalid: %d\n", vswp->instance,
1456			    resp->match_curr.nelem);
1457			return (MDEG_FAILURE);
1458		}
1459
1460		mdp = resp->match_curr.mdp;
1461		node = resp->match_curr.mdep[0];
1462
1463	} else {
1464		return (MDEG_FAILURE);
1465	}
1466
1467	/* Validate name and instance */
1468	if (md_get_prop_str(mdp, node, "name", &node_name) != 0) {
1469		DERR(vswp, "%s: unable to get node name\n",  __func__);
1470		return (MDEG_FAILURE);
1471	}
1472
1473	/* is this a virtual-network-switch? */
1474	if (strcmp(node_name, vsw_propname) != 0) {
1475		DERR(vswp, "%s: Invalid node name: %s\n",
1476		    __func__, node_name);
1477		return (MDEG_FAILURE);
1478	}
1479
1480	if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) {
1481		DERR(vswp, "%s: prop(cfg-handle) not found\n",
1482		    __func__);
1483		return (MDEG_FAILURE);
1484	}
1485
1486	/* is this the right instance of vsw? */
1487	if (inst != vswp->regprop) {
1488		DERR(vswp, "%s: Invalid cfg-handle: %lx\n",
1489		    __func__, inst);
1490		return (MDEG_FAILURE);
1491	}
1492
1493	vsw_update_md_prop(vswp, mdp, node);
1494
1495	return (MDEG_SUCCESS);
1496}
1497
1498/*
1499 * Mdeg callback invoked for changes to the vsw-port nodes
1500 * under the vsw node.
1501 */
1502static int
1503vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
1504{
1505	vsw_t		*vswp;
1506	int		idx;
1507	md_t		*mdp;
1508	mde_cookie_t	node;
1509	uint64_t	inst;
1510	int		rv;
1511
1512	if ((resp == NULL) || (cb_argp == NULL))
1513		return (MDEG_FAILURE);
1514
1515	vswp = (vsw_t *)cb_argp;
1516
1517	D2(vswp, "%s: added %d : removed %d : curr matched %d"
1518	    " : prev matched %d", __func__, resp->added.nelem,
1519	    resp->removed.nelem, resp->match_curr.nelem,
1520	    resp->match_prev.nelem);
1521
1522	/* process added ports */
1523	for (idx = 0; idx < resp->added.nelem; idx++) {
1524		mdp = resp->added.mdp;
1525		node = resp->added.mdep[idx];
1526
1527		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
1528
1529		if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) {
1530			cmn_err(CE_WARN, "!vsw%d: Unable to add new port "
1531			    "(0x%lx), err=%d", vswp->instance, node, rv);
1532		}
1533	}
1534
1535	/* process removed ports */
1536	for (idx = 0; idx < resp->removed.nelem; idx++) {
1537		mdp = resp->removed.mdp;
1538		node = resp->removed.mdep[idx];
1539
1540		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
1541			DERR(vswp, "%s: prop(%s) not found in port(%d)",
1542			    __func__, id_propname, idx);
1543			continue;
1544		}
1545
1546		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
1547
1548		if (vsw_port_detach(vswp, inst) != 0) {
1549			cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld",
1550			    vswp->instance, inst);
1551		}
1552	}
1553
1554	for (idx = 0; idx < resp->match_curr.nelem; idx++) {
1555		(void) vsw_port_update(vswp, resp->match_curr.mdp,
1556		    resp->match_curr.mdep[idx],
1557		    resp->match_prev.mdp,
1558		    resp->match_prev.mdep[idx]);
1559	}
1560
1561	D1(vswp, "%s: exit", __func__);
1562
1563	return (MDEG_SUCCESS);
1564}
1565
1566/*
1567 * Scan the machine description for this instance of vsw
1568 * and read its properties. Called only from vsw_attach().
1569 * Returns: 0 on success, 1 on failure.
1570 */
1571static int
1572vsw_read_mdprops(vsw_t *vswp)
1573{
1574	md_t		*mdp = NULL;
1575	mde_cookie_t	rootnode;
1576	mde_cookie_t	*listp = NULL;
1577	uint64_t	inst;
1578	uint64_t	cfgh;
1579	char		*name;
1580	int		rv = 1;
1581	int		num_nodes = 0;
1582	int		num_devs = 0;
1583	int		listsz = 0;
1584	int		i;
1585
1586	/*
1587	 * In each 'virtual-device' node in the MD there is a
1588	 * 'cfg-handle' property which is the MD's concept of
1589	 * an instance number (this may be completely different from
1590	 * the device drivers instance #). OBP reads that value and
1591	 * stores it in the 'reg' property of the appropriate node in
1592	 * the device tree. We first read this reg property and use this
1593	 * to compare against the 'cfg-handle' property of vsw nodes
1594	 * in MD to get to this specific vsw instance and then read
1595	 * other properties that we are interested in.
1596	 * We also cache the value of 'reg' property and use it later
1597	 * to register callbacks with mdeg (see vsw_mdeg_register())
1598	 */
1599	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
1600	    DDI_PROP_DONTPASS, reg_propname, -1);
1601	if (inst == -1) {
1602		cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from "
1603		    "OBP device tree", vswp->instance, reg_propname);
1604		return (rv);
1605	}
1606
1607	vswp->regprop = inst;
1608
1609	if ((mdp = md_get_handle()) == NULL) {
1610		DWARN(vswp, "%s: cannot init MD\n", __func__);
1611		return (rv);
1612	}
1613
1614	num_nodes = md_node_count(mdp);
1615	ASSERT(num_nodes > 0);
1616
1617	listsz = num_nodes * sizeof (mde_cookie_t);
1618	listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP);
1619
1620	rootnode = md_root_node(mdp);
1621
1622	/* search for all "virtual_device" nodes */
1623	num_devs = md_scan_dag(mdp, rootnode,
1624	    md_find_name(mdp, vdev_propname),
1625	    md_find_name(mdp, "fwd"), listp);
1626	if (num_devs <= 0) {
1627		DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs);
1628		goto vsw_readmd_exit;
1629	}
1630
1631	/*
1632	 * Now loop through the list of virtual-devices looking for
1633	 * devices with name "virtual-network-switch" and for each
1634	 * such device compare its instance with what we have from
1635	 * the 'reg' property to find the right node in MD and then
1636	 * read all its properties.
1637	 */
1638	for (i = 0; i < num_devs; i++) {
1639
1640		if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) {
1641			DWARN(vswp, "%s: name property not found\n",
1642			    __func__);
1643			goto vsw_readmd_exit;
1644		}
1645
1646		/* is this a virtual-network-switch? */
1647		if (strcmp(name, vsw_propname) != 0)
1648			continue;
1649
1650		if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) {
1651			DWARN(vswp, "%s: cfg-handle property not found\n",
1652			    __func__);
1653			goto vsw_readmd_exit;
1654		}
1655
1656		/* is this the required instance of vsw? */
1657		if (inst != cfgh)
1658			continue;
1659
1660		/* now read all properties of this vsw instance */
1661		rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]);
1662		break;
1663	}
1664
1665vsw_readmd_exit:
1666
1667	kmem_free(listp, listsz);
1668	(void) md_fini_handle(mdp);
1669	return (rv);
1670}
1671
1672/*
1673 * Read the initial start-of-day values from the specified MD node.
1674 */
1675static int
1676vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1677{
1678	uint64_t	macaddr = 0;
1679
1680	D1(vswp, "%s: enter", __func__);
1681
1682	if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) {
1683		return (1);
1684	}
1685
1686	/* mac address for vswitch device itself */
1687	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
1688		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
1689		    vswp->instance);
1690		return (1);
1691	}
1692
1693	vsw_save_lmacaddr(vswp, macaddr);
1694
1695	if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) {
1696		DWARN(vswp, "%s: Unable to read %s property from MD, "
1697		    "defaulting to 'switched' mode",
1698		    __func__, smode_propname);
1699
1700		vswp->smode = VSW_LAYER2;
1701	}
1702
1703	/*
1704	 * Read the 'linkprop' property to know if this
1705	 * vsw device wants to get physical link updates.
1706	 */
1707	vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update);
1708
1709	/* read mtu */
1710	vsw_mtu_read(vswp, mdp, node, &vswp->mtu);
1711	if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) {
1712		vswp->mtu = ETHERMTU;
1713	}
1714	vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) +
1715	    VLAN_TAGSZ;
1716
1717	/* read vlan id properties of this vsw instance */
1718	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid,
1719	    &vswp->vids, &vswp->nvids, &vswp->default_vlan_id);
1720
1721	/* read priority-ether-types */
1722	vsw_read_pri_eth_types(vswp, mdp, node);
1723
1724	/* read bandwidth property of this vsw instance */
1725	vsw_bandwidth_read(vswp, mdp, node, &vswp->bandwidth);
1726
1727	D1(vswp, "%s: exit", __func__);
1728	return (0);
1729}
1730
1731/*
1732 * Read vlan id properties of the given MD node.
1733 * Arguments:
1734 *   arg:          device argument(vsw device or a port)
1735 *   type:         type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port)
1736 *   mdp:          machine description
1737 *   node:         md node cookie
1738 *
1739 * Returns:
1740 *   pvidp:        port-vlan-id of the node
1741 *   vidspp:       list of vlan-ids of the node
1742 *   nvidsp:       # of vlan-ids in the list
1743 *   default_idp:  default-vlan-id of the node(if node is vsw device)
1744 */
1745static void
1746vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node,
1747	uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp,
1748	uint16_t *default_idp)
1749{
1750	vsw_t		*vswp;
1751	vsw_port_t	*portp;
1752	char		*pvid_propname;
1753	char		*vid_propname;
1754	uint_t		nvids = 0;
1755	uint32_t	vids_size;
1756	int		rv;
1757	int		i;
1758	uint64_t	*data;
1759	uint64_t	val;
1760	int		size;
1761	int		inst;
1762
1763	if (type == VSW_LOCALDEV) {
1764
1765		vswp = (vsw_t *)arg;
1766		pvid_propname = vsw_pvid_propname;
1767		vid_propname = vsw_vid_propname;
1768		inst = vswp->instance;
1769
1770	} else if (type == VSW_VNETPORT) {
1771
1772		portp = (vsw_port_t *)arg;
1773		vswp = portp->p_vswp;
1774		pvid_propname = port_pvid_propname;
1775		vid_propname = port_vid_propname;
1776		inst = portp->p_instance;
1777
1778	} else {
1779		return;
1780	}
1781
1782	if (type == VSW_LOCALDEV && default_idp != NULL) {
1783		rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val);
1784		if (rv != 0) {
1785			DWARN(vswp, "%s: prop(%s) not found", __func__,
1786			    vsw_dvid_propname);
1787
1788			*default_idp = vsw_default_vlan_id;
1789		} else {
1790			*default_idp = val & 0xFFF;
1791			D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1792			    vsw_dvid_propname, inst, *default_idp);
1793		}
1794	}
1795
1796	rv = md_get_prop_val(mdp, node, pvid_propname, &val);
1797	if (rv != 0) {
1798		DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname);
1799		*pvidp = vsw_default_vlan_id;
1800	} else {
1801
1802		*pvidp = val & 0xFFF;
1803		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1804		    pvid_propname, inst, *pvidp);
1805	}
1806
1807	rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data,
1808	    &size);
1809	if (rv != 0) {
1810		D2(vswp, "%s: prop(%s) not found", __func__, vid_propname);
1811		size = 0;
1812	} else {
1813		size /= sizeof (uint64_t);
1814	}
1815	nvids = size;
1816
1817	if (nvids != 0) {
1818		D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst);
1819		vids_size = sizeof (vsw_vlanid_t) * nvids;
1820		*vidspp = kmem_zalloc(vids_size, KM_SLEEP);
1821		for (i = 0; i < nvids; i++) {
1822			(*vidspp)[i].vl_vid = data[i] & 0xFFFF;
1823			(*vidspp)[i].vl_set = B_FALSE;
1824			D2(vswp, " %d ", (*vidspp)[i].vl_vid);
1825		}
1826		D2(vswp, "\n");
1827	}
1828
1829	*nvidsp = nvids;
1830}
1831
1832static void
1833vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node,
1834    uint64_t *bw)
1835{
1836	int		rv;
1837	uint64_t	val;
1838	vsw_t		*vswp;
1839
1840	vswp = portp->p_vswp;
1841
1842	rv = md_get_prop_val(mdp, node, port_maxbw_propname, &val);
1843
1844	if (rv != 0) {
1845		*bw = 0;
1846		D3(vswp, "%s: prop(%s) not found\n", __func__,
1847		    port_maxbw_propname);
1848	} else {
1849		*bw = val;
1850		D3(vswp, "%s: %s nodes found", __func__, port_maxbw_propname);
1851	}
1852}
1853
1854/*
1855 * This function reads "priority-ether-types" property from md. This property
1856 * is used to enable support for priority frames. Applications which need
1857 * guaranteed and timely delivery of certain high priority frames to/from
1858 * a vnet or vsw within ldoms, should configure this property by providing
1859 * the ether type(s) for which the priority facility is needed.
1860 * Normal data frames are delivered over a ldc channel using the descriptor
1861 * ring mechanism which is constrained by factors such as descriptor ring size,
1862 * the rate at which the ring is processed at the peer ldc end point, etc.
1863 * The priority mechanism provides an Out-Of-Band path to send/receive frames
1864 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the
1865 * descriptor ring path and enables a more reliable and timely delivery of
1866 * frames to the peer.
1867 */
1868static void
1869vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
1870{
1871	int		rv;
1872	uint16_t	*types;
1873	uint64_t	*data;
1874	int		size;
1875	int		i;
1876	size_t		mblk_sz;
1877
1878	rv = md_get_prop_data(mdp, node, pri_types_propname,
1879	    (uint8_t **)&data, &size);
1880	if (rv != 0) {
1881		/*
1882		 * Property may not exist if we are running pre-ldoms1.1 f/w.
1883		 * Check if 'vsw_pri_eth_type' has been set in that case.
1884		 */
1885		if (vsw_pri_eth_type != 0) {
1886			size = sizeof (vsw_pri_eth_type);
1887			data = &vsw_pri_eth_type;
1888		} else {
1889			D3(vswp, "%s: prop(%s) not found", __func__,
1890			    pri_types_propname);
1891			size = 0;
1892		}
1893	}
1894
1895	if (size == 0) {
1896		vswp->pri_num_types = 0;
1897		return;
1898	}
1899
1900	/*
1901	 * we have some priority-ether-types defined;
1902	 * allocate a table of these types and also
1903	 * allocate a pool of mblks to transmit these
1904	 * priority packets.
1905	 */
1906	size /= sizeof (uint64_t);
1907	vswp->pri_num_types = size;
1908	vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP);
1909	for (i = 0, types = vswp->pri_types; i < size; i++) {
1910		types[i] = data[i] & 0xFFFF;
1911	}
1912	mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7;
1913	(void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp);
1914}
1915
1916static void
1917vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu)
1918{
1919	int		rv;
1920	int		inst;
1921	uint64_t	val;
1922	char		*mtu_propname;
1923
1924	mtu_propname = vsw_mtu_propname;
1925	inst = vswp->instance;
1926
1927	rv = md_get_prop_val(mdp, node, mtu_propname, &val);
1928	if (rv != 0) {
1929		D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname);
1930		*mtu = vsw_ethermtu;
1931	} else {
1932
1933		*mtu = val & 0xFFFF;
1934		D2(vswp, "%s: %s(%d): (%d)\n", __func__,
1935		    mtu_propname, inst, *mtu);
1936	}
1937}
1938
1939/*
1940 * Update the mtu of the vsw device. We first check if the device has been
1941 * plumbed and if so fail the mtu update. Otherwise, we continue to update the
1942 * new mtu and reset all ports to initiate handshake re-negotiation with peers
1943 * using the new mtu.
1944 */
1945static int
1946vsw_mtu_update(vsw_t *vswp, uint32_t mtu)
1947{
1948	int	rv;
1949
1950	WRITE_ENTER(&vswp->if_lockrw);
1951
1952	if (vswp->if_state & VSW_IF_UP) {
1953
1954		RW_EXIT(&vswp->if_lockrw);
1955
1956		cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
1957		    " as the device is plumbed\n", vswp->instance);
1958		return (EBUSY);
1959
1960	} else {
1961
1962		D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n",
1963		    __func__, vswp->mtu, mtu);
1964
1965		vswp->mtu = mtu;
1966		vswp->max_frame_size = vswp->mtu +
1967		    sizeof (struct ether_header) + VLAN_TAGSZ;
1968
1969		rv = mac_maxsdu_update(vswp->if_mh, mtu);
1970		if (rv != 0) {
1971			cmn_err(CE_NOTE,
1972			    "!vsw%d: Unable to update mtu with mac"
1973			    " layer\n", vswp->instance);
1974		}
1975
1976		RW_EXIT(&vswp->if_lockrw);
1977
1978		/* Reset ports to renegotiate with the new mtu */
1979		vsw_reset_ports(vswp);
1980
1981	}
1982
1983	return (0);
1984}
1985
1986static void
1987vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node,
1988	boolean_t *pls)
1989{
1990	int		rv;
1991	uint64_t	val;
1992	char		*linkpropname;
1993
1994	linkpropname = vsw_linkprop_propname;
1995
1996	rv = md_get_prop_val(mdp, node, linkpropname, &val);
1997	if (rv != 0) {
1998		D3(vswp, "%s: prop(%s) not found", __func__, linkpropname);
1999		*pls = B_FALSE;
2000	} else {
2001
2002		*pls = (val & 0x1) ? B_TRUE : B_FALSE;
2003		D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname,
2004		    vswp->instance, *pls);
2005	}
2006}
2007
2008void
2009vsw_mac_link_update(vsw_t *vswp, link_state_t link_state)
2010{
2011	READ_ENTER(&vswp->if_lockrw);
2012
2013	if (vswp->if_state & VSW_IF_REG) {
2014		mac_link_update(vswp->if_mh, link_state);
2015	}
2016
2017	RW_EXIT(&vswp->if_lockrw);
2018}
2019
2020void
2021vsw_physlink_state_update(vsw_t *vswp)
2022{
2023	if (vswp->pls_update == B_TRUE) {
2024		vsw_mac_link_update(vswp, vswp->phys_link_state);
2025	}
2026	vsw_physlink_update_ports(vswp);
2027}
2028
2029static void
2030vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw)
2031{
2032	/* read the vsw bandwidth from md */
2033	int		rv;
2034	uint64_t	val;
2035
2036	rv = md_get_prop_val(mdp, node, vsw_maxbw_propname, &val);
2037	if (rv != 0) {
2038		*bw = 0;
2039		D3(vswp, "%s: prop(%s) not found", __func__,
2040		    vsw_maxbw_propname);
2041	} else {
2042		*bw = val;
2043		D3(vswp, "%s: %s(%d): (%ld)\n", __func__,
2044		    vsw_maxbw_propname, vswp->instance, *bw);
2045	}
2046}
2047
2048/*
2049 * Check to see if the relevant properties in the specified node have
2050 * changed, and if so take the appropriate action.
2051 *
2052 * If any of the properties are missing or invalid we don't take
2053 * any action, as this function should only be invoked when modifications
2054 * have been made to what we assume is a working configuration, which
2055 * we leave active.
2056 *
2057 * Note it is legal for this routine to be invoked even if none of the
2058 * properties in the port node within the MD have actually changed.
2059 */
2060static void
2061vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node)
2062{
2063	char		physname[LIFNAMSIZ];
2064	char		drv[LIFNAMSIZ];
2065	uint_t		ddi_instance;
2066	uint8_t		new_smode;
2067	int		i;
2068	uint64_t 	macaddr = 0;
2069	enum		{MD_init = 0x1,
2070				MD_physname = 0x2,
2071				MD_macaddr = 0x4,
2072				MD_smode = 0x8,
2073				MD_vlans = 0x10,
2074				MD_mtu = 0x20,
2075				MD_pls = 0x40,
2076				MD_bw = 0x80} updated;
2077	int		rv;
2078	uint16_t	pvid;
2079	vsw_vlanid_t	*vids;
2080	uint16_t	nvids;
2081	uint32_t	mtu;
2082	boolean_t	pls_update;
2083	uint64_t	maxbw;
2084
2085	updated = MD_init;
2086
2087	D1(vswp, "%s: enter", __func__);
2088
2089	/*
2090	 * Check if name of physical device in MD has changed.
2091	 */
2092	if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) {
2093		/*
2094		 * Do basic sanity check on new device name/instance,
2095		 * if its non NULL. It is valid for the device name to
2096		 * have changed from a non NULL to a NULL value, i.e.
2097		 * the vsw is being changed to 'routed' mode.
2098		 */
2099		if ((strlen(physname) != 0) &&
2100		    (ddi_parse(physname, drv,
2101		    &ddi_instance) != DDI_SUCCESS)) {
2102			cmn_err(CE_WARN, "!vsw%d: physical device %s is not"
2103			    " a valid device name/instance",
2104			    vswp->instance, physname);
2105			goto fail_reconf;
2106		}
2107
2108		if (strcmp(physname, vswp->physname)) {
2109			D2(vswp, "%s: device name changed from %s to %s",
2110			    __func__, vswp->physname, physname);
2111
2112			updated |= MD_physname;
2113		} else {
2114			D2(vswp, "%s: device name unchanged at %s",
2115			    __func__, vswp->physname);
2116		}
2117	} else {
2118		cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical "
2119		    "device from updated MD.", vswp->instance);
2120		goto fail_reconf;
2121	}
2122
2123	/*
2124	 * Check if MAC address has changed.
2125	 */
2126	if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) {
2127		cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD",
2128		    vswp->instance);
2129		goto fail_reconf;
2130	} else {
2131		uint64_t maddr = macaddr;
2132		READ_ENTER(&vswp->if_lockrw);
2133		for (i = ETHERADDRL - 1; i >= 0; i--) {
2134			if (vswp->if_addr.ether_addr_octet[i]
2135			    != (macaddr & 0xFF)) {
2136				D2(vswp, "%s: octet[%d] 0x%x != 0x%x",
2137				    __func__, i,
2138				    vswp->if_addr.ether_addr_octet[i],
2139				    (macaddr & 0xFF));
2140				updated |= MD_macaddr;
2141				macaddr = maddr;
2142				break;
2143			}
2144			macaddr >>= 8;
2145		}
2146		RW_EXIT(&vswp->if_lockrw);
2147		if (updated & MD_macaddr) {
2148			vsw_save_lmacaddr(vswp, macaddr);
2149		}
2150	}
2151
2152	/*
2153	 * Check if switching modes have changed.
2154	 */
2155	if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) {
2156		cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD",
2157		    vswp->instance, smode_propname);
2158		goto fail_reconf;
2159	} else {
2160		if (new_smode != vswp->smode) {
2161			D2(vswp, "%s: switching mode changed from %d to %d",
2162			    __func__, vswp->smode, new_smode);
2163
2164			updated |= MD_smode;
2165		}
2166	}
2167
2168	/* Read the vlan ids */
2169	vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids,
2170	    &nvids, NULL);
2171
2172	/* Determine if there are any vlan id updates */
2173	if ((pvid != vswp->pvid) ||		/* pvid changed? */
2174	    (nvids != vswp->nvids) ||		/* # of vids changed? */
2175	    ((nvids != 0) && (vswp->nvids != 0) &&	/* vids changed? */
2176	    !vsw_cmp_vids(vids, vswp->vids, nvids))) {
2177		updated |= MD_vlans;
2178	}
2179
2180	/* Read mtu */
2181	vsw_mtu_read(vswp, mdp, node, &mtu);
2182	if (mtu != vswp->mtu) {
2183		if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) {
2184			updated |= MD_mtu;
2185		} else {
2186			cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update"
2187			    " as the specified value:%d is invalid\n",
2188			    vswp->instance, mtu);
2189		}
2190	}
2191
2192	/*
2193	 * Read the 'linkprop' property.
2194	 */
2195	vsw_linkprop_read(vswp, mdp, node, &pls_update);
2196	if (pls_update != vswp->pls_update) {
2197		updated |= MD_pls;
2198	}
2199
2200	/* Read bandwidth */
2201	vsw_bandwidth_read(vswp, mdp, node, &maxbw);
2202	if (maxbw != vswp->bandwidth) {
2203		if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
2204			updated |= MD_bw;
2205		} else {
2206			cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
2207			    " update as the specified value:%ld is invalid\n",
2208			    vswp->instance, maxbw);
2209		}
2210	}
2211
2212	/*
2213	 * Now make any changes which are needed...
2214	 */
2215	if (updated & MD_pls) {
2216
2217		/* save the updated property. */
2218		vswp->pls_update = pls_update;
2219
2220		if (pls_update == B_FALSE) {
2221			/*
2222			 * Phys link state update is now disabled for this vsw
2223			 * interface. If we had previously reported a link-down
2224			 * to the stack, undo that by sending a link-up.
2225			 */
2226			if (vswp->phys_link_state == LINK_STATE_DOWN) {
2227				vsw_mac_link_update(vswp, LINK_STATE_UP);
2228			}
2229		} else {
2230			/*
2231			 * Phys link state update is now enabled. Send up an
2232			 * update based on the current phys link state.
2233			 */
2234			if (vswp->smode & VSW_LAYER2) {
2235				vsw_mac_link_update(vswp,
2236				    vswp->phys_link_state);
2237			}
2238		}
2239
2240	}
2241
2242	if (updated & (MD_physname | MD_smode | MD_mtu)) {
2243
2244		/*
2245		 * Stop any pending thread to setup switching mode.
2246		 */
2247		vsw_setup_switching_stop(vswp);
2248
2249		/* Cleanup HybridIO */
2250		vsw_hio_cleanup(vswp);
2251
2252		/*
2253		 * Remove unicst, mcst addrs of vsw interface
2254		 * and ports from the physdev. This also closes
2255		 * the corresponding mac clients.
2256		 */
2257		vsw_unset_addrs(vswp);
2258
2259		/*
2260		 * Stop, detach and close the old device..
2261		 */
2262		mutex_enter(&vswp->mac_lock);
2263		vsw_mac_close(vswp);
2264		mutex_exit(&vswp->mac_lock);
2265
2266		/*
2267		 * Update phys name.
2268		 */
2269		if (updated & MD_physname) {
2270			cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s",
2271			    vswp->instance, vswp->physname, physname);
2272			(void) strncpy(vswp->physname,
2273			    physname, strlen(physname) + 1);
2274		}
2275
2276		/*
2277		 * Update array with the new switch mode values.
2278		 */
2279		if (updated & MD_smode) {
2280			vswp->smode = new_smode;
2281		}
2282
2283		/* Update mtu */
2284		if (updated & MD_mtu) {
2285			rv = vsw_mtu_update(vswp, mtu);
2286			if (rv != 0) {
2287				goto fail_update;
2288			}
2289		}
2290
2291		/*
2292		 * ..and attach, start the new device.
2293		 */
2294		rv = vsw_setup_switching(vswp);
2295		if (rv == EAGAIN) {
2296			/*
2297			 * Unable to setup switching mode.
2298			 * As the error is EAGAIN, schedule a thread to retry
2299			 * and return. Programming addresses of ports and
2300			 * vsw interface will be done by the thread when the
2301			 * switching setup completes successfully.
2302			 */
2303			if (vsw_setup_switching_start(vswp) != 0) {
2304				goto fail_update;
2305			}
2306			return;
2307
2308		} else if (rv) {
2309			goto fail_update;
2310		}
2311
2312		vsw_setup_switching_post_process(vswp);
2313	} else if (updated & MD_macaddr) {
2314		/*
2315		 * We enter here if only MD_macaddr is exclusively updated.
2316		 * If MD_physname and/or MD_smode are also updated, then
2317		 * as part of that, we would have implicitly processed
2318		 * MD_macaddr update (above).
2319		 */
2320		cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx",
2321		    vswp->instance, macaddr);
2322
2323		READ_ENTER(&vswp->if_lockrw);
2324		if (vswp->if_state & VSW_IF_UP) {
2325			/* reconfigure with new address */
2326			vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0);
2327
2328			/*
2329			 * Notify the MAC layer of the changed address.
2330			 */
2331			mac_unicst_update(vswp->if_mh,
2332			    (uint8_t *)&vswp->if_addr);
2333
2334		}
2335		RW_EXIT(&vswp->if_lockrw);
2336
2337	}
2338
2339	if (updated & MD_vlans) {
2340		/* Remove existing vlan ids from the hash table. */
2341		vsw_vlan_remove_ids(vswp, VSW_LOCALDEV);
2342
2343		if (vswp->if_state & VSW_IF_UP) {
2344			vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids);
2345		} else {
2346			if (vswp->nvids != 0) {
2347				kmem_free(vswp->vids,
2348				    sizeof (vsw_vlanid_t) * vswp->nvids);
2349			}
2350			vswp->vids = vids;
2351			vswp->nvids = nvids;
2352			vswp->pvid = pvid;
2353		}
2354
2355		/* add these new vlan ids into hash table */
2356		vsw_vlan_add_ids(vswp, VSW_LOCALDEV);
2357	} else {
2358		if (nvids != 0) {
2359			kmem_free(vids, sizeof (vsw_vlanid_t) * nvids);
2360		}
2361	}
2362
2363	if (updated & MD_bw) {
2364		vsw_update_bandwidth(vswp, NULL, VSW_LOCALDEV, maxbw);
2365	}
2366
2367	return;
2368
2369fail_reconf:
2370	cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance);
2371	return;
2372
2373fail_update:
2374	cmn_err(CE_WARN, "!vsw%d: re-configuration failed",
2375	    vswp->instance);
2376}
2377
2378/*
2379 * Read the port's md properties.
2380 */
2381static int
2382vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp,
2383	md_t *mdp, mde_cookie_t *node)
2384{
2385	uint64_t		ldc_id;
2386	uint8_t			*addrp;
2387	int			i, addrsz;
2388	int			num_nodes = 0, nchan = 0;
2389	int			listsz = 0;
2390	mde_cookie_t		*listp = NULL;
2391	struct ether_addr	ea;
2392	uint64_t		macaddr;
2393	uint64_t		inst = 0;
2394	uint64_t		val;
2395
2396	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
2397		DWARN(vswp, "%s: prop(%s) not found", __func__,
2398		    id_propname);
2399		return (1);
2400	}
2401
2402	/*
2403	 * Find the channel endpoint node(s) (which should be under this
2404	 * port node) which contain the channel id(s).
2405	 */
2406	if ((num_nodes = md_node_count(mdp)) <= 0) {
2407		DERR(vswp, "%s: invalid number of nodes found (%d)",
2408		    __func__, num_nodes);
2409		return (1);
2410	}
2411
2412	D2(vswp, "%s: %d nodes found", __func__, num_nodes);
2413
2414	/* allocate enough space for node list */
2415	listsz = num_nodes * sizeof (mde_cookie_t);
2416	listp = kmem_zalloc(listsz, KM_SLEEP);
2417
2418	nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname),
2419	    md_find_name(mdp, "fwd"), listp);
2420
2421	if (nchan <= 0) {
2422		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
2423		kmem_free(listp, listsz);
2424		return (1);
2425	}
2426
2427	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
2428
2429	/* use property from first node found */
2430	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
2431		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
2432		    id_propname);
2433		kmem_free(listp, listsz);
2434		return (1);
2435	}
2436
2437	/* don't need list any more */
2438	kmem_free(listp, listsz);
2439
2440	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
2441
2442	/* read mac-address property */
2443	if (md_get_prop_data(mdp, *node, remaddr_propname,
2444	    &addrp, &addrsz)) {
2445		DWARN(vswp, "%s: prop(%s) not found",
2446		    __func__, remaddr_propname);
2447		return (1);
2448	}
2449
2450	if (addrsz < ETHERADDRL) {
2451		DWARN(vswp, "%s: invalid address size", __func__);
2452		return (1);
2453	}
2454
2455	macaddr = *((uint64_t *)addrp);
2456	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
2457
2458	for (i = ETHERADDRL - 1; i >= 0; i--) {
2459		ea.ether_addr_octet[i] = macaddr & 0xFF;
2460		macaddr >>= 8;
2461	}
2462
2463	/* now update all properties into the port */
2464	portp->p_vswp = vswp;
2465	portp->p_instance = inst;
2466	portp->addr_set = B_FALSE;
2467	ether_copy(&ea, &portp->p_macaddr);
2468	if (nchan > VSW_PORT_MAX_LDCS) {
2469		D2(vswp, "%s: using first of %d ldc ids",
2470		    __func__, nchan);
2471		nchan = VSW_PORT_MAX_LDCS;
2472	}
2473	portp->num_ldcs = nchan;
2474	portp->ldc_ids =
2475	    kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP);
2476	bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan);
2477
2478	/* read vlan id properties of this port node */
2479	vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid,
2480	    &portp->vids, &portp->nvids, NULL);
2481
2482	/* Check if hybrid property is present */
2483	if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) {
2484		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2485		portp->p_hio_enabled = B_TRUE;
2486	} else {
2487		portp->p_hio_enabled = B_FALSE;
2488	}
2489	/*
2490	 * Port hio capability determined after version
2491	 * negotiation, i.e., when we know the peer is HybridIO capable.
2492	 */
2493	portp->p_hio_capable = B_FALSE;
2494
2495	/* Read bandwidth of this port */
2496	vsw_port_read_bandwidth(portp, mdp, *node, &portp->p_bandwidth);
2497
2498	return (0);
2499}
2500
2501/*
2502 * Add a new port to the system.
2503 *
2504 * Returns 0 on success, 1 on failure.
2505 */
2506int
2507vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
2508{
2509	vsw_port_t	*portp;
2510	int		rv;
2511
2512	portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
2513
2514	rv = vsw_port_read_props(portp, vswp, mdp, node);
2515	if (rv != 0) {
2516		kmem_free(portp, sizeof (*portp));
2517		return (1);
2518	}
2519
2520	rv = vsw_port_attach(portp);
2521	if (rv != 0) {
2522		DERR(vswp, "%s: failed to attach port", __func__);
2523		return (1);
2524	}
2525
2526	return (0);
2527}
2528
2529static int
2530vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex,
2531	md_t *prev_mdp, mde_cookie_t prev_mdex)
2532{
2533	uint64_t	cport_num;
2534	uint64_t	pport_num;
2535	vsw_port_list_t	*plistp;
2536	vsw_port_t	*portp;
2537	uint16_t	pvid;
2538	vsw_vlanid_t	*vids;
2539	uint16_t	nvids;
2540	uint64_t	val;
2541	boolean_t	hio_enabled = B_FALSE;
2542	uint64_t	maxbw;
2543	enum		{P_MD_init = 0x1,
2544				P_MD_vlans = 0x2,
2545				P_MD_hio = 0x4,
2546				P_MD_maxbw = 0x8} updated;
2547
2548	updated = P_MD_init;
2549
2550	/*
2551	 * For now, we get port updates only if vlan ids changed.
2552	 * We read the port num and do some sanity check.
2553	 */
2554	if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) {
2555		return (1);
2556	}
2557
2558	if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) {
2559		return (1);
2560	}
2561	if (cport_num != pport_num)
2562		return (1);
2563
2564	plistp = &(vswp->plist);
2565
2566	READ_ENTER(&plistp->lockrw);
2567
2568	portp = vsw_lookup_port(vswp, cport_num);
2569	if (portp == NULL) {
2570		RW_EXIT(&plistp->lockrw);
2571		return (1);
2572	}
2573
2574	/* Read the vlan ids */
2575	vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid,
2576	    &vids, &nvids, NULL);
2577
2578	/* Determine if there are any vlan id updates */
2579	if ((pvid != portp->pvid) ||		/* pvid changed? */
2580	    (nvids != portp->nvids) ||		/* # of vids changed? */
2581	    ((nvids != 0) && (portp->nvids != 0) &&	/* vids changed? */
2582	    !vsw_cmp_vids(vids, portp->vids, nvids))) {
2583		updated |= P_MD_vlans;
2584	}
2585
2586	/* Check if hybrid property is present */
2587	if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) {
2588		D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname);
2589		hio_enabled = B_TRUE;
2590	}
2591
2592	if (portp->p_hio_enabled != hio_enabled) {
2593		updated |= P_MD_hio;
2594	}
2595
2596	/* Check if maxbw property is present */
2597	vsw_port_read_bandwidth(portp, curr_mdp, curr_mdex, &maxbw);
2598	if (maxbw != portp->p_bandwidth) {
2599		if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) {
2600			updated |= P_MD_maxbw;
2601		} else {
2602			cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth"
2603			    " update for port %d as the specified value:%ld"
2604			    " is invalid\n",
2605			    vswp->instance, portp->p_instance, maxbw);
2606		}
2607	}
2608
2609	if (updated & P_MD_vlans) {
2610		/* Remove existing vlan ids from the hash table. */
2611		vsw_vlan_remove_ids(portp, VSW_VNETPORT);
2612
2613		/* Reconfigure vlans with network device */
2614		vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids);
2615
2616		/* add these new vlan ids into hash table */
2617		vsw_vlan_add_ids(portp, VSW_VNETPORT);
2618
2619		/* reset the port if it is vlan unaware (ver < 1.3) */
2620		vsw_vlan_unaware_port_reset(portp);
2621	}
2622
2623	if (updated & P_MD_hio) {
2624		vsw_hio_port_update(portp, hio_enabled);
2625	}
2626
2627	if (updated & P_MD_maxbw) {
2628		vsw_update_bandwidth(NULL, portp, VSW_VNETPORT, maxbw);
2629	}
2630
2631	RW_EXIT(&plistp->lockrw);
2632
2633	return (0);
2634}
2635
2636/*
2637 * vsw_mac_rx -- A common function to send packets to the interface.
2638 * By default this function check if the interface is UP or not, the
2639 * rest of the behaviour depends on the flags as below:
2640 *
2641 *	VSW_MACRX_PROMISC -- Check if the promisc mode set or not.
2642 *	VSW_MACRX_COPYMSG -- Make a copy of the message(s).
2643 *	VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack.
2644 */
2645void
2646vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
2647    mblk_t *mp, vsw_macrx_flags_t flags)
2648{
2649	mblk_t		*mpt;
2650
2651	D1(vswp, "%s:enter\n", __func__);
2652	READ_ENTER(&vswp->if_lockrw);
2653	/* Check if the interface is up */
2654	if (!(vswp->if_state & VSW_IF_UP)) {
2655		RW_EXIT(&vswp->if_lockrw);
2656		/* Free messages only if FREEMSG flag specified */
2657		if (flags & VSW_MACRX_FREEMSG) {
2658			freemsgchain(mp);
2659		}
2660		D1(vswp, "%s:exit\n", __func__);
2661		return;
2662	}
2663	/*
2664	 * If PROMISC flag is passed, then check if
2665	 * the interface is in the PROMISC mode.
2666	 * If not, drop the messages.
2667	 */
2668	if (flags & VSW_MACRX_PROMISC) {
2669		if (!(vswp->if_state & VSW_IF_PROMISC)) {
2670			RW_EXIT(&vswp->if_lockrw);
2671			/* Free messages only if FREEMSG flag specified */
2672			if (flags & VSW_MACRX_FREEMSG) {
2673				freemsgchain(mp);
2674			}
2675			D1(vswp, "%s:exit\n", __func__);
2676			return;
2677		}
2678	}
2679	RW_EXIT(&vswp->if_lockrw);
2680	/*
2681	 * If COPYMSG flag is passed, then make a copy
2682	 * of the message chain and send up the copy.
2683	 */
2684	if (flags & VSW_MACRX_COPYMSG) {
2685		mp = copymsgchain(mp);
2686		if (mp == NULL) {
2687			D1(vswp, "%s:exit\n", __func__);
2688			return;
2689		}
2690	}
2691
2692	D2(vswp, "%s: sending up stack", __func__);
2693
2694	mpt = NULL;
2695	(void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt);
2696	if (mp != NULL) {
2697		mac_rx(vswp->if_mh, mrh, mp);
2698	}
2699	D1(vswp, "%s:exit\n", __func__);
2700}
2701
2702/* copy mac address of vsw into soft state structure */
2703static void
2704vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr)
2705{
2706	int	i;
2707
2708	WRITE_ENTER(&vswp->if_lockrw);
2709	for (i = ETHERADDRL - 1; i >= 0; i--) {
2710		vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
2711		macaddr >>= 8;
2712	}
2713	RW_EXIT(&vswp->if_lockrw);
2714}
2715
2716/* Compare VLAN ids, array size expected to be same. */
2717static boolean_t
2718vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids)
2719{
2720	int i, j;
2721	uint16_t vid;
2722
2723	for (i = 0; i < nvids; i++) {
2724		vid = vids1[i].vl_vid;
2725		for (j = 0; j < nvids; j++) {
2726			if (vid == vids2[i].vl_vid)
2727				break;
2728		}
2729		if (j == nvids) {
2730			return (B_FALSE);
2731		}
2732	}
2733	return (B_TRUE);
2734}
2735