1/*
2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34#include "mlx4_en.h"
35
36#include <linux/mlx4/driver.h>
37#include <linux/mlx4/device.h>
38#include <linux/mlx4/cmd.h>
39#include <linux/mlx4/cq.h>
40
41#include <linux/delay.h>
42#include <net/ethernet.h>
43#include <net/if_vlan_var.h>
44#include <sys/sockio.h>
45
46static void mlx4_en_init_locked(struct mlx4_en_priv *priv);
47static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv);
48
49static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid)
50{
51	struct mlx4_en_priv *priv = netdev_priv(dev);
52	int idx;
53	u8 field;
54
55	if (arg != priv)
56		return;
57
58	if ((vid == 0) || (vid > 4095))    /* Invalid */
59		return;
60	en_dbg(HW, priv, "adding VLAN:%d\n", vid);
61	idx = vid >> 5;
62	field = 1 << (vid & 0x1f);
63	spin_lock(&priv->vlan_lock);
64	priv->vlgrp_modified = true;
65	if (priv->vlan_unregister[idx] & field)
66		priv->vlan_unregister[idx] &= ~field;
67	else
68		priv->vlan_register[idx] |= field;
69	priv->vlans[idx] |= field;
70	spin_unlock(&priv->vlan_lock);
71}
72
73static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid)
74{
75	struct mlx4_en_priv *priv = netdev_priv(dev);
76	int idx;
77	u8 field;
78
79	if (arg != priv)
80		return;
81
82	if ((vid == 0) || (vid > 4095))    /* Invalid */
83		return;
84	en_dbg(HW, priv, "Killing VID:%d\n", vid);
85	idx = vid >> 5;
86	field = 1 << (vid & 0x1f);
87	spin_lock(&priv->vlan_lock);
88	priv->vlgrp_modified = true;
89	if (priv->vlan_register[idx] & field)
90		priv->vlan_register[idx] &= ~field;
91	else
92		priv->vlan_unregister[idx] |= field;
93	priv->vlans[idx] &= ~field;
94	spin_unlock(&priv->vlan_lock);
95}
96
97u64 mlx4_en_mac_to_u64(u8 *addr)
98{
99	u64 mac = 0;
100	int i;
101
102	for (i = 0; i < ETHER_ADDR_LEN; i++) {
103		mac <<= 8;
104		mac |= addr[i];
105	}
106	return mac;
107}
108
109static int mlx4_en_cache_mclist(struct net_device *dev, u64 **mcaddrp)
110{
111	struct ifmultiaddr *ifma;
112	u64 *mcaddr;
113	int cnt;
114	int i;
115
116	*mcaddrp = NULL;
117restart:
118	cnt = 0;
119	if_maddr_rlock(dev);
120	TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) {
121		if (ifma->ifma_addr->sa_family != AF_LINK)
122			continue;
123		if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen !=
124		    ETHER_ADDR_LEN)
125			continue;
126		cnt++;
127	}
128	if_maddr_runlock(dev);
129	if (cnt == 0)
130		return (0);
131	mcaddr = kmalloc(sizeof(u64) * cnt, GFP_KERNEL);
132	if (mcaddr == NULL)
133		return (0);
134	i = 0;
135	if_maddr_rlock(dev);
136	TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) {
137		if (ifma->ifma_addr->sa_family != AF_LINK)
138			continue;
139		if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen !=
140		    ETHER_ADDR_LEN)
141			continue;
142		/* Make sure the list didn't grow. */
143		if (i == cnt) {
144			if_maddr_runlock(dev);
145			kfree(mcaddr);
146			goto restart;
147		}
148		mcaddr[i++] = mlx4_en_mac_to_u64(
149		    LLADDR((struct sockaddr_dl *)ifma->ifma_addr));
150	}
151	if_maddr_runlock(dev);
152	*mcaddrp = mcaddr;
153	return (i);
154}
155
156static void mlx4_en_stop_port(struct net_device *dev)
157{
158	struct mlx4_en_priv *priv = netdev_priv(dev);
159
160	queue_work(priv->mdev->workqueue, &priv->stop_port_task);
161}
162
163static void mlx4_en_start_port(struct net_device *dev)
164{
165	struct mlx4_en_priv *priv = netdev_priv(dev);
166
167	queue_work(priv->mdev->workqueue, &priv->start_port_task);
168}
169
170static void mlx4_en_set_multicast(struct net_device *dev)
171{
172	struct mlx4_en_priv *priv = netdev_priv(dev);
173
174	if (!priv->port_up)
175		return;
176
177	queue_work(priv->mdev->workqueue, &priv->mcast_task);
178}
179
180static void mlx4_en_do_set_multicast(struct work_struct *work)
181{
182	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
183						 mcast_task);
184	struct net_device *dev = priv->dev;
185	struct mlx4_en_dev *mdev = priv->mdev;
186	int err;
187
188	mutex_lock(&mdev->state_lock);
189	if (!mdev->device_up) {
190		en_dbg(HW, priv, "Card is not up, "
191				 "ignoring multicast change.\n");
192		goto out;
193	}
194	if (!priv->port_up) {
195		en_dbg(HW, priv, "Port is down, "
196				 "ignoring  multicast change.\n");
197		goto out;
198	}
199
200	/*
201	 * Promsicuous mode: disable all filters
202	 */
203
204	if (dev->if_flags & IFF_PROMISC) {
205		if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) {
206			priv->flags |= MLX4_EN_FLAG_PROMISC;
207
208			/* Enable promiscouos mode */
209			err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
210						     priv->base_qpn, 1);
211			if (err)
212				en_err(priv, "Failed enabling "
213					     "promiscous mode\n");
214
215			/* Disable port multicast filter (unconditionally) */
216			err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
217						  0, MLX4_MCAST_DISABLE);
218			if (err)
219				en_err(priv, "Failed disabling "
220					     "multicast filter\n");
221
222			/* Disable port VLAN filter */
223			err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, NULL);
224			if (err)
225				en_err(priv, "Failed disabling VLAN filter\n");
226		}
227		goto out;
228	}
229
230	/*
231	 * Not in promiscous mode
232	 */
233
234	if (priv->flags & MLX4_EN_FLAG_PROMISC) {
235		priv->flags &= ~MLX4_EN_FLAG_PROMISC;
236
237		/* Disable promiscouos mode */
238		err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
239					     priv->base_qpn, 0);
240		if (err)
241			en_err(priv, "Failed disabling promiscous mode\n");
242
243		/* Enable port VLAN filter */
244		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv->port, priv->vlans);
245		if (err)
246			en_err(priv, "Failed enabling VLAN filter\n");
247	}
248
249	/* Enable/disable the multicast filter according to IFF_ALLMULTI */
250	if (dev->if_flags & IFF_ALLMULTI) {
251		err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
252					  0, MLX4_MCAST_DISABLE);
253		if (err)
254			en_err(priv, "Failed disabling multicast filter\n");
255	} else {
256		u64 *mcaddr;
257		int mccount;
258		int i;
259
260		err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
261					  0, MLX4_MCAST_DISABLE);
262		if (err)
263			en_err(priv, "Failed disabling multicast filter\n");
264
265		/* Flush mcast filter and init it with broadcast address */
266		mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST,
267				    1, MLX4_MCAST_CONFIG);
268
269		/* Update multicast list - we cache all addresses so they won't
270		 * change while HW is updated holding the command semaphor */
271		mccount = mlx4_en_cache_mclist(dev, &mcaddr);
272		for (i = 0; i < mccount; i++)
273			mlx4_SET_MCAST_FLTR(mdev->dev, priv->port,
274					    mcaddr[i], 0, MLX4_MCAST_CONFIG);
275		err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
276					  0, MLX4_MCAST_ENABLE);
277		if (err)
278			en_err(priv, "Failed enabling multicast filter\n");
279
280		kfree(mcaddr);
281	}
282out:
283	mutex_unlock(&mdev->state_lock);
284}
285
286#ifdef CONFIG_NET_POLL_CONTROLLER
287static void mlx4_en_netpoll(struct net_device *dev)
288{
289	struct mlx4_en_priv *priv = netdev_priv(dev);
290	struct mlx4_en_cq *cq;
291	unsigned long flags;
292	int i;
293
294	for (i = 0; i < priv->rx_ring_num; i++) {
295		cq = &priv->rx_cq[i];
296		spin_lock_irqsave(&cq->lock, flags);
297		napi_synchronize(&cq->napi);
298		mlx4_en_process_rx_cq(dev, cq, 0);
299		spin_unlock_irqrestore(&cq->lock, flags);
300	}
301}
302#endif
303
304static void mlx4_en_watchdog_timeout(void *arg)
305{
306	struct mlx4_en_priv *priv = arg;
307	struct mlx4_en_dev *mdev = priv->mdev;
308
309	en_dbg(DRV, priv, "Scheduling watchdog\n");
310	queue_work(mdev->workqueue, &priv->watchdog_task);
311	if (priv->port_up)
312		callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT,
313		    mlx4_en_watchdog_timeout, priv);
314}
315
316
317/* XXX This clears user settings in too many cases. */
318static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv)
319{
320	struct mlx4_en_cq *cq;
321	int i;
322
323	/* If we haven't received a specific coalescing setting
324	 * (module param), we set the moderation paramters as follows:
325	 * - moder_cnt is set to the number of mtu sized packets to
326	 *   satisfy our coelsing target.
327	 * - moder_time is set to a fixed value.
328	 */
329	priv->rx_frames = MLX4_EN_RX_COAL_TARGET / priv->dev->if_mtu + 1;
330	priv->rx_usecs = MLX4_EN_RX_COAL_TIME;
331	en_dbg(INTR, priv, "Default coalesing params for mtu:%ld - "
332			   "rx_frames:%d rx_usecs:%d\n",
333		 priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs);
334
335	/* Setup cq moderation params */
336	for (i = 0; i < priv->rx_ring_num; i++) {
337		cq = &priv->rx_cq[i];
338		cq->moder_cnt = priv->rx_frames;
339		cq->moder_time = priv->rx_usecs;
340		priv->last_moder_time[i] = MLX4_EN_AUTO_CONF;
341		priv->last_moder_packets[i] = 0;
342		priv->last_moder_bytes[i] = 0;
343	}
344
345	for (i = 0; i < priv->tx_ring_num; i++) {
346		cq = &priv->tx_cq[i];
347		cq->moder_cnt = MLX4_EN_TX_COAL_PKTS;
348		cq->moder_time = MLX4_EN_TX_COAL_TIME;
349	}
350
351	/* Reset auto-moderation params */
352	priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW;
353	priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW;
354	priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH;
355	priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH;
356	priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL;
357	priv->adaptive_rx_coal = 1;
358	priv->last_moder_jiffies = 0;
359	priv->last_moder_tx_packets = 0;
360}
361
362static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv)
363{
364	unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies);
365	struct mlx4_en_cq *cq;
366	unsigned long packets;
367	unsigned long rate;
368	unsigned long avg_pkt_size;
369	unsigned long rx_packets;
370	unsigned long rx_bytes;
371	unsigned long rx_pkt_diff;
372	int moder_time;
373	int ring, err;
374
375	if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ)
376		return;
377	for (ring = 0; ring < priv->rx_ring_num; ring++) {
378		spin_lock(&priv->stats_lock);
379		rx_packets = priv->rx_ring[ring].packets;
380		rx_bytes = priv->rx_ring[ring].bytes;
381		spin_unlock(&priv->stats_lock);
382
383		rx_pkt_diff = ((unsigned long) (rx_packets -
384				priv->last_moder_packets[ring]));
385		packets = rx_pkt_diff;
386		rate = packets * HZ / period;
387		avg_pkt_size = packets ? ((unsigned long) (rx_bytes -
388				priv->last_moder_bytes[ring])) / packets : 0;
389
390		/* Apply auto-moderation only when packet rate
391		* exceeds a rate that it matters */
392		if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) &&
393				avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) {
394			if (rate < priv->pkt_rate_low ||
395			    avg_pkt_size < MLX4_EN_AVG_PKT_SMALL)
396				moder_time = priv->rx_usecs_low;
397			else if (rate > priv->pkt_rate_high)
398				moder_time = priv->rx_usecs_high;
399			else
400				moder_time = (rate - priv->pkt_rate_low) *
401					(priv->rx_usecs_high - priv->rx_usecs_low) /
402					(priv->pkt_rate_high - priv->pkt_rate_low) +
403					priv->rx_usecs_low;
404		} else {
405			moder_time = priv->rx_usecs_low;
406		}
407
408		if (moder_time != priv->last_moder_time[ring]) {
409			priv->last_moder_time[ring] = moder_time;
410			cq = &priv->rx_cq[ring];
411			cq->moder_time = moder_time;
412			err = mlx4_en_set_cq_moder(priv, cq);
413			if (err)
414				en_err(priv, "Failed modifying moderation "
415					"for cq:%d\n", ring);
416		}
417		priv->last_moder_packets[ring] = rx_packets;
418		priv->last_moder_bytes[ring] = rx_bytes;
419	}
420
421	priv->last_moder_jiffies = jiffies;
422}
423
424static void mlx4_en_handle_vlans(struct mlx4_en_priv *priv)
425{
426	u8 vlan_register[VLAN_FLTR_SIZE];
427	u8 vlan_unregister[VLAN_FLTR_SIZE];
428	int i, j, idx;
429	u16 vid;
430
431	/* cache the vlan data for processing
432	 * done under lock to avoid changes during work */
433	spin_lock(&priv->vlan_lock);
434	for (i = 0; i < VLAN_FLTR_SIZE; i++) {
435		vlan_register[i] = priv->vlan_register[i];
436		priv->vlan_register[i] = 0;
437		vlan_unregister[i] = priv->vlan_unregister[i];
438		priv->vlan_unregister[i] = 0;
439	}
440	priv->vlgrp_modified = false;
441	spin_unlock(&priv->vlan_lock);
442
443	/* Configure the vlan filter
444	 * The vlgrp is updated with all the vids that need to be allowed */
445	if (mlx4_SET_VLAN_FLTR(priv->mdev->dev, priv->port, priv->vlans))
446		en_err(priv, "Failed configuring VLAN filter\n");
447
448	/* Configure the VLAN table */
449	for (i = 0; i < VLAN_FLTR_SIZE; i++) {
450		for (j = 0; j < 32; j++) {
451			vid = (i << 5) + j;
452			if (vlan_register[i] & (1 << j))
453				if (mlx4_register_vlan(priv->mdev->dev, priv->port, vid, &idx))
454					en_dbg(HW, priv, "failed registering vlan %d\n", vid);
455			if (vlan_unregister[i] & (1 << j)) {
456				if (!mlx4_find_cached_vlan(priv->mdev->dev, priv->port, vid, &idx))
457					mlx4_unregister_vlan(priv->mdev->dev, priv->port, idx);
458				else
459					en_dbg(HW, priv, "could not find vid %d in cache\n", vid);
460			}
461		}
462	}
463}
464
465static void mlx4_en_do_get_stats(struct work_struct *work)
466{
467	struct delayed_work *delay = to_delayed_work(work);
468	struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv,
469						 stats_task);
470	struct mlx4_en_dev *mdev = priv->mdev;
471	int err;
472
473	err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0);
474	if (err)
475		en_dbg(HW, priv, "Could not update stats \n");
476
477
478	mutex_lock(&mdev->state_lock);
479	if (mdev->device_up) {
480		if (priv->port_up) {
481			if (priv->vlgrp_modified)
482				mlx4_en_handle_vlans(priv);
483
484			mlx4_en_auto_moderation(priv);
485		}
486
487		queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
488	}
489	mlx4_en_QUERY_PORT(priv->mdev, priv->port);
490	mutex_unlock(&mdev->state_lock);
491}
492
493static void mlx4_en_linkstate(struct work_struct *work)
494{
495	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
496						 linkstate_task);
497	struct mlx4_en_dev *mdev = priv->mdev;
498	int linkstate = priv->link_state;
499
500	mutex_lock(&mdev->state_lock);
501	/* If observable port state changed set carrier state and
502	 * report to system log */
503	if (priv->last_link_state != linkstate) {
504		if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) {
505			if_link_state_change(priv->dev, LINK_STATE_DOWN);
506		} else {
507			en_info(priv, "Link Up\n");
508			if_link_state_change(priv->dev, LINK_STATE_UP);
509		}
510	}
511	priv->last_link_state = linkstate;
512	mutex_unlock(&mdev->state_lock);
513}
514
515static void mlx4_en_lock_and_stop_port(struct work_struct *work)
516{
517	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
518						 stop_port_task);
519	struct net_device *dev = priv->dev;
520	struct mlx4_en_dev *mdev = priv->mdev;
521
522	mutex_lock(&mdev->state_lock);
523	mlx4_en_do_stop_port(dev);
524	mutex_unlock(&mdev->state_lock);
525}
526
527static void mlx4_en_lock_and_start_port(struct work_struct *work)
528{
529	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
530						 start_port_task);
531	struct net_device *dev = priv->dev;
532	struct mlx4_en_dev *mdev = priv->mdev;
533
534	mutex_lock(&mdev->state_lock);
535	mlx4_en_do_start_port(dev);
536	mutex_unlock(&mdev->state_lock);
537}
538
539int mlx4_en_do_start_port(struct net_device *dev)
540{
541	struct mlx4_en_priv *priv = netdev_priv(dev);
542	struct mlx4_en_dev *mdev = priv->mdev;
543	struct mlx4_en_cq *cq;
544	struct mlx4_en_tx_ring *tx_ring;
545	u64 config;
546	int rx_index = 0;
547	int tx_index = 0;
548	int err = 0;
549	int i;
550	int j;
551
552	if (priv->port_up) {
553		en_dbg(DRV, priv, "start port called while port already up\n");
554		return 0;
555	}
556
557	/* Calculate Rx buf size */
558	dev->if_mtu = min(dev->if_mtu, priv->max_mtu);
559	mlx4_en_calc_rx_buf(dev);
560	en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size);
561
562	/* Configure rx cq's and rings */
563	err = mlx4_en_activate_rx_rings(priv);
564	if (err) {
565		en_err(priv, "Failed to activate RX rings\n");
566		return err;
567	}
568
569	for (i = 0; i < priv->rx_ring_num; i++) {
570		cq = &priv->rx_cq[i];
571
572		err = mlx4_en_activate_cq(priv, cq);
573		if (err) {
574			en_err(priv, "Failed activating Rx CQ\n");
575			goto cq_err;
576		}
577		for (j = 0; j < cq->size; j++)
578			cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK;
579		err = mlx4_en_set_cq_moder(priv, cq);
580		if (err) {
581			en_err(priv, "Failed setting cq moderation parameters");
582			mlx4_en_deactivate_cq(priv, cq);
583			goto cq_err;
584		}
585		mlx4_en_arm_cq(priv, cq);
586		priv->rx_ring[i].cqn = cq->mcq.cqn;
587		++rx_index;
588	}
589
590	err = mlx4_en_config_rss_steer(priv);
591	if (err) {
592		en_err(priv, "Failed configuring rss steering\n");
593		goto cq_err;
594	}
595
596	/* Configure tx cq's and rings */
597	for (i = 0; i < priv->tx_ring_num; i++) {
598		/* Configure cq */
599		cq = &priv->tx_cq[i];
600		err = mlx4_en_activate_cq(priv, cq);
601		if (err) {
602			en_err(priv, "Failed allocating Tx CQ\n");
603			goto tx_err;
604		}
605		err = mlx4_en_set_cq_moder(priv, cq);
606		if (err) {
607			en_err(priv, "Failed setting cq moderation parameters");
608			mlx4_en_deactivate_cq(priv, cq);
609			goto tx_err;
610		}
611		en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i);
612		cq->buf->wqe_index = cpu_to_be16(0xffff);
613
614		/* Configure ring */
615		tx_ring = &priv->tx_ring[i];
616		err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn);
617		if (err) {
618			en_err(priv, "Failed allocating Tx ring\n");
619			mlx4_en_deactivate_cq(priv, cq);
620			goto tx_err;
621		}
622		/* Set initial ownership of all Tx TXBBs to SW (1) */
623		for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
624			*((u32 *) (tx_ring->buf + j)) = 0xffffffff;
625		++tx_index;
626	}
627
628	/* Configure port */
629	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
630				    priv->rx_mb_size + ETHER_CRC_LEN,
631				    priv->prof->tx_pause,
632				    priv->prof->tx_ppp,
633				    priv->prof->rx_pause,
634				    priv->prof->rx_ppp);
635	if (err) {
636		en_err(priv, "Failed setting port general configurations "
637			     "for port %d, with error %d\n", priv->port, err);
638		goto tx_err;
639	}
640	/* Set default qp number */
641	err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0);
642	if (err) {
643		en_err(priv, "Failed setting default qp numbers\n");
644		goto tx_err;
645	}
646	/* Set port mac number */
647	en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port);
648	err = mlx4_register_mac(mdev->dev, priv->port,
649				mlx4_en_mac_to_u64(IF_LLADDR(dev)));
650	if (err < 0) {
651		en_err(priv, "Failed setting port mac err=%d\n", err);
652		goto tx_err;
653	}
654	mdev->mac_removed[priv->port] = 0;
655
656	/* Init port */
657	en_dbg(HW, priv, "Initializing port\n");
658	err = mlx4_INIT_PORT(mdev->dev, priv->port);
659	if (err) {
660		en_err(priv, "Failed Initializing port\n");
661		goto mac_err;
662	}
663
664	/* Set the various hardware offload abilities */
665	dev->if_hwassist = 0;
666	if (dev->if_capenable & IFCAP_TSO4)
667		dev->if_hwassist |= CSUM_TSO;
668	if (dev->if_capenable & IFCAP_TXCSUM)
669		dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
670	if (dev->if_capenable & IFCAP_RXCSUM)
671		priv->rx_csum = 1;
672	else
673		priv->rx_csum = 0;
674
675	err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
676	if (err) {
677		en_err(priv, "Failed to get WoL info, unable to modify\n");
678		goto wol_err;
679	}
680	if (dev->if_capenable & IFCAP_WOL_MAGIC) {
681		config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED |
682		    MLX4_EN_WOL_MAGIC;
683	} else {
684		config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC);
685		config |= MLX4_EN_WOL_DO_MODIFY;
686	}
687
688	err = mlx4_wol_write(priv->mdev->dev, config, priv->port);
689	if (err) {
690		en_err(priv, "Failed to set WoL information\n");
691		goto wol_err;
692	}
693
694	priv->port_up = true;
695
696	/* Populate multicast list */
697	mlx4_en_set_multicast(dev);
698
699	/* Enable the queues. */
700	dev->if_drv_flags &= ~IFF_DRV_OACTIVE;
701	dev->if_drv_flags |= IFF_DRV_RUNNING;
702
703	callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT,
704	    mlx4_en_watchdog_timeout, priv);
705
706	return 0;
707
708wol_err:
709	/* close port*/
710	mlx4_CLOSE_PORT(mdev->dev, priv->port);
711
712mac_err:
713	mlx4_unregister_mac(mdev->dev, priv->port, priv->mac);
714tx_err:
715	while (tx_index--) {
716		mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]);
717		mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]);
718	}
719
720	mlx4_en_release_rss_steer(priv);
721cq_err:
722	while (rx_index--)
723		mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]);
724	for (i = 0; i < priv->rx_ring_num; i++)
725		mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
726
727	return err; /* need to close devices */
728}
729
730
731void mlx4_en_do_stop_port(struct net_device *dev)
732{
733	struct mlx4_en_priv *priv = netdev_priv(dev);
734	struct mlx4_en_dev *mdev = priv->mdev;
735	int i;
736
737	if (!priv->port_up) {
738		en_dbg(DRV, priv, "stop port called while port already down\n");
739		return;
740	}
741
742	/* Set port as not active */
743	priv->port_up = false;
744
745	/* Unregister Mac address for the port */
746	mlx4_unregister_mac(mdev->dev, priv->port, priv->mac);
747	mdev->mac_removed[priv->port] = 1;
748
749	/* Free TX Rings */
750	for (i = 0; i < priv->tx_ring_num; i++) {
751		mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]);
752		mlx4_en_deactivate_cq(priv, &priv->tx_cq[i]);
753	}
754	msleep(10);
755
756	for (i = 0; i < priv->tx_ring_num; i++)
757		mlx4_en_free_tx_buf(dev, &priv->tx_ring[i]);
758
759	/* Free RSS qps */
760	mlx4_en_release_rss_steer(priv);
761
762	/* Free RX Rings */
763	for (i = 0; i < priv->rx_ring_num; i++) {
764		mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
765		mlx4_en_deactivate_cq(priv, &priv->rx_cq[i]);
766	}
767
768	/* close port*/
769	mlx4_CLOSE_PORT(mdev->dev, priv->port);
770
771	callout_stop(&priv->watchdog_timer);
772
773	dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
774}
775
776static void mlx4_en_restart(struct work_struct *work)
777{
778	struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv,
779						 watchdog_task);
780	struct mlx4_en_dev *mdev = priv->mdev;
781	struct net_device *dev = priv->dev;
782	struct mlx4_en_tx_ring *ring;
783	int i;
784
785	if (priv->blocked == 0 || priv->port_up == 0)
786		return;
787	for (i = 0; i < priv->tx_ring_num; i++) {
788		ring = &priv->tx_ring[i];
789		if (ring->blocked &&
790		    ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks)
791			goto reset;
792	}
793	return;
794
795reset:
796	priv->port_stats.tx_timeout++;
797	en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port);
798
799	mutex_lock(&mdev->state_lock);
800	if (priv->port_up) {
801		mlx4_en_do_stop_port(dev);
802		if (mlx4_en_do_start_port(dev))
803			en_err(priv, "Failed restarting port %d\n", priv->port);
804	}
805	mutex_unlock(&mdev->state_lock);
806}
807
808
809static void
810mlx4_en_init(void *arg)
811{
812	struct mlx4_en_priv *priv;
813	struct mlx4_en_dev *mdev;
814
815	priv = arg;
816	mdev = priv->mdev;
817	mutex_lock(&mdev->state_lock);
818	mlx4_en_init_locked(priv);
819	mutex_unlock(&mdev->state_lock);
820}
821
822static void
823mlx4_en_init_locked(struct mlx4_en_priv *priv)
824{
825
826	struct mlx4_en_dev *mdev;
827	struct ifnet *dev;
828	int i;
829
830	dev = priv->dev;
831	mdev = priv->mdev;
832	if (dev->if_drv_flags & IFF_DRV_RUNNING)
833		mlx4_en_do_stop_port(dev);
834
835	if (!mdev->device_up) {
836		en_err(priv, "Cannot open - device down/disabled\n");
837		return;
838	}
839
840	/* Reset HW statistics and performance counters */
841	if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1))
842		en_dbg(HW, priv, "Failed dumping statistics\n");
843
844	memset(&priv->pstats, 0, sizeof(priv->pstats));
845
846	for (i = 0; i < priv->tx_ring_num; i++) {
847		priv->tx_ring[i].bytes = 0;
848		priv->tx_ring[i].packets = 0;
849	}
850	for (i = 0; i < priv->rx_ring_num; i++) {
851		priv->rx_ring[i].bytes = 0;
852		priv->rx_ring[i].packets = 0;
853	}
854
855	mlx4_en_set_default_moderation(priv);
856	if (mlx4_en_do_start_port(dev))
857		en_err(priv, "Failed starting port:%d\n", priv->port);
858}
859
860void mlx4_en_free_resources(struct mlx4_en_priv *priv)
861{
862	int i;
863
864	for (i = 0; i < priv->tx_ring_num; i++) {
865		if (priv->tx_ring[i].tx_info)
866			mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]);
867		if (priv->tx_cq[i].buf)
868			mlx4_en_destroy_cq(priv, &priv->tx_cq[i]);
869	}
870
871	for (i = 0; i < priv->rx_ring_num; i++) {
872		if (priv->rx_ring[i].rx_info)
873			mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i]);
874		if (priv->rx_cq[i].buf)
875			mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
876	}
877	/* Free the stats tree when we resize the rings. */
878	if (priv->sysctl)
879		sysctl_ctx_free(&priv->stat_ctx);
880
881}
882
883int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
884{
885	struct mlx4_en_port_profile *prof = priv->prof;
886	int i;
887
888	/* Create tx Rings */
889	for (i = 0; i < priv->tx_ring_num; i++) {
890		if (mlx4_en_create_cq(priv, &priv->tx_cq[i],
891				      prof->tx_ring_size, i, TX))
892			goto err;
893
894		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i],
895					   prof->tx_ring_size, TXBB_SIZE))
896			goto err;
897	}
898
899	/* Create rx Rings */
900	for (i = 0; i < priv->rx_ring_num; i++) {
901		if (mlx4_en_create_cq(priv, &priv->rx_cq[i],
902				      prof->rx_ring_size, i, RX))
903			goto err;
904
905		if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
906					   prof->rx_ring_size))
907			goto err;
908	}
909
910	/* Re-create stat sysctls in case the number of rings changed. */
911	mlx4_en_sysctl_stat(priv);
912
913	/* Populate Tx priority mappings */
914	mlx4_en_set_prio_map(priv, priv->tx_prio_map,
915			     priv->tx_ring_num - MLX4_EN_NUM_HASH_RINGS);
916
917	return 0;
918
919err:
920	en_err(priv, "Failed to allocate NIC resources\n");
921	return -ENOMEM;
922}
923
924
925void mlx4_en_destroy_netdev(struct net_device *dev)
926{
927	struct mlx4_en_priv *priv = netdev_priv(dev);
928	struct mlx4_en_dev *mdev = priv->mdev;
929
930	en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
931
932	if (priv->vlan_attach != NULL)
933		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
934	if (priv->vlan_detach != NULL)
935		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
936
937	/* Unregister device - this will close the port if it was up */
938	if (priv->registered)
939		ether_ifdetach(dev);
940
941	if (priv->allocated)
942		mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE);
943
944	mutex_lock(&mdev->state_lock);
945	mlx4_en_do_stop_port(dev);
946	mutex_unlock(&mdev->state_lock);
947
948	cancel_delayed_work(&priv->stats_task);
949	/* flush any pending task for this netdev */
950	flush_workqueue(mdev->workqueue);
951	callout_drain(&priv->watchdog_timer);
952
953	/* Detach the netdev so tasks would not attempt to access it */
954	mutex_lock(&mdev->state_lock);
955	mdev->pndev[priv->port] = NULL;
956	mutex_unlock(&mdev->state_lock);
957
958	mlx4_en_free_resources(priv);
959
960	if (priv->sysctl)
961		sysctl_ctx_free(&priv->conf_ctx);
962
963	mtx_destroy(&priv->stats_lock.m);
964	mtx_destroy(&priv->vlan_lock.m);
965	kfree(priv);
966	if_free(dev);
967}
968
969static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
970{
971	struct mlx4_en_priv *priv = netdev_priv(dev);
972	struct mlx4_en_dev *mdev = priv->mdev;
973	int err = 0;
974
975	en_dbg(DRV, priv, "Change MTU called - current:%ld new:%d\n",
976		 dev->if_mtu, new_mtu);
977
978	if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) {
979		en_err(priv, "Bad MTU size:%d.\n", new_mtu);
980		return -EPERM;
981	}
982	mutex_lock(&mdev->state_lock);
983	dev->if_mtu = new_mtu;
984	if (dev->if_drv_flags & IFF_DRV_RUNNING) {
985		if (!mdev->device_up) {
986			/* NIC is probably restarting - let watchdog task reset
987			 * the port */
988			en_dbg(DRV, priv, "Change MTU called with card down!?\n");
989		} else {
990			mlx4_en_do_stop_port(dev);
991			mlx4_en_set_default_moderation(priv);
992			err = mlx4_en_do_start_port(dev);
993			if (err) {
994				en_err(priv, "Failed restarting port:%d\n",
995					 priv->port);
996				queue_work(mdev->workqueue, &priv->watchdog_task);
997			}
998		}
999	}
1000	mutex_unlock(&mdev->state_lock);
1001	return 0;
1002}
1003
1004static int mlx4_en_calc_media(struct mlx4_en_priv *priv)
1005{
1006	int trans_type;
1007	int active;
1008
1009	active = IFM_ETHER;
1010	if (priv->last_link_state == MLX4_DEV_EVENT_PORT_DOWN)
1011		return (active);
1012	/*
1013	 * [ShaharK] mlx4_en_QUERY_PORT sleeps and cannot be called under a
1014	 * non-sleepable lock.
1015	 * I moved it to the periodic mlx4_en_do_get_stats.
1016 	if (mlx4_en_QUERY_PORT(priv->mdev, priv->port))
1017 		return (active);
1018	*/
1019	active |= IFM_FDX;
1020	trans_type = priv->port_state.transciver;
1021	/* XXX I don't know all of the transceiver values. */
1022	switch (priv->port_state.link_speed) {
1023	case 1000:
1024		active |= IFM_1000_T;
1025		break;
1026	case 10000:
1027		if (trans_type > 0 && trans_type <= 0xC)
1028			active |= IFM_10G_SR;
1029		else if (trans_type == 0x80 || trans_type == 0)
1030			active |= IFM_10G_CX4;
1031		break;
1032	case 40000:
1033		active |= IFM_40G_CR4;
1034		break;
1035	}
1036	if (priv->prof->tx_pause)
1037		active |= IFM_ETH_TXPAUSE;
1038	if (priv->prof->rx_pause)
1039		active |= IFM_ETH_RXPAUSE;
1040
1041	return (active);
1042}
1043
1044
1045static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
1046{
1047	struct mlx4_en_priv *priv;
1048
1049	priv = dev->if_softc;
1050	ifmr->ifm_status = IFM_AVALID;
1051	if (priv->last_link_state != MLX4_DEV_EVENT_PORT_DOWN)
1052		ifmr->ifm_status |= IFM_ACTIVE;
1053	ifmr->ifm_active = mlx4_en_calc_media(priv);
1054
1055	return;
1056}
1057
1058static int mlx4_en_media_change(struct ifnet *dev)
1059{
1060	struct mlx4_en_priv *priv;
1061        struct ifmedia *ifm;
1062	int rxpause;
1063	int txpause;
1064	int error;
1065
1066	priv = dev->if_softc;
1067	ifm = &priv->media;
1068	rxpause = txpause = 0;
1069	error = 0;
1070
1071	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1072		return (EINVAL);
1073        switch (IFM_SUBTYPE(ifm->ifm_media)) {
1074        case IFM_AUTO:
1075		break;
1076	case IFM_10G_SR:
1077	case IFM_10G_CX4:
1078	case IFM_1000_T:
1079		if (IFM_SUBTYPE(ifm->ifm_media) ==
1080		    IFM_SUBTYPE(mlx4_en_calc_media(priv)) &&
1081		    (ifm->ifm_media & IFM_FDX))
1082			break;
1083		/* Fallthrough */
1084	default:
1085                printf("%s: Only auto media type\n", if_name(dev));
1086                return (EINVAL);
1087	}
1088	/* Allow user to set/clear pause */
1089	if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE)
1090		rxpause = 1;
1091	if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE)
1092		txpause = 1;
1093	if (priv->prof->tx_pause != txpause || priv->prof->rx_pause != rxpause) {
1094		priv->prof->tx_pause = txpause;
1095		priv->prof->rx_pause = rxpause;
1096		error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port,
1097		     priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause,
1098		     priv->prof->tx_ppp, priv->prof->rx_pause,
1099		     priv->prof->rx_ppp);
1100	}
1101	return (error);
1102}
1103
1104static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data)
1105{
1106	struct mlx4_en_priv *priv;
1107	struct mlx4_en_dev *mdev;
1108	struct ifreq *ifr;
1109	int error;
1110	int mask;
1111
1112	error = 0;
1113	mask = 0;
1114	priv = dev->if_softc;
1115	mdev = priv->mdev;
1116	ifr = (struct ifreq *) data;
1117	switch (command) {
1118	case SIOCSIFMTU:
1119		error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu);
1120		break;
1121	case SIOCSIFFLAGS:
1122		if (dev->if_flags & IFF_UP) {
1123			if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
1124				mlx4_en_start_port(dev);
1125			else
1126				mlx4_en_set_multicast(dev);
1127		} else {
1128			if (dev->if_drv_flags & IFF_DRV_RUNNING) {
1129				mlx4_en_stop_port(dev);
1130                                if_link_state_change(dev, LINK_STATE_DOWN);
1131                                /*
1132				 * Since mlx4_en_stop_port is defered we
1133				 * have to wait till it's finished.
1134				 */
1135                                for (int count=0; count<10; count++) {
1136                                        if (dev->if_drv_flags & IFF_DRV_RUNNING) {
1137                                                DELAY(20000);
1138                                        } else {
1139                                                break;
1140                                        }
1141                                }
1142			}
1143		}
1144		break;
1145	case SIOCADDMULTI:
1146	case SIOCDELMULTI:
1147		mlx4_en_set_multicast(dev);
1148		break;
1149	case SIOCSIFMEDIA:
1150	case SIOCGIFMEDIA:
1151		error = ifmedia_ioctl(dev, ifr, &priv->media, command);
1152		break;
1153	case SIOCSIFCAP:
1154		mutex_lock(&mdev->state_lock);
1155		mask = ifr->ifr_reqcap ^ dev->if_capenable;
1156		if (mask & IFCAP_HWCSUM)
1157			dev->if_capenable ^= IFCAP_HWCSUM;
1158		if (mask & IFCAP_TSO4)
1159			dev->if_capenable ^= IFCAP_TSO4;
1160		if (mask & IFCAP_LRO)
1161			dev->if_capenable ^= IFCAP_LRO;
1162		if (mask & IFCAP_VLAN_HWTAGGING)
1163			dev->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1164		if (mask & IFCAP_VLAN_HWFILTER)
1165			dev->if_capenable ^= IFCAP_VLAN_HWFILTER;
1166		if (mask & IFCAP_WOL_MAGIC)
1167			dev->if_capenable ^= IFCAP_WOL_MAGIC;
1168		if (dev->if_drv_flags & IFF_DRV_RUNNING)
1169			mlx4_en_init_locked(priv);
1170		mutex_unlock(&mdev->state_lock);
1171		VLAN_CAPABILITIES(dev);
1172		break;
1173	default:
1174		error = ether_ioctl(dev, command, data);
1175		break;
1176	}
1177
1178	return (error);
1179}
1180
1181static int mlx4_en_set_ring_size(struct net_device *dev,
1182    int rx_size, int tx_size)
1183{
1184	struct mlx4_en_priv *priv = netdev_priv(dev);
1185	struct mlx4_en_dev *mdev = priv->mdev;
1186	int port_up = 0;
1187	int err = 0;
1188
1189	rx_size = roundup_pow_of_two(rx_size);
1190	rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE);
1191	rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE);
1192	tx_size = roundup_pow_of_two(tx_size);
1193	tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE);
1194	tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE);
1195
1196	if (rx_size == (priv->port_up ?
1197	    priv->rx_ring[0].actual_size : priv->rx_ring[0].size) &&
1198	    tx_size == priv->tx_ring[0].size)
1199		return 0;
1200
1201	mutex_lock(&mdev->state_lock);
1202	if (priv->port_up) {
1203		port_up = 1;
1204		mlx4_en_do_stop_port(dev);
1205	}
1206	mlx4_en_free_resources(priv);
1207	priv->prof->tx_ring_size = tx_size;
1208	priv->prof->rx_ring_size = rx_size;
1209	err = mlx4_en_alloc_resources(priv);
1210	if (err) {
1211		en_err(priv, "Failed reallocating port resources\n");
1212		goto out;
1213	}
1214	if (port_up) {
1215		err = mlx4_en_do_start_port(dev);
1216		if (err)
1217			en_err(priv, "Failed starting port\n");
1218	}
1219out:
1220	mutex_unlock(&mdev->state_lock);
1221	return err;
1222}
1223
1224static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS)
1225{
1226	struct mlx4_en_priv *priv;
1227	int size;
1228	int error;
1229
1230	priv = arg1;
1231	size = priv->prof->rx_ring_size;
1232	error = sysctl_handle_int(oidp, &size, 0, req);
1233	if (error || !req->newptr)
1234		return (error);
1235	error = -mlx4_en_set_ring_size(priv->dev, size,
1236	    priv->prof->tx_ring_size);
1237
1238	return (error);
1239}
1240
1241static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS)
1242{
1243	struct mlx4_en_priv *priv;
1244	int size;
1245	int error;
1246
1247	priv = arg1;
1248	size = priv->prof->tx_ring_size;
1249	error = sysctl_handle_int(oidp, &size, 0, req);
1250	if (error || !req->newptr)
1251		return (error);
1252	error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size,
1253	    size);
1254
1255	return (error);
1256}
1257
1258static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS)
1259{
1260	struct mlx4_en_priv *priv;
1261	int ppp;
1262	int error;
1263
1264	priv = arg1;
1265	ppp = priv->prof->tx_ppp;
1266	error = sysctl_handle_int(oidp, &ppp, 0, req);
1267	if (error || !req->newptr)
1268		return (error);
1269	if (ppp > 0xff || ppp < 0)
1270		return (-EINVAL);
1271	priv->prof->tx_ppp = ppp;
1272	error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port,
1273				       priv->rx_mb_size + ETHER_CRC_LEN,
1274				       priv->prof->tx_pause,
1275				       priv->prof->tx_ppp,
1276				       priv->prof->rx_pause,
1277				       priv->prof->rx_ppp);
1278
1279	return (error);
1280}
1281
1282static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS)
1283{
1284	struct mlx4_en_priv *priv;
1285	struct mlx4_en_dev *mdev;
1286	int tx_ring_num;
1287	int ppp;
1288	int error;
1289	int port_up;
1290
1291	port_up = 0;
1292	priv = arg1;
1293	mdev = priv->mdev;
1294	ppp = priv->prof->rx_ppp;
1295	error = sysctl_handle_int(oidp, &ppp, 0, req);
1296	if (error || !req->newptr)
1297		return (error);
1298	if (ppp > 0xff || ppp < 0)
1299		return (-EINVAL);
1300	/* See if we have to change the number of tx queues. */
1301	if (!ppp != !priv->prof->rx_ppp) {
1302		tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 +
1303		    (!!ppp) * MLX4_EN_NUM_PPP_RINGS;
1304		mutex_lock(&mdev->state_lock);
1305		if (priv->port_up) {
1306			port_up = 1;
1307			mlx4_en_do_stop_port(priv->dev);
1308		}
1309		mlx4_en_free_resources(priv);
1310		priv->tx_ring_num = tx_ring_num;
1311		priv->prof->rx_ppp = ppp;
1312		error = -mlx4_en_alloc_resources(priv);
1313		if (error)
1314			en_err(priv, "Failed reallocating port resources\n");
1315		if (error == 0 && port_up) {
1316			error = -mlx4_en_do_start_port(priv->dev);
1317			if (error)
1318				en_err(priv, "Failed starting port\n");
1319		}
1320		mutex_unlock(&mdev->state_lock);
1321		return (error);
1322
1323	}
1324	priv->prof->rx_ppp = ppp;
1325	error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port,
1326				       priv->rx_mb_size + ETHER_CRC_LEN,
1327				       priv->prof->tx_pause,
1328				       priv->prof->tx_ppp,
1329				       priv->prof->rx_pause,
1330				       priv->prof->rx_ppp);
1331
1332	return (error);
1333}
1334
1335static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv)
1336{
1337	struct net_device *dev;
1338	struct sysctl_ctx_list *ctx;
1339	struct sysctl_oid *node;
1340	struct sysctl_oid_list *node_list;
1341	struct sysctl_oid *coal;
1342	struct sysctl_oid_list *coal_list;
1343
1344	dev = priv->dev;
1345	ctx = &priv->conf_ctx;
1346
1347	sysctl_ctx_init(ctx);
1348	priv->sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw),
1349	    OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet");
1350	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO,
1351	    "conf", CTLFLAG_RD, NULL, "Configuration");
1352	node_list = SYSCTL_CHILDREN(node);
1353
1354	SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable",
1355	    CTLFLAG_RW, &priv->msg_enable, 0,
1356	    "Driver message enable bitfield");
1357	SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings",
1358	    CTLTYPE_INT | CTLFLAG_RD, &priv->rx_ring_num, 0,
1359	    "Number of receive rings");
1360	SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings",
1361	    CTLTYPE_INT | CTLFLAG_RD, &priv->tx_ring_num, 0,
1362	    "Number of transmit rings");
1363	SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size",
1364	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
1365	    mlx4_en_set_rx_ring_size, "I", "Receive ring size");
1366	SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size",
1367	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
1368	    mlx4_en_set_tx_ring_size, "I", "Transmit ring size");
1369	SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "ip_reasm",
1370	    CTLFLAG_RW, &priv->ip_reasm, 0,
1371	    "Allow reassembly of IP fragments.");
1372	SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp",
1373	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
1374	    mlx4_en_set_tx_ppp, "I", "TX Per-priority pause");
1375	SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp",
1376	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0,
1377	    mlx4_en_set_rx_ppp, "I", "RX Per-priority pause");
1378
1379	/* Add coalescer configuration. */
1380	coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO,
1381	    "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration");
1382	coal_list = SYSCTL_CHILDREN(node);
1383	SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low",
1384	    CTLFLAG_RW, &priv->pkt_rate_low, 0,
1385	    "Packets per-second for minimum delay");
1386	SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low",
1387	    CTLFLAG_RW, &priv->rx_usecs_low, 0,
1388	    "Minimum RX delay in micro-seconds");
1389	SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high",
1390	    CTLFLAG_RW, &priv->pkt_rate_high, 0,
1391	    "Packets per-second for maximum delay");
1392	SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high",
1393	    CTLFLAG_RW, &priv->rx_usecs_high, 0,
1394	    "Maximum RX delay in micro-seconds");
1395	SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval",
1396	    CTLFLAG_RW, &priv->sample_interval, 0,
1397	    "adaptive frequency in units of HZ ticks");
1398	SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal",
1399	    CTLFLAG_RW, &priv->adaptive_rx_coal, 0,
1400	    "Enable adaptive rx coalescing");
1401}
1402
1403static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv)
1404{
1405	struct net_device *dev;
1406	struct sysctl_ctx_list *ctx;
1407	struct sysctl_oid *node;
1408	struct sysctl_oid_list *node_list;
1409	struct sysctl_oid *ring_node;
1410	struct sysctl_oid_list *ring_list;
1411	struct mlx4_en_tx_ring *tx_ring;
1412	struct mlx4_en_rx_ring *rx_ring;
1413	char namebuf[128];
1414	int i;
1415
1416	dev = priv->dev;
1417
1418	ctx = &priv->stat_ctx;
1419	sysctl_ctx_init(ctx);
1420	node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->sysctl), OID_AUTO,
1421	    "stat", CTLFLAG_RD, NULL, "Statistics");
1422	node_list = SYSCTL_CHILDREN(node);
1423
1424#ifdef MLX4_EN_PERF_STAT
1425	SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_poll", CTLFLAG_RD,
1426	    &priv->pstats.tx_poll, "TX Poll calls");
1427	SYSCTL_ADD_QUAD(ctx, node_list, OID_AUTO, "tx_pktsz_avg", CTLFLAG_RD,
1428	    &priv->pstats.tx_pktsz_avg, "TX average packet size");
1429	SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "inflight_avg", CTLFLAG_RD,
1430	    &priv->pstats.inflight_avg, "TX average packets in-flight");
1431	SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_coal_avg", CTLFLAG_RD,
1432	    &priv->pstats.tx_coal_avg, "TX average coalesced completions");
1433	SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_coal_avg", CTLFLAG_RD,
1434	    &priv->pstats.rx_coal_avg, "RX average coalesced completions");
1435#endif
1436
1437	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tso_packets", CTLFLAG_RD,
1438	    &priv->port_stats.tso_packets, "TSO packets sent");
1439	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "queue_stopped", CTLFLAG_RD,
1440	    &priv->port_stats.queue_stopped, "Queue full");
1441	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "wake_queue", CTLFLAG_RD,
1442	    &priv->port_stats.wake_queue, "Queue resumed after full");
1443	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_timeout", CTLFLAG_RD,
1444	    &priv->port_stats.tx_timeout, "Transmit timeouts");
1445	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_alloc_failed", CTLFLAG_RD,
1446	    &priv->port_stats.rx_alloc_failed, "RX failed to allocate mbuf");
1447	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_good", CTLFLAG_RD,
1448	    &priv->port_stats.rx_chksum_good, "RX checksum offload success");
1449	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_chksum_none", CTLFLAG_RD,
1450	    &priv->port_stats.rx_chksum_none, "RX without checksum offload");
1451	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_chksum_offload",
1452	    CTLFLAG_RD, &priv->port_stats.tx_chksum_offload,
1453	    "TX checksum offloads");
1454
1455	/* Could strdup the names and add in a loop.  This is simpler. */
1456	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "broadcast", CTLFLAG_RD,
1457	    &priv->pkstats.broadcast, "Broadcast packets");
1458	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio0", CTLFLAG_RD,
1459	    &priv->pkstats.tx_prio[0], "TX Priority 0 packets");
1460	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio1", CTLFLAG_RD,
1461	    &priv->pkstats.tx_prio[1], "TX Priority 1 packets");
1462	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio2", CTLFLAG_RD,
1463	    &priv->pkstats.tx_prio[2], "TX Priority 2 packets");
1464	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio3", CTLFLAG_RD,
1465	    &priv->pkstats.tx_prio[3], "TX Priority 3 packets");
1466	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio4", CTLFLAG_RD,
1467	    &priv->pkstats.tx_prio[4], "TX Priority 4 packets");
1468	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio5", CTLFLAG_RD,
1469	    &priv->pkstats.tx_prio[5], "TX Priority 5 packets");
1470	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio6", CTLFLAG_RD,
1471	    &priv->pkstats.tx_prio[6], "TX Priority 6 packets");
1472	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "tx_prio7", CTLFLAG_RD,
1473	    &priv->pkstats.tx_prio[7], "TX Priority 7 packets");
1474	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio0", CTLFLAG_RD,
1475	    &priv->pkstats.rx_prio[0], "RX Priority 0 packets");
1476	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio1", CTLFLAG_RD,
1477	    &priv->pkstats.rx_prio[1], "RX Priority 1 packets");
1478	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio2", CTLFLAG_RD,
1479	    &priv->pkstats.rx_prio[2], "RX Priority 2 packets");
1480	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio3", CTLFLAG_RD,
1481	    &priv->pkstats.rx_prio[3], "RX Priority 3 packets");
1482	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio4", CTLFLAG_RD,
1483	    &priv->pkstats.rx_prio[4], "RX Priority 4 packets");
1484	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio5", CTLFLAG_RD,
1485	    &priv->pkstats.rx_prio[5], "RX Priority 5 packets");
1486	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio6", CTLFLAG_RD,
1487	    &priv->pkstats.rx_prio[6], "RX Priority 6 packets");
1488	SYSCTL_ADD_ULONG(ctx, node_list, OID_AUTO, "rx_prio7", CTLFLAG_RD,
1489	    &priv->pkstats.rx_prio[7], "RX Priority 7 packets");
1490
1491	for (i = 0; i < priv->tx_ring_num; i++) {
1492		tx_ring = &priv->tx_ring[i];
1493		snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i);
1494		ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf,
1495		    CTLFLAG_RD, NULL, "TX Ring");
1496		ring_list = SYSCTL_CHILDREN(ring_node);
1497		SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets",
1498		    CTLFLAG_RD, &tx_ring->packets, "TX packets");
1499		SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes",
1500		    CTLFLAG_RD, &tx_ring->bytes, "TX bytes");
1501		SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error",
1502		    CTLFLAG_RD, &tx_ring->errors, "TX soft errors");
1503
1504	}
1505	for (i = 0; i < priv->rx_ring_num; i++) {
1506		rx_ring = &priv->rx_ring[i];
1507		snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i);
1508		ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf,
1509		    CTLFLAG_RD, NULL, "RX Ring");
1510		ring_list = SYSCTL_CHILDREN(ring_node);
1511		SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "packets",
1512		    CTLFLAG_RD, &rx_ring->packets, "RX packets");
1513		SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "bytes",
1514		    CTLFLAG_RD, &rx_ring->bytes, "RX bytes");
1515		SYSCTL_ADD_ULONG(ctx, ring_list, OID_AUTO, "error",
1516		    CTLFLAG_RD, &rx_ring->errors, "RX soft errors");
1517		SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_queued",
1518		    CTLFLAG_RD, &rx_ring->lro.lro_queued, 0, "LRO Queued");
1519		SYSCTL_ADD_UINT(ctx, ring_list, OID_AUTO, "lro_flushed",
1520		    CTLFLAG_RD, &rx_ring->lro.lro_flushed, 0, "LRO Flushed");
1521	}
1522}
1523
1524int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
1525			struct mlx4_en_port_profile *prof)
1526{
1527	static volatile int mlx4_en_unit;
1528	struct net_device *dev;
1529	struct mlx4_en_priv *priv;
1530	uint8_t dev_addr[ETHER_ADDR_LEN];
1531	int err;
1532	int i;
1533
1534	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1535	dev = priv->dev = if_alloc(IFT_ETHER);
1536	if (dev == NULL) {
1537		mlx4_err(mdev, "Net device allocation failed\n");
1538		kfree(priv);
1539		return -ENOMEM;
1540	}
1541	dev->if_softc = priv;
1542	if_initname(dev, "mlxen", atomic_fetchadd_int(&mlx4_en_unit, 1));
1543	dev->if_mtu = ETHERMTU;
1544	dev->if_baudrate = 1000000000;
1545	dev->if_init = mlx4_en_init;
1546	dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1547	dev->if_ioctl = mlx4_en_ioctl;
1548	dev->if_transmit = mlx4_en_transmit;
1549	dev->if_qflush = mlx4_en_qflush;
1550	dev->if_snd.ifq_maxlen = prof->tx_ring_size;
1551
1552	/*
1553	 * Initialize driver private data
1554	 */
1555	priv->dev = dev;
1556	priv->mdev = mdev;
1557	priv->prof = prof;
1558	priv->port = port;
1559	priv->port_up = false;
1560	priv->rx_csum = 1;
1561	priv->flags = prof->flags;
1562	priv->tx_ring_num = prof->tx_ring_num;
1563	priv->rx_ring_num = prof->rx_ring_num;
1564	priv->mac_index = -1;
1565	priv->msg_enable = MLX4_EN_MSG_LEVEL;
1566	priv->ip_reasm = priv->mdev->profile.ip_reasm;
1567	mtx_init(&priv->stats_lock.m, "mlx4 stats", NULL, MTX_DEF);
1568	mtx_init(&priv->vlan_lock.m, "mlx4 vlan", NULL, MTX_DEF);
1569	INIT_WORK(&priv->start_port_task, mlx4_en_lock_and_start_port);
1570	INIT_WORK(&priv->stop_port_task, mlx4_en_lock_and_stop_port);
1571	INIT_WORK(&priv->mcast_task, mlx4_en_do_set_multicast);
1572	INIT_WORK(&priv->watchdog_task, mlx4_en_restart);
1573	INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate);
1574	INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats);
1575	callout_init(&priv->watchdog_timer, 1);
1576
1577	/* Query for default mac and max mtu */
1578	priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port];
1579	priv->mac = mdev->dev->caps.def_mac[priv->port];
1580
1581	if (ILLEGAL_MAC(priv->mac)) {
1582		en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n",
1583			 priv->port, priv->mac);
1584		err = -EINVAL;
1585		goto out;
1586	}
1587
1588	mlx4_en_sysctl_conf(priv);
1589
1590	err = mlx4_en_alloc_resources(priv);
1591	if (err)
1592		goto out;
1593
1594	/* Allocate page for receive rings */
1595	err = mlx4_alloc_hwq_res(mdev->dev, &priv->res,
1596				MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE);
1597	if (err) {
1598		en_err(priv, "Failed to allocate page for rx qps\n");
1599		goto out;
1600	}
1601	priv->allocated = 1;
1602
1603	/*
1604	 * Set driver features
1605	 */
1606	dev->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1607	dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
1608	dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
1609	dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
1610	if (mdev->LSO_support)
1611		dev->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_HWTSO;
1612	if (mdev->profile.num_lro)
1613		dev->if_capabilities |= IFCAP_LRO;
1614	dev->if_capenable = dev->if_capabilities;
1615	/*
1616	 * Setup wake-on-lan.
1617	 */
1618#if 0
1619	if (priv->mdev->dev->caps.wol) {
1620		u64 config;
1621		if (mlx4_wol_read(priv->mdev->dev, &config, priv->port) == 0) {
1622			if (config & MLX4_EN_WOL_MAGIC)
1623				dev->if_capabilities |= IFCAP_WOL_MAGIC;
1624			if (config & MLX4_EN_WOL_ENABLED)
1625				dev->if_capenable |= IFCAP_WOL_MAGIC;
1626		}
1627	}
1628#endif
1629
1630        /* Register for VLAN events */
1631	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1632            mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
1633	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1634            mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
1635
1636	mdev->pndev[priv->port] = dev;
1637
1638	priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN;
1639	if_link_state_change(dev, LINK_STATE_DOWN);
1640
1641	/* Set default MAC */
1642	for (i = 0; i < ETHER_ADDR_LEN; i++)
1643		dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i));
1644
1645	ether_ifattach(dev, dev_addr);
1646	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
1647	    mlx4_en_media_change, mlx4_en_media_status);
1648	ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL);
1649	ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL);
1650	ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL);
1651	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
1652	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
1653
1654	en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num);
1655	en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
1656
1657	priv->registered = 1;
1658	queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY);
1659
1660	return 0;
1661
1662out:
1663	mlx4_en_destroy_netdev(dev);
1664	return err;
1665}
1666
1667