This patch allows virtio-net driver to report traffic distribution
to inbound/outbound queues through ethtool -S. The per_cpu
virtnet_stats is split into receive and transmit stats and are
maintained on a per receive_queue and send_queue basis.
virtnet_stats() is modified to aggregate interface level statistics
from per-queue statistics. Sample output below:
NIC statistics:
rxq0: rx_packets: 4357802
rxq0: rx_bytes: 292642052
txq0: tx_packets: 824540
txq0: tx_bytes: 55256404
rxq1: rx_packets: 0
rxq1: rx_bytes: 0
txq1: tx_packets: 1094268
txq1: tx_bytes: 73328316
rxq2: rx_packets: 0
rxq2: rx_bytes: 0
txq2: tx_packets: 1091466
txq2: tx_bytes: 73140566
rxq3: rx_packets: 0
rxq3: rx_bytes: 0
txq3: tx_packets: 1093043
txq3: tx_bytes: 73246142
Signed-off-by: Sriram Narasimhan <[email protected]>
---
drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
1 files changed, 128 insertions(+), 29 deletions(-)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3c23fdc..3c58c52 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
#define VIRTNET_DRIVER_VERSION "1.0.0"
-struct virtnet_stats {
- struct u64_stats_sync tx_syncp;
+struct virtnet_rx_stats {
struct u64_stats_sync rx_syncp;
- u64 tx_bytes;
+ u64 rx_packets;
+ u64 rx_bytes;
+};
+
+struct virtnet_tx_stats {
+ struct u64_stats_sync tx_syncp;
u64 tx_packets;
+ u64 tx_bytes;
+};
- u64 rx_bytes;
- u64 rx_packets;
+struct virtnet_ethtool_stats {
+ char desc[ETH_GSTRING_LEN];
+ int type;
+ int size;
+ int offset;
+};
+
+enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
+
+#define VIRTNET_RX_STATS_INFO(_struct, _field) \
+ {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
+ offsetof(_struct, _field)}
+
+#define VIRTNET_TX_STATS_INFO(_struct, _field) \
+ {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
+ offsetof(_struct, _field)}
+
+static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
+ VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
+ VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
+};
+#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
+
+static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
+ VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
+ VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
};
+#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
/* Internal representation of a send virtqueue */
struct send_queue {
@@ -61,6 +92,9 @@ struct send_queue {
/* Name of the send queue: output.$index */
char name[40];
+
+ /* Active send queue statistics */
+ struct virtnet_tx_stats stats;
};
/* Internal representation of a receive virtqueue */
@@ -81,6 +115,9 @@ struct receive_queue {
/* Name of this receive queue: input.$index */
char name[40];
+
+ /* Active receive queue statistics */
+ struct virtnet_rx_stats stats;
};
struct virtnet_info {
@@ -109,9 +146,6 @@ struct virtnet_info {
/* enable config space updates */
bool config_enable;
- /* Active statistics */
- struct virtnet_stats __percpu *stats;
-
/* Work struct for refilling if we run low on memory. */
struct delayed_work refill;
@@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
struct net_device *dev = vi->dev;
- struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
+ struct virtnet_rx_stats *stats = &rq->stats;
struct sk_buff *skb;
struct page *page;
struct skb_vnet_hdr *hdr;
@@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
{
struct sk_buff *skb;
unsigned int len;
- struct virtnet_info *vi = sq->vq->vdev->priv;
- struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
+ struct virtnet_tx_stats *stats = &sq->stats;
while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
pr_debug("Sent skb %p\n", skb);
@@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
struct rtnl_link_stats64 *tot)
{
struct virtnet_info *vi = netdev_priv(dev);
- int cpu;
+ int i;
unsigned int start;
- for_each_possible_cpu(cpu) {
- struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
+ struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
u64 tpackets, tbytes, rpackets, rbytes;
do {
- start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
- tpackets = stats->tx_packets;
- tbytes = stats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
+ start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
+ tpackets = tstats->tx_packets;
+ tbytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
do {
- start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
- rpackets = stats->rx_packets;
- rbytes = stats->rx_bytes;
- } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
+ start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
+ rpackets = rstats->rx_packets;
+ rbytes = rstats->rx_bytes;
+ } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
tot->rx_packets += rpackets;
tot->tx_packets += tpackets;
@@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
channels->other_count = 0;
}
+static void virtnet_get_stat_strings(struct net_device *dev,
+ u32 stringset,
+ u8 *data)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ int i, j;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < vi->max_queue_pairs; i++) {
+ for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
+ sprintf(data, "rxq%d: %s", i,
+ virtnet_et_rx_stats[j].desc);
+ data += ETH_GSTRING_LEN;
+ }
+ for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
+ sprintf(data, "txq%d: %s", i,
+ virtnet_et_tx_stats[j].desc);
+ data += ETH_GSTRING_LEN;
+ }
+ }
+ break;
+ }
+}
+
+static int virtnet_get_sset_count(struct net_device *dev, int stringset)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ switch (stringset) {
+ case ETH_SS_STATS:
+ return vi->max_queue_pairs *
+ (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
+ default:
+ return -EINVAL;
+ }
+}
+
+static void virtnet_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ unsigned int i, base;
+ unsigned int start;
+
+ for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
+ struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
+ struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
+ data[base] = rstats->rx_packets;
+ data[base+1] = rstats->rx_bytes;
+ } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
+
+ base += VIRTNET_RX_STATS_NUM;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
+ data[base] = tstats->tx_packets;
+ data[base+1] = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
+
+ base += VIRTNET_TX_STATS_NUM;
+ }
+}
+
+
static const struct ethtool_ops virtnet_ethtool_ops = {
.get_drvinfo = virtnet_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_ringparam = virtnet_get_ringparam,
.set_channels = virtnet_set_channels,
.get_channels = virtnet_get_channels,
+ .get_strings = virtnet_get_stat_strings,
+ .get_sset_count = virtnet_get_sset_count,
+ .get_ethtool_stats = virtnet_get_ethtool_stats,
};
#define MIN_MTU 68
@@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
vi->dev = dev;
vi->vdev = vdev;
vdev->priv = vi;
- vi->stats = alloc_percpu(struct virtnet_stats);
err = -ENOMEM;
- if (vi->stats == NULL)
- goto free;
vi->vq_index = alloc_percpu(int);
if (vi->vq_index == NULL)
- goto free_stats;
+ goto free;
mutex_init(&vi->config_lock);
vi->config_enable = true;
@@ -1616,8 +1718,6 @@ free_vqs:
virtnet_del_vqs(vi);
free_index:
free_percpu(vi->vq_index);
-free_stats:
- free_percpu(vi->stats);
free:
free_netdev(dev);
return err;
@@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
flush_work(&vi->config_work);
free_percpu(vi->vq_index);
- free_percpu(vi->stats);
free_netdev(vi->dev);
}
--
1.7.1
On Thu, 2013-05-16 at 13:24 -0700, Sriram Narasimhan wrote:
> This patch allows virtio-net driver to report traffic distribution
> to inbound/outbound queues through ethtool -S. The per_cpu
> virtnet_stats is split into receive and transmit stats and are
> maintained on a per receive_queue and send_queue basis.
> virtnet_stats() is modified to aggregate interface level statistics
> from per-queue statistics. Sample output below:
>
> NIC statistics:
> rxq0: rx_packets: 4357802
> rxq0: rx_bytes: 292642052
> txq0: tx_packets: 824540
> txq0: tx_bytes: 55256404
> rxq1: rx_packets: 0
> rxq1: rx_bytes: 0
> txq1: tx_packets: 1094268
> txq1: tx_bytes: 73328316
> rxq2: rx_packets: 0
> rxq2: rx_bytes: 0
> txq2: tx_packets: 1091466
> txq2: tx_bytes: 73140566
> rxq3: rx_packets: 0
> rxq3: rx_bytes: 0
> txq3: tx_packets: 1093043
> txq3: tx_bytes: 73246142
>
> Signed-off-by: Sriram Narasimhan <[email protected]>
[...]
That ordering is totally unreadable. I want to see patches for ethtool
to let the user order and aggregate the queue statistics in a sensible
way:
$ ethtool -S eth0 # default would show aggregated statistics
NIC statistics:
rx_packets: 4357802
rx_bytes: 292642052
tx_packets: 4103317
tx_bytes: 274971428
$ ethtool -S eth0 full group queue # full statistics, grouped by queue name
NIC statistics:
rxq0:
rx_packets: 4357802
rx_bytes: 292642052
rxq1:
rx_packets: 0
rx_bytes: 0
[...]
txq0:
tx_packets: 824540
tx_bytes: 55256404
txq1:
tx_packets: 1094268
tx_bytes: 73328316
[...]
$ ethtool -S eth0 full group statistic # full statistics, grouped by statistic name
NIC statistics:
rx_packets:
rxq0: 4357802
rxq1: 0
rxq2: 0
rxq3: 0
rx_bytes:
rxq0: 292642052
rxq1: 0
rxq2: 0
rxq3: 0
[...]
Maybe even:
$ ethtool -S eth0 full table
NIC statistics:
rx_packets rx_bytes
rxq0: 4357802 292642052
rxq1: 0 0
rxq2: 0 0
rxq3: 0 0
tx_packets tx_bytes
txq0: 824540 55256404
txq1: 1094268 73328316
txq2: 1091466 73140566
txq3: 1093043 73246142
(Difficult to do, but probably very useful!)
The queue names should be rx-<index> and tx-<index> like in sysfs.
We'll need to reach a consensus on the naming scheme for per-queue and
otherwise disaggregated statistics (see previous discussions in
<http://thread.gmane.org/gmane.linux.kernel.virtualization/15572/focus=15608>). I don't much care what they look like as long as there's an implementation for the ethtool side and they don't look too awful in older versions of ethtool.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
Thanks for the pointer to the earlier discussion thread.
I am open to any consensus format. I would also like to
get feedback from Michael on maintaining the stats in the
per-queue data structure.
Sriram
-----Original Message-----
From: Ben Hutchings [mailto:[email protected]]
Sent: Thursday, May 16, 2013 2:48 PM
To: Narasimhan, Sriram
Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
On Thu, 2013-05-16 at 13:24 -0700, Sriram Narasimhan wrote:
> This patch allows virtio-net driver to report traffic distribution
> to inbound/outbound queues through ethtool -S. The per_cpu
> virtnet_stats is split into receive and transmit stats and are
> maintained on a per receive_queue and send_queue basis.
> virtnet_stats() is modified to aggregate interface level statistics
> from per-queue statistics. Sample output below:
>
> NIC statistics:
> rxq0: rx_packets: 4357802
> rxq0: rx_bytes: 292642052
> txq0: tx_packets: 824540
> txq0: tx_bytes: 55256404
> rxq1: rx_packets: 0
> rxq1: rx_bytes: 0
> txq1: tx_packets: 1094268
> txq1: tx_bytes: 73328316
> rxq2: rx_packets: 0
> rxq2: rx_bytes: 0
> txq2: tx_packets: 1091466
> txq2: tx_bytes: 73140566
> rxq3: rx_packets: 0
> rxq3: rx_bytes: 0
> txq3: tx_packets: 1093043
> txq3: tx_bytes: 73246142
>
> Signed-off-by: Sriram Narasimhan <[email protected]>
[...]
That ordering is totally unreadable. I want to see patches for ethtool
to let the user order and aggregate the queue statistics in a sensible
way:
$ ethtool -S eth0 # default would show aggregated statistics
NIC statistics:
rx_packets: 4357802
rx_bytes: 292642052
tx_packets: 4103317
tx_bytes: 274971428
$ ethtool -S eth0 full group queue # full statistics, grouped by queue name
NIC statistics:
rxq0:
rx_packets: 4357802
rx_bytes: 292642052
rxq1:
rx_packets: 0
rx_bytes: 0
[...]
txq0:
tx_packets: 824540
tx_bytes: 55256404
txq1:
tx_packets: 1094268
tx_bytes: 73328316
[...]
$ ethtool -S eth0 full group statistic # full statistics, grouped by statistic name
NIC statistics:
rx_packets:
rxq0: 4357802
rxq1: 0
rxq2: 0
rxq3: 0
rx_bytes:
rxq0: 292642052
rxq1: 0
rxq2: 0
rxq3: 0
[...]
Maybe even:
$ ethtool -S eth0 full table
NIC statistics:
rx_packets rx_bytes
rxq0: 4357802 292642052
rxq1: 0 0
rxq2: 0 0
rxq3: 0 0
tx_packets tx_bytes
txq0: 824540 55256404
txq1: 1094268 73328316
txq2: 1091466 73140566
txq3: 1093043 73246142
(Difficult to do, but probably very useful!)
The queue names should be rx-<index> and tx-<index> like in sysfs.
We'll need to reach a consensus on the naming scheme for per-queue and
otherwise disaggregated statistics (see previous discussions in
<http://thread.gmane.org/gmane.linux.kernel.virtualization/15572/focus=15608>). I don't much care what they look like as long as there's an implementation for the ethtool side and they don't look too awful in older versions of ethtool.
Ben.
--
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
On Thu, May 16, 2013 at 01:24:29PM -0700, Sriram Narasimhan wrote:
> This patch allows virtio-net driver to report traffic distribution
> to inbound/outbound queues through ethtool -S. The per_cpu
> virtnet_stats is split into receive and transmit stats and are
> maintained on a per receive_queue and send_queue basis.
> virtnet_stats() is modified to aggregate interface level statistics
> from per-queue statistics. Sample output below:
>
Thanks for the patch. The idea itself looks OK to me.
Ben Hutchings already sent some comments
so I won't repeat them. Some minor more comments
and questions below.
> NIC statistics:
> rxq0: rx_packets: 4357802
> rxq0: rx_bytes: 292642052
> txq0: tx_packets: 824540
> txq0: tx_bytes: 55256404
> rxq1: rx_packets: 0
> rxq1: rx_bytes: 0
> txq1: tx_packets: 1094268
> txq1: tx_bytes: 73328316
> rxq2: rx_packets: 0
> rxq2: rx_bytes: 0
> txq2: tx_packets: 1091466
> txq2: tx_bytes: 73140566
> rxq3: rx_packets: 0
> rxq3: rx_bytes: 0
> txq3: tx_packets: 1093043
> txq3: tx_bytes: 73246142
Interesting. This example implies that all packets are coming in
through the same RX queue - is this right?
If yes that's worth exploring - could be a tun bug -
and shows how this patch is useful.
> Signed-off-by: Sriram Narasimhan <[email protected]>
BTW, while you are looking at the stats, one other interesting
thing to add could be checking more types of stats: number of exits,
queue full errors, etc.
> ---
> drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
> 1 files changed, 128 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 3c23fdc..3c58c52 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
>
> #define VIRTNET_DRIVER_VERSION "1.0.0"
>
> -struct virtnet_stats {
> - struct u64_stats_sync tx_syncp;
> +struct virtnet_rx_stats {
> struct u64_stats_sync rx_syncp;
> - u64 tx_bytes;
> + u64 rx_packets;
> + u64 rx_bytes;
> +};
> +
> +struct virtnet_tx_stats {
> + struct u64_stats_sync tx_syncp;
> u64 tx_packets;
> + u64 tx_bytes;
> +};
>
> - u64 rx_bytes;
> - u64 rx_packets;
I think maintaining the stats in a per-queue data structure like this is
fine. if # of CPUs == # of queues which is typical, we use same amount
of memory. And each queue access is under a lock,
or from napi thread, so no races either.
> +struct virtnet_ethtool_stats {
> + char desc[ETH_GSTRING_LEN];
> + int type;
> + int size;
> + int offset;
> +};
> +
> +enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
> +
> +#define VIRTNET_RX_STATS_INFO(_struct, _field) \
> + {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
> + offsetof(_struct, _field)}
> +
> +#define VIRTNET_TX_STATS_INFO(_struct, _field) \
> + {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
> + offsetof(_struct, _field)}
> +
> +static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
> + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
> + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
> +};
> +#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
> +
> +static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
> + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
> + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
> };
> +#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
I'd prefer a full name: virtnet_ethtool_tx_stats, or
just virtnet_tx_stats.
>
> /* Internal representation of a send virtqueue */
> struct send_queue {
> @@ -61,6 +92,9 @@ struct send_queue {
>
> /* Name of the send queue: output.$index */
> char name[40];
> +
> + /* Active send queue statistics */
> + struct virtnet_tx_stats stats;
> };
>
> /* Internal representation of a receive virtqueue */
> @@ -81,6 +115,9 @@ struct receive_queue {
>
> /* Name of this receive queue: input.$index */
> char name[40];
> +
> + /* Active receive queue statistics */
> + struct virtnet_rx_stats stats;
> };
>
> struct virtnet_info {
> @@ -109,9 +146,6 @@ struct virtnet_info {
> /* enable config space updates */
> bool config_enable;
>
> - /* Active statistics */
> - struct virtnet_stats __percpu *stats;
> -
> /* Work struct for refilling if we run low on memory. */
> struct delayed_work refill;
>
> @@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
> {
> struct virtnet_info *vi = rq->vq->vdev->priv;
> struct net_device *dev = vi->dev;
> - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> + struct virtnet_rx_stats *stats = &rq->stats;
> struct sk_buff *skb;
> struct page *page;
> struct skb_vnet_hdr *hdr;
> @@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
> {
> struct sk_buff *skb;
> unsigned int len;
> - struct virtnet_info *vi = sq->vq->vdev->priv;
> - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> + struct virtnet_tx_stats *stats = &sq->stats;
>
> while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> pr_debug("Sent skb %p\n", skb);
> @@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
> struct rtnl_link_stats64 *tot)
> {
> struct virtnet_info *vi = netdev_priv(dev);
> - int cpu;
> + int i;
> unsigned int start;
>
> - for_each_possible_cpu(cpu) {
> - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
> + for (i = 0; i < vi->max_queue_pairs; i++) {
> + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> u64 tpackets, tbytes, rpackets, rbytes;
>
> do {
> - start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
> - tpackets = stats->tx_packets;
> - tbytes = stats->tx_bytes;
> - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
> + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> + tpackets = tstats->tx_packets;
> + tbytes = tstats->tx_bytes;
> + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
>
> do {
> - start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
> - rpackets = stats->rx_packets;
> - rbytes = stats->rx_bytes;
> - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
> + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> + rpackets = rstats->rx_packets;
> + rbytes = rstats->rx_bytes;
> + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
>
> tot->rx_packets += rpackets;
> tot->tx_packets += tpackets;
> @@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
> channels->other_count = 0;
> }
>
> +static void virtnet_get_stat_strings(struct net_device *dev,
> + u32 stringset,
> + u8 *data)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + int i, j;
> +
> + switch (stringset) {
> + case ETH_SS_STATS:
> + for (i = 0; i < vi->max_queue_pairs; i++) {
> + for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
> + sprintf(data, "rxq%d: %s", i,
> + virtnet_et_rx_stats[j].desc);
> + data += ETH_GSTRING_LEN;
> + }
> + for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
> + sprintf(data, "txq%d: %s", i,
> + virtnet_et_tx_stats[j].desc);
> + data += ETH_GSTRING_LEN;
> + }
> + }
> + break;
> + }
> +}
> +
> +static int virtnet_get_sset_count(struct net_device *dev, int stringset)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + switch (stringset) {
> + case ETH_SS_STATS:
> + return vi->max_queue_pairs *
> + (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
> + default:
> + return -EINVAL;
> + }
> +}
> +
> +static void virtnet_get_ethtool_stats(struct net_device *dev,
> + struct ethtool_stats *stats,
> + u64 *data)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + unsigned int i, base;
> + unsigned int start;
> +
> + for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
> + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> +
> + do {
> + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> + data[base] = rstats->rx_packets;
> + data[base+1] = rstats->rx_bytes;
nitpicking:
We normally has spaces around +, like this:
data[base + 1] = rstats->rx_bytes;
> + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> +
> + base += VIRTNET_RX_STATS_NUM;
> +
> + do {
> + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> + data[base] = tstats->tx_packets;
> + data[base+1] = tstats->tx_bytes;
nitpicking:
Here, something strange happened to indentation.
> + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> +
> + base += VIRTNET_TX_STATS_NUM;
> + }
> +}
> +
> +
> static const struct ethtool_ops virtnet_ethtool_ops = {
> .get_drvinfo = virtnet_get_drvinfo,
> .get_link = ethtool_op_get_link,
> .get_ringparam = virtnet_get_ringparam,
> .set_channels = virtnet_set_channels,
> .get_channels = virtnet_get_channels,
> + .get_strings = virtnet_get_stat_strings,
> + .get_sset_count = virtnet_get_sset_count,
> + .get_ethtool_stats = virtnet_get_ethtool_stats,
> };
>
> #define MIN_MTU 68
> @@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
> vi->dev = dev;
> vi->vdev = vdev;
> vdev->priv = vi;
> - vi->stats = alloc_percpu(struct virtnet_stats);
> err = -ENOMEM;
> - if (vi->stats == NULL)
> - goto free;
>
> vi->vq_index = alloc_percpu(int);
> if (vi->vq_index == NULL)
> - goto free_stats;
> + goto free;
>
> mutex_init(&vi->config_lock);
> vi->config_enable = true;
> @@ -1616,8 +1718,6 @@ free_vqs:
> virtnet_del_vqs(vi);
> free_index:
> free_percpu(vi->vq_index);
> -free_stats:
> - free_percpu(vi->stats);
> free:
> free_netdev(dev);
> return err;
> @@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
> flush_work(&vi->config_work);
>
> free_percpu(vi->vq_index);
> - free_percpu(vi->stats);
> free_netdev(vi->dev);
> }
Thanks!
> --
> 1.7.1
Hi Michael,
I was getting all packets on the same inbound queue which
is why I added this support to virtio-net (and some more
instrumentation at tun as well). But, it turned out to be
my misconfiguration - I did not enable IFF_MULTI_QUEUE on
the tap device, so the real_num_tx_queues on tap netdev was
always 1 (no tx distribution at tap). I am thinking about
adding a -q option to tunctl to specify multi-queue flag on
the tap device.
Yes, number of exits will be most useful. I will look into
adding the other statistics you mention.
Sriram
-----Original Message-----
From: Michael S. Tsirkin [mailto:[email protected]]
Sent: Sunday, May 19, 2013 4:28 AM
To: Narasimhan, Sriram
Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
On Thu, May 16, 2013 at 01:24:29PM -0700, Sriram Narasimhan wrote:
> This patch allows virtio-net driver to report traffic distribution
> to inbound/outbound queues through ethtool -S. The per_cpu
> virtnet_stats is split into receive and transmit stats and are
> maintained on a per receive_queue and send_queue basis.
> virtnet_stats() is modified to aggregate interface level statistics
> from per-queue statistics. Sample output below:
>
Thanks for the patch. The idea itself looks OK to me.
Ben Hutchings already sent some comments
so I won't repeat them. Some minor more comments
and questions below.
> NIC statistics:
> rxq0: rx_packets: 4357802
> rxq0: rx_bytes: 292642052
> txq0: tx_packets: 824540
> txq0: tx_bytes: 55256404
> rxq1: rx_packets: 0
> rxq1: rx_bytes: 0
> txq1: tx_packets: 1094268
> txq1: tx_bytes: 73328316
> rxq2: rx_packets: 0
> rxq2: rx_bytes: 0
> txq2: tx_packets: 1091466
> txq2: tx_bytes: 73140566
> rxq3: rx_packets: 0
> rxq3: rx_bytes: 0
> txq3: tx_packets: 1093043
> txq3: tx_bytes: 73246142
Interesting. This example implies that all packets are coming in
through the same RX queue - is this right?
If yes that's worth exploring - could be a tun bug -
and shows how this patch is useful.
> Signed-off-by: Sriram Narasimhan <[email protected]>
BTW, while you are looking at the stats, one other interesting
thing to add could be checking more types of stats: number of exits,
queue full errors, etc.
> ---
> drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
> 1 files changed, 128 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 3c23fdc..3c58c52 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
>
> #define VIRTNET_DRIVER_VERSION "1.0.0"
>
> -struct virtnet_stats {
> - struct u64_stats_sync tx_syncp;
> +struct virtnet_rx_stats {
> struct u64_stats_sync rx_syncp;
> - u64 tx_bytes;
> + u64 rx_packets;
> + u64 rx_bytes;
> +};
> +
> +struct virtnet_tx_stats {
> + struct u64_stats_sync tx_syncp;
> u64 tx_packets;
> + u64 tx_bytes;
> +};
>
> - u64 rx_bytes;
> - u64 rx_packets;
I think maintaining the stats in a per-queue data structure like this is
fine. if # of CPUs == # of queues which is typical, we use same amount
of memory. And each queue access is under a lock,
or from napi thread, so no races either.
> +struct virtnet_ethtool_stats {
> + char desc[ETH_GSTRING_LEN];
> + int type;
> + int size;
> + int offset;
> +};
> +
> +enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
> +
> +#define VIRTNET_RX_STATS_INFO(_struct, _field) \
> + {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
> + offsetof(_struct, _field)}
> +
> +#define VIRTNET_TX_STATS_INFO(_struct, _field) \
> + {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
> + offsetof(_struct, _field)}
> +
> +static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
> + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
> + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
> +};
> +#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
> +
> +static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
> + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
> + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
> };
> +#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
I'd prefer a full name: virtnet_ethtool_tx_stats, or
just virtnet_tx_stats.
>
> /* Internal representation of a send virtqueue */
> struct send_queue {
> @@ -61,6 +92,9 @@ struct send_queue {
>
> /* Name of the send queue: output.$index */
> char name[40];
> +
> + /* Active send queue statistics */
> + struct virtnet_tx_stats stats;
> };
>
> /* Internal representation of a receive virtqueue */
> @@ -81,6 +115,9 @@ struct receive_queue {
>
> /* Name of this receive queue: input.$index */
> char name[40];
> +
> + /* Active receive queue statistics */
> + struct virtnet_rx_stats stats;
> };
>
> struct virtnet_info {
> @@ -109,9 +146,6 @@ struct virtnet_info {
> /* enable config space updates */
> bool config_enable;
>
> - /* Active statistics */
> - struct virtnet_stats __percpu *stats;
> -
> /* Work struct for refilling if we run low on memory. */
> struct delayed_work refill;
>
> @@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
> {
> struct virtnet_info *vi = rq->vq->vdev->priv;
> struct net_device *dev = vi->dev;
> - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> + struct virtnet_rx_stats *stats = &rq->stats;
> struct sk_buff *skb;
> struct page *page;
> struct skb_vnet_hdr *hdr;
> @@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
> {
> struct sk_buff *skb;
> unsigned int len;
> - struct virtnet_info *vi = sq->vq->vdev->priv;
> - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> + struct virtnet_tx_stats *stats = &sq->stats;
>
> while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> pr_debug("Sent skb %p\n", skb);
> @@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
> struct rtnl_link_stats64 *tot)
> {
> struct virtnet_info *vi = netdev_priv(dev);
> - int cpu;
> + int i;
> unsigned int start;
>
> - for_each_possible_cpu(cpu) {
> - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
> + for (i = 0; i < vi->max_queue_pairs; i++) {
> + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> u64 tpackets, tbytes, rpackets, rbytes;
>
> do {
> - start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
> - tpackets = stats->tx_packets;
> - tbytes = stats->tx_bytes;
> - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
> + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> + tpackets = tstats->tx_packets;
> + tbytes = tstats->tx_bytes;
> + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
>
> do {
> - start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
> - rpackets = stats->rx_packets;
> - rbytes = stats->rx_bytes;
> - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
> + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> + rpackets = rstats->rx_packets;
> + rbytes = rstats->rx_bytes;
> + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
>
> tot->rx_packets += rpackets;
> tot->tx_packets += tpackets;
> @@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
> channels->other_count = 0;
> }
>
> +static void virtnet_get_stat_strings(struct net_device *dev,
> + u32 stringset,
> + u8 *data)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + int i, j;
> +
> + switch (stringset) {
> + case ETH_SS_STATS:
> + for (i = 0; i < vi->max_queue_pairs; i++) {
> + for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
> + sprintf(data, "rxq%d: %s", i,
> + virtnet_et_rx_stats[j].desc);
> + data += ETH_GSTRING_LEN;
> + }
> + for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
> + sprintf(data, "txq%d: %s", i,
> + virtnet_et_tx_stats[j].desc);
> + data += ETH_GSTRING_LEN;
> + }
> + }
> + break;
> + }
> +}
> +
> +static int virtnet_get_sset_count(struct net_device *dev, int stringset)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + switch (stringset) {
> + case ETH_SS_STATS:
> + return vi->max_queue_pairs *
> + (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
> + default:
> + return -EINVAL;
> + }
> +}
> +
> +static void virtnet_get_ethtool_stats(struct net_device *dev,
> + struct ethtool_stats *stats,
> + u64 *data)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + unsigned int i, base;
> + unsigned int start;
> +
> + for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
> + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> +
> + do {
> + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> + data[base] = rstats->rx_packets;
> + data[base+1] = rstats->rx_bytes;
nitpicking:
We normally has spaces around +, like this:
data[base + 1] = rstats->rx_bytes;
> + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> +
> + base += VIRTNET_RX_STATS_NUM;
> +
> + do {
> + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> + data[base] = tstats->tx_packets;
> + data[base+1] = tstats->tx_bytes;
nitpicking:
Here, something strange happened to indentation.
> + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> +
> + base += VIRTNET_TX_STATS_NUM;
> + }
> +}
> +
> +
> static const struct ethtool_ops virtnet_ethtool_ops = {
> .get_drvinfo = virtnet_get_drvinfo,
> .get_link = ethtool_op_get_link,
> .get_ringparam = virtnet_get_ringparam,
> .set_channels = virtnet_set_channels,
> .get_channels = virtnet_get_channels,
> + .get_strings = virtnet_get_stat_strings,
> + .get_sset_count = virtnet_get_sset_count,
> + .get_ethtool_stats = virtnet_get_ethtool_stats,
> };
>
> #define MIN_MTU 68
> @@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
> vi->dev = dev;
> vi->vdev = vdev;
> vdev->priv = vi;
> - vi->stats = alloc_percpu(struct virtnet_stats);
> err = -ENOMEM;
> - if (vi->stats == NULL)
> - goto free;
>
> vi->vq_index = alloc_percpu(int);
> if (vi->vq_index == NULL)
> - goto free_stats;
> + goto free;
>
> mutex_init(&vi->config_lock);
> vi->config_enable = true;
> @@ -1616,8 +1718,6 @@ free_vqs:
> virtnet_del_vqs(vi);
> free_index:
> free_percpu(vi->vq_index);
> -free_stats:
> - free_percpu(vi->stats);
> free:
> free_netdev(dev);
> return err;
> @@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
> flush_work(&vi->config_work);
>
> free_percpu(vi->vq_index);
> - free_percpu(vi->stats);
> free_netdev(vi->dev);
> }
Thanks!
> --
> 1.7.1
On Sun, May 19, 2013 at 04:09:48PM +0000, Narasimhan, Sriram wrote:
> Hi Michael,
>
> I was getting all packets on the same inbound queue which
> is why I added this support to virtio-net (and some more
> instrumentation at tun as well). But, it turned out to be
> my misconfiguration - I did not enable IFF_MULTI_QUEUE on
> the tap device, so the real_num_tx_queues on tap netdev was
> always 1 (no tx distribution at tap).
Interesting that qemu didn't fail.
> I am thinking about
> adding a -q option to tunctl to specify multi-queue flag on
> the tap device.
Absolutely.
> Yes, number of exits will be most useful. I will look into
> adding the other statistics you mention.
>
> Sriram
Pls note you'll need to switch to virtqueue_kick_prepare
to detect exits: virtqueue_kick doesn't let you know
whether there was an exit.
Also, it's best to make this a separate patch from the one
adding per-queue stats.
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:[email protected]]
> Sent: Sunday, May 19, 2013 4:28 AM
> To: Narasimhan, Sriram
> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>
> On Thu, May 16, 2013 at 01:24:29PM -0700, Sriram Narasimhan wrote:
> > This patch allows virtio-net driver to report traffic distribution
> > to inbound/outbound queues through ethtool -S. The per_cpu
> > virtnet_stats is split into receive and transmit stats and are
> > maintained on a per receive_queue and send_queue basis.
> > virtnet_stats() is modified to aggregate interface level statistics
> > from per-queue statistics. Sample output below:
> >
>
> Thanks for the patch. The idea itself looks OK to me.
> Ben Hutchings already sent some comments
> so I won't repeat them. Some minor more comments
> and questions below.
>
> > NIC statistics:
> > rxq0: rx_packets: 4357802
> > rxq0: rx_bytes: 292642052
> > txq0: tx_packets: 824540
> > txq0: tx_bytes: 55256404
> > rxq1: rx_packets: 0
> > rxq1: rx_bytes: 0
> > txq1: tx_packets: 1094268
> > txq1: tx_bytes: 73328316
> > rxq2: rx_packets: 0
> > rxq2: rx_bytes: 0
> > txq2: tx_packets: 1091466
> > txq2: tx_bytes: 73140566
> > rxq3: rx_packets: 0
> > rxq3: rx_bytes: 0
> > txq3: tx_packets: 1093043
> > txq3: tx_bytes: 73246142
>
> Interesting. This example implies that all packets are coming in
> through the same RX queue - is this right?
> If yes that's worth exploring - could be a tun bug -
> and shows how this patch is useful.
>
> > Signed-off-by: Sriram Narasimhan <[email protected]>
>
> BTW, while you are looking at the stats, one other interesting
> thing to add could be checking more types of stats: number of exits,
> queue full errors, etc.
>
> > ---
> > drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
> > 1 files changed, 128 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 3c23fdc..3c58c52 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
> >
> > #define VIRTNET_DRIVER_VERSION "1.0.0"
> >
> > -struct virtnet_stats {
> > - struct u64_stats_sync tx_syncp;
> > +struct virtnet_rx_stats {
> > struct u64_stats_sync rx_syncp;
> > - u64 tx_bytes;
> > + u64 rx_packets;
> > + u64 rx_bytes;
> > +};
> > +
> > +struct virtnet_tx_stats {
> > + struct u64_stats_sync tx_syncp;
> > u64 tx_packets;
> > + u64 tx_bytes;
> > +};
> >
> > - u64 rx_bytes;
> > - u64 rx_packets;
>
> I think maintaining the stats in a per-queue data structure like this is
> fine. if # of CPUs == # of queues which is typical, we use same amount
> of memory. And each queue access is under a lock,
> or from napi thread, so no races either.
>
> > +struct virtnet_ethtool_stats {
> > + char desc[ETH_GSTRING_LEN];
> > + int type;
> > + int size;
> > + int offset;
> > +};
> > +
> > +enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
> > +
> > +#define VIRTNET_RX_STATS_INFO(_struct, _field) \
> > + {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
> > + offsetof(_struct, _field)}
> > +
> > +#define VIRTNET_TX_STATS_INFO(_struct, _field) \
> > + {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
> > + offsetof(_struct, _field)}
> > +
> > +static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
> > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
> > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
> > +};
> > +#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
> > +
> > +static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
> > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
> > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
> > };
> > +#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
>
> I'd prefer a full name: virtnet_ethtool_tx_stats, or
> just virtnet_tx_stats.
>
> >
> > /* Internal representation of a send virtqueue */
> > struct send_queue {
> > @@ -61,6 +92,9 @@ struct send_queue {
> >
> > /* Name of the send queue: output.$index */
> > char name[40];
> > +
> > + /* Active send queue statistics */
> > + struct virtnet_tx_stats stats;
> > };
> >
> > /* Internal representation of a receive virtqueue */
> > @@ -81,6 +115,9 @@ struct receive_queue {
> >
> > /* Name of this receive queue: input.$index */
> > char name[40];
> > +
> > + /* Active receive queue statistics */
> > + struct virtnet_rx_stats stats;
> > };
> >
> > struct virtnet_info {
> > @@ -109,9 +146,6 @@ struct virtnet_info {
> > /* enable config space updates */
> > bool config_enable;
> >
> > - /* Active statistics */
> > - struct virtnet_stats __percpu *stats;
> > -
> > /* Work struct for refilling if we run low on memory. */
> > struct delayed_work refill;
> >
> > @@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
> > {
> > struct virtnet_info *vi = rq->vq->vdev->priv;
> > struct net_device *dev = vi->dev;
> > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > + struct virtnet_rx_stats *stats = &rq->stats;
> > struct sk_buff *skb;
> > struct page *page;
> > struct skb_vnet_hdr *hdr;
> > @@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
> > {
> > struct sk_buff *skb;
> > unsigned int len;
> > - struct virtnet_info *vi = sq->vq->vdev->priv;
> > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > + struct virtnet_tx_stats *stats = &sq->stats;
> >
> > while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> > pr_debug("Sent skb %p\n", skb);
> > @@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
> > struct rtnl_link_stats64 *tot)
> > {
> > struct virtnet_info *vi = netdev_priv(dev);
> > - int cpu;
> > + int i;
> > unsigned int start;
> >
> > - for_each_possible_cpu(cpu) {
> > - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
> > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > u64 tpackets, tbytes, rpackets, rbytes;
> >
> > do {
> > - start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
> > - tpackets = stats->tx_packets;
> > - tbytes = stats->tx_bytes;
> > - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
> > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > + tpackets = tstats->tx_packets;
> > + tbytes = tstats->tx_bytes;
> > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> >
> > do {
> > - start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
> > - rpackets = stats->rx_packets;
> > - rbytes = stats->rx_bytes;
> > - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
> > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > + rpackets = rstats->rx_packets;
> > + rbytes = rstats->rx_bytes;
> > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> >
> > tot->rx_packets += rpackets;
> > tot->tx_packets += tpackets;
> > @@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
> > channels->other_count = 0;
> > }
> >
> > +static void virtnet_get_stat_strings(struct net_device *dev,
> > + u32 stringset,
> > + u8 *data)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + int i, j;
> > +
> > + switch (stringset) {
> > + case ETH_SS_STATS:
> > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > + for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
> > + sprintf(data, "rxq%d: %s", i,
> > + virtnet_et_rx_stats[j].desc);
> > + data += ETH_GSTRING_LEN;
> > + }
> > + for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
> > + sprintf(data, "txq%d: %s", i,
> > + virtnet_et_tx_stats[j].desc);
> > + data += ETH_GSTRING_LEN;
> > + }
> > + }
> > + break;
> > + }
> > +}
> > +
> > +static int virtnet_get_sset_count(struct net_device *dev, int stringset)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + switch (stringset) {
> > + case ETH_SS_STATS:
> > + return vi->max_queue_pairs *
> > + (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
> > + default:
> > + return -EINVAL;
> > + }
> > +}
> > +
> > +static void virtnet_get_ethtool_stats(struct net_device *dev,
> > + struct ethtool_stats *stats,
> > + u64 *data)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + unsigned int i, base;
> > + unsigned int start;
> > +
> > + for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
> > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > +
> > + do {
> > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > + data[base] = rstats->rx_packets;
> > + data[base+1] = rstats->rx_bytes;
>
> nitpicking:
> We normally has spaces around +, like this:
> data[base + 1] = rstats->rx_bytes;
>
> > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> > +
> > + base += VIRTNET_RX_STATS_NUM;
> > +
> > + do {
> > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > + data[base] = tstats->tx_packets;
> > + data[base+1] = tstats->tx_bytes;
>
>
> nitpicking:
> Here, something strange happened to indentation.
>
> > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> > +
> > + base += VIRTNET_TX_STATS_NUM;
> > + }
> > +}
> > +
> > +
> > static const struct ethtool_ops virtnet_ethtool_ops = {
> > .get_drvinfo = virtnet_get_drvinfo,
> > .get_link = ethtool_op_get_link,
> > .get_ringparam = virtnet_get_ringparam,
> > .set_channels = virtnet_set_channels,
> > .get_channels = virtnet_get_channels,
> > + .get_strings = virtnet_get_stat_strings,
> > + .get_sset_count = virtnet_get_sset_count,
> > + .get_ethtool_stats = virtnet_get_ethtool_stats,
> > };
> >
> > #define MIN_MTU 68
> > @@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
> > vi->dev = dev;
> > vi->vdev = vdev;
> > vdev->priv = vi;
> > - vi->stats = alloc_percpu(struct virtnet_stats);
> > err = -ENOMEM;
> > - if (vi->stats == NULL)
> > - goto free;
> >
> > vi->vq_index = alloc_percpu(int);
> > if (vi->vq_index == NULL)
> > - goto free_stats;
> > + goto free;
> >
> > mutex_init(&vi->config_lock);
> > vi->config_enable = true;
> > @@ -1616,8 +1718,6 @@ free_vqs:
> > virtnet_del_vqs(vi);
> > free_index:
> > free_percpu(vi->vq_index);
> > -free_stats:
> > - free_percpu(vi->stats);
> > free:
> > free_netdev(dev);
> > return err;
> > @@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
> > flush_work(&vi->config_work);
> >
> > free_percpu(vi->vq_index);
> > - free_percpu(vi->stats);
> > free_netdev(vi->dev);
> > }
>
> Thanks!
>
> > --
> > 1.7.1
Hi Michael,
Comments inline...
-----Original Message-----
From: Michael S. Tsirkin [mailto:[email protected]]
Sent: Sunday, May 19, 2013 1:03 PM
To: Narasimhan, Sriram
Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
On Sun, May 19, 2013 at 04:09:48PM +0000, Narasimhan, Sriram wrote:
> Hi Michael,
>
> I was getting all packets on the same inbound queue which
> is why I added this support to virtio-net (and some more
> instrumentation at tun as well). But, it turned out to be
> my misconfiguration - I did not enable IFF_MULTI_QUEUE on
> the tap device, so the real_num_tx_queues on tap netdev was
> always 1 (no tx distribution at tap).
Interesting that qemu didn't fail.
[Sriram] void tun_set_real_num_tx_queues() does not return
the EINVAL return from netif_set_real_num_tx_queues() for
txq > dev->num_tx_queues (which would be the case if the
tap device were not created with IFF_MULTI_QUEUE). I think
it would be better to fix the code to disable the new
queue and fail tun_attach() in this scenario. If you
agree, I can go ahead and create a separate patch for that.
> I am thinking about
> adding a -q option to tunctl to specify multi-queue flag on
> the tap device.
Absolutely.
[Sriram] OK, let me do that.
> Yes, number of exits will be most useful. I will look into
> adding the other statistics you mention.
>
> Sriram
Pls note you'll need to switch to virtqueue_kick_prepare
to detect exits: virtqueue_kick doesn't let you know
whether there was an exit.
Also, it's best to make this a separate patch from the one
adding per-queue stats.
[Sriram] OK, I will cover only the per-queue statistics in
this patch. Also, I will address the indentation/data
structure name points that you mentioned in your earlier
email and send a new revision for this patch.
Sriram
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:[email protected]]
> Sent: Sunday, May 19, 2013 4:28 AM
> To: Narasimhan, Sriram
> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>
> On Thu, May 16, 2013 at 01:24:29PM -0700, Sriram Narasimhan wrote:
> > This patch allows virtio-net driver to report traffic distribution
> > to inbound/outbound queues through ethtool -S. The per_cpu
> > virtnet_stats is split into receive and transmit stats and are
> > maintained on a per receive_queue and send_queue basis.
> > virtnet_stats() is modified to aggregate interface level statistics
> > from per-queue statistics. Sample output below:
> >
>
> Thanks for the patch. The idea itself looks OK to me.
> Ben Hutchings already sent some comments
> so I won't repeat them. Some minor more comments
> and questions below.
>
> > NIC statistics:
> > rxq0: rx_packets: 4357802
> > rxq0: rx_bytes: 292642052
> > txq0: tx_packets: 824540
> > txq0: tx_bytes: 55256404
> > rxq1: rx_packets: 0
> > rxq1: rx_bytes: 0
> > txq1: tx_packets: 1094268
> > txq1: tx_bytes: 73328316
> > rxq2: rx_packets: 0
> > rxq2: rx_bytes: 0
> > txq2: tx_packets: 1091466
> > txq2: tx_bytes: 73140566
> > rxq3: rx_packets: 0
> > rxq3: rx_bytes: 0
> > txq3: tx_packets: 1093043
> > txq3: tx_bytes: 73246142
>
> Interesting. This example implies that all packets are coming in
> through the same RX queue - is this right?
> If yes that's worth exploring - could be a tun bug -
> and shows how this patch is useful.
>
> > Signed-off-by: Sriram Narasimhan <[email protected]>
>
> BTW, while you are looking at the stats, one other interesting
> thing to add could be checking more types of stats: number of exits,
> queue full errors, etc.
>
> > ---
> > drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
> > 1 files changed, 128 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 3c23fdc..3c58c52 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
> >
> > #define VIRTNET_DRIVER_VERSION "1.0.0"
> >
> > -struct virtnet_stats {
> > - struct u64_stats_sync tx_syncp;
> > +struct virtnet_rx_stats {
> > struct u64_stats_sync rx_syncp;
> > - u64 tx_bytes;
> > + u64 rx_packets;
> > + u64 rx_bytes;
> > +};
> > +
> > +struct virtnet_tx_stats {
> > + struct u64_stats_sync tx_syncp;
> > u64 tx_packets;
> > + u64 tx_bytes;
> > +};
> >
> > - u64 rx_bytes;
> > - u64 rx_packets;
>
> I think maintaining the stats in a per-queue data structure like this is
> fine. if # of CPUs == # of queues which is typical, we use same amount
> of memory. And each queue access is under a lock,
> or from napi thread, so no races either.
>
> > +struct virtnet_ethtool_stats {
> > + char desc[ETH_GSTRING_LEN];
> > + int type;
> > + int size;
> > + int offset;
> > +};
> > +
> > +enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
> > +
> > +#define VIRTNET_RX_STATS_INFO(_struct, _field) \
> > + {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
> > + offsetof(_struct, _field)}
> > +
> > +#define VIRTNET_TX_STATS_INFO(_struct, _field) \
> > + {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
> > + offsetof(_struct, _field)}
> > +
> > +static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
> > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
> > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
> > +};
> > +#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
> > +
> > +static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
> > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
> > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
> > };
> > +#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
>
> I'd prefer a full name: virtnet_ethtool_tx_stats, or
> just virtnet_tx_stats.
>
> >
> > /* Internal representation of a send virtqueue */
> > struct send_queue {
> > @@ -61,6 +92,9 @@ struct send_queue {
> >
> > /* Name of the send queue: output.$index */
> > char name[40];
> > +
> > + /* Active send queue statistics */
> > + struct virtnet_tx_stats stats;
> > };
> >
> > /* Internal representation of a receive virtqueue */
> > @@ -81,6 +115,9 @@ struct receive_queue {
> >
> > /* Name of this receive queue: input.$index */
> > char name[40];
> > +
> > + /* Active receive queue statistics */
> > + struct virtnet_rx_stats stats;
> > };
> >
> > struct virtnet_info {
> > @@ -109,9 +146,6 @@ struct virtnet_info {
> > /* enable config space updates */
> > bool config_enable;
> >
> > - /* Active statistics */
> > - struct virtnet_stats __percpu *stats;
> > -
> > /* Work struct for refilling if we run low on memory. */
> > struct delayed_work refill;
> >
> > @@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
> > {
> > struct virtnet_info *vi = rq->vq->vdev->priv;
> > struct net_device *dev = vi->dev;
> > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > + struct virtnet_rx_stats *stats = &rq->stats;
> > struct sk_buff *skb;
> > struct page *page;
> > struct skb_vnet_hdr *hdr;
> > @@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
> > {
> > struct sk_buff *skb;
> > unsigned int len;
> > - struct virtnet_info *vi = sq->vq->vdev->priv;
> > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > + struct virtnet_tx_stats *stats = &sq->stats;
> >
> > while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> > pr_debug("Sent skb %p\n", skb);
> > @@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
> > struct rtnl_link_stats64 *tot)
> > {
> > struct virtnet_info *vi = netdev_priv(dev);
> > - int cpu;
> > + int i;
> > unsigned int start;
> >
> > - for_each_possible_cpu(cpu) {
> > - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
> > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > u64 tpackets, tbytes, rpackets, rbytes;
> >
> > do {
> > - start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
> > - tpackets = stats->tx_packets;
> > - tbytes = stats->tx_bytes;
> > - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
> > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > + tpackets = tstats->tx_packets;
> > + tbytes = tstats->tx_bytes;
> > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> >
> > do {
> > - start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
> > - rpackets = stats->rx_packets;
> > - rbytes = stats->rx_bytes;
> > - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
> > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > + rpackets = rstats->rx_packets;
> > + rbytes = rstats->rx_bytes;
> > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> >
> > tot->rx_packets += rpackets;
> > tot->tx_packets += tpackets;
> > @@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
> > channels->other_count = 0;
> > }
> >
> > +static void virtnet_get_stat_strings(struct net_device *dev,
> > + u32 stringset,
> > + u8 *data)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + int i, j;
> > +
> > + switch (stringset) {
> > + case ETH_SS_STATS:
> > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > + for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
> > + sprintf(data, "rxq%d: %s", i,
> > + virtnet_et_rx_stats[j].desc);
> > + data += ETH_GSTRING_LEN;
> > + }
> > + for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
> > + sprintf(data, "txq%d: %s", i,
> > + virtnet_et_tx_stats[j].desc);
> > + data += ETH_GSTRING_LEN;
> > + }
> > + }
> > + break;
> > + }
> > +}
> > +
> > +static int virtnet_get_sset_count(struct net_device *dev, int stringset)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + switch (stringset) {
> > + case ETH_SS_STATS:
> > + return vi->max_queue_pairs *
> > + (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
> > + default:
> > + return -EINVAL;
> > + }
> > +}
> > +
> > +static void virtnet_get_ethtool_stats(struct net_device *dev,
> > + struct ethtool_stats *stats,
> > + u64 *data)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + unsigned int i, base;
> > + unsigned int start;
> > +
> > + for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
> > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > +
> > + do {
> > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > + data[base] = rstats->rx_packets;
> > + data[base+1] = rstats->rx_bytes;
>
> nitpicking:
> We normally has spaces around +, like this:
> data[base + 1] = rstats->rx_bytes;
>
> > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> > +
> > + base += VIRTNET_RX_STATS_NUM;
> > +
> > + do {
> > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > + data[base] = tstats->tx_packets;
> > + data[base+1] = tstats->tx_bytes;
>
>
> nitpicking:
> Here, something strange happened to indentation.
>
> > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> > +
> > + base += VIRTNET_TX_STATS_NUM;
> > + }
> > +}
> > +
> > +
> > static const struct ethtool_ops virtnet_ethtool_ops = {
> > .get_drvinfo = virtnet_get_drvinfo,
> > .get_link = ethtool_op_get_link,
> > .get_ringparam = virtnet_get_ringparam,
> > .set_channels = virtnet_set_channels,
> > .get_channels = virtnet_get_channels,
> > + .get_strings = virtnet_get_stat_strings,
> > + .get_sset_count = virtnet_get_sset_count,
> > + .get_ethtool_stats = virtnet_get_ethtool_stats,
> > };
> >
> > #define MIN_MTU 68
> > @@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
> > vi->dev = dev;
> > vi->vdev = vdev;
> > vdev->priv = vi;
> > - vi->stats = alloc_percpu(struct virtnet_stats);
> > err = -ENOMEM;
> > - if (vi->stats == NULL)
> > - goto free;
> >
> > vi->vq_index = alloc_percpu(int);
> > if (vi->vq_index == NULL)
> > - goto free_stats;
> > + goto free;
> >
> > mutex_init(&vi->config_lock);
> > vi->config_enable = true;
> > @@ -1616,8 +1718,6 @@ free_vqs:
> > virtnet_del_vqs(vi);
> > free_index:
> > free_percpu(vi->vq_index);
> > -free_stats:
> > - free_percpu(vi->stats);
> > free:
> > free_netdev(dev);
> > return err;
> > @@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
> > flush_work(&vi->config_work);
> >
> > free_percpu(vi->vq_index);
> > - free_percpu(vi->stats);
> > free_netdev(vi->dev);
> > }
>
> Thanks!
>
> > --
> > 1.7.1
On 05/20/2013 06:56 AM, Narasimhan, Sriram wrote:
> Hi Michael,
>
> Comments inline...
>
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:[email protected]]
> Sent: Sunday, May 19, 2013 1:03 PM
> To: Narasimhan, Sriram
> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>
> On Sun, May 19, 2013 at 04:09:48PM +0000, Narasimhan, Sriram wrote:
>> Hi Michael,
>>
>> I was getting all packets on the same inbound queue which
>> is why I added this support to virtio-net (and some more
>> instrumentation at tun as well). But, it turned out to be
>> my misconfiguration - I did not enable IFF_MULTI_QUEUE on
>> the tap device, so the real_num_tx_queues on tap netdev was
>> always 1 (no tx distribution at tap).
> Interesting that qemu didn't fail.
>
> [Sriram] void tun_set_real_num_tx_queues() does not return
> the EINVAL return from netif_set_real_num_tx_queues() for
> txq > dev->num_tx_queues (which would be the case if the
> tap device were not created with IFF_MULTI_QUEUE). I think
> it would be better to fix the code to disable the new
> queue and fail tun_attach() in this scenario. If you
> agree, I can go ahead and create a separate patch for that.
>
But tun_attach() check the TUN_TAP_MQ flags before, so it should fail?
How was the tap created?
>> I am thinking about
>> adding a -q option to tunctl to specify multi-queue flag on
>> the tap device.
> Absolutely.
>
> [Sriram] OK, let me do that.
>
>> Yes, number of exits will be most useful. I will look into
>> adding the other statistics you mention.
>>
>> Sriram
> Pls note you'll need to switch to virtqueue_kick_prepare
> to detect exits: virtqueue_kick doesn't let you know
> whether there was an exit.
>
> Also, it's best to make this a separate patch from the one
> adding per-queue stats.
>
> [Sriram] OK, I will cover only the per-queue statistics in
> this patch. Also, I will address the indentation/data
> structure name points that you mentioned in your earlier
> email and send a new revision for this patch.
>
> Sriram
Better have some performance numbers to make sure nothing was slowed down.
Thanks
>
>> -----Original Message-----
>> From: Michael S. Tsirkin [mailto:[email protected]]
>> Sent: Sunday, May 19, 2013 4:28 AM
>> To: Narasimhan, Sriram
>> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
>> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>>
>> On Thu, May 16, 2013 at 01:24:29PM -0700, Sriram Narasimhan wrote:
>>> This patch allows virtio-net driver to report traffic distribution
>>> to inbound/outbound queues through ethtool -S. The per_cpu
>>> virtnet_stats is split into receive and transmit stats and are
>>> maintained on a per receive_queue and send_queue basis.
>>> virtnet_stats() is modified to aggregate interface level statistics
>>> from per-queue statistics. Sample output below:
>>>
>> Thanks for the patch. The idea itself looks OK to me.
>> Ben Hutchings already sent some comments
>> so I won't repeat them. Some minor more comments
>> and questions below.
>>
>>> NIC statistics:
>>> rxq0: rx_packets: 4357802
>>> rxq0: rx_bytes: 292642052
>>> txq0: tx_packets: 824540
>>> txq0: tx_bytes: 55256404
>>> rxq1: rx_packets: 0
>>> rxq1: rx_bytes: 0
>>> txq1: tx_packets: 1094268
>>> txq1: tx_bytes: 73328316
>>> rxq2: rx_packets: 0
>>> rxq2: rx_bytes: 0
>>> txq2: tx_packets: 1091466
>>> txq2: tx_bytes: 73140566
>>> rxq3: rx_packets: 0
>>> rxq3: rx_bytes: 0
>>> txq3: tx_packets: 1093043
>>> txq3: tx_bytes: 73246142
>> Interesting. This example implies that all packets are coming in
>> through the same RX queue - is this right?
>> If yes that's worth exploring - could be a tun bug -
>> and shows how this patch is useful.
>>
>>> Signed-off-by: Sriram Narasimhan <[email protected]>
>> BTW, while you are looking at the stats, one other interesting
>> thing to add could be checking more types of stats: number of exits,
>> queue full errors, etc.
>>
>>> ---
>>> drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
>>> 1 files changed, 128 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>> index 3c23fdc..3c58c52 100644
>>> --- a/drivers/net/virtio_net.c
>>> +++ b/drivers/net/virtio_net.c
>>> @@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
>>>
>>> #define VIRTNET_DRIVER_VERSION "1.0.0"
>>>
>>> -struct virtnet_stats {
>>> - struct u64_stats_sync tx_syncp;
>>> +struct virtnet_rx_stats {
>>> struct u64_stats_sync rx_syncp;
>>> - u64 tx_bytes;
>>> + u64 rx_packets;
>>> + u64 rx_bytes;
>>> +};
>>> +
>>> +struct virtnet_tx_stats {
>>> + struct u64_stats_sync tx_syncp;
>>> u64 tx_packets;
>>> + u64 tx_bytes;
>>> +};
>>>
>>> - u64 rx_bytes;
>>> - u64 rx_packets;
>> I think maintaining the stats in a per-queue data structure like this is
>> fine. if # of CPUs == # of queues which is typical, we use same amount
>> of memory. And each queue access is under a lock,
>> or from napi thread, so no races either.
>>
>>> +struct virtnet_ethtool_stats {
>>> + char desc[ETH_GSTRING_LEN];
>>> + int type;
>>> + int size;
>>> + int offset;
>>> +};
>>> +
>>> +enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
>>> +
>>> +#define VIRTNET_RX_STATS_INFO(_struct, _field) \
>>> + {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
>>> + offsetof(_struct, _field)}
>>> +
>>> +#define VIRTNET_TX_STATS_INFO(_struct, _field) \
>>> + {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
>>> + offsetof(_struct, _field)}
>>> +
>>> +static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
>>> + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
>>> + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
>>> +};
>>> +#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
>>> +
>>> +static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
>>> + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
>>> + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
>>> };
>>> +#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
>> I'd prefer a full name: virtnet_ethtool_tx_stats, or
>> just virtnet_tx_stats.
>>
>>>
>>> /* Internal representation of a send virtqueue */
>>> struct send_queue {
>>> @@ -61,6 +92,9 @@ struct send_queue {
>>>
>>> /* Name of the send queue: output.$index */
>>> char name[40];
>>> +
>>> + /* Active send queue statistics */
>>> + struct virtnet_tx_stats stats;
>>> };
>>>
>>> /* Internal representation of a receive virtqueue */
>>> @@ -81,6 +115,9 @@ struct receive_queue {
>>>
>>> /* Name of this receive queue: input.$index */
>>> char name[40];
>>> +
>>> + /* Active receive queue statistics */
>>> + struct virtnet_rx_stats stats;
>>> };
>>>
>>> struct virtnet_info {
>>> @@ -109,9 +146,6 @@ struct virtnet_info {
>>> /* enable config space updates */
>>> bool config_enable;
>>>
>>> - /* Active statistics */
>>> - struct virtnet_stats __percpu *stats;
>>> -
>>> /* Work struct for refilling if we run low on memory. */
>>> struct delayed_work refill;
>>>
>>> @@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
>>> {
>>> struct virtnet_info *vi = rq->vq->vdev->priv;
>>> struct net_device *dev = vi->dev;
>>> - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
>>> + struct virtnet_rx_stats *stats = &rq->stats;
>>> struct sk_buff *skb;
>>> struct page *page;
>>> struct skb_vnet_hdr *hdr;
>>> @@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
>>> {
>>> struct sk_buff *skb;
>>> unsigned int len;
>>> - struct virtnet_info *vi = sq->vq->vdev->priv;
>>> - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
>>> + struct virtnet_tx_stats *stats = &sq->stats;
>>>
>>> while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
>>> pr_debug("Sent skb %p\n", skb);
>>> @@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
>>> struct rtnl_link_stats64 *tot)
>>> {
>>> struct virtnet_info *vi = netdev_priv(dev);
>>> - int cpu;
>>> + int i;
>>> unsigned int start;
>>>
>>> - for_each_possible_cpu(cpu) {
>>> - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
>>> + for (i = 0; i < vi->max_queue_pairs; i++) {
>>> + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
>>> + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
>>> u64 tpackets, tbytes, rpackets, rbytes;
>>>
>>> do {
>>> - start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
>>> - tpackets = stats->tx_packets;
>>> - tbytes = stats->tx_bytes;
>>> - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
>>> + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
>>> + tpackets = tstats->tx_packets;
>>> + tbytes = tstats->tx_bytes;
>>> + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
>>>
>>> do {
>>> - start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
>>> - rpackets = stats->rx_packets;
>>> - rbytes = stats->rx_bytes;
>>> - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
>>> + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
>>> + rpackets = rstats->rx_packets;
>>> + rbytes = rstats->rx_bytes;
>>> + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
>>>
>>> tot->rx_packets += rpackets;
>>> tot->tx_packets += tpackets;
>>> @@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
>>> channels->other_count = 0;
>>> }
>>>
>>> +static void virtnet_get_stat_strings(struct net_device *dev,
>>> + u32 stringset,
>>> + u8 *data)
>>> +{
>>> + struct virtnet_info *vi = netdev_priv(dev);
>>> + int i, j;
>>> +
>>> + switch (stringset) {
>>> + case ETH_SS_STATS:
>>> + for (i = 0; i < vi->max_queue_pairs; i++) {
>>> + for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
>>> + sprintf(data, "rxq%d: %s", i,
>>> + virtnet_et_rx_stats[j].desc);
>>> + data += ETH_GSTRING_LEN;
>>> + }
>>> + for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
>>> + sprintf(data, "txq%d: %s", i,
>>> + virtnet_et_tx_stats[j].desc);
>>> + data += ETH_GSTRING_LEN;
>>> + }
>>> + }
>>> + break;
>>> + }
>>> +}
>>> +
>>> +static int virtnet_get_sset_count(struct net_device *dev, int stringset)
>>> +{
>>> + struct virtnet_info *vi = netdev_priv(dev);
>>> + switch (stringset) {
>>> + case ETH_SS_STATS:
>>> + return vi->max_queue_pairs *
>>> + (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
>>> + default:
>>> + return -EINVAL;
>>> + }
>>> +}
>>> +
>>> +static void virtnet_get_ethtool_stats(struct net_device *dev,
>>> + struct ethtool_stats *stats,
>>> + u64 *data)
>>> +{
>>> + struct virtnet_info *vi = netdev_priv(dev);
>>> + unsigned int i, base;
>>> + unsigned int start;
>>> +
>>> + for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
>>> + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
>>> + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
>>> +
>>> + do {
>>> + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
>>> + data[base] = rstats->rx_packets;
>>> + data[base+1] = rstats->rx_bytes;
>> nitpicking:
>> We normally has spaces around +, like this:
>> data[base + 1] = rstats->rx_bytes;
>>
>>> + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
>>> +
>>> + base += VIRTNET_RX_STATS_NUM;
>>> +
>>> + do {
>>> + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
>>> + data[base] = tstats->tx_packets;
>>> + data[base+1] = tstats->tx_bytes;
>>
>> nitpicking:
>> Here, something strange happened to indentation.
>>
>>> + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
>>> +
>>> + base += VIRTNET_TX_STATS_NUM;
>>> + }
>>> +}
>>> +
>>> +
>>> static const struct ethtool_ops virtnet_ethtool_ops = {
>>> .get_drvinfo = virtnet_get_drvinfo,
>>> .get_link = ethtool_op_get_link,
>>> .get_ringparam = virtnet_get_ringparam,
>>> .set_channels = virtnet_set_channels,
>>> .get_channels = virtnet_get_channels,
>>> + .get_strings = virtnet_get_stat_strings,
>>> + .get_sset_count = virtnet_get_sset_count,
>>> + .get_ethtool_stats = virtnet_get_ethtool_stats,
>>> };
>>>
>>> #define MIN_MTU 68
>>> @@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
>>> vi->dev = dev;
>>> vi->vdev = vdev;
>>> vdev->priv = vi;
>>> - vi->stats = alloc_percpu(struct virtnet_stats);
>>> err = -ENOMEM;
>>> - if (vi->stats == NULL)
>>> - goto free;
>>>
>>> vi->vq_index = alloc_percpu(int);
>>> if (vi->vq_index == NULL)
>>> - goto free_stats;
>>> + goto free;
>>>
>>> mutex_init(&vi->config_lock);
>>> vi->config_enable = true;
>>> @@ -1616,8 +1718,6 @@ free_vqs:
>>> virtnet_del_vqs(vi);
>>> free_index:
>>> free_percpu(vi->vq_index);
>>> -free_stats:
>>> - free_percpu(vi->stats);
>>> free:
>>> free_netdev(dev);
>>> return err;
>>> @@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
>>> flush_work(&vi->config_work);
>>>
>>> free_percpu(vi->vq_index);
>>> - free_percpu(vi->stats);
>>> free_netdev(vi->dev);
>>> }
>> Thanks!
>>
>>> --
>>> 1.7.1
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, May 19, 2013 at 10:56:16PM +0000, Narasimhan, Sriram wrote:
> Hi Michael,
>
> Comments inline...
>
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:[email protected]]
> Sent: Sunday, May 19, 2013 1:03 PM
> To: Narasimhan, Sriram
> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>
> On Sun, May 19, 2013 at 04:09:48PM +0000, Narasimhan, Sriram wrote:
> > Hi Michael,
> >
> > I was getting all packets on the same inbound queue which
> > is why I added this support to virtio-net (and some more
> > instrumentation at tun as well). But, it turned out to be
> > my misconfiguration - I did not enable IFF_MULTI_QUEUE on
> > the tap device, so the real_num_tx_queues on tap netdev was
> > always 1 (no tx distribution at tap).
>
> Interesting that qemu didn't fail.
>
> [Sriram] void tun_set_real_num_tx_queues() does not return
> the EINVAL return from netif_set_real_num_tx_queues() for
> txq > dev->num_tx_queues (which would be the case if the
> tap device were not created with IFF_MULTI_QUEUE). I think
> it would be better to fix the code to disable the new
> queue and fail tun_attach()
You mean fail TUNSETQUEUE?
> in this scenario. If you
> agree, I can go ahead and create a separate patch for that.
Hmm I not sure I understand what happens, so hard for me to tell.
I think this code was supposed to handle it:
err = -EBUSY;
if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1)
goto out;
why doesn't it?
>
> > I am thinking about
> > adding a -q option to tunctl to specify multi-queue flag on
> > the tap device.
>
> Absolutely.
>
> [Sriram] OK, let me do that.
>
> > Yes, number of exits will be most useful. I will look into
> > adding the other statistics you mention.
> >
> > Sriram
>
> Pls note you'll need to switch to virtqueue_kick_prepare
> to detect exits: virtqueue_kick doesn't let you know
> whether there was an exit.
>
> Also, it's best to make this a separate patch from the one
> adding per-queue stats.
>
> [Sriram] OK, I will cover only the per-queue statistics in
> this patch. Also, I will address the indentation/data
> structure name points that you mentioned in your earlier
> email and send a new revision for this patch.
>
> Sriram
>
> > -----Original Message-----
> > From: Michael S. Tsirkin [mailto:[email protected]]
> > Sent: Sunday, May 19, 2013 4:28 AM
> > To: Narasimhan, Sriram
> > Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
> > Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
> >
> > On Thu, May 16, 2013 at 01:24:29PM -0700, Sriram Narasimhan wrote:
> > > This patch allows virtio-net driver to report traffic distribution
> > > to inbound/outbound queues through ethtool -S. The per_cpu
> > > virtnet_stats is split into receive and transmit stats and are
> > > maintained on a per receive_queue and send_queue basis.
> > > virtnet_stats() is modified to aggregate interface level statistics
> > > from per-queue statistics. Sample output below:
> > >
> >
> > Thanks for the patch. The idea itself looks OK to me.
> > Ben Hutchings already sent some comments
> > so I won't repeat them. Some minor more comments
> > and questions below.
> >
> > > NIC statistics:
> > > rxq0: rx_packets: 4357802
> > > rxq0: rx_bytes: 292642052
> > > txq0: tx_packets: 824540
> > > txq0: tx_bytes: 55256404
> > > rxq1: rx_packets: 0
> > > rxq1: rx_bytes: 0
> > > txq1: tx_packets: 1094268
> > > txq1: tx_bytes: 73328316
> > > rxq2: rx_packets: 0
> > > rxq2: rx_bytes: 0
> > > txq2: tx_packets: 1091466
> > > txq2: tx_bytes: 73140566
> > > rxq3: rx_packets: 0
> > > rxq3: rx_bytes: 0
> > > txq3: tx_packets: 1093043
> > > txq3: tx_bytes: 73246142
> >
> > Interesting. This example implies that all packets are coming in
> > through the same RX queue - is this right?
> > If yes that's worth exploring - could be a tun bug -
> > and shows how this patch is useful.
> >
> > > Signed-off-by: Sriram Narasimhan <[email protected]>
> >
> > BTW, while you are looking at the stats, one other interesting
> > thing to add could be checking more types of stats: number of exits,
> > queue full errors, etc.
> >
> > > ---
> > > drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
> > > 1 files changed, 128 insertions(+), 29 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 3c23fdc..3c58c52 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
> > >
> > > #define VIRTNET_DRIVER_VERSION "1.0.0"
> > >
> > > -struct virtnet_stats {
> > > - struct u64_stats_sync tx_syncp;
> > > +struct virtnet_rx_stats {
> > > struct u64_stats_sync rx_syncp;
> > > - u64 tx_bytes;
> > > + u64 rx_packets;
> > > + u64 rx_bytes;
> > > +};
> > > +
> > > +struct virtnet_tx_stats {
> > > + struct u64_stats_sync tx_syncp;
> > > u64 tx_packets;
> > > + u64 tx_bytes;
> > > +};
> > >
> > > - u64 rx_bytes;
> > > - u64 rx_packets;
> >
> > I think maintaining the stats in a per-queue data structure like this is
> > fine. if # of CPUs == # of queues which is typical, we use same amount
> > of memory. And each queue access is under a lock,
> > or from napi thread, so no races either.
> >
> > > +struct virtnet_ethtool_stats {
> > > + char desc[ETH_GSTRING_LEN];
> > > + int type;
> > > + int size;
> > > + int offset;
> > > +};
> > > +
> > > +enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
> > > +
> > > +#define VIRTNET_RX_STATS_INFO(_struct, _field) \
> > > + {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
> > > + offsetof(_struct, _field)}
> > > +
> > > +#define VIRTNET_TX_STATS_INFO(_struct, _field) \
> > > + {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
> > > + offsetof(_struct, _field)}
> > > +
> > > +static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
> > > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
> > > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
> > > +};
> > > +#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
> > > +
> > > +static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
> > > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
> > > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
> > > };
> > > +#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
> >
> > I'd prefer a full name: virtnet_ethtool_tx_stats, or
> > just virtnet_tx_stats.
> >
> > >
> > > /* Internal representation of a send virtqueue */
> > > struct send_queue {
> > > @@ -61,6 +92,9 @@ struct send_queue {
> > >
> > > /* Name of the send queue: output.$index */
> > > char name[40];
> > > +
> > > + /* Active send queue statistics */
> > > + struct virtnet_tx_stats stats;
> > > };
> > >
> > > /* Internal representation of a receive virtqueue */
> > > @@ -81,6 +115,9 @@ struct receive_queue {
> > >
> > > /* Name of this receive queue: input.$index */
> > > char name[40];
> > > +
> > > + /* Active receive queue statistics */
> > > + struct virtnet_rx_stats stats;
> > > };
> > >
> > > struct virtnet_info {
> > > @@ -109,9 +146,6 @@ struct virtnet_info {
> > > /* enable config space updates */
> > > bool config_enable;
> > >
> > > - /* Active statistics */
> > > - struct virtnet_stats __percpu *stats;
> > > -
> > > /* Work struct for refilling if we run low on memory. */
> > > struct delayed_work refill;
> > >
> > > @@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
> > > {
> > > struct virtnet_info *vi = rq->vq->vdev->priv;
> > > struct net_device *dev = vi->dev;
> > > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > > + struct virtnet_rx_stats *stats = &rq->stats;
> > > struct sk_buff *skb;
> > > struct page *page;
> > > struct skb_vnet_hdr *hdr;
> > > @@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
> > > {
> > > struct sk_buff *skb;
> > > unsigned int len;
> > > - struct virtnet_info *vi = sq->vq->vdev->priv;
> > > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > > + struct virtnet_tx_stats *stats = &sq->stats;
> > >
> > > while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> > > pr_debug("Sent skb %p\n", skb);
> > > @@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
> > > struct rtnl_link_stats64 *tot)
> > > {
> > > struct virtnet_info *vi = netdev_priv(dev);
> > > - int cpu;
> > > + int i;
> > > unsigned int start;
> > >
> > > - for_each_possible_cpu(cpu) {
> > > - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
> > > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > > u64 tpackets, tbytes, rpackets, rbytes;
> > >
> > > do {
> > > - start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
> > > - tpackets = stats->tx_packets;
> > > - tbytes = stats->tx_bytes;
> > > - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
> > > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > > + tpackets = tstats->tx_packets;
> > > + tbytes = tstats->tx_bytes;
> > > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> > >
> > > do {
> > > - start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
> > > - rpackets = stats->rx_packets;
> > > - rbytes = stats->rx_bytes;
> > > - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
> > > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > > + rpackets = rstats->rx_packets;
> > > + rbytes = rstats->rx_bytes;
> > > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> > >
> > > tot->rx_packets += rpackets;
> > > tot->tx_packets += tpackets;
> > > @@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
> > > channels->other_count = 0;
> > > }
> > >
> > > +static void virtnet_get_stat_strings(struct net_device *dev,
> > > + u32 stringset,
> > > + u8 *data)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + int i, j;
> > > +
> > > + switch (stringset) {
> > > + case ETH_SS_STATS:
> > > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > > + for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
> > > + sprintf(data, "rxq%d: %s", i,
> > > + virtnet_et_rx_stats[j].desc);
> > > + data += ETH_GSTRING_LEN;
> > > + }
> > > + for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
> > > + sprintf(data, "txq%d: %s", i,
> > > + virtnet_et_tx_stats[j].desc);
> > > + data += ETH_GSTRING_LEN;
> > > + }
> > > + }
> > > + break;
> > > + }
> > > +}
> > > +
> > > +static int virtnet_get_sset_count(struct net_device *dev, int stringset)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + switch (stringset) {
> > > + case ETH_SS_STATS:
> > > + return vi->max_queue_pairs *
> > > + (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
> > > + default:
> > > + return -EINVAL;
> > > + }
> > > +}
> > > +
> > > +static void virtnet_get_ethtool_stats(struct net_device *dev,
> > > + struct ethtool_stats *stats,
> > > + u64 *data)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + unsigned int i, base;
> > > + unsigned int start;
> > > +
> > > + for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
> > > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > > +
> > > + do {
> > > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > > + data[base] = rstats->rx_packets;
> > > + data[base+1] = rstats->rx_bytes;
> >
> > nitpicking:
> > We normally has spaces around +, like this:
> > data[base + 1] = rstats->rx_bytes;
> >
> > > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> > > +
> > > + base += VIRTNET_RX_STATS_NUM;
> > > +
> > > + do {
> > > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > > + data[base] = tstats->tx_packets;
> > > + data[base+1] = tstats->tx_bytes;
> >
> >
> > nitpicking:
> > Here, something strange happened to indentation.
> >
> > > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> > > +
> > > + base += VIRTNET_TX_STATS_NUM;
> > > + }
> > > +}
> > > +
> > > +
> > > static const struct ethtool_ops virtnet_ethtool_ops = {
> > > .get_drvinfo = virtnet_get_drvinfo,
> > > .get_link = ethtool_op_get_link,
> > > .get_ringparam = virtnet_get_ringparam,
> > > .set_channels = virtnet_set_channels,
> > > .get_channels = virtnet_get_channels,
> > > + .get_strings = virtnet_get_stat_strings,
> > > + .get_sset_count = virtnet_get_sset_count,
> > > + .get_ethtool_stats = virtnet_get_ethtool_stats,
> > > };
> > >
> > > #define MIN_MTU 68
> > > @@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
> > > vi->dev = dev;
> > > vi->vdev = vdev;
> > > vdev->priv = vi;
> > > - vi->stats = alloc_percpu(struct virtnet_stats);
> > > err = -ENOMEM;
> > > - if (vi->stats == NULL)
> > > - goto free;
> > >
> > > vi->vq_index = alloc_percpu(int);
> > > if (vi->vq_index == NULL)
> > > - goto free_stats;
> > > + goto free;
> > >
> > > mutex_init(&vi->config_lock);
> > > vi->config_enable = true;
> > > @@ -1616,8 +1718,6 @@ free_vqs:
> > > virtnet_del_vqs(vi);
> > > free_index:
> > > free_percpu(vi->vq_index);
> > > -free_stats:
> > > - free_percpu(vi->stats);
> > > free:
> > > free_netdev(dev);
> > > return err;
> > > @@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
> > > flush_work(&vi->config_work);
> > >
> > > free_percpu(vi->vq_index);
> > > - free_percpu(vi->stats);
> > > free_netdev(vi->dev);
> > > }
> >
> > Thanks!
> >
> > > --
> > > 1.7.1
-----Original Message-----
From: Michael S. Tsirkin [mailto:[email protected]]
Sent: Monday, May 20, 2013 2:59 AM
To: Narasimhan, Sriram
Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
On Sun, May 19, 2013 at 10:56:16PM +0000, Narasimhan, Sriram wrote:
> Hi Michael,
>
> Comments inline...
>
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:[email protected]]
> Sent: Sunday, May 19, 2013 1:03 PM
> To: Narasimhan, Sriram
> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>
> On Sun, May 19, 2013 at 04:09:48PM +0000, Narasimhan, Sriram wrote:
> > Hi Michael,
> >
> > I was getting all packets on the same inbound queue which
> > is why I added this support to virtio-net (and some more
> > instrumentation at tun as well). But, it turned out to be
> > my misconfiguration - I did not enable IFF_MULTI_QUEUE on
> > the tap device, so the real_num_tx_queues on tap netdev was
> > always 1 (no tx distribution at tap).
>
> Interesting that qemu didn't fail.
>
> [Sriram] void tun_set_real_num_tx_queues() does not return
> the EINVAL return from netif_set_real_num_tx_queues() for
> txq > dev->num_tx_queues (which would be the case if the
> tap device were not created with IFF_MULTI_QUEUE). I think
> it would be better to fix the code to disable the new
> queue and fail tun_attach()
You mean fail TUNSETQUEUE?
[Sriram] No I meant TUNSETIFF. FYI, I am using QEMU 1.4.50.
I created the tap device using tunctl. This does not
specify the IFF_MULTI_QUEUE flag during create so 1 netdev
queue is allocated. But, when the tun device is closed,
tun_detach decrements tun->numqueues from 1 to 0.
The following were the options I passed to qemu:
-netdev tap,id=hostnet1,vhost=on,ifname=tap1,queues=4
-device virtio-net-pci,netdev=hostnet1,id=net1,
mac=52:54:00:9b:8e:24,mq=on,vectors=9,ctrl_vq=on
> in this scenario. If you
> agree, I can go ahead and create a separate patch for that.
Hmm I not sure I understand what happens, so hard for me to tell.
I think this code was supposed to handle it:
err = -EBUSY;
if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1)
goto out;
why doesn't it?
[Sriram] This question was raised by Jason as well.
When QEMU is started with multiple queues on the tap
device, it calls TUNSETIFF on the existing tap device with
IFF_MULTI_QUEUE set. The above code falls through since
tun->numqueues = 0 due to the previous tun_detach() during
close. At the end of this, tun_set_iff() sets TUN_TAP_MQ
flag for the tun data structure. From that point onwards,
subsequent TUNSETIFF will fall through resulting in the
mismatch in #queues between tun and netdev structures.
>
> > I am thinking about
> > adding a -q option to tunctl to specify multi-queue flag on
> > the tap device.
>
> Absolutely.
>
> [Sriram] OK, let me do that.
[Sriram] I am planning to add ip tuntap multi-queue option
instead of tunctl.
Sriram
>
> > Yes, number of exits will be most useful. I will look into
> > adding the other statistics you mention.
> >
> > Sriram
>
> Pls note you'll need to switch to virtqueue_kick_prepare
> to detect exits: virtqueue_kick doesn't let you know
> whether there was an exit.
>
> Also, it's best to make this a separate patch from the one
> adding per-queue stats.
>
> [Sriram] OK, I will cover only the per-queue statistics in
> this patch. Also, I will address the indentation/data
> structure name points that you mentioned in your earlier
> email and send a new revision for this patch.
>
> Sriram
>
> > -----Original Message-----
> > From: Michael S. Tsirkin [mailto:[email protected]]
> > Sent: Sunday, May 19, 2013 4:28 AM
> > To: Narasimhan, Sriram
> > Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
> > Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
> >
> > On Thu, May 16, 2013 at 01:24:29PM -0700, Sriram Narasimhan wrote:
> > > This patch allows virtio-net driver to report traffic distribution
> > > to inbound/outbound queues through ethtool -S. The per_cpu
> > > virtnet_stats is split into receive and transmit stats and are
> > > maintained on a per receive_queue and send_queue basis.
> > > virtnet_stats() is modified to aggregate interface level statistics
> > > from per-queue statistics. Sample output below:
> > >
> >
> > Thanks for the patch. The idea itself looks OK to me.
> > Ben Hutchings already sent some comments
> > so I won't repeat them. Some minor more comments
> > and questions below.
> >
> > > NIC statistics:
> > > rxq0: rx_packets: 4357802
> > > rxq0: rx_bytes: 292642052
> > > txq0: tx_packets: 824540
> > > txq0: tx_bytes: 55256404
> > > rxq1: rx_packets: 0
> > > rxq1: rx_bytes: 0
> > > txq1: tx_packets: 1094268
> > > txq1: tx_bytes: 73328316
> > > rxq2: rx_packets: 0
> > > rxq2: rx_bytes: 0
> > > txq2: tx_packets: 1091466
> > > txq2: tx_bytes: 73140566
> > > rxq3: rx_packets: 0
> > > rxq3: rx_bytes: 0
> > > txq3: tx_packets: 1093043
> > > txq3: tx_bytes: 73246142
> >
> > Interesting. This example implies that all packets are coming in
> > through the same RX queue - is this right?
> > If yes that's worth exploring - could be a tun bug -
> > and shows how this patch is useful.
> >
> > > Signed-off-by: Sriram Narasimhan <[email protected]>
> >
> > BTW, while you are looking at the stats, one other interesting
> > thing to add could be checking more types of stats: number of exits,
> > queue full errors, etc.
> >
> > > ---
> > > drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
> > > 1 files changed, 128 insertions(+), 29 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 3c23fdc..3c58c52 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
> > >
> > > #define VIRTNET_DRIVER_VERSION "1.0.0"
> > >
> > > -struct virtnet_stats {
> > > - struct u64_stats_sync tx_syncp;
> > > +struct virtnet_rx_stats {
> > > struct u64_stats_sync rx_syncp;
> > > - u64 tx_bytes;
> > > + u64 rx_packets;
> > > + u64 rx_bytes;
> > > +};
> > > +
> > > +struct virtnet_tx_stats {
> > > + struct u64_stats_sync tx_syncp;
> > > u64 tx_packets;
> > > + u64 tx_bytes;
> > > +};
> > >
> > > - u64 rx_bytes;
> > > - u64 rx_packets;
> >
> > I think maintaining the stats in a per-queue data structure like this is
> > fine. if # of CPUs == # of queues which is typical, we use same amount
> > of memory. And each queue access is under a lock,
> > or from napi thread, so no races either.
> >
> > > +struct virtnet_ethtool_stats {
> > > + char desc[ETH_GSTRING_LEN];
> > > + int type;
> > > + int size;
> > > + int offset;
> > > +};
> > > +
> > > +enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
> > > +
> > > +#define VIRTNET_RX_STATS_INFO(_struct, _field) \
> > > + {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
> > > + offsetof(_struct, _field)}
> > > +
> > > +#define VIRTNET_TX_STATS_INFO(_struct, _field) \
> > > + {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
> > > + offsetof(_struct, _field)}
> > > +
> > > +static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
> > > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
> > > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
> > > +};
> > > +#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
> > > +
> > > +static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
> > > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
> > > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
> > > };
> > > +#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
> >
> > I'd prefer a full name: virtnet_ethtool_tx_stats, or
> > just virtnet_tx_stats.
> >
> > >
> > > /* Internal representation of a send virtqueue */
> > > struct send_queue {
> > > @@ -61,6 +92,9 @@ struct send_queue {
> > >
> > > /* Name of the send queue: output.$index */
> > > char name[40];
> > > +
> > > + /* Active send queue statistics */
> > > + struct virtnet_tx_stats stats;
> > > };
> > >
> > > /* Internal representation of a receive virtqueue */
> > > @@ -81,6 +115,9 @@ struct receive_queue {
> > >
> > > /* Name of this receive queue: input.$index */
> > > char name[40];
> > > +
> > > + /* Active receive queue statistics */
> > > + struct virtnet_rx_stats stats;
> > > };
> > >
> > > struct virtnet_info {
> > > @@ -109,9 +146,6 @@ struct virtnet_info {
> > > /* enable config space updates */
> > > bool config_enable;
> > >
> > > - /* Active statistics */
> > > - struct virtnet_stats __percpu *stats;
> > > -
> > > /* Work struct for refilling if we run low on memory. */
> > > struct delayed_work refill;
> > >
> > > @@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
> > > {
> > > struct virtnet_info *vi = rq->vq->vdev->priv;
> > > struct net_device *dev = vi->dev;
> > > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > > + struct virtnet_rx_stats *stats = &rq->stats;
> > > struct sk_buff *skb;
> > > struct page *page;
> > > struct skb_vnet_hdr *hdr;
> > > @@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
> > > {
> > > struct sk_buff *skb;
> > > unsigned int len;
> > > - struct virtnet_info *vi = sq->vq->vdev->priv;
> > > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > > + struct virtnet_tx_stats *stats = &sq->stats;
> > >
> > > while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> > > pr_debug("Sent skb %p\n", skb);
> > > @@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
> > > struct rtnl_link_stats64 *tot)
> > > {
> > > struct virtnet_info *vi = netdev_priv(dev);
> > > - int cpu;
> > > + int i;
> > > unsigned int start;
> > >
> > > - for_each_possible_cpu(cpu) {
> > > - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
> > > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > > u64 tpackets, tbytes, rpackets, rbytes;
> > >
> > > do {
> > > - start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
> > > - tpackets = stats->tx_packets;
> > > - tbytes = stats->tx_bytes;
> > > - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
> > > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > > + tpackets = tstats->tx_packets;
> > > + tbytes = tstats->tx_bytes;
> > > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> > >
> > > do {
> > > - start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
> > > - rpackets = stats->rx_packets;
> > > - rbytes = stats->rx_bytes;
> > > - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
> > > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > > + rpackets = rstats->rx_packets;
> > > + rbytes = rstats->rx_bytes;
> > > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> > >
> > > tot->rx_packets += rpackets;
> > > tot->tx_packets += tpackets;
> > > @@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
> > > channels->other_count = 0;
> > > }
> > >
> > > +static void virtnet_get_stat_strings(struct net_device *dev,
> > > + u32 stringset,
> > > + u8 *data)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + int i, j;
> > > +
> > > + switch (stringset) {
> > > + case ETH_SS_STATS:
> > > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > > + for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
> > > + sprintf(data, "rxq%d: %s", i,
> > > + virtnet_et_rx_stats[j].desc);
> > > + data += ETH_GSTRING_LEN;
> > > + }
> > > + for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
> > > + sprintf(data, "txq%d: %s", i,
> > > + virtnet_et_tx_stats[j].desc);
> > > + data += ETH_GSTRING_LEN;
> > > + }
> > > + }
> > > + break;
> > > + }
> > > +}
> > > +
> > > +static int virtnet_get_sset_count(struct net_device *dev, int stringset)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + switch (stringset) {
> > > + case ETH_SS_STATS:
> > > + return vi->max_queue_pairs *
> > > + (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
> > > + default:
> > > + return -EINVAL;
> > > + }
> > > +}
> > > +
> > > +static void virtnet_get_ethtool_stats(struct net_device *dev,
> > > + struct ethtool_stats *stats,
> > > + u64 *data)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + unsigned int i, base;
> > > + unsigned int start;
> > > +
> > > + for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
> > > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > > +
> > > + do {
> > > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > > + data[base] = rstats->rx_packets;
> > > + data[base+1] = rstats->rx_bytes;
> >
> > nitpicking:
> > We normally has spaces around +, like this:
> > data[base + 1] = rstats->rx_bytes;
> >
> > > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> > > +
> > > + base += VIRTNET_RX_STATS_NUM;
> > > +
> > > + do {
> > > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > > + data[base] = tstats->tx_packets;
> > > + data[base+1] = tstats->tx_bytes;
> >
> >
> > nitpicking:
> > Here, something strange happened to indentation.
> >
> > > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> > > +
> > > + base += VIRTNET_TX_STATS_NUM;
> > > + }
> > > +}
> > > +
> > > +
> > > static const struct ethtool_ops virtnet_ethtool_ops = {
> > > .get_drvinfo = virtnet_get_drvinfo,
> > > .get_link = ethtool_op_get_link,
> > > .get_ringparam = virtnet_get_ringparam,
> > > .set_channels = virtnet_set_channels,
> > > .get_channels = virtnet_get_channels,
> > > + .get_strings = virtnet_get_stat_strings,
> > > + .get_sset_count = virtnet_get_sset_count,
> > > + .get_ethtool_stats = virtnet_get_ethtool_stats,
> > > };
> > >
> > > #define MIN_MTU 68
> > > @@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
> > > vi->dev = dev;
> > > vi->vdev = vdev;
> > > vdev->priv = vi;
> > > - vi->stats = alloc_percpu(struct virtnet_stats);
> > > err = -ENOMEM;
> > > - if (vi->stats == NULL)
> > > - goto free;
> > >
> > > vi->vq_index = alloc_percpu(int);
> > > if (vi->vq_index == NULL)
> > > - goto free_stats;
> > > + goto free;
> > >
> > > mutex_init(&vi->config_lock);
> > > vi->config_enable = true;
> > > @@ -1616,8 +1718,6 @@ free_vqs:
> > > virtnet_del_vqs(vi);
> > > free_index:
> > > free_percpu(vi->vq_index);
> > > -free_stats:
> > > - free_percpu(vi->stats);
> > > free:
> > > free_netdev(dev);
> > > return err;
> > > @@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
> > > flush_work(&vi->config_work);
> > >
> > > free_percpu(vi->vq_index);
> > > - free_percpu(vi->stats);
> > > free_netdev(vi->dev);
> > > }
> >
> > Thanks!
> >
> > > --
> > > 1.7.1
On 05/21/2013 09:26 AM, Narasimhan, Sriram wrote:
>
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:[email protected]]
> Sent: Monday, May 20, 2013 2:59 AM
> To: Narasimhan, Sriram
> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>
> On Sun, May 19, 2013 at 10:56:16PM +0000, Narasimhan, Sriram wrote:
>> Hi Michael,
>>
>> Comments inline...
>>
>> -----Original Message-----
>> From: Michael S. Tsirkin [mailto:[email protected]]
>> Sent: Sunday, May 19, 2013 1:03 PM
>> To: Narasimhan, Sriram
>> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
>> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>>
>> On Sun, May 19, 2013 at 04:09:48PM +0000, Narasimhan, Sriram wrote:
>>> Hi Michael,
>>>
>>> I was getting all packets on the same inbound queue which
>>> is why I added this support to virtio-net (and some more
>>> instrumentation at tun as well). But, it turned out to be
>>> my misconfiguration - I did not enable IFF_MULTI_QUEUE on
>>> the tap device, so the real_num_tx_queues on tap netdev was
>>> always 1 (no tx distribution at tap).
>> Interesting that qemu didn't fail.
>>
>> [Sriram] void tun_set_real_num_tx_queues() does not return
>> the EINVAL return from netif_set_real_num_tx_queues() for
>> txq > dev->num_tx_queues (which would be the case if the
>> tap device were not created with IFF_MULTI_QUEUE). I think
>> it would be better to fix the code to disable the new
>> queue and fail tun_attach()
> You mean fail TUNSETQUEUE?
>
> [Sriram] No I meant TUNSETIFF. FYI, I am using QEMU 1.4.50.
> I created the tap device using tunctl. This does not
> specify the IFF_MULTI_QUEUE flag during create so 1 netdev
> queue is allocated. But, when the tun device is closed,
> tun_detach decrements tun->numqueues from 1 to 0.
>
> The following were the options I passed to qemu:
> -netdev tap,id=hostnet1,vhost=on,ifname=tap1,queues=4
> -device virtio-net-pci,netdev=hostnet1,id=net1,
> mac=52:54:00:9b:8e:24,mq=on,vectors=9,ctrl_vq=on
>
>
>> in this scenario. If you
>> agree, I can go ahead and create a separate patch for that.
> Hmm I not sure I understand what happens, so hard for me to tell.
>
> I think this code was supposed to handle it:
> err = -EBUSY;
> if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1)
> goto out;
>
> why doesn't it?
>
> [Sriram] This question was raised by Jason as well.
> When QEMU is started with multiple queues on the tap
> device, it calls TUNSETIFF on the existing tap device with
> IFF_MULTI_QUEUE set. The above code falls through since
> tun->numqueues = 0 due to the previous tun_detach() during
> close. At the end of this, tun_set_iff() sets TUN_TAP_MQ
> flag for the tun data structure. From that point onwards,
> subsequent TUNSETIFF will fall through resulting in the
> mismatch in #queues between tun and netdev structures.
>
Thanks, I think I get it. The problem is we only allocate a one queue
netdevice when IFF_MULTI_QUEUE were not set. So reset the multiqueue
flag for persist device should be forbidden. Looks like we need the
following patch. Could you please test this?
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index f042b03..d4fc2bd 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1585,6 +1585,10 @@ static int tun_set_iff(struct net *net, struct
file *file, struct ifreq *ifr)
else
return -EINVAL;
+ if (((ifr->ifr_flags & IFF_MULTI_QUEUE) ==
IFF_MULTI_QUEUE) ^
+ ((tun->flags & TUN_TAP_MQ) == TUN_TAP_MQ))
+ return -EINVAL;
+
if (tun_not_capable(tun))
return -EPERM;
err = security_tun_dev_open(tun->security);
>>> I am thinking about
>>> adding a -q option to tunctl to specify multi-queue flag on
>>> the tap device.
>> Absolutely.
>>
>> [Sriram] OK, let me do that.
>
> [Sriram] I am planning to add ip tuntap multi-queue option
> instead of tunctl.
>
> Sriram
>
>>> Yes, number of exits will be most useful. I will look into
>>> adding the other statistics you mention.
>>>
>>> Sriram
>> Pls note you'll need to switch to virtqueue_kick_prepare
>> to detect exits: virtqueue_kick doesn't let you know
>> whether there was an exit.
>>
>> Also, it's best to make this a separate patch from the one
>> adding per-queue stats.
>>
>> [Sriram] OK, I will cover only the per-queue statistics in
>> this patch. Also, I will address the indentation/data
>> structure name points that you mentioned in your earlier
>> email and send a new revision for this patch.
>>
>> Sriram
>>
>>> -----Original Message-----
>>> From: Michael S. Tsirkin [mailto:[email protected]]
>>> Sent: Sunday, May 19, 2013 4:28 AM
>>> To: Narasimhan, Sriram
>>> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
>>> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>>>
>>> On Thu, May 16, 2013 at 01:24:29PM -0700, Sriram Narasimhan wrote:
>>>> This patch allows virtio-net driver to report traffic distribution
>>>> to inbound/outbound queues through ethtool -S. The per_cpu
>>>> virtnet_stats is split into receive and transmit stats and are
>>>> maintained on a per receive_queue and send_queue basis.
>>>> virtnet_stats() is modified to aggregate interface level statistics
>>>> from per-queue statistics. Sample output below:
>>>>
>>> Thanks for the patch. The idea itself looks OK to me.
>>> Ben Hutchings already sent some comments
>>> so I won't repeat them. Some minor more comments
>>> and questions below.
>>>
>>>> NIC statistics:
>>>> rxq0: rx_packets: 4357802
>>>> rxq0: rx_bytes: 292642052
>>>> txq0: tx_packets: 824540
>>>> txq0: tx_bytes: 55256404
>>>> rxq1: rx_packets: 0
>>>> rxq1: rx_bytes: 0
>>>> txq1: tx_packets: 1094268
>>>> txq1: tx_bytes: 73328316
>>>> rxq2: rx_packets: 0
>>>> rxq2: rx_bytes: 0
>>>> txq2: tx_packets: 1091466
>>>> txq2: tx_bytes: 73140566
>>>> rxq3: rx_packets: 0
>>>> rxq3: rx_bytes: 0
>>>> txq3: tx_packets: 1093043
>>>> txq3: tx_bytes: 73246142
>>> Interesting. This example implies that all packets are coming in
>>> through the same RX queue - is this right?
>>> If yes that's worth exploring - could be a tun bug -
>>> and shows how this patch is useful.
>>>
>>>> Signed-off-by: Sriram Narasimhan <[email protected]>
>>> BTW, while you are looking at the stats, one other interesting
>>> thing to add could be checking more types of stats: number of exits,
>>> queue full errors, etc.
>>>
>>>> ---
>>>> drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
>>>> 1 files changed, 128 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>> index 3c23fdc..3c58c52 100644
>>>> --- a/drivers/net/virtio_net.c
>>>> +++ b/drivers/net/virtio_net.c
>>>> @@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
>>>>
>>>> #define VIRTNET_DRIVER_VERSION "1.0.0"
>>>>
>>>> -struct virtnet_stats {
>>>> - struct u64_stats_sync tx_syncp;
>>>> +struct virtnet_rx_stats {
>>>> struct u64_stats_sync rx_syncp;
>>>> - u64 tx_bytes;
>>>> + u64 rx_packets;
>>>> + u64 rx_bytes;
>>>> +};
>>>> +
>>>> +struct virtnet_tx_stats {
>>>> + struct u64_stats_sync tx_syncp;
>>>> u64 tx_packets;
>>>> + u64 tx_bytes;
>>>> +};
>>>>
>>>> - u64 rx_bytes;
>>>> - u64 rx_packets;
>>> I think maintaining the stats in a per-queue data structure like this is
>>> fine. if # of CPUs == # of queues which is typical, we use same amount
>>> of memory. And each queue access is under a lock,
>>> or from napi thread, so no races either.
>>>
>>>> +struct virtnet_ethtool_stats {
>>>> + char desc[ETH_GSTRING_LEN];
>>>> + int type;
>>>> + int size;
>>>> + int offset;
>>>> +};
>>>> +
>>>> +enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
>>>> +
>>>> +#define VIRTNET_RX_STATS_INFO(_struct, _field) \
>>>> + {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
>>>> + offsetof(_struct, _field)}
>>>> +
>>>> +#define VIRTNET_TX_STATS_INFO(_struct, _field) \
>>>> + {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
>>>> + offsetof(_struct, _field)}
>>>> +
>>>> +static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
>>>> + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
>>>> + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
>>>> +};
>>>> +#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
>>>> +
>>>> +static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
>>>> + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
>>>> + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
>>>> };
>>>> +#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
>>> I'd prefer a full name: virtnet_ethtool_tx_stats, or
>>> just virtnet_tx_stats.
>>>
>>>>
>>>> /* Internal representation of a send virtqueue */
>>>> struct send_queue {
>>>> @@ -61,6 +92,9 @@ struct send_queue {
>>>>
>>>> /* Name of the send queue: output.$index */
>>>> char name[40];
>>>> +
>>>> + /* Active send queue statistics */
>>>> + struct virtnet_tx_stats stats;
>>>> };
>>>>
>>>> /* Internal representation of a receive virtqueue */
>>>> @@ -81,6 +115,9 @@ struct receive_queue {
>>>>
>>>> /* Name of this receive queue: input.$index */
>>>> char name[40];
>>>> +
>>>> + /* Active receive queue statistics */
>>>> + struct virtnet_rx_stats stats;
>>>> };
>>>>
>>>> struct virtnet_info {
>>>> @@ -109,9 +146,6 @@ struct virtnet_info {
>>>> /* enable config space updates */
>>>> bool config_enable;
>>>>
>>>> - /* Active statistics */
>>>> - struct virtnet_stats __percpu *stats;
>>>> -
>>>> /* Work struct for refilling if we run low on memory. */
>>>> struct delayed_work refill;
>>>>
>>>> @@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
>>>> {
>>>> struct virtnet_info *vi = rq->vq->vdev->priv;
>>>> struct net_device *dev = vi->dev;
>>>> - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
>>>> + struct virtnet_rx_stats *stats = &rq->stats;
>>>> struct sk_buff *skb;
>>>> struct page *page;
>>>> struct skb_vnet_hdr *hdr;
>>>> @@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
>>>> {
>>>> struct sk_buff *skb;
>>>> unsigned int len;
>>>> - struct virtnet_info *vi = sq->vq->vdev->priv;
>>>> - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
>>>> + struct virtnet_tx_stats *stats = &sq->stats;
>>>>
>>>> while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
>>>> pr_debug("Sent skb %p\n", skb);
>>>> @@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
>>>> struct rtnl_link_stats64 *tot)
>>>> {
>>>> struct virtnet_info *vi = netdev_priv(dev);
>>>> - int cpu;
>>>> + int i;
>>>> unsigned int start;
>>>>
>>>> - for_each_possible_cpu(cpu) {
>>>> - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
>>>> + for (i = 0; i < vi->max_queue_pairs; i++) {
>>>> + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
>>>> + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
>>>> u64 tpackets, tbytes, rpackets, rbytes;
>>>>
>>>> do {
>>>> - start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
>>>> - tpackets = stats->tx_packets;
>>>> - tbytes = stats->tx_bytes;
>>>> - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
>>>> + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
>>>> + tpackets = tstats->tx_packets;
>>>> + tbytes = tstats->tx_bytes;
>>>> + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
>>>>
>>>> do {
>>>> - start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
>>>> - rpackets = stats->rx_packets;
>>>> - rbytes = stats->rx_bytes;
>>>> - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
>>>> + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
>>>> + rpackets = rstats->rx_packets;
>>>> + rbytes = rstats->rx_bytes;
>>>> + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
>>>>
>>>> tot->rx_packets += rpackets;
>>>> tot->tx_packets += tpackets;
>>>> @@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
>>>> channels->other_count = 0;
>>>> }
>>>>
>>>> +static void virtnet_get_stat_strings(struct net_device *dev,
>>>> + u32 stringset,
>>>> + u8 *data)
>>>> +{
>>>> + struct virtnet_info *vi = netdev_priv(dev);
>>>> + int i, j;
>>>> +
>>>> + switch (stringset) {
>>>> + case ETH_SS_STATS:
>>>> + for (i = 0; i < vi->max_queue_pairs; i++) {
>>>> + for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
>>>> + sprintf(data, "rxq%d: %s", i,
>>>> + virtnet_et_rx_stats[j].desc);
>>>> + data += ETH_GSTRING_LEN;
>>>> + }
>>>> + for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
>>>> + sprintf(data, "txq%d: %s", i,
>>>> + virtnet_et_tx_stats[j].desc);
>>>> + data += ETH_GSTRING_LEN;
>>>> + }
>>>> + }
>>>> + break;
>>>> + }
>>>> +}
>>>> +
>>>> +static int virtnet_get_sset_count(struct net_device *dev, int stringset)
>>>> +{
>>>> + struct virtnet_info *vi = netdev_priv(dev);
>>>> + switch (stringset) {
>>>> + case ETH_SS_STATS:
>>>> + return vi->max_queue_pairs *
>>>> + (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
>>>> + default:
>>>> + return -EINVAL;
>>>> + }
>>>> +}
>>>> +
>>>> +static void virtnet_get_ethtool_stats(struct net_device *dev,
>>>> + struct ethtool_stats *stats,
>>>> + u64 *data)
>>>> +{
>>>> + struct virtnet_info *vi = netdev_priv(dev);
>>>> + unsigned int i, base;
>>>> + unsigned int start;
>>>> +
>>>> + for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
>>>> + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
>>>> + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
>>>> +
>>>> + do {
>>>> + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
>>>> + data[base] = rstats->rx_packets;
>>>> + data[base+1] = rstats->rx_bytes;
>>> nitpicking:
>>> We normally has spaces around +, like this:
>>> data[base + 1] = rstats->rx_bytes;
>>>
>>>> + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
>>>> +
>>>> + base += VIRTNET_RX_STATS_NUM;
>>>> +
>>>> + do {
>>>> + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
>>>> + data[base] = tstats->tx_packets;
>>>> + data[base+1] = tstats->tx_bytes;
>>>
>>> nitpicking:
>>> Here, something strange happened to indentation.
>>>
>>>> + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
>>>> +
>>>> + base += VIRTNET_TX_STATS_NUM;
>>>> + }
>>>> +}
>>>> +
>>>> +
>>>> static const struct ethtool_ops virtnet_ethtool_ops = {
>>>> .get_drvinfo = virtnet_get_drvinfo,
>>>> .get_link = ethtool_op_get_link,
>>>> .get_ringparam = virtnet_get_ringparam,
>>>> .set_channels = virtnet_set_channels,
>>>> .get_channels = virtnet_get_channels,
>>>> + .get_strings = virtnet_get_stat_strings,
>>>> + .get_sset_count = virtnet_get_sset_count,
>>>> + .get_ethtool_stats = virtnet_get_ethtool_stats,
>>>> };
>>>>
>>>> #define MIN_MTU 68
>>>> @@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
>>>> vi->dev = dev;
>>>> vi->vdev = vdev;
>>>> vdev->priv = vi;
>>>> - vi->stats = alloc_percpu(struct virtnet_stats);
>>>> err = -ENOMEM;
>>>> - if (vi->stats == NULL)
>>>> - goto free;
>>>>
>>>> vi->vq_index = alloc_percpu(int);
>>>> if (vi->vq_index == NULL)
>>>> - goto free_stats;
>>>> + goto free;
>>>>
>>>> mutex_init(&vi->config_lock);
>>>> vi->config_enable = true;
>>>> @@ -1616,8 +1718,6 @@ free_vqs:
>>>> virtnet_del_vqs(vi);
>>>> free_index:
>>>> free_percpu(vi->vq_index);
>>>> -free_stats:
>>>> - free_percpu(vi->stats);
>>>> free:
>>>> free_netdev(dev);
>>>> return err;
>>>> @@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
>>>> flush_work(&vi->config_work);
>>>>
>>>> free_percpu(vi->vq_index);
>>>> - free_percpu(vi->stats);
>>>> free_netdev(vi->dev);
>>>> }
>>> Thanks!
>>>
>>>> --
>>>> 1.7.1
On Tue, May 21, 2013 at 01:11:28PM +0800, Jason Wang wrote:
> On 05/21/2013 09:26 AM, Narasimhan, Sriram wrote:
> >
> > -----Original Message-----
> > From: Michael S. Tsirkin [mailto:[email protected]]
> > Sent: Monday, May 20, 2013 2:59 AM
> > To: Narasimhan, Sriram
> > Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
> > Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
> >
> > On Sun, May 19, 2013 at 10:56:16PM +0000, Narasimhan, Sriram wrote:
> >> Hi Michael,
> >>
> >> Comments inline...
> >>
> >> -----Original Message-----
> >> From: Michael S. Tsirkin [mailto:[email protected]]
> >> Sent: Sunday, May 19, 2013 1:03 PM
> >> To: Narasimhan, Sriram
> >> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
> >> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
> >>
> >> On Sun, May 19, 2013 at 04:09:48PM +0000, Narasimhan, Sriram wrote:
> >>> Hi Michael,
> >>>
> >>> I was getting all packets on the same inbound queue which
> >>> is why I added this support to virtio-net (and some more
> >>> instrumentation at tun as well). But, it turned out to be
> >>> my misconfiguration - I did not enable IFF_MULTI_QUEUE on
> >>> the tap device, so the real_num_tx_queues on tap netdev was
> >>> always 1 (no tx distribution at tap).
> >> Interesting that qemu didn't fail.
> >>
> >> [Sriram] void tun_set_real_num_tx_queues() does not return
> >> the EINVAL return from netif_set_real_num_tx_queues() for
> >> txq > dev->num_tx_queues (which would be the case if the
> >> tap device were not created with IFF_MULTI_QUEUE). I think
> >> it would be better to fix the code to disable the new
> >> queue and fail tun_attach()
> > You mean fail TUNSETQUEUE?
> >
> > [Sriram] No I meant TUNSETIFF. FYI, I am using QEMU 1.4.50.
> > I created the tap device using tunctl. This does not
> > specify the IFF_MULTI_QUEUE flag during create so 1 netdev
> > queue is allocated. But, when the tun device is closed,
> > tun_detach decrements tun->numqueues from 1 to 0.
> >
> > The following were the options I passed to qemu:
> > -netdev tap,id=hostnet1,vhost=on,ifname=tap1,queues=4
> > -device virtio-net-pci,netdev=hostnet1,id=net1,
> > mac=52:54:00:9b:8e:24,mq=on,vectors=9,ctrl_vq=on
> >
> >
> >> in this scenario. If you
> >> agree, I can go ahead and create a separate patch for that.
> > Hmm I not sure I understand what happens, so hard for me to tell.
> >
> > I think this code was supposed to handle it:
> > err = -EBUSY;
> > if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1)
> > goto out;
> >
> > why doesn't it?
> >
> > [Sriram] This question was raised by Jason as well.
> > When QEMU is started with multiple queues on the tap
> > device, it calls TUNSETIFF on the existing tap device with
> > IFF_MULTI_QUEUE set. The above code falls through since
> > tun->numqueues = 0 due to the previous tun_detach() during
> > close. At the end of this, tun_set_iff() sets TUN_TAP_MQ
> > flag for the tun data structure. From that point onwards,
> > subsequent TUNSETIFF will fall through resulting in the
> > mismatch in #queues between tun and netdev structures.
> >
>
> Thanks, I think I get it. The problem is we only allocate a one queue
> netdevice when IFF_MULTI_QUEUE were not set. So reset the multiqueue
> flag for persist device should be forbidden. Looks like we need the
> following patch. Could you please test this?
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index f042b03..d4fc2bd 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1585,6 +1585,10 @@ static int tun_set_iff(struct net *net, struct
> file *file, struct ifreq *ifr)
> else
> return -EINVAL;
>
> + if (((ifr->ifr_flags & IFF_MULTI_QUEUE) ==
> IFF_MULTI_QUEUE) ^
> + ((tun->flags & TUN_TAP_MQ) == TUN_TAP_MQ))
> + return -EINVAL;
> +
> if (tun_not_capable(tun))
> return -EPERM;
> err = security_tun_dev_open(tun->security);
That's too complex I think. Let's convert to bool, like this:
/* Make sure we don't change IFF_MULTI_QUEUE flag */
if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) != !!(tun->flags & TUN_TAP_MQ))
return -EINVAL;
You'll want to mention it's a stable candidate when you post this
but I think you are not supposed to copy stable - davem does this himself.
--
MST
On Sun, May 19, 2013 at 10:56:16PM +0000, Narasimhan, Sriram wrote:
> Hi Michael,
>
> Comments inline...
>
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:[email protected]]
> Sent: Sunday, May 19, 2013 1:03 PM
> To: Narasimhan, Sriram
> Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; Jason Wang
> Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
>
> On Sun, May 19, 2013 at 04:09:48PM +0000, Narasimhan, Sriram wrote:
> > Hi Michael,
> >
> > I was getting all packets on the same inbound queue which
> > is why I added this support to virtio-net (and some more
> > instrumentation at tun as well). But, it turned out to be
> > my misconfiguration - I did not enable IFF_MULTI_QUEUE on
> > the tap device, so the real_num_tx_queues on tap netdev was
> > always 1 (no tx distribution at tap).
>
> Interesting that qemu didn't fail.
>
> [Sriram] void tun_set_real_num_tx_queues() does not return
> the EINVAL return from netif_set_real_num_tx_queues() for
> txq > dev->num_tx_queues (which would be the case if the
> tap device were not created with IFF_MULTI_QUEUE). I think
> it would be better to fix the code to disable the new
> queue and fail tun_attach() in this scenario. If you
> agree, I can go ahead and create a separate patch for that.
>
>
> > I am thinking about
> > adding a -q option to tunctl to specify multi-queue flag on
> > the tap device.
>
> Absolutely.
>
> [Sriram] OK, let me do that.
>
> > Yes, number of exits will be most useful. I will look into
> > adding the other statistics you mention.
> >
> > Sriram
>
> Pls note you'll need to switch to virtqueue_kick_prepare
> to detect exits: virtqueue_kick doesn't let you know
> whether there was an exit.
>
> Also, it's best to make this a separate patch from the one
> adding per-queue stats.
>
> [Sriram] OK, I will cover only the per-queue statistics in
> this patch. Also, I will address the indentation/data
> structure name points that you mentioned in your earlier
> email and send a new revision for this patch.
>
> Sriram
Still plan to look into this?
Might be nice to have for 3.12.
> > -----Original Message-----
> > From: Michael S. Tsirkin [mailto:[email protected]]
> > Sent: Sunday, May 19, 2013 4:28 AM
> > To: Narasimhan, Sriram
> > Cc: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
> > Subject: Re: [PATCH] virtio-net: Reporting traffic queue distribution statistics through ethtool
> >
> > On Thu, May 16, 2013 at 01:24:29PM -0700, Sriram Narasimhan wrote:
> > > This patch allows virtio-net driver to report traffic distribution
> > > to inbound/outbound queues through ethtool -S. The per_cpu
> > > virtnet_stats is split into receive and transmit stats and are
> > > maintained on a per receive_queue and send_queue basis.
> > > virtnet_stats() is modified to aggregate interface level statistics
> > > from per-queue statistics. Sample output below:
> > >
> >
> > Thanks for the patch. The idea itself looks OK to me.
> > Ben Hutchings already sent some comments
> > so I won't repeat them. Some minor more comments
> > and questions below.
> >
> > > NIC statistics:
> > > rxq0: rx_packets: 4357802
> > > rxq0: rx_bytes: 292642052
> > > txq0: tx_packets: 824540
> > > txq0: tx_bytes: 55256404
> > > rxq1: rx_packets: 0
> > > rxq1: rx_bytes: 0
> > > txq1: tx_packets: 1094268
> > > txq1: tx_bytes: 73328316
> > > rxq2: rx_packets: 0
> > > rxq2: rx_bytes: 0
> > > txq2: tx_packets: 1091466
> > > txq2: tx_bytes: 73140566
> > > rxq3: rx_packets: 0
> > > rxq3: rx_bytes: 0
> > > txq3: tx_packets: 1093043
> > > txq3: tx_bytes: 73246142
> >
> > Interesting. This example implies that all packets are coming in
> > through the same RX queue - is this right?
> > If yes that's worth exploring - could be a tun bug -
> > and shows how this patch is useful.
> >
> > > Signed-off-by: Sriram Narasimhan <[email protected]>
> >
> > BTW, while you are looking at the stats, one other interesting
> > thing to add could be checking more types of stats: number of exits,
> > queue full errors, etc.
> >
> > > ---
> > > drivers/net/virtio_net.c | 157 +++++++++++++++++++++++++++++++++++++---------
> > > 1 files changed, 128 insertions(+), 29 deletions(-)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 3c23fdc..3c58c52 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -41,15 +41,46 @@ module_param(gso, bool, 0444);
> > >
> > > #define VIRTNET_DRIVER_VERSION "1.0.0"
> > >
> > > -struct virtnet_stats {
> > > - struct u64_stats_sync tx_syncp;
> > > +struct virtnet_rx_stats {
> > > struct u64_stats_sync rx_syncp;
> > > - u64 tx_bytes;
> > > + u64 rx_packets;
> > > + u64 rx_bytes;
> > > +};
> > > +
> > > +struct virtnet_tx_stats {
> > > + struct u64_stats_sync tx_syncp;
> > > u64 tx_packets;
> > > + u64 tx_bytes;
> > > +};
> > >
> > > - u64 rx_bytes;
> > > - u64 rx_packets;
> >
> > I think maintaining the stats in a per-queue data structure like this is
> > fine. if # of CPUs == # of queues which is typical, we use same amount
> > of memory. And each queue access is under a lock,
> > or from napi thread, so no races either.
> >
> > > +struct virtnet_ethtool_stats {
> > > + char desc[ETH_GSTRING_LEN];
> > > + int type;
> > > + int size;
> > > + int offset;
> > > +};
> > > +
> > > +enum {VIRTNET_STATS_TX, VIRTNET_STATS_RX};
> > > +
> > > +#define VIRTNET_RX_STATS_INFO(_struct, _field) \
> > > + {#_field, VIRTNET_STATS_RX, FIELD_SIZEOF(_struct, _field), \
> > > + offsetof(_struct, _field)}
> > > +
> > > +#define VIRTNET_TX_STATS_INFO(_struct, _field) \
> > > + {#_field, VIRTNET_STATS_TX, FIELD_SIZEOF(_struct, _field), \
> > > + offsetof(_struct, _field)}
> > > +
> > > +static const struct virtnet_ethtool_stats virtnet_et_rx_stats[] = {
> > > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_packets),
> > > + VIRTNET_RX_STATS_INFO(struct virtnet_rx_stats, rx_bytes)
> > > +};
> > > +#define VIRTNET_RX_STATS_NUM (ARRAY_SIZE(virtnet_et_rx_stats))
> > > +
> > > +static const struct virtnet_ethtool_stats virtnet_et_tx_stats[] = {
> > > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_packets),
> > > + VIRTNET_TX_STATS_INFO(struct virtnet_tx_stats, tx_bytes)
> > > };
> > > +#define VIRTNET_TX_STATS_NUM (ARRAY_SIZE(virtnet_et_tx_stats))
> >
> > I'd prefer a full name: virtnet_ethtool_tx_stats, or
> > just virtnet_tx_stats.
> >
> > >
> > > /* Internal representation of a send virtqueue */
> > > struct send_queue {
> > > @@ -61,6 +92,9 @@ struct send_queue {
> > >
> > > /* Name of the send queue: output.$index */
> > > char name[40];
> > > +
> > > + /* Active send queue statistics */
> > > + struct virtnet_tx_stats stats;
> > > };
> > >
> > > /* Internal representation of a receive virtqueue */
> > > @@ -81,6 +115,9 @@ struct receive_queue {
> > >
> > > /* Name of this receive queue: input.$index */
> > > char name[40];
> > > +
> > > + /* Active receive queue statistics */
> > > + struct virtnet_rx_stats stats;
> > > };
> > >
> > > struct virtnet_info {
> > > @@ -109,9 +146,6 @@ struct virtnet_info {
> > > /* enable config space updates */
> > > bool config_enable;
> > >
> > > - /* Active statistics */
> > > - struct virtnet_stats __percpu *stats;
> > > -
> > > /* Work struct for refilling if we run low on memory. */
> > > struct delayed_work refill;
> > >
> > > @@ -330,7 +364,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
> > > {
> > > struct virtnet_info *vi = rq->vq->vdev->priv;
> > > struct net_device *dev = vi->dev;
> > > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > > + struct virtnet_rx_stats *stats = &rq->stats;
> > > struct sk_buff *skb;
> > > struct page *page;
> > > struct skb_vnet_hdr *hdr;
> > > @@ -650,8 +684,7 @@ static void free_old_xmit_skbs(struct send_queue *sq)
> > > {
> > > struct sk_buff *skb;
> > > unsigned int len;
> > > - struct virtnet_info *vi = sq->vq->vdev->priv;
> > > - struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
> > > + struct virtnet_tx_stats *stats = &sq->stats;
> > >
> > > while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
> > > pr_debug("Sent skb %p\n", skb);
> > > @@ -841,24 +874,25 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
> > > struct rtnl_link_stats64 *tot)
> > > {
> > > struct virtnet_info *vi = netdev_priv(dev);
> > > - int cpu;
> > > + int i;
> > > unsigned int start;
> > >
> > > - for_each_possible_cpu(cpu) {
> > > - struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
> > > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > > u64 tpackets, tbytes, rpackets, rbytes;
> > >
> > > do {
> > > - start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
> > > - tpackets = stats->tx_packets;
> > > - tbytes = stats->tx_bytes;
> > > - } while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
> > > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > > + tpackets = tstats->tx_packets;
> > > + tbytes = tstats->tx_bytes;
> > > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> > >
> > > do {
> > > - start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
> > > - rpackets = stats->rx_packets;
> > > - rbytes = stats->rx_bytes;
> > > - } while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
> > > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > > + rpackets = rstats->rx_packets;
> > > + rbytes = rstats->rx_bytes;
> > > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> > >
> > > tot->rx_packets += rpackets;
> > > tot->tx_packets += tpackets;
> > > @@ -1177,12 +1211,83 @@ static void virtnet_get_channels(struct net_device *dev,
> > > channels->other_count = 0;
> > > }
> > >
> > > +static void virtnet_get_stat_strings(struct net_device *dev,
> > > + u32 stringset,
> > > + u8 *data)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + int i, j;
> > > +
> > > + switch (stringset) {
> > > + case ETH_SS_STATS:
> > > + for (i = 0; i < vi->max_queue_pairs; i++) {
> > > + for (j = 0; j < VIRTNET_RX_STATS_NUM; j++) {
> > > + sprintf(data, "rxq%d: %s", i,
> > > + virtnet_et_rx_stats[j].desc);
> > > + data += ETH_GSTRING_LEN;
> > > + }
> > > + for (j = 0; j < VIRTNET_TX_STATS_NUM; j++) {
> > > + sprintf(data, "txq%d: %s", i,
> > > + virtnet_et_tx_stats[j].desc);
> > > + data += ETH_GSTRING_LEN;
> > > + }
> > > + }
> > > + break;
> > > + }
> > > +}
> > > +
> > > +static int virtnet_get_sset_count(struct net_device *dev, int stringset)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + switch (stringset) {
> > > + case ETH_SS_STATS:
> > > + return vi->max_queue_pairs *
> > > + (VIRTNET_RX_STATS_NUM + VIRTNET_TX_STATS_NUM);
> > > + default:
> > > + return -EINVAL;
> > > + }
> > > +}
> > > +
> > > +static void virtnet_get_ethtool_stats(struct net_device *dev,
> > > + struct ethtool_stats *stats,
> > > + u64 *data)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + unsigned int i, base;
> > > + unsigned int start;
> > > +
> > > + for (i = 0, base = 0; i < vi->max_queue_pairs; i++) {
> > > + struct virtnet_tx_stats *tstats = &vi->sq[i].stats;
> > > + struct virtnet_rx_stats *rstats = &vi->rq[i].stats;
> > > +
> > > + do {
> > > + start = u64_stats_fetch_begin_bh(&rstats->rx_syncp);
> > > + data[base] = rstats->rx_packets;
> > > + data[base+1] = rstats->rx_bytes;
> >
> > nitpicking:
> > We normally has spaces around +, like this:
> > data[base + 1] = rstats->rx_bytes;
> >
> > > + } while (u64_stats_fetch_retry_bh(&rstats->rx_syncp, start));
> > > +
> > > + base += VIRTNET_RX_STATS_NUM;
> > > +
> > > + do {
> > > + start = u64_stats_fetch_begin_bh(&tstats->tx_syncp);
> > > + data[base] = tstats->tx_packets;
> > > + data[base+1] = tstats->tx_bytes;
> >
> >
> > nitpicking:
> > Here, something strange happened to indentation.
> >
> > > + } while (u64_stats_fetch_retry_bh(&tstats->tx_syncp, start));
> > > +
> > > + base += VIRTNET_TX_STATS_NUM;
> > > + }
> > > +}
> > > +
> > > +
> > > static const struct ethtool_ops virtnet_ethtool_ops = {
> > > .get_drvinfo = virtnet_get_drvinfo,
> > > .get_link = ethtool_op_get_link,
> > > .get_ringparam = virtnet_get_ringparam,
> > > .set_channels = virtnet_set_channels,
> > > .get_channels = virtnet_get_channels,
> > > + .get_strings = virtnet_get_stat_strings,
> > > + .get_sset_count = virtnet_get_sset_count,
> > > + .get_ethtool_stats = virtnet_get_ethtool_stats,
> > > };
> > >
> > > #define MIN_MTU 68
> > > @@ -1531,14 +1636,11 @@ static int virtnet_probe(struct virtio_device *vdev)
> > > vi->dev = dev;
> > > vi->vdev = vdev;
> > > vdev->priv = vi;
> > > - vi->stats = alloc_percpu(struct virtnet_stats);
> > > err = -ENOMEM;
> > > - if (vi->stats == NULL)
> > > - goto free;
> > >
> > > vi->vq_index = alloc_percpu(int);
> > > if (vi->vq_index == NULL)
> > > - goto free_stats;
> > > + goto free;
> > >
> > > mutex_init(&vi->config_lock);
> > > vi->config_enable = true;
> > > @@ -1616,8 +1718,6 @@ free_vqs:
> > > virtnet_del_vqs(vi);
> > > free_index:
> > > free_percpu(vi->vq_index);
> > > -free_stats:
> > > - free_percpu(vi->stats);
> > > free:
> > > free_netdev(dev);
> > > return err;
> > > @@ -1653,7 +1753,6 @@ static void virtnet_remove(struct virtio_device *vdev)
> > > flush_work(&vi->config_work);
> > >
> > > free_percpu(vi->vq_index);
> > > - free_percpu(vi->stats);
> > > free_netdev(vi->dev);
> > > }
> >
> > Thanks!
> >
> > > --
> > > 1.7.1