From: Julio Faracco <[email protected]>
To enable dev_watchdog, virtio_net should have a tx_timeout defined
(.ndo_tx_timeout). This is only a skeleton to throw a warn message. It
notifies the event in some specific queue of device. This function
still counts tx_timeout statistic and consider this event as an error
(one error per queue), reporting it.
Signed-off-by: Julio Faracco <[email protected]>
Signed-off-by: Daiane Mendes <[email protected]>
Cc: Jason Wang <[email protected]>
---
drivers/net/virtio_net.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 27f9b212c9f5..4b703b4b9441 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2585,6 +2585,29 @@ static int virtnet_set_features(struct net_device *dev,
return 0;
}
+static void virtnet_tx_timeout(struct net_device *dev)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ u32 i;
+
+ /* find the stopped queue the same way dev_watchdog() does */
+ for (i = 0; i < vi->curr_queue_pairs; i++) {
+ struct send_queue *sq = &vi->sq[i];
+
+ if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
+ continue;
+
+ u64_stats_update_begin(&sq->stats.syncp);
+ sq->stats.tx_timeouts++;
+ u64_stats_update_end(&sq->stats.syncp);
+
+ netdev_warn(dev, "TX timeout on send queue: %d, sq: %s, vq: %d, name: %s\n",
+ i, sq->name, sq->vq->index, sq->vq->name);
+
+ dev->stats.tx_errors++;
+ }
+}
+
static const struct net_device_ops virtnet_netdev = {
.ndo_open = virtnet_open,
.ndo_stop = virtnet_close,
@@ -2600,6 +2623,7 @@ static const struct net_device_ops virtnet_netdev = {
.ndo_features_check = passthru_features_check,
.ndo_get_phys_port_name = virtnet_get_phys_port_name,
.ndo_set_features = virtnet_set_features,
+ .ndo_tx_timeout = virtnet_tx_timeout,
};
static void virtnet_config_changed_work(struct work_struct *work)
@@ -3018,6 +3042,9 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->netdev_ops = &virtnet_netdev;
dev->features = NETIF_F_HIGHDMA;
+ /* Set up dev_watchdog cycle. */
+ dev->watchdog_timeo = 5 * HZ;
+
dev->ethtool_ops = &virtnet_ethtool_ops;
SET_NETDEV_DEV(dev, &vdev->dev);
--
2.21.0
On Sun, Oct 06, 2019 at 03:45:15PM -0300, [email protected] wrote:
> From: Julio Faracco <[email protected]>
>
> To enable dev_watchdog, virtio_net should have a tx_timeout defined
> (.ndo_tx_timeout). This is only a skeleton to throw a warn message. It
> notifies the event in some specific queue of device. This function
> still counts tx_timeout statistic and consider this event as an error
> (one error per queue), reporting it.
>
> Signed-off-by: Julio Faracco <[email protected]>
> Signed-off-by: Daiane Mendes <[email protected]>
> Cc: Jason Wang <[email protected]>
> ---
> drivers/net/virtio_net.c | 27 +++++++++++++++++++++++++++
> 1 file changed, 27 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 27f9b212c9f5..4b703b4b9441 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2585,6 +2585,29 @@ static int virtnet_set_features(struct net_device *dev,
> return 0;
> }
>
> +static void virtnet_tx_timeout(struct net_device *dev)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + u32 i;
> +
> + /* find the stopped queue the same way dev_watchdog() does */
not really - the watchdog actually looks at trans_start.
> + for (i = 0; i < vi->curr_queue_pairs; i++) {
> + struct send_queue *sq = &vi->sq[i];
> +
> + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
> + continue;
> +
> + u64_stats_update_begin(&sq->stats.syncp);
> + sq->stats.tx_timeouts++;
> + u64_stats_update_end(&sq->stats.syncp);
> +
> + netdev_warn(dev, "TX timeout on send queue: %d, sq: %s, vq: %d, name: %s\n",
> + i, sq->name, sq->vq->index, sq->vq->name);
this seems to assume any running queue is timed out.
doesn't look right.
also - there's already a warning in this case in the core. do we need another one?
> + dev->stats.tx_errors++;
> + }
> +}
> +
> static const struct net_device_ops virtnet_netdev = {
> .ndo_open = virtnet_open,
> .ndo_stop = virtnet_close,
> @@ -2600,6 +2623,7 @@ static const struct net_device_ops virtnet_netdev = {
> .ndo_features_check = passthru_features_check,
> .ndo_get_phys_port_name = virtnet_get_phys_port_name,
> .ndo_set_features = virtnet_set_features,
> + .ndo_tx_timeout = virtnet_tx_timeout,
> };
>
> static void virtnet_config_changed_work(struct work_struct *work)
> @@ -3018,6 +3042,9 @@ static int virtnet_probe(struct virtio_device *vdev)
> dev->netdev_ops = &virtnet_netdev;
> dev->features = NETIF_F_HIGHDMA;
>
> + /* Set up dev_watchdog cycle. */
> + dev->watchdog_timeo = 5 * HZ;
> +
Seems to be still broken with napi_tx = false.
> dev->ethtool_ops = &virtnet_ethtool_ops;
> SET_NETDEV_DEV(dev, &vdev->dev);
>
> --
> 2.21.0
Em seg, 7 de out de 2019 às 04:51, Michael S. Tsirkin <[email protected]> escreveu:
>
> On Sun, Oct 06, 2019 at 03:45:15PM -0300, [email protected] wrote:
> > From: Julio Faracco <[email protected]>
> >
> > To enable dev_watchdog, virtio_net should have a tx_timeout defined
> > (.ndo_tx_timeout). This is only a skeleton to throw a warn message. It
> > notifies the event in some specific queue of device. This function
> > still counts tx_timeout statistic and consider this event as an error
> > (one error per queue), reporting it.
> >
> > Signed-off-by: Julio Faracco <[email protected]>
> > Signed-off-by: Daiane Mendes <[email protected]>
> > Cc: Jason Wang <[email protected]>
> > ---
> > drivers/net/virtio_net.c | 27 +++++++++++++++++++++++++++
> > 1 file changed, 27 insertions(+)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 27f9b212c9f5..4b703b4b9441 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -2585,6 +2585,29 @@ static int virtnet_set_features(struct net_device *dev,
> > return 0;
> > }
> >
> > +static void virtnet_tx_timeout(struct net_device *dev)
> > +{
> > + struct virtnet_info *vi = netdev_priv(dev);
> > + u32 i;
> > +
> > + /* find the stopped queue the same way dev_watchdog() does */
>
> not really - the watchdog actually looks at trans_start.
The comments are wrong. It is the negative logic from dev_watchdog.
Watchdog requires queue stopped AND timeout.
If the queue is not stopped, this queue does not reached a timeout event.
So, continue... Do not report a timeout.
>
> > + for (i = 0; i < vi->curr_queue_pairs; i++) {
> > + struct send_queue *sq = &vi->sq[i];
> > +
> > + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
> > + continue;
> > +
> > + u64_stats_update_begin(&sq->stats.syncp);
> > + sq->stats.tx_timeouts++;
> > + u64_stats_update_end(&sq->stats.syncp);
> > +
> > + netdev_warn(dev, "TX timeout on send queue: %d, sq: %s, vq: %d, name: %s\n",
> > + i, sq->name, sq->vq->index, sq->vq->name);
>
> this seems to assume any running queue is timed out.
> doesn't look right.
>
> also - there's already a warning in this case in the core. do we need another one?
Here, it can be a debug message if the idea is enhance debugging information.
Other enhancements can be done to enable or disable debug messages.
Using ethtool methods for instance.
>
> > + dev->stats.tx_errors++;
>
>
>
> > + }
> > +}
> > +
> > static const struct net_device_ops virtnet_netdev = {
> > .ndo_open = virtnet_open,
> > .ndo_stop = virtnet_close,
> > @@ -2600,6 +2623,7 @@ static const struct net_device_ops virtnet_netdev = {
> > .ndo_features_check = passthru_features_check,
> > .ndo_get_phys_port_name = virtnet_get_phys_port_name,
> > .ndo_set_features = virtnet_set_features,
> > + .ndo_tx_timeout = virtnet_tx_timeout,
> > };
> >
> > static void virtnet_config_changed_work(struct work_struct *work)
> > @@ -3018,6 +3042,9 @@ static int virtnet_probe(struct virtio_device *vdev)
> > dev->netdev_ops = &virtnet_netdev;
> > dev->features = NETIF_F_HIGHDMA;
> >
> > + /* Set up dev_watchdog cycle. */
> > + dev->watchdog_timeo = 5 * HZ;
> > +
>
> Seems to be still broken with napi_tx = false.
>
> > dev->ethtool_ops = &virtnet_ethtool_ops;
> > SET_NETDEV_DEV(dev, &vdev->dev);
> >
> > --
> > 2.21.0
Em seg, 7 de out de 2019 às 11:03, Julio Faracco <[email protected]> escreveu:
>
> Em seg, 7 de out de 2019 às 04:51, Michael S. Tsirkin <[email protected]> escreveu:
> >
> > On Sun, Oct 06, 2019 at 03:45:15PM -0300, [email protected] wrote:
> > > From: Julio Faracco <[email protected]>
> > >
> > > To enable dev_watchdog, virtio_net should have a tx_timeout defined
> > > (.ndo_tx_timeout). This is only a skeleton to throw a warn message. It
> > > notifies the event in some specific queue of device. This function
> > > still counts tx_timeout statistic and consider this event as an error
> > > (one error per queue), reporting it.
> > >
> > > Signed-off-by: Julio Faracco <[email protected]>
> > > Signed-off-by: Daiane Mendes <[email protected]>
> > > Cc: Jason Wang <[email protected]>
> > > ---
> > > drivers/net/virtio_net.c | 27 +++++++++++++++++++++++++++
> > > 1 file changed, 27 insertions(+)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 27f9b212c9f5..4b703b4b9441 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -2585,6 +2585,29 @@ static int virtnet_set_features(struct net_device *dev,
> > > return 0;
> > > }
> > >
> > > +static void virtnet_tx_timeout(struct net_device *dev)
> > > +{
> > > + struct virtnet_info *vi = netdev_priv(dev);
> > > + u32 i;
> > > +
> > > + /* find the stopped queue the same way dev_watchdog() does */
> >
> > not really - the watchdog actually looks at trans_start.
>
> The comments are wrong. It is the negative logic from dev_watchdog.
> Watchdog requires queue stopped AND timeout.
>
> If the queue is not stopped, this queue does not reached a timeout event.
> So, continue... Do not report a timeout.
>
> >
> > > + for (i = 0; i < vi->curr_queue_pairs; i++) {
> > > + struct send_queue *sq = &vi->sq[i];
> > > +
> > > + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
> > > + continue;
> > > +
> > > + u64_stats_update_begin(&sq->stats.syncp);
> > > + sq->stats.tx_timeouts++;
> > > + u64_stats_update_end(&sq->stats.syncp);
> > > +
> > > + netdev_warn(dev, "TX timeout on send queue: %d, sq: %s, vq: %d, name: %s\n",
> > > + i, sq->name, sq->vq->index, sq->vq->name);
> >
> > this seems to assume any running queue is timed out.
> > doesn't look right.
> >
> > also - there's already a warning in this case in the core. do we need another one?
>
> Here, it can be a debug message if the idea is enhance debugging information.
> Other enhancements can be done to enable or disable debug messages.
> Using ethtool methods for instance.
Observation...
Another important point, kernel will thrown WARN_ONCE, only if
ndo_tx_timeout() is implemented.
Even if we are adding an extra/unnecessary netdev_warn() we need this
function to enable dev_watchdog().
>
> >
> > > + dev->stats.tx_errors++;
> >
> >
> >
> > > + }
> > > +}
> > > +
> > > static const struct net_device_ops virtnet_netdev = {
> > > .ndo_open = virtnet_open,
> > > .ndo_stop = virtnet_close,
> > > @@ -2600,6 +2623,7 @@ static const struct net_device_ops virtnet_netdev = {
> > > .ndo_features_check = passthru_features_check,
> > > .ndo_get_phys_port_name = virtnet_get_phys_port_name,
> > > .ndo_set_features = virtnet_set_features,
> > > + .ndo_tx_timeout = virtnet_tx_timeout,
> > > };
> > >
> > > static void virtnet_config_changed_work(struct work_struct *work)
> > > @@ -3018,6 +3042,9 @@ static int virtnet_probe(struct virtio_device *vdev)
> > > dev->netdev_ops = &virtnet_netdev;
> > > dev->features = NETIF_F_HIGHDMA;
> > >
> > > + /* Set up dev_watchdog cycle. */
> > > + dev->watchdog_timeo = 5 * HZ;
> > > +
> >
> > Seems to be still broken with napi_tx = false.
> >
> > > dev->ethtool_ops = &virtnet_ethtool_ops;
> > > SET_NETDEV_DEV(dev, &vdev->dev);
> > >
> > > --
> > > 2.21.0
On 2019/10/7 下午3:51, Michael S. Tsirkin wrote:
> On Sun, Oct 06, 2019 at 03:45:15PM -0300, [email protected] wrote:
>> From: Julio Faracco <[email protected]>
>>
>> To enable dev_watchdog, virtio_net should have a tx_timeout defined
>> (.ndo_tx_timeout). This is only a skeleton to throw a warn message. It
>> notifies the event in some specific queue of device. This function
>> still counts tx_timeout statistic and consider this event as an error
>> (one error per queue), reporting it.
>>
>> Signed-off-by: Julio Faracco <[email protected]>
>> Signed-off-by: Daiane Mendes <[email protected]>
>> Cc: Jason Wang <[email protected]>
>> ---
>> drivers/net/virtio_net.c | 27 +++++++++++++++++++++++++++
>> 1 file changed, 27 insertions(+)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index 27f9b212c9f5..4b703b4b9441 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -2585,6 +2585,29 @@ static int virtnet_set_features(struct net_device *dev,
>> return 0;
>> }
>>
>> +static void virtnet_tx_timeout(struct net_device *dev)
>> +{
>> + struct virtnet_info *vi = netdev_priv(dev);
>> + u32 i;
>> +
>> + /* find the stopped queue the same way dev_watchdog() does */
> not really - the watchdog actually looks at trans_start.
>
>> + for (i = 0; i < vi->curr_queue_pairs; i++) {
>> + struct send_queue *sq = &vi->sq[i];
>> +
>> + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
>> + continue;
>> +
>> + u64_stats_update_begin(&sq->stats.syncp);
>> + sq->stats.tx_timeouts++;
>> + u64_stats_update_end(&sq->stats.syncp);
>> +
>> + netdev_warn(dev, "TX timeout on send queue: %d, sq: %s, vq: %d, name: %s\n",
>> + i, sq->name, sq->vq->index, sq->vq->name);
> this seems to assume any running queue is timed out.
> doesn't look right.
>
> also - there's already a warning in this case in the core. do we need another one?
>
>> + dev->stats.tx_errors++;
>
>
>> + }
>> +}
>> +
>> static const struct net_device_ops virtnet_netdev = {
>> .ndo_open = virtnet_open,
>> .ndo_stop = virtnet_close,
>> @@ -2600,6 +2623,7 @@ static const struct net_device_ops virtnet_netdev = {
>> .ndo_features_check = passthru_features_check,
>> .ndo_get_phys_port_name = virtnet_get_phys_port_name,
>> .ndo_set_features = virtnet_set_features,
>> + .ndo_tx_timeout = virtnet_tx_timeout,
>> };
>>
>> static void virtnet_config_changed_work(struct work_struct *work)
>> @@ -3018,6 +3042,9 @@ static int virtnet_probe(struct virtio_device *vdev)
>> dev->netdev_ops = &virtnet_netdev;
>> dev->features = NETIF_F_HIGHDMA;
>>
>> + /* Set up dev_watchdog cycle. */
>> + dev->watchdog_timeo = 5 * HZ;
>> +
> Seems to be still broken with napi_tx = false.
With napi_tx = false, we still have tx interrupt after we stop the queue
which looks fine I believe?
Thanks
>
>> dev->ethtool_ops = &virtnet_ethtool_ops;
>> SET_NETDEV_DEV(dev, &vdev->dev);
>>
>> --
>> 2.21.0
On 2019/10/7 上午2:45, [email protected] wrote:
> From: Julio Faracco <[email protected]>
>
> To enable dev_watchdog, virtio_net should have a tx_timeout defined
> (.ndo_tx_timeout). This is only a skeleton to throw a warn message. It
> notifies the event in some specific queue of device. This function
> still counts tx_timeout statistic and consider this event as an error
> (one error per queue), reporting it.
>
> Signed-off-by: Julio Faracco <[email protected]>
> Signed-off-by: Daiane Mendes <[email protected]>
> Cc: Jason Wang <[email protected]>
> ---
> drivers/net/virtio_net.c | 27 +++++++++++++++++++++++++++
> 1 file changed, 27 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 27f9b212c9f5..4b703b4b9441 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2585,6 +2585,29 @@ static int virtnet_set_features(struct net_device *dev,
> return 0;
> }
>
> +static void virtnet_tx_timeout(struct net_device *dev)
> +{
> + struct virtnet_info *vi = netdev_priv(dev);
> + u32 i;
> +
> + /* find the stopped queue the same way dev_watchdog() does */
> + for (i = 0; i < vi->curr_queue_pairs; i++) {
> + struct send_queue *sq = &vi->sq[i];
> +
> + if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
> + continue;
> +
> + u64_stats_update_begin(&sq->stats.syncp);
> + sq->stats.tx_timeouts++;
> + u64_stats_update_end(&sq->stats.syncp);
> +
> + netdev_warn(dev, "TX timeout on send queue: %d, sq: %s, vq: %d, name: %s\n",
> + i, sq->name, sq->vq->index, sq->vq->name);
If this is just a warn for a specific queue, maybe it's better to do it
in the dev_watchdog()?
Or we may want more information like avail,used idx etc.
And usually there will be a reset, any reason for not doing this?
Thanks
> +
> + dev->stats.tx_errors++;
> + }
> +}
> +
> static const struct net_device_ops virtnet_netdev = {
> .ndo_open = virtnet_open,
> .ndo_stop = virtnet_close,
> @@ -2600,6 +2623,7 @@ static const struct net_device_ops virtnet_netdev = {
> .ndo_features_check = passthru_features_check,
> .ndo_get_phys_port_name = virtnet_get_phys_port_name,
> .ndo_set_features = virtnet_set_features,
> + .ndo_tx_timeout = virtnet_tx_timeout,
> };
>
> static void virtnet_config_changed_work(struct work_struct *work)
> @@ -3018,6 +3042,9 @@ static int virtnet_probe(struct virtio_device *vdev)
> dev->netdev_ops = &virtnet_netdev;
> dev->features = NETIF_F_HIGHDMA;
>
> + /* Set up dev_watchdog cycle. */
> + dev->watchdog_timeo = 5 * HZ;
> +
> dev->ethtool_ops = &virtnet_ethtool_ops;
> SET_NETDEV_DEV(dev, &vdev->dev);
>