2023-02-13 22:48:42

by Si-Wei Liu

[permalink] [raw]
Subject: [PATCH v2] vdpa/mlx5: should not activate virtq object when suspended

Otherwise the virtqueue object to instate could point to invalid address
that was unmapped from the MTT:

mlx5_core 0000:41:04.2: mlx5_cmd_out_err:782:(pid 8321):
CREATE_GENERAL_OBJECT(0xa00) op_mod(0xd) failed, status
bad parameter(0x3), syndrome (0x5fa1c), err(-22)

Fixes: cae15c2ed8e6 ("vdpa/mlx5: Implement susupend virtqueue callback")
Cc: Eli Cohen <[email protected]>
Signed-off-by: Si-Wei Liu <[email protected]>

---
v2: removed the change for improving warning message
---
drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 3a6dbbc6..d7e8ca0 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -165,6 +165,7 @@ struct mlx5_vdpa_net {
u32 cur_num_vqs;
u32 rqt_size;
bool nb_registered;
+ bool suspended;
struct notifier_block nb;
struct vdpa_callback config_cb;
struct mlx5_vdpa_wq_ent cvq_ent;
@@ -2411,7 +2412,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
if (err)
goto err_mr;

- if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || ndev->suspended)
goto err_mr;

restore_channels_info(ndev);
@@ -2580,6 +2581,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
mlx5_vdpa_destroy_mr(&ndev->mvdev);
ndev->mvdev.status = 0;
ndev->cur_num_vqs = 0;
+ ndev->suspended = false;
ndev->mvdev.cvq.received_desc = 0;
ndev->mvdev.cvq.completed_desc = 0;
memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
@@ -2815,6 +2817,8 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
struct mlx5_vdpa_virtqueue *mvq;
int i;

+ mlx5_vdpa_info(mvdev, "suspending device\n");
+
down_write(&ndev->reslock);
ndev->nb_registered = false;
mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
@@ -2824,6 +2828,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
suspend_vq(ndev, mvq);
}
mlx5_vdpa_cvq_suspend(mvdev);
+ ndev->suspended = true;
up_write(&ndev->reslock);
return 0;
}
--
1.8.3.1



2023-02-14 06:21:10

by Jason Wang

[permalink] [raw]
Subject: Re: [PATCH v2] vdpa/mlx5: should not activate virtq object when suspended

On Tue, Feb 14, 2023 at 6:48 AM Si-Wei Liu <[email protected]> wrote:
>
> Otherwise the virtqueue object to instate could point to invalid address
> that was unmapped from the MTT:
>
> mlx5_core 0000:41:04.2: mlx5_cmd_out_err:782:(pid 8321):
> CREATE_GENERAL_OBJECT(0xa00) op_mod(0xd) failed, status
> bad parameter(0x3), syndrome (0x5fa1c), err(-22)
>
> Fixes: cae15c2ed8e6 ("vdpa/mlx5: Implement susupend virtqueue callback")
> Cc: Eli Cohen <[email protected]>
> Signed-off-by: Si-Wei Liu <[email protected]>
>
> ---
> v2: removed the change for improving warning message
> ---
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 3a6dbbc6..d7e8ca0 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -165,6 +165,7 @@ struct mlx5_vdpa_net {
> u32 cur_num_vqs;
> u32 rqt_size;
> bool nb_registered;
> + bool suspended;

Any reason we make this net specific? (or is it better to use
mlx5_vdpa_dev structure?)


> struct notifier_block nb;
> struct vdpa_callback config_cb;
> struct mlx5_vdpa_wq_ent cvq_ent;
> @@ -2411,7 +2412,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
> if (err)
> goto err_mr;
>
> - if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || ndev->suspended)
> goto err_mr;
>
> restore_channels_info(ndev);
> @@ -2580,6 +2581,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
> mlx5_vdpa_destroy_mr(&ndev->mvdev);
> ndev->mvdev.status = 0;
> ndev->cur_num_vqs = 0;
> + ndev->suspended = false;
> ndev->mvdev.cvq.received_desc = 0;
> ndev->mvdev.cvq.completed_desc = 0;
> memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
> @@ -2815,6 +2817,8 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
> struct mlx5_vdpa_virtqueue *mvq;
> int i;
>
> + mlx5_vdpa_info(mvdev, "suspending device\n");
> +

Is this better to show the info after the device has been suspended?

Thanks

> down_write(&ndev->reslock);
> ndev->nb_registered = false;
> mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
> @@ -2824,6 +2828,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
> suspend_vq(ndev, mvq);
> }
> mlx5_vdpa_cvq_suspend(mvdev);
> + ndev->suspended = true;
> up_write(&ndev->reslock);
> return 0;
> }
> --
> 1.8.3.1
>


2023-02-14 13:41:45

by Eli Cohen

[permalink] [raw]
Subject: Re: [PATCH v2] vdpa/mlx5: should not activate virtq object when suspended


On 14/02/2023 8:20, Jason Wang wrote:
> On Tue, Feb 14, 2023 at 6:48 AM Si-Wei Liu <[email protected]> wrote:
>> Otherwise the virtqueue object to instate could point to invalid address
>> that was unmapped from the MTT:
>>
>> mlx5_core 0000:41:04.2: mlx5_cmd_out_err:782:(pid 8321):
>> CREATE_GENERAL_OBJECT(0xa00) op_mod(0xd) failed, status
>> bad parameter(0x3), syndrome (0x5fa1c), err(-22)
>>
>> Fixes: cae15c2ed8e6 ("vdpa/mlx5: Implement susupend virtqueue callback")
>> Cc: Eli Cohen <[email protected]>
>> Signed-off-by: Si-Wei Liu <[email protected]>
>>
>> ---
>> v2: removed the change for improving warning message
>> ---
>> drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++-
>> 1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 3a6dbbc6..d7e8ca0 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -165,6 +165,7 @@ struct mlx5_vdpa_net {
>> u32 cur_num_vqs;
>> u32 rqt_size;
>> bool nb_registered;
>> + bool suspended;
> Any reason we make this net specific? (or is it better to use
> mlx5_vdpa_dev structure?)
>
>
>> struct notifier_block nb;
>> struct vdpa_callback config_cb;
>> struct mlx5_vdpa_wq_ent cvq_ent;
>> @@ -2411,7 +2412,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
>> if (err)
>> goto err_mr;
>>
>> - if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
>> + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || ndev->suspended)
>> goto err_mr;
>>
>> restore_channels_info(ndev);
>> @@ -2580,6 +2581,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>> mlx5_vdpa_destroy_mr(&ndev->mvdev);
>> ndev->mvdev.status = 0;
>> ndev->cur_num_vqs = 0;
>> + ndev->suspended = false;
>> ndev->mvdev.cvq.received_desc = 0;
>> ndev->mvdev.cvq.completed_desc = 0;
>> memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>> @@ -2815,6 +2817,8 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
>> struct mlx5_vdpa_virtqueue *mvq;
>> int i;
>>
>> + mlx5_vdpa_info(mvdev, "suspending device\n");
>> +
> Is this better to show the info after the device has been suspended?

Besides this comment of Jason,

Reviewed-by: Eli Cohen <[email protected]>

>
> Thanks
>
>> down_write(&ndev->reslock);
>> ndev->nb_registered = false;
>> mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
>> @@ -2824,6 +2828,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
>> suspend_vq(ndev, mvq);
>> }
>> mlx5_vdpa_cvq_suspend(mvdev);
>> + ndev->suspended = true;
>> up_write(&ndev->reslock);
>> return 0;
>> }
>> --
>> 1.8.3.1
>>

2023-02-15 01:20:52

by Si-Wei Liu

[permalink] [raw]
Subject: Re: [PATCH v2] vdpa/mlx5: should not activate virtq object when suspended



On 2/13/2023 10:20 PM, Jason Wang wrote:
> On Tue, Feb 14, 2023 at 6:48 AM Si-Wei Liu <[email protected]> wrote:
>> Otherwise the virtqueue object to instate could point to invalid address
>> that was unmapped from the MTT:
>>
>> mlx5_core 0000:41:04.2: mlx5_cmd_out_err:782:(pid 8321):
>> CREATE_GENERAL_OBJECT(0xa00) op_mod(0xd) failed, status
>> bad parameter(0x3), syndrome (0x5fa1c), err(-22)
>>
>> Fixes: cae15c2ed8e6 ("vdpa/mlx5: Implement susupend virtqueue callback")
>> Cc: Eli Cohen <[email protected]>
>> Signed-off-by: Si-Wei Liu <[email protected]>
>>
>> ---
>> v2: removed the change for improving warning message
>> ---
>> drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 ++++++-
>> 1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 3a6dbbc6..d7e8ca0 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -165,6 +165,7 @@ struct mlx5_vdpa_net {
>> u32 cur_num_vqs;
>> u32 rqt_size;
>> bool nb_registered;
>> + bool suspended;
> Any reason we make this net specific? (or is it better to use
> mlx5_vdpa_dev structure?)
Yep, I can make this change, suspend should not be net specific.

>
>
>> struct notifier_block nb;
>> struct vdpa_callback config_cb;
>> struct mlx5_vdpa_wq_ent cvq_ent;
>> @@ -2411,7 +2412,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
>> if (err)
>> goto err_mr;
>>
>> - if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
>> + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || ndev->suspended)
>> goto err_mr;
>>
>> restore_channels_info(ndev);
>> @@ -2580,6 +2581,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
>> mlx5_vdpa_destroy_mr(&ndev->mvdev);
>> ndev->mvdev.status = 0;
>> ndev->cur_num_vqs = 0;
>> + ndev->suspended = false;
>> ndev->mvdev.cvq.received_desc = 0;
>> ndev->mvdev.cvq.completed_desc = 0;
>> memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
>> @@ -2815,6 +2817,8 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
>> struct mlx5_vdpa_virtqueue *mvq;
>> int i;
>>
>> + mlx5_vdpa_info(mvdev, "suspending device\n");
>> +
> Is this better to show the info after the device has been suspended?
I'm not sure, the current convention of this driver (for e.g.
mlx5_vdpa_reset) is to log info at the function entry to inform some
action is about to happen, and log a warning if running into any
failure; otherwise if no subsequent warning but other logs following the
info notice, it means the action has been successfully done. I thought
at least there's some value to log at the start of the function just in
case it's getting stuck in the middle.

Thanks,
-Siwei

>
> Thanks
>
>> down_write(&ndev->reslock);
>> ndev->nb_registered = false;
>> mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
>> @@ -2824,6 +2828,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
>> suspend_vq(ndev, mvq);
>> }
>> mlx5_vdpa_cvq_suspend(mvdev);
>> + ndev->suspended = true;
>> up_write(&ndev->reslock);
>> return 0;
>> }
>> --
>> 1.8.3.1
>>