2021-10-30 00:59:04

by Dexuan Cui

[permalink] [raw]
Subject: [PATCH net-next 4/4] net: mana: Support hibernation and kexec

Implement the suspend/resume/shutdown callbacks for hibernation/kexec.

Add mana_gd_setup() and mana_gd_cleanup() for some common code, and
use them in the mand_gd_* callbacks.

Reuse mana_probe/remove() for the hibernation path.

Signed-off-by: Dexuan Cui <[email protected]>
---
.../net/ethernet/microsoft/mana/gdma_main.c | 140 +++++++++++++-----
drivers/net/ethernet/microsoft/mana/mana.h | 4 +-
drivers/net/ethernet/microsoft/mana/mana_en.c | 72 +++++++--
3 files changed, 164 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 599dfd5e6090..c96ac81212f7 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1258,6 +1258,52 @@ static void mana_gd_remove_irqs(struct pci_dev *pdev)
gc->irq_contexts = NULL;
}

+static int mana_gd_setup(struct pci_dev *pdev)
+{
+ struct gdma_context *gc = pci_get_drvdata(pdev);
+ int err;
+
+ mana_gd_init_registers(pdev);
+ mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
+
+ err = mana_gd_setup_irqs(pdev);
+ if (err)
+ return err;
+
+ err = mana_hwc_create_channel(gc);
+ if (err)
+ goto remove_irq;
+
+ err = mana_gd_verify_vf_version(pdev);
+ if (err)
+ goto destroy_hwc;
+
+ err = mana_gd_query_max_resources(pdev);
+ if (err)
+ goto destroy_hwc;
+
+ err = mana_gd_detect_devices(pdev);
+ if (err)
+ goto destroy_hwc;
+
+ return 0;
+
+destroy_hwc:
+ mana_hwc_destroy_channel(gc);
+remove_irq:
+ mana_gd_remove_irqs(pdev);
+ return err;
+}
+
+static void mana_gd_cleanup(struct pci_dev *pdev)
+{
+ struct gdma_context *gc = pci_get_drvdata(pdev);
+
+ mana_hwc_destroy_channel(gc);
+
+ mana_gd_remove_irqs(pdev);
+}
+
static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct gdma_context *gc;
@@ -1287,6 +1333,9 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!gc)
goto release_region;

+ mutex_init(&gc->eq_test_event_mutex);
+ pci_set_drvdata(pdev, gc);
+
bar0_va = pci_iomap(pdev, bar, 0);
if (!bar0_va)
goto free_gc;
@@ -1294,47 +1343,23 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
gc->bar0_va = bar0_va;
gc->dev = &pdev->dev;

- pci_set_drvdata(pdev, gc);
-
- mana_gd_init_registers(pdev);

- mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
-
- err = mana_gd_setup_irqs(pdev);
+ err = mana_gd_setup(pdev);
if (err)
goto unmap_bar;

- mutex_init(&gc->eq_test_event_mutex);
-
- err = mana_hwc_create_channel(gc);
+ err = mana_probe(&gc->mana, false);
if (err)
- goto remove_irq;
-
- err = mana_gd_verify_vf_version(pdev);
- if (err)
- goto remove_irq;
-
- err = mana_gd_query_max_resources(pdev);
- if (err)
- goto remove_irq;
-
- err = mana_gd_detect_devices(pdev);
- if (err)
- goto remove_irq;
-
- err = mana_probe(&gc->mana);
- if (err)
- goto clean_up_gdma;
+ goto cleanup_gd;

return 0;

-clean_up_gdma:
- mana_hwc_destroy_channel(gc);
-remove_irq:
- mana_gd_remove_irqs(pdev);
+cleanup_gd:
+ mana_gd_cleanup(pdev);
unmap_bar:
pci_iounmap(pdev, bar0_va);
free_gc:
+ pci_set_drvdata(pdev, NULL);
vfree(gc);
release_region:
pci_release_regions(pdev);
@@ -1349,11 +1374,9 @@ static void mana_gd_remove(struct pci_dev *pdev)
{
struct gdma_context *gc = pci_get_drvdata(pdev);

- mana_remove(&gc->mana);
+ mana_remove(&gc->mana, false);

- mana_hwc_destroy_channel(gc);
-
- mana_gd_remove_irqs(pdev);
+ mana_gd_cleanup(pdev);

pci_iounmap(pdev, gc->bar0_va);

@@ -1364,6 +1387,52 @@ static void mana_gd_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
}

+/* The 'state' parameter is not used. */
+static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct gdma_context *gc = pci_get_drvdata(pdev);
+
+ mana_remove(&gc->mana, true);
+
+ mana_gd_cleanup(pdev);
+
+ return 0;
+}
+
+/* In case the NIC hardware stops working, the suspend and resume callbacks will
+ * fail -- if this happens, it's safer to just report an error than try to undo
+ * what has been done.
+ */
+static int mana_gd_resume(struct pci_dev *pdev)
+{
+ struct gdma_context *gc = pci_get_drvdata(pdev);
+ int err;
+
+ err = mana_gd_setup(pdev);
+ if (err)
+ return err;
+
+ err = mana_probe(&gc->mana, true);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/* Quiesce the device for kexec. This is also called upon reboot/shutdown. */
+static void mana_gd_shutdown(struct pci_dev *pdev)
+{
+ struct gdma_context *gc = pci_get_drvdata(pdev);
+
+ dev_info(&pdev->dev, "Shutdown was calledd\n");
+
+ mana_remove(&gc->mana, true);
+
+ mana_gd_cleanup(pdev);
+
+ pci_disable_device(pdev);
+}
+
#ifndef PCI_VENDOR_ID_MICROSOFT
#define PCI_VENDOR_ID_MICROSOFT 0x1414
#endif
@@ -1378,6 +1447,9 @@ static struct pci_driver mana_driver = {
.id_table = mana_id_table,
.probe = mana_gd_probe,
.remove = mana_gd_remove,
+ .suspend = mana_gd_suspend,
+ .resume = mana_gd_resume,
+ .shutdown = mana_gd_shutdown,
};

module_pci_driver(mana_driver);
diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
index fc98a5ba5ed0..d047ee876f12 100644
--- a/drivers/net/ethernet/microsoft/mana/mana.h
+++ b/drivers/net/ethernet/microsoft/mana/mana.h
@@ -374,8 +374,8 @@ int mana_alloc_queues(struct net_device *ndev);
int mana_attach(struct net_device *ndev);
int mana_detach(struct net_device *ndev, bool from_close);

-int mana_probe(struct gdma_dev *gd);
-void mana_remove(struct gdma_dev *gd);
+int mana_probe(struct gdma_dev *gd, bool resuming);
+void mana_remove(struct gdma_dev *gd, bool suspending);

extern const struct ethtool_ops mana_ethtool_ops;

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 4ff5a1fc506f..820585d45a61 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1828,11 +1828,12 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
return err;
}

-int mana_probe(struct gdma_dev *gd)
+int mana_probe(struct gdma_dev *gd, bool resuming)
{
struct gdma_context *gc = gd->gdma_context;
+ struct mana_context *ac = gd->driver_data;
struct device *dev = gc->dev;
- struct mana_context *ac;
+ u16 num_ports = 0;
int err;
int i;

@@ -1844,44 +1845,70 @@ int mana_probe(struct gdma_dev *gd)
if (err)
return err;

- ac = kzalloc(sizeof(*ac), GFP_KERNEL);
- if (!ac)
- return -ENOMEM;
+ if (!resuming) {
+ ac = kzalloc(sizeof(*ac), GFP_KERNEL);
+ if (!ac)
+ return -ENOMEM;

- ac->gdma_dev = gd;
- ac->num_ports = 1;
- gd->driver_data = ac;
+ ac->gdma_dev = gd;
+ gd->driver_data = ac;
+ }

err = mana_create_eq(ac);
if (err)
goto out;

err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
- MANA_MICRO_VERSION, &ac->num_ports);
+ MANA_MICRO_VERSION, &num_ports);
if (err)
goto out;

+ if (!resuming) {
+ ac->num_ports = num_ports;
+ } else {
+ if (ac->num_ports != num_ports) {
+ dev_err(dev, "The number of vPorts changed: %d->%d\n",
+ ac->num_ports, num_ports);
+ err = -EPROTO;
+ goto out;
+ }
+ }
+
+ if (ac->num_ports == 0)
+ dev_err(dev, "Failed to detect any vPort\n");
+
if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
ac->num_ports = MAX_PORTS_IN_MANA_DEV;

- for (i = 0; i < ac->num_ports; i++) {
- err = mana_probe_port(ac, i, &ac->ports[i]);
- if (err)
- break;
+ if (!resuming) {
+ for (i = 0; i < ac->num_ports; i++) {
+ err = mana_probe_port(ac, i, &ac->ports[i]);
+ if (err)
+ break;
+ }
+ } else {
+ for (i = 0; i < ac->num_ports; i++) {
+ rtnl_lock();
+ err = mana_attach(ac->ports[i]);
+ rtnl_unlock();
+ if (err)
+ break;
+ }
}
out:
if (err)
- mana_remove(gd);
+ mana_remove(gd, false);

return err;
}

-void mana_remove(struct gdma_dev *gd)
+void mana_remove(struct gdma_dev *gd, bool suspending)
{
struct gdma_context *gc = gd->gdma_context;
struct mana_context *ac = gd->driver_data;
struct device *dev = gc->dev;
struct net_device *ndev;
+ int err;
int i;

for (i = 0; i < ac->num_ports; i++) {
@@ -1897,7 +1924,16 @@ void mana_remove(struct gdma_dev *gd)
*/
rtnl_lock();

- mana_detach(ndev, false);
+ err = mana_detach(ndev, false);
+ if (err)
+ netdev_err(ndev, "Failed to detach vPort %d: %d\n",
+ i, err);
+
+ if (suspending) {
+ /* No need to unregister the ndev. */
+ rtnl_unlock();
+ continue;
+ }

unregister_netdevice(ndev);

@@ -1910,6 +1946,10 @@ void mana_remove(struct gdma_dev *gd)

out:
mana_gd_deregister_device(gd);
+
+ if (suspending)
+ return;
+
gd->driver_data = NULL;
gd->gdma_context = NULL;
kfree(ac);
--
2.17.1


2021-10-30 15:59:29

by Haiyang Zhang

[permalink] [raw]
Subject: RE: [PATCH net-next 4/4] net: mana: Support hibernation and kexec



> -----Original Message-----
> From: Dexuan Cui <[email protected]>
> Sent: Friday, October 29, 2021 8:54 PM
> To: [email protected]; [email protected]; [email protected]; Haiyang
> Zhang <[email protected]>; [email protected]
> Cc: KY Srinivasan <[email protected]>; [email protected];
> [email protected]; [email protected]; linux-
> [email protected]; Shachar Raindel <[email protected]>; Paul
> Rosswurm <[email protected]>; [email protected]; vkuznets
> <[email protected]>; Dexuan Cui <[email protected]>
> Subject: [PATCH net-next 4/4] net: mana: Support hibernation and kexec
>
> Implement the suspend/resume/shutdown callbacks for hibernation/kexec.
>
> Add mana_gd_setup() and mana_gd_cleanup() for some common code, and
> use them in the mand_gd_* callbacks.
>
> Reuse mana_probe/remove() for the hibernation path.
>
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> .../net/ethernet/microsoft/mana/gdma_main.c | 140 +++++++++++++-----
> drivers/net/ethernet/microsoft/mana/mana.h | 4 +-
> drivers/net/ethernet/microsoft/mana/mana_en.c | 72 +++++++--
> 3 files changed, 164 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 599dfd5e6090..c96ac81212f7 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -1258,6 +1258,52 @@ static void mana_gd_remove_irqs(struct pci_dev
> *pdev)
> gc->irq_contexts = NULL;
> }
>
> +static int mana_gd_setup(struct pci_dev *pdev)
> +{
> + struct gdma_context *gc = pci_get_drvdata(pdev);
> + int err;
> +
> + mana_gd_init_registers(pdev);
> + mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
> +
> + err = mana_gd_setup_irqs(pdev);
> + if (err)
> + return err;
> +
> + err = mana_hwc_create_channel(gc);
> + if (err)
> + goto remove_irq;
> +
> + err = mana_gd_verify_vf_version(pdev);
> + if (err)
> + goto destroy_hwc;
> +
> + err = mana_gd_query_max_resources(pdev);
> + if (err)
> + goto destroy_hwc;
> +
> + err = mana_gd_detect_devices(pdev);
> + if (err)
> + goto destroy_hwc;
> +
> + return 0;
> +
> +destroy_hwc:
> + mana_hwc_destroy_channel(gc);
> +remove_irq:
> + mana_gd_remove_irqs(pdev);
> + return err;
> +}
> +
> +static void mana_gd_cleanup(struct pci_dev *pdev)
> +{
> + struct gdma_context *gc = pci_get_drvdata(pdev);
> +
> + mana_hwc_destroy_channel(gc);
> +
> + mana_gd_remove_irqs(pdev);
> +}
> +
> static int mana_gd_probe(struct pci_dev *pdev, const struct
> pci_device_id *ent)
> {
> struct gdma_context *gc;
> @@ -1287,6 +1333,9 @@ static int mana_gd_probe(struct pci_dev *pdev,
> const struct pci_device_id *ent)
> if (!gc)
> goto release_region;
>
> + mutex_init(&gc->eq_test_event_mutex);
> + pci_set_drvdata(pdev, gc);
> +
> bar0_va = pci_iomap(pdev, bar, 0);
> if (!bar0_va)
> goto free_gc;
> @@ -1294,47 +1343,23 @@ static int mana_gd_probe(struct pci_dev *pdev,
> const struct pci_device_id *ent)
> gc->bar0_va = bar0_va;
> gc->dev = &pdev->dev;
>
> - pci_set_drvdata(pdev, gc);
> -
> - mana_gd_init_registers(pdev);
>
> - mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base);
> -
> - err = mana_gd_setup_irqs(pdev);
> + err = mana_gd_setup(pdev);
> if (err)
> goto unmap_bar;
>
> - mutex_init(&gc->eq_test_event_mutex);
> -
> - err = mana_hwc_create_channel(gc);
> + err = mana_probe(&gc->mana, false);
> if (err)
> - goto remove_irq;
> -
> - err = mana_gd_verify_vf_version(pdev);
> - if (err)
> - goto remove_irq;
> -
> - err = mana_gd_query_max_resources(pdev);
> - if (err)
> - goto remove_irq;
> -
> - err = mana_gd_detect_devices(pdev);
> - if (err)
> - goto remove_irq;
> -
> - err = mana_probe(&gc->mana);
> - if (err)
> - goto clean_up_gdma;
> + goto cleanup_gd;
>
> return 0;
>
> -clean_up_gdma:
> - mana_hwc_destroy_channel(gc);
> -remove_irq:
> - mana_gd_remove_irqs(pdev);
> +cleanup_gd:
> + mana_gd_cleanup(pdev);
> unmap_bar:
> pci_iounmap(pdev, bar0_va);
> free_gc:
> + pci_set_drvdata(pdev, NULL);
> vfree(gc);
> release_region:
> pci_release_regions(pdev);
> @@ -1349,11 +1374,9 @@ static void mana_gd_remove(struct pci_dev *pdev)
> {
> struct gdma_context *gc = pci_get_drvdata(pdev);
>
> - mana_remove(&gc->mana);
> + mana_remove(&gc->mana, false);
>
> - mana_hwc_destroy_channel(gc);
> -
> - mana_gd_remove_irqs(pdev);
> + mana_gd_cleanup(pdev);
>
> pci_iounmap(pdev, gc->bar0_va);
>
> @@ -1364,6 +1387,52 @@ static void mana_gd_remove(struct pci_dev *pdev)
> pci_disable_device(pdev);
> }
>
> +/* The 'state' parameter is not used. */
> +static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state)
> +{
> + struct gdma_context *gc = pci_get_drvdata(pdev);
> +
> + mana_remove(&gc->mana, true);
> +
> + mana_gd_cleanup(pdev);
> +
> + return 0;
> +}
> +
> +/* In case the NIC hardware stops working, the suspend and resume
> callbacks will
> + * fail -- if this happens, it's safer to just report an error than try
> to undo
> + * what has been done.
> + */
> +static int mana_gd_resume(struct pci_dev *pdev)
> +{
> + struct gdma_context *gc = pci_get_drvdata(pdev);
> + int err;
> +
> + err = mana_gd_setup(pdev);
> + if (err)
> + return err;
> +
> + err = mana_probe(&gc->mana, true);
> + if (err)
> + return err;
> +
> + return 0;
> +}
> +
> +/* Quiesce the device for kexec. This is also called upon
> reboot/shutdown. */
> +static void mana_gd_shutdown(struct pci_dev *pdev)
> +{
> + struct gdma_context *gc = pci_get_drvdata(pdev);
> +
> + dev_info(&pdev->dev, "Shutdown was calledd\n");
> +
> + mana_remove(&gc->mana, true);
> +
> + mana_gd_cleanup(pdev);
> +
> + pci_disable_device(pdev);
> +}
> +
> #ifndef PCI_VENDOR_ID_MICROSOFT
> #define PCI_VENDOR_ID_MICROSOFT 0x1414
> #endif
> @@ -1378,6 +1447,9 @@ static struct pci_driver mana_driver = {
> .id_table = mana_id_table,
> .probe = mana_gd_probe,
> .remove = mana_gd_remove,
> + .suspend = mana_gd_suspend,
> + .resume = mana_gd_resume,
> + .shutdown = mana_gd_shutdown,
> };
>
> module_pci_driver(mana_driver);
> diff --git a/drivers/net/ethernet/microsoft/mana/mana.h
> b/drivers/net/ethernet/microsoft/mana/mana.h
> index fc98a5ba5ed0..d047ee876f12 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana.h
> +++ b/drivers/net/ethernet/microsoft/mana/mana.h
> @@ -374,8 +374,8 @@ int mana_alloc_queues(struct net_device *ndev);
> int mana_attach(struct net_device *ndev);
> int mana_detach(struct net_device *ndev, bool from_close);
>
> -int mana_probe(struct gdma_dev *gd);
> -void mana_remove(struct gdma_dev *gd);
> +int mana_probe(struct gdma_dev *gd, bool resuming);
> +void mana_remove(struct gdma_dev *gd, bool suspending);
>
> extern const struct ethtool_ops mana_ethtool_ops;
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c
> b/drivers/net/ethernet/microsoft/mana/mana_en.c
> index 4ff5a1fc506f..820585d45a61 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1828,11 +1828,12 @@ static int mana_probe_port(struct mana_context
> *ac, int port_idx,
> return err;
> }
>
> -int mana_probe(struct gdma_dev *gd)
> +int mana_probe(struct gdma_dev *gd, bool resuming)
> {
> struct gdma_context *gc = gd->gdma_context;
> + struct mana_context *ac = gd->driver_data;
> struct device *dev = gc->dev;
> - struct mana_context *ac;
> + u16 num_ports = 0;
> int err;
> int i;
>
> @@ -1844,44 +1845,70 @@ int mana_probe(struct gdma_dev *gd)
> if (err)
> return err;
>
> - ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> - if (!ac)
> - return -ENOMEM;
> + if (!resuming) {
> + ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> + if (!ac)
> + return -ENOMEM;
>
> - ac->gdma_dev = gd;
> - ac->num_ports = 1;
> - gd->driver_data = ac;
> + ac->gdma_dev = gd;
> + gd->driver_data = ac;
> + }
>
> err = mana_create_eq(ac);
> if (err)
> goto out;
>
> err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION,
> MANA_MINOR_VERSION,
> - MANA_MICRO_VERSION, &ac->num_ports);
> + MANA_MICRO_VERSION, &num_ports);
> if (err)
> goto out;
>
> + if (!resuming) {
> + ac->num_ports = num_ports;
> + } else {
> + if (ac->num_ports != num_ports) {
> + dev_err(dev, "The number of vPorts changed: %d->%d\n",
> + ac->num_ports, num_ports);
> + err = -EPROTO;
> + goto out;
> + }
> + }
> +
> + if (ac->num_ports == 0)
> + dev_err(dev, "Failed to detect any vPort\n");
> +
> if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
> ac->num_ports = MAX_PORTS_IN_MANA_DEV;
>
> - for (i = 0; i < ac->num_ports; i++) {
> - err = mana_probe_port(ac, i, &ac->ports[i]);
> - if (err)
> - break;
> + if (!resuming) {
> + for (i = 0; i < ac->num_ports; i++) {
> + err = mana_probe_port(ac, i, &ac->ports[i]);
> + if (err)
> + break;
> + }
> + } else {
> + for (i = 0; i < ac->num_ports; i++) {
> + rtnl_lock();
> + err = mana_attach(ac->ports[i]);
> + rtnl_unlock();
> + if (err)
> + break;
> + }
> }
> out:
> if (err)
> - mana_remove(gd);
> + mana_remove(gd, false);

The "goto out" can happen in both resuming true/false cases,
should the error handling path deal with the two cases
differently?


Other parts look good.

Reviewed-by: Haiyang Zhang <[email protected]>


>
> return err;
> }
>
> -void mana_remove(struct gdma_dev *gd)
> +void mana_remove(struct gdma_dev *gd, bool suspending)
> {
> struct gdma_context *gc = gd->gdma_context;
> struct mana_context *ac = gd->driver_data;
> struct device *dev = gc->dev;
> struct net_device *ndev;
> + int err;
> int i;
>
> for (i = 0; i < ac->num_ports; i++) {
> @@ -1897,7 +1924,16 @@ void mana_remove(struct gdma_dev *gd)
> */
> rtnl_lock();
>
> - mana_detach(ndev, false);
> + err = mana_detach(ndev, false);
> + if (err)
> + netdev_err(ndev, "Failed to detach vPort %d: %d\n",
> + i, err);
> +
> + if (suspending) {
> + /* No need to unregister the ndev. */
> + rtnl_unlock();
> + continue;
> + }
>
> unregister_netdevice(ndev);
>
> @@ -1910,6 +1946,10 @@ void mana_remove(struct gdma_dev *gd)
>
> out:
> mana_gd_deregister_device(gd);
> +
> + if (suspending)
> + return;
> +
> gd->driver_data = NULL;
> gd->gdma_context = NULL;
> kfree(ac);
> --
> 2.17.1

2021-11-01 07:07:00

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH net-next 4/4] net: mana: Support hibernation and kexec

> From: Haiyang Zhang <[email protected]>
> Sent: Saturday, October 30, 2021 8:55 AM
> >
> > @@ -1844,44 +1845,70 @@ int mana_probe(struct gdma_dev *gd)
> > if (err)
> > return err;
> >
> > - ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> > - if (!ac)
> > - return -ENOMEM;
> > + if (!resuming) {
> > + ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> > + if (!ac)
> > + return -ENOMEM;
> >
> > - ac->gdma_dev = gd;
> > - ac->num_ports = 1;
> > - gd->driver_data = ac;
> > + ac->gdma_dev = gd;
> > + gd->driver_data = ac;
> > + }
> >
> > err = mana_create_eq(ac);
> > if (err)
> > goto out;
> >
> > err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION,
> > MANA_MINOR_VERSION,
> > - MANA_MICRO_VERSION, &ac->num_ports);
> > + MANA_MICRO_VERSION, &num_ports);
> > if (err)
> > goto out;
> >
> > + if (!resuming) {
> > + ac->num_ports = num_ports;
> > + } else {
> > + if (ac->num_ports != num_ports) {
> > + dev_err(dev, "The number of vPorts changed: %d->%d\n",
> > + ac->num_ports, num_ports);
> > + err = -EPROTO;
> > + goto out;
> > + }
> > + }
> > +
> > + if (ac->num_ports == 0)
> > + dev_err(dev, "Failed to detect any vPort\n");
> > +
> > if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
> > ac->num_ports = MAX_PORTS_IN_MANA_DEV;
> >
> > - for (i = 0; i < ac->num_ports; i++) {
> > - err = mana_probe_port(ac, i, &ac->ports[i]);
> > - if (err)
> > - break;
> > + if (!resuming) {
> > + for (i = 0; i < ac->num_ports; i++) {
> > + err = mana_probe_port(ac, i, &ac->ports[i]);
> > + if (err)
> > + break;
> > + }
> > + } else {
> > + for (i = 0; i < ac->num_ports; i++) {
> > + rtnl_lock();
> > + err = mana_attach(ac->ports[i]);
> > + rtnl_unlock();
> > + if (err)
> > + break;
> > + }
> > }
> > out:
> > if (err)
> > - mana_remove(gd);
> > + mana_remove(gd, false);
>
> The "goto out" can happen in both resuming true/false cases,
> should the error handling path deal with the two cases
> differently?

Let me make the below change in v2. Please let me know
if any further change is needed:

--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1850,8 +1850,10 @@ int mana_probe(struct gdma_dev *gd, bool resuming)

if (!resuming) {
ac = kzalloc(sizeof(*ac), GFP_KERNEL);
- if (!ac)
- return -ENOMEM;
+ if (!ac) {
+ err = -ENOMEM;
+ goto out;
+ }

ac->gdma_dev = gd;
gd->driver_data = ac;
@@ -1872,8 +1874,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
if (ac->num_ports != num_ports) {
dev_err(dev, "The number of vPorts changed: %d->%d\n",
ac->num_ports, num_ports);
- err = -EPROTO;
- goto out;
+ /* It's unsafe to proceed. */
+ return -EPROTO;
}
}

@@ -1886,22 +1888,36 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
if (!resuming) {
for (i = 0; i < ac->num_ports; i++) {
err = mana_probe_port(ac, i, &ac->ports[i]);
- if (err)
- break;
+ if (err) {
+ dev_err(dev, "Failed to probe vPort %u: %d\n",
+ i, err);
+ goto out;
+ }
}
} else {
for (i = 0; i < ac->num_ports; i++) {
rtnl_lock();
err = mana_attach(ac->ports[i]);
rtnl_unlock();
- if (err)
- break;
+
+ if (err) {
+ netdev_err(ac->ports[i],
+ "Failed to resume vPort %u: %d\n",
+ i, err);
+ return err;
+ }
}
}
+
+ return 0;
out:
- if (err)
- mana_remove(gd, false);
+ /* In the resuming path, it's safer to leave the device in the failed
+ * state than try to invoke mana_detach().
+ */
+ if (resuming)
+ return err;

+ mana_remove(gd, false);
return err;
}

@@ -1919,7 +1935,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
if (!ndev) {
if (i == 0)
dev_err(dev, "No net device to remove\n");
- goto out;
+ break;
}

/* All cleanup actions should stay after rtnl_lock(), otherwise

For your easy reviewing, the new code of the function in v2 will be:

int mana_probe(struct gdma_dev *gd, bool resuming)
{
struct gdma_context *gc = gd->gdma_context;
struct mana_context *ac = gd->driver_data;
struct device *dev = gc->dev;
u16 num_ports = 0;
int err;
int i;

dev_info(dev,
"Microsoft Azure Network Adapter protocol version: %d.%d.%d\n",
MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION);

err = mana_gd_register_device(gd);
if (err)
return err;

if (!resuming) {
ac = kzalloc(sizeof(*ac), GFP_KERNEL);
if (!ac) {
err = -ENOMEM;
goto out;
}

ac->gdma_dev = gd;
gd->driver_data = ac;
}

err = mana_create_eq(ac);
if (err)
goto out;

err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
MANA_MICRO_VERSION, &num_ports);
if (err)
goto out;

if (!resuming) {
ac->num_ports = num_ports;
} else {
if (ac->num_ports != num_ports) {
dev_err(dev, "The number of vPorts changed: %d->%d\n",
ac->num_ports, num_ports);
/* It's unsafe to proceed. */
return -EPROTO;
}
}

if (ac->num_ports == 0)
dev_err(dev, "Failed to detect any vPort\n");

if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
ac->num_ports = MAX_PORTS_IN_MANA_DEV;

if (!resuming) {
for (i = 0; i < ac->num_ports; i++) {
err = mana_probe_port(ac, i, &ac->ports[i]);
if (err) {
dev_err(dev, "Failed to probe vPort %u: %d\n",
i, err);
goto out;
}
}
} else {
for (i = 0; i < ac->num_ports; i++) {
rtnl_lock();
err = mana_attach(ac->ports[i]);
rtnl_unlock();

if (err) {
netdev_err(ac->ports[i],
"Failed to resume vPort %u: %d\n",
i, err);
return err;
}
}
}

return 0;
out:
/* In the resuming path, it's safer to leave the device in the failed
* state than try to invoke mana_detach().
*/
if (resuming)
return err;

mana_remove(gd, false);
return err;
}

2021-11-01 15:15:28

by Haiyang Zhang

[permalink] [raw]
Subject: RE: [PATCH net-next 4/4] net: mana: Support hibernation and kexec



> -----Original Message-----
> From: Dexuan Cui <[email protected]>
> Sent: Monday, November 1, 2021 3:03 AM
> To: Haiyang Zhang <[email protected]>; [email protected];
> [email protected]; [email protected]; [email protected]
> Cc: KY Srinivasan <[email protected]>; [email protected];
> [email protected]; [email protected]; linux-
> [email protected]; Shachar Raindel <[email protected]>; Paul
> Rosswurm <[email protected]>; [email protected]; vkuznets
> <[email protected]>
> Subject: RE: [PATCH net-next 4/4] net: mana: Support hibernation and
> kexec
>
> > From: Haiyang Zhang <[email protected]>
> > Sent: Saturday, October 30, 2021 8:55 AM
> > >
> > > @@ -1844,44 +1845,70 @@ int mana_probe(struct gdma_dev *gd)
> > > if (err)
> > > return err;
> > >
> > > - ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> > > - if (!ac)
> > > - return -ENOMEM;
> > > + if (!resuming) {
> > > + ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> > > + if (!ac)
> > > + return -ENOMEM;
> > >
> > > - ac->gdma_dev = gd;
> > > - ac->num_ports = 1;
> > > - gd->driver_data = ac;
> > > + ac->gdma_dev = gd;
> > > + gd->driver_data = ac;
> > > + }
> > >
> > > err = mana_create_eq(ac);
> > > if (err)
> > > goto out;
> > >
> > > err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION,
> > > MANA_MINOR_VERSION,
> > > - MANA_MICRO_VERSION, &ac->num_ports);
> > > + MANA_MICRO_VERSION, &num_ports);
> > > if (err)
> > > goto out;
> > >
> > > + if (!resuming) {
> > > + ac->num_ports = num_ports;
> > > + } else {
> > > + if (ac->num_ports != num_ports) {
> > > + dev_err(dev, "The number of vPorts changed: %d->%d\n",
> > > + ac->num_ports, num_ports);
> > > + err = -EPROTO;
> > > + goto out;
> > > + }
> > > + }
> > > +
> > > + if (ac->num_ports == 0)
> > > + dev_err(dev, "Failed to detect any vPort\n");
> > > +
> > > if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
> > > ac->num_ports = MAX_PORTS_IN_MANA_DEV;
> > >
> > > - for (i = 0; i < ac->num_ports; i++) {
> > > - err = mana_probe_port(ac, i, &ac->ports[i]);
> > > - if (err)
> > > - break;
> > > + if (!resuming) {
> > > + for (i = 0; i < ac->num_ports; i++) {
> > > + err = mana_probe_port(ac, i, &ac->ports[i]);
> > > + if (err)
> > > + break;
> > > + }
> > > + } else {
> > > + for (i = 0; i < ac->num_ports; i++) {
> > > + rtnl_lock();
> > > + err = mana_attach(ac->ports[i]);
> > > + rtnl_unlock();
> > > + if (err)
> > > + break;
> > > + }
> > > }
> > > out:
> > > if (err)
> > > - mana_remove(gd);
> > > + mana_remove(gd, false);
> >
> > The "goto out" can happen in both resuming true/false cases,
> > should the error handling path deal with the two cases
> > differently?
>
> Let me make the below change in v2. Please let me know
> if any further change is needed:
>
> --- a/drivers/net/ethernet/microsoft/mana/mana_en.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
> @@ -1850,8 +1850,10 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
>
> if (!resuming) {
> ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> - if (!ac)
> - return -ENOMEM;
> + if (!ac) {
> + err = -ENOMEM;
> + goto out;
> + }
>
> ac->gdma_dev = gd;
> gd->driver_data = ac;
> @@ -1872,8 +1874,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
> if (ac->num_ports != num_ports) {
> dev_err(dev, "The number of vPorts changed: %d-
> >%d\n",
> ac->num_ports, num_ports);
> - err = -EPROTO;
> - goto out;
> + /* It's unsafe to proceed. */
> + return -EPROTO;
> }
> }
>
> @@ -1886,22 +1888,36 @@ int mana_probe(struct gdma_dev *gd, bool
> resuming)
> if (!resuming) {
> for (i = 0; i < ac->num_ports; i++) {
> err = mana_probe_port(ac, i, &ac->ports[i]);
> - if (err)
> - break;
> + if (err) {
> + dev_err(dev, "Failed to probe
> vPort %u: %d\n",
> + i, err);
> + goto out;
> + }
> }
> } else {
> for (i = 0; i < ac->num_ports; i++) {
> rtnl_lock();
> err = mana_attach(ac->ports[i]);
> rtnl_unlock();
> - if (err)
> - break;
> +
> + if (err) {
> + netdev_err(ac->ports[i],
> + "Failed to resume
> vPort %u: %d\n",
> + i, err);
> + return err;
> + }
> }
> }
> +
> + return 0;
> out:
> - if (err)
> - mana_remove(gd, false);
> + /* In the resuming path, it's safer to leave the device in the
> failed
> + * state than try to invoke mana_detach().
> + */
> + if (resuming)
> + return err;
>
> + mana_remove(gd, false);
> return err;
> }
>
> @@ -1919,7 +1935,7 @@ void mana_remove(struct gdma_dev *gd, bool
> suspending)
> if (!ndev) {
> if (i == 0)
> dev_err(dev, "No net device to
> remove\n");
> - goto out;
> + break;
> }
>
> /* All cleanup actions should stay after rtnl_lock(),
> otherwise
>
> For your easy reviewing, the new code of the function in v2 will be:
>
> int mana_probe(struct gdma_dev *gd, bool resuming)
> {
> struct gdma_context *gc = gd->gdma_context;
> struct mana_context *ac = gd->driver_data;
> struct device *dev = gc->dev;
> u16 num_ports = 0;
> int err;
> int i;
>
> dev_info(dev,
> "Microsoft Azure Network Adapter protocol
> version: %d.%d.%d\n",
> MANA_MAJOR_VERSION, MANA_MINOR_VERSION,
> MANA_MICRO_VERSION);
>
> err = mana_gd_register_device(gd);
> if (err)
> return err;
>
> if (!resuming) {
> ac = kzalloc(sizeof(*ac), GFP_KERNEL);
> if (!ac) {
> err = -ENOMEM;
> goto out;
> }
>
> ac->gdma_dev = gd;
> gd->driver_data = ac;
> }
>
> err = mana_create_eq(ac);
> if (err)
> goto out;
>
> err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION,
> MANA_MINOR_VERSION,
> MANA_MICRO_VERSION, &num_ports);
> if (err)
> goto out;
>
> if (!resuming) {
> ac->num_ports = num_ports;
> } else {
> if (ac->num_ports != num_ports) {
> dev_err(dev, "The number of vPorts changed: %d-
> >%d\n",
> ac->num_ports, num_ports);
> /* It's unsafe to proceed. */
> return -EPROTO;
> }
> }
>
> if (ac->num_ports == 0)
> dev_err(dev, "Failed to detect any vPort\n");
>
> if (ac->num_ports > MAX_PORTS_IN_MANA_DEV)
> ac->num_ports = MAX_PORTS_IN_MANA_DEV;
>
> if (!resuming) {
> for (i = 0; i < ac->num_ports; i++) {
> err = mana_probe_port(ac, i, &ac->ports[i]);
> if (err) {
> dev_err(dev, "Failed to probe
> vPort %u: %d\n",
> i, err);
> goto out;
> }
> }
> } else {
> for (i = 0; i < ac->num_ports; i++) {
> rtnl_lock();
> err = mana_attach(ac->ports[i]);
> rtnl_unlock();
>
> if (err) {
> netdev_err(ac->ports[i],
> "Failed to resume
> vPort %u: %d\n",
> i, err);
> return err;
> }
> }
> }
>
> return 0;
> out:
> /* In the resuming path, it's safer to leave the device in the
> failed
> * state than try to invoke mana_detach().
> */
> if (resuming)
> return err;
>
> mana_remove(gd, false);
> return err;
> }

The new code looks good!

Thanks,
- Haiyang

2021-11-01 16:43:54

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH net-next 4/4] net: mana: Support hibernation and kexec

> From: Haiyang Zhang <[email protected]>
> Sent: Monday, November 1, 2021 8:12 AM
> ...
> The new code looks good!
>
> Thanks,
> - Haiyang

The v1 patchset has been merged into net-next.git by
[email protected] ...

I'll post an incremental patchset.

Thanks,
Dexuan