2021-10-30 00:57:22

by Dexuan Cui

[permalink] [raw]
Subject: [PATCH net-next 3/4] net: mana: Improve the HWC error handling

Currently when the HWC creation fails, the error handling is flawed,
e.g. if mana_hwc_create_channel() -> mana_hwc_establish_channel() fails,
the resources acquired in mana_hwc_init_queues() is not released.

Enhance mana_hwc_destroy_channel() to do the proper cleanup work and
call it accordingly.

Signed-off-by: Dexuan Cui <[email protected]>
---
.../net/ethernet/microsoft/mana/gdma_main.c | 4 --
.../net/ethernet/microsoft/mana/hw_channel.c | 71 ++++++++-----------
2 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 8a9ee2885f8c..599dfd5e6090 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -1330,8 +1330,6 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent)

clean_up_gdma:
mana_hwc_destroy_channel(gc);
- vfree(gc->cq_table);
- gc->cq_table = NULL;
remove_irq:
mana_gd_remove_irqs(pdev);
unmap_bar:
@@ -1354,8 +1352,6 @@ static void mana_gd_remove(struct pci_dev *pdev)
mana_remove(&gc->mana);

mana_hwc_destroy_channel(gc);
- vfree(gc->cq_table);
- gc->cq_table = NULL;

mana_gd_remove_irqs(pdev);

diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c
index c1310ea1c216..851de2b81fa4 100644
--- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
+++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
@@ -309,9 +309,6 @@ static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self)

static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq)
{
- if (!hwc_cq)
- return;
-
kfree(hwc_cq->comp_buf);

if (hwc_cq->gdma_cq)
@@ -448,9 +445,6 @@ static void mana_hwc_dealloc_dma_buf(struct hw_channel_context *hwc,
static void mana_hwc_destroy_wq(struct hw_channel_context *hwc,
struct hwc_wq *hwc_wq)
{
- if (!hwc_wq)
- return;
-
mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf);

if (hwc_wq->gdma_wq)
@@ -623,6 +617,7 @@ static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth,
*max_req_msg_size = hwc->hwc_init_max_req_msg_size;
*max_resp_msg_size = hwc->hwc_init_max_resp_msg_size;

+ /* Both were set in mana_hwc_init_event_handler(). */
if (WARN_ON(cq->id >= gc->max_num_cqs))
return -EPROTO;

@@ -638,9 +633,6 @@ static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth,
static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16 q_depth,
u32 max_req_msg_size, u32 max_resp_msg_size)
{
- struct hwc_wq *hwc_rxq = NULL;
- struct hwc_wq *hwc_txq = NULL;
- struct hwc_cq *hwc_cq = NULL;
int err;

err = mana_hwc_init_inflight_msg(hwc, q_depth);
@@ -653,44 +645,32 @@ static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16 q_depth,
err = mana_hwc_create_cq(hwc, q_depth * 2,
mana_hwc_init_event_handler, hwc,
mana_hwc_rx_event_handler, hwc,
- mana_hwc_tx_event_handler, hwc, &hwc_cq);
+ mana_hwc_tx_event_handler, hwc, &hwc->cq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC CQ: %d\n", err);
goto out;
}
- hwc->cq = hwc_cq;

err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size,
- hwc_cq, &hwc_rxq);
+ hwc->cq, &hwc->rxq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC RQ: %d\n", err);
goto out;
}
- hwc->rxq = hwc_rxq;

err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size,
- hwc_cq, &hwc_txq);
+ hwc->cq, &hwc->txq);
if (err) {
dev_err(hwc->dev, "Failed to create HWC SQ: %d\n", err);
goto out;
}
- hwc->txq = hwc_txq;

hwc->num_inflight_msg = q_depth;
hwc->max_req_msg_size = max_req_msg_size;

return 0;
out:
- if (hwc_txq)
- mana_hwc_destroy_wq(hwc, hwc_txq);
-
- if (hwc_rxq)
- mana_hwc_destroy_wq(hwc, hwc_rxq);
-
- if (hwc_cq)
- mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq);
-
- mana_gd_free_res_map(&hwc->inflight_msg_res);
+ /* mana_hwc_create_channel() will do the cleanup.*/
return err;
}

@@ -718,6 +698,9 @@ int mana_hwc_create_channel(struct gdma_context *gc)
gd->pdid = INVALID_PDID;
gd->doorbell = INVALID_DOORBELL;

+ /* mana_hwc_init_queues() only creates the required data structures,
+ * and doesn't touch the HWC device.
+ */
err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH,
HW_CHANNEL_MAX_REQUEST_SIZE,
HW_CHANNEL_MAX_RESPONSE_SIZE);
@@ -743,42 +726,50 @@ int mana_hwc_create_channel(struct gdma_context *gc)

return 0;
out:
- kfree(hwc);
+ mana_hwc_destroy_channel(gc);
return err;
}

void mana_hwc_destroy_channel(struct gdma_context *gc)
{
struct hw_channel_context *hwc = gc->hwc.driver_data;
- struct hwc_caller_ctx *ctx;

- mana_smc_teardown_hwc(&gc->shm_channel, false);
+ if (!hwc)
+ return;
+
+ /* gc->max_num_cqs is set in mana_hwc_init_event_handler(). If it's
+ * non-zero, the HWC worked and we should tear down the HWC here.
+ */
+ if (gc->max_num_cqs > 0) {
+ mana_smc_teardown_hwc(&gc->shm_channel, false);
+ gc->max_num_cqs = 0;
+ }

- ctx = hwc->caller_ctx;
- kfree(ctx);
+ kfree(hwc->caller_ctx);
hwc->caller_ctx = NULL;

- mana_hwc_destroy_wq(hwc, hwc->txq);
- hwc->txq = NULL;
+ if (hwc->txq)
+ mana_hwc_destroy_wq(hwc, hwc->txq);

- mana_hwc_destroy_wq(hwc, hwc->rxq);
- hwc->rxq = NULL;
+ if (hwc->rxq)
+ mana_hwc_destroy_wq(hwc, hwc->rxq);

- mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq);
- hwc->cq = NULL;
+ if (hwc->cq)
+ mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq);

mana_gd_free_res_map(&hwc->inflight_msg_res);

hwc->num_inflight_msg = 0;

- if (hwc->gdma_dev->pdid != INVALID_PDID) {
- hwc->gdma_dev->doorbell = INVALID_DOORBELL;
- hwc->gdma_dev->pdid = INVALID_PDID;
- }
+ hwc->gdma_dev->doorbell = INVALID_DOORBELL;
+ hwc->gdma_dev->pdid = INVALID_PDID;

kfree(hwc);
gc->hwc.driver_data = NULL;
gc->hwc.gdma_context = NULL;
+
+ vfree(gc->cq_table);
+ gc->cq_table = NULL;
}

int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
--
2.17.1


2021-10-30 15:40:32

by Haiyang Zhang

[permalink] [raw]
Subject: RE: [PATCH net-next 3/4] net: mana: Improve the HWC error handling



> -----Original Message-----
> From: Dexuan Cui <[email protected]>
> Sent: Friday, October 29, 2021 8:54 PM
> To: [email protected]; [email protected]; [email protected]; Haiyang
> Zhang <[email protected]>; [email protected]
> Cc: KY Srinivasan <[email protected]>; [email protected];
> [email protected]; [email protected]; linux-
> [email protected]; Shachar Raindel <[email protected]>; Paul
> Rosswurm <[email protected]>; [email protected]; vkuznets
> <[email protected]>; Dexuan Cui <[email protected]>
> Subject: [PATCH net-next 3/4] net: mana: Improve the HWC error handling
>
> Currently when the HWC creation fails, the error handling is flawed, e.g.
> if mana_hwc_create_channel() -> mana_hwc_establish_channel() fails, the
> resources acquired in mana_hwc_init_queues() is not released.
>
> Enhance mana_hwc_destroy_channel() to do the proper cleanup work and
> call it accordingly.
>
> Signed-off-by: Dexuan Cui <[email protected]>
> ---
> .../net/ethernet/microsoft/mana/gdma_main.c | 4 --
> .../net/ethernet/microsoft/mana/hw_channel.c | 71 ++++++++-----------
> 2 files changed, 31 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> index 8a9ee2885f8c..599dfd5e6090 100644
> --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
> +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
> @@ -1330,8 +1330,6 @@ static int mana_gd_probe(struct pci_dev *pdev,
> const struct pci_device_id *ent)
>
> clean_up_gdma:
> mana_hwc_destroy_channel(gc);
> - vfree(gc->cq_table);
> - gc->cq_table = NULL;
> remove_irq:
> mana_gd_remove_irqs(pdev);
> unmap_bar:
> @@ -1354,8 +1352,6 @@ static void mana_gd_remove(struct pci_dev *pdev)
> mana_remove(&gc->mana);
>
> mana_hwc_destroy_channel(gc);
> - vfree(gc->cq_table);
> - gc->cq_table = NULL;
>
> mana_gd_remove_irqs(pdev);
>
> diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c
> b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> index c1310ea1c216..851de2b81fa4 100644
> --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c
> +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c
> @@ -309,9 +309,6 @@ static void mana_hwc_comp_event(void *ctx, struct
> gdma_queue *q_self)
>
> static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq
> *hwc_cq) {
> - if (!hwc_cq)
> - return;
> -
> kfree(hwc_cq->comp_buf);
>
> if (hwc_cq->gdma_cq)
> @@ -448,9 +445,6 @@ static void mana_hwc_dealloc_dma_buf(struct
> hw_channel_context *hwc, static void mana_hwc_destroy_wq(struct
> hw_channel_context *hwc,
> struct hwc_wq *hwc_wq)
> {
> - if (!hwc_wq)
> - return;
> -
> mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf);
>
> if (hwc_wq->gdma_wq)
> @@ -623,6 +617,7 @@ static int mana_hwc_establish_channel(struct
> gdma_context *gc, u16 *q_depth,
> *max_req_msg_size = hwc->hwc_init_max_req_msg_size;
> *max_resp_msg_size = hwc->hwc_init_max_resp_msg_size;
>
> + /* Both were set in mana_hwc_init_event_handler(). */
> if (WARN_ON(cq->id >= gc->max_num_cqs))
> return -EPROTO;
>
> @@ -638,9 +633,6 @@ static int mana_hwc_establish_channel(struct
> gdma_context *gc, u16 *q_depth, static int mana_hwc_init_queues(struct
> hw_channel_context *hwc, u16 q_depth,
> u32 max_req_msg_size, u32 max_resp_msg_size) {
> - struct hwc_wq *hwc_rxq = NULL;
> - struct hwc_wq *hwc_txq = NULL;
> - struct hwc_cq *hwc_cq = NULL;
> int err;
>
> err = mana_hwc_init_inflight_msg(hwc, q_depth); @@ -653,44 +645,32
> @@ static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16
> q_depth,
> err = mana_hwc_create_cq(hwc, q_depth * 2,
> mana_hwc_init_event_handler, hwc,
> mana_hwc_rx_event_handler, hwc,
> - mana_hwc_tx_event_handler, hwc, &hwc_cq);
> + mana_hwc_tx_event_handler, hwc, &hwc->cq);
> if (err) {
> dev_err(hwc->dev, "Failed to create HWC CQ: %d\n", err);
> goto out;
> }
> - hwc->cq = hwc_cq;
>
> err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size,
> - hwc_cq, &hwc_rxq);
> + hwc->cq, &hwc->rxq);
> if (err) {
> dev_err(hwc->dev, "Failed to create HWC RQ: %d\n", err);
> goto out;
> }
> - hwc->rxq = hwc_rxq;
>
> err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size,
> - hwc_cq, &hwc_txq);
> + hwc->cq, &hwc->txq);
> if (err) {
> dev_err(hwc->dev, "Failed to create HWC SQ: %d\n", err);
> goto out;
> }
> - hwc->txq = hwc_txq;
>
> hwc->num_inflight_msg = q_depth;
> hwc->max_req_msg_size = max_req_msg_size;
>
> return 0;
> out:
> - if (hwc_txq)
> - mana_hwc_destroy_wq(hwc, hwc_txq);
> -
> - if (hwc_rxq)
> - mana_hwc_destroy_wq(hwc, hwc_rxq);
> -
> - if (hwc_cq)
> - mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq);
> -
> - mana_gd_free_res_map(&hwc->inflight_msg_res);
> + /* mana_hwc_create_channel() will do the cleanup.*/
> return err;
> }
>
> @@ -718,6 +698,9 @@ int mana_hwc_create_channel(struct gdma_context *gc)
> gd->pdid = INVALID_PDID;
> gd->doorbell = INVALID_DOORBELL;
>
> + /* mana_hwc_init_queues() only creates the required data structures,
> + * and doesn't touch the HWC device.
> + */
> err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH,
> HW_CHANNEL_MAX_REQUEST_SIZE,
> HW_CHANNEL_MAX_RESPONSE_SIZE);
> @@ -743,42 +726,50 @@ int mana_hwc_create_channel(struct gdma_context
> *gc)
>
> return 0;
> out:
> - kfree(hwc);
> + mana_hwc_destroy_channel(gc);
> return err;
> }
>
> void mana_hwc_destroy_channel(struct gdma_context *gc) {
> struct hw_channel_context *hwc = gc->hwc.driver_data;
> - struct hwc_caller_ctx *ctx;
>
> - mana_smc_teardown_hwc(&gc->shm_channel, false);
> + if (!hwc)
> + return;
> +
> + /* gc->max_num_cqs is set in mana_hwc_init_event_handler(). If it's
> + * non-zero, the HWC worked and we should tear down the HWC here.
> + */
> + if (gc->max_num_cqs > 0) {
> + mana_smc_teardown_hwc(&gc->shm_channel, false);
> + gc->max_num_cqs = 0;
> + }
>
> - ctx = hwc->caller_ctx;
> - kfree(ctx);
> + kfree(hwc->caller_ctx);
> hwc->caller_ctx = NULL;
>
> - mana_hwc_destroy_wq(hwc, hwc->txq);
> - hwc->txq = NULL;
> + if (hwc->txq)
> + mana_hwc_destroy_wq(hwc, hwc->txq);
>
> - mana_hwc_destroy_wq(hwc, hwc->rxq);
> - hwc->rxq = NULL;
> + if (hwc->rxq)
> + mana_hwc_destroy_wq(hwc, hwc->rxq);
>
> - mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq);
> - hwc->cq = NULL;
> + if (hwc->cq)
> + mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq);
>
> mana_gd_free_res_map(&hwc->inflight_msg_res);
>
> hwc->num_inflight_msg = 0;
>
> - if (hwc->gdma_dev->pdid != INVALID_PDID) {
> - hwc->gdma_dev->doorbell = INVALID_DOORBELL;
> - hwc->gdma_dev->pdid = INVALID_PDID;
> - }
> + hwc->gdma_dev->doorbell = INVALID_DOORBELL;
> + hwc->gdma_dev->pdid = INVALID_PDID;
>
> kfree(hwc);
> gc->hwc.driver_data = NULL;
> gc->hwc.gdma_context = NULL;
> +
> + vfree(gc->cq_table);
> + gc->cq_table = NULL;
> }
>
> int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
> --
> 2.17.1

Reviewed-by: Haiyang Zhang <[email protected]>