2023-10-13 03:53:07

by liulongfang

[permalink] [raw]
Subject: [PATCH] crypto: hisilicon/qm - fix EQ/AEQ interrupt issue

During hisilicon accelerator live migration operation. In order to
prevent the problem of EQ/AEQ interrupt loss. Migration driver will
trigger an EQ/AEQ doorbell at the end of the migration.

This operation may cause double interruption of EQ/AEQ events.
To ensure that the EQ/AEQ interrupt processing function is normal.
The interrupt handling functionality of EQ/AEQ needs to be updated.
Used to handle repeated interrupts event.

Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
Signed-off-by: Longfang Liu <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 105 +++++++++++++---------------------
include/linux/hisi_acc_qm.h | 1 +
2 files changed, 41 insertions(+), 65 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index f3b55c044dd3..c12dedcd6bba 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -854,47 +854,15 @@ static void qm_poll_req_cb(struct hisi_qp *qp)
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ, qp->qp_status.cq_head, 1);
}

-static int qm_get_complete_eqe_num(struct hisi_qm_poll_data *poll_data)
-{
- struct hisi_qm *qm = poll_data->qm;
- struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
- u16 eq_depth = qm->eq_depth;
- int eqe_num = 0;
- u16 cqn;
-
- while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
- cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
- poll_data->qp_finish_id[eqe_num] = cqn;
- eqe_num++;
-
- if (qm->status.eq_head == eq_depth - 1) {
- qm->status.eqc_phase = !qm->status.eqc_phase;
- eqe = qm->eqe;
- qm->status.eq_head = 0;
- } else {
- eqe++;
- qm->status.eq_head++;
- }
-
- if (eqe_num == (eq_depth >> 1) - 1)
- break;
- }
-
- qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
-
- return eqe_num;
-}
-
static void qm_work_process(struct work_struct *work)
{
struct hisi_qm_poll_data *poll_data =
container_of(work, struct hisi_qm_poll_data, work);
struct hisi_qm *qm = poll_data->qm;
+ u16 eqe_num = poll_data->eqe_num;
struct hisi_qp *qp;
- int eqe_num, i;
+ int i;

- /* Get qp id of completed tasks and re-enable the interrupt. */
- eqe_num = qm_get_complete_eqe_num(poll_data);
for (i = eqe_num - 1; i >= 0; i--) {
qp = &qm->qp_array[poll_data->qp_finish_id[i]];
if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
@@ -910,39 +878,55 @@ static void qm_work_process(struct work_struct *work)
}
}

-static bool do_qm_eq_irq(struct hisi_qm *qm)
+static void qm_get_complete_eqe_num(struct hisi_qm *qm)
{
struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
- struct hisi_qm_poll_data *poll_data;
- u16 cqn;
+ struct hisi_qm_poll_data *poll_data = NULL;
+ u16 eq_depth = qm->eq_depth;
+ u16 cqn, eqe_num = 0;

- if (!readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
- return false;
+ if (QM_EQE_PHASE(eqe) != qm->status.eqc_phase) {
+ atomic64_inc(&qm->debug.dfx.err_irq_cnt);
+ qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+ return;
+ }

- if (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
+ cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
+ if (unlikely(cqn >= qm->qp_num))
+ return;
+ poll_data = &qm->poll_data[cqn];
+
+ while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
- poll_data = &qm->poll_data[cqn];
- queue_work(qm->wq, &poll_data->work);
+ poll_data->qp_finish_id[eqe_num] = cqn;
+ eqe_num++;
+
+ if (qm->status.eq_head == eq_depth - 1) {
+ qm->status.eqc_phase = !qm->status.eqc_phase;
+ eqe = qm->eqe;
+ qm->status.eq_head = 0;
+ } else {
+ eqe++;
+ qm->status.eq_head++;
+ }

- return true;
+ if (eqe_num == (eq_depth >> 1) - 1)
+ break;
}

- return false;
+ poll_data->eqe_num = eqe_num;
+ queue_work(qm->wq, &poll_data->work);
+ qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
}

static irqreturn_t qm_eq_irq(int irq, void *data)
{
struct hisi_qm *qm = data;
- bool ret;
-
- ret = do_qm_eq_irq(qm);
- if (ret)
- return IRQ_HANDLED;

- atomic64_inc(&qm->debug.dfx.err_irq_cnt);
- qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+ /* Get qp id of completed tasks and re-enable the interrupt */
+ qm_get_complete_eqe_num(qm);

- return IRQ_NONE;
+ return IRQ_HANDLED;
}

static irqreturn_t qm_mb_cmd_irq(int irq, void *data)
@@ -1024,6 +1008,8 @@ static irqreturn_t qm_aeq_thread(int irq, void *data)
u16 aeq_depth = qm->aeq_depth;
u32 type, qp_id;

+ atomic64_inc(&qm->debug.dfx.aeq_irq_cnt);
+
while (QM_AEQE_PHASE(aeqe) == qm->status.aeqc_phase) {
type = (le32_to_cpu(aeqe->dw0) >> QM_AEQE_TYPE_SHIFT) &
QM_AEQE_TYPE_MASK;
@@ -1062,17 +1048,6 @@ static irqreturn_t qm_aeq_thread(int irq, void *data)
return IRQ_HANDLED;
}

-static irqreturn_t qm_aeq_irq(int irq, void *data)
-{
- struct hisi_qm *qm = data;
-
- atomic64_inc(&qm->debug.dfx.aeq_irq_cnt);
- if (!readl(qm->io_base + QM_VF_AEQ_INT_SOURCE))
- return IRQ_NONE;
-
- return IRQ_WAKE_THREAD;
-}
-
static void qm_init_qp_status(struct hisi_qp *qp)
{
struct hisi_qp_status *qp_status = &qp->qp_status;
@@ -4997,8 +4972,8 @@ static int qm_register_aeq_irq(struct hisi_qm *qm)
return 0;

irq_vector = val & QM_IRQ_VECTOR_MASK;
- ret = request_threaded_irq(pci_irq_vector(pdev, irq_vector), qm_aeq_irq,
- qm_aeq_thread, 0, qm->dev_name, qm);
+ ret = request_threaded_irq(pci_irq_vector(pdev, irq_vector), NULL,
+ qm_aeq_thread, IRQF_ONESHOT, qm->dev_name, qm);
if (ret)
dev_err(&pdev->dev, "failed to request eq irq, ret = %d", ret);

diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 34c64a02712c..369206363569 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -276,6 +276,7 @@ struct hisi_qm_poll_data {
struct hisi_qm *qm;
struct work_struct work;
u16 *qp_finish_id;
+ u16 eqe_num;
};

/**
--
2.24.0


2023-10-20 05:57:46

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH] crypto: hisilicon/qm - fix EQ/AEQ interrupt issue

On Fri, Oct 13, 2023 at 11:49:57AM +0800, Longfang Liu wrote:
> During hisilicon accelerator live migration operation. In order to
> prevent the problem of EQ/AEQ interrupt loss. Migration driver will
> trigger an EQ/AEQ doorbell at the end of the migration.
>
> This operation may cause double interruption of EQ/AEQ events.
> To ensure that the EQ/AEQ interrupt processing function is normal.
> The interrupt handling functionality of EQ/AEQ needs to be updated.
> Used to handle repeated interrupts event.
>
> Fixes: b0eed085903e ("hisi_acc_vfio_pci: Add support for VFIO live migration")
> Signed-off-by: Longfang Liu <[email protected]>
> ---
> drivers/crypto/hisilicon/qm.c | 105 +++++++++++++---------------------
> include/linux/hisi_acc_qm.h | 1 +
> 2 files changed, 41 insertions(+), 65 deletions(-)

Patch applied. Thanks.
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt