2021-12-11 11:30:09

by Weili Qian

[permalink] [raw]
Subject: [PATCH 0/6] crypto: hisilicon/qm - handling abnormal interrupts event

When the hardware reports abnormal interrupt event, the driver needs to
handle it according to the error type, such as function reset and
disable queue.

Weili Qian (6):
crypto: hisilicon/qm - remove unnecessary device memory reset
crypto: hisilicon/qm - code movement
crypto: hisilicon/qm - modify the handling method after abnormal
interruption
crypto: hisilicon/qm - use request_threaded_irq instead
crypto: hisilicon/qm - reset function if event queue overflows
crypto: hisilicon/qm - disable queue when 'CQ' error

drivers/crypto/hisilicon/qm.c | 278 ++++++++++++++++++++++------------
1 file changed, 183 insertions(+), 95 deletions(-)

--
2.33.0



2021-12-11 11:30:19

by Weili Qian

[permalink] [raw]
Subject: [PATCH 1/6] crypto: hisilicon/qm - remove unnecessary device memory reset

The internal memory of the device needs to be reset only when
the device is globally initialized. Other scenarios, such as
function reset, do not need to perform reset.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index bea8622d80e6..fba6a26089aa 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3581,10 +3581,6 @@ static int __hisi_qm_start(struct hisi_qm *qm)
WARN_ON(!qm->qdma.va);

if (qm->fun_type == QM_HW_PF) {
- ret = qm_dev_mem_reset(qm);
- if (ret)
- return ret;
-
ret = hisi_qm_set_vft(qm, 0, qm->qp_base, qm->qp_num);
if (ret)
return ret;
@@ -5083,6 +5079,12 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
if (qm->err_ini->open_axi_master_ooo)
qm->err_ini->open_axi_master_ooo(qm);

+ ret = qm_dev_mem_reset(qm);
+ if (ret) {
+ pci_err(pdev, "failed to reset device memory\n");
+ return ret;
+ }
+
ret = qm_restart(qm);
if (ret) {
pci_err(pdev, "Failed to start QM!\n");
@@ -5857,6 +5859,14 @@ int hisi_qm_init(struct hisi_qm *qm)
goto err_irq_register;
}

+ if (qm->fun_type == QM_HW_PF) {
+ ret = qm_dev_mem_reset(qm);
+ if (ret) {
+ dev_err(dev, "failed to reset device memory\n");
+ goto err_irq_register;
+ }
+ }
+
if (qm->mode == UACCE_MODE_SVA) {
ret = qm_alloc_uacce(qm);
if (ret < 0)
@@ -6014,8 +6024,11 @@ static int qm_rebuild_for_resume(struct hisi_qm *qm)

qm_cmd_init(qm);
hisi_qm_dev_err_init(qm);
+ ret = qm_dev_mem_reset(qm);
+ if (ret)
+ pci_err(pdev, "failed to reset device memory\n");

- return 0;
+ return ret;
}

/**
--
2.33.0


2021-12-11 11:30:23

by Weili Qian

[permalink] [raw]
Subject: [PATCH 3/6] crypto: hisilicon/qm - modify the handling method after abnormal interruption

After processing an interrupt event and the interrupt function is
enabled by writing the QM_DOORBELL_CMD_AEQ register, the hardware
may generate new interrupt events due to processing other user's task
when the subsequent interrupt events have not been processed. The new
interrupt event will disrupt the current normal processing flow and
cause other problems.

Therefore, the operation of writing the QM_DOORBELL_CMD_AEQ doorbell
register needs to be placed after all interrupt events processing
are completed.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index d124800b67e6..beea3a0fd0a5 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -1015,10 +1015,10 @@ static irqreturn_t qm_aeq_irq(int irq, void *data)
aeqe++;
qm->status.aeq_head++;
}
-
- qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, qm->status.aeq_head, 0);
}

+ qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, qm->status.aeq_head, 0);
+
return IRQ_HANDLED;
}

--
2.33.0


2021-12-11 11:30:24

by Weili Qian

[permalink] [raw]
Subject: [PATCH 5/6] crypto: hisilicon/qm - reset function if event queue overflows

If the hardware reports the event queue overflow by the abnormal interrupt,
the driver needs to reset the function and re-enable the event queue
interrupt and abnormal interrupt.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 68 +++++++++++++++++++++++++++++------
1 file changed, 57 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 93abe1feb0b7..6c61f9d25f75 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -89,6 +89,7 @@

#define QM_AEQE_PHASE(aeqe) ((le32_to_cpu((aeqe)->dw0) >> 16) & 0x1)
#define QM_AEQE_TYPE_SHIFT 17
+#define QM_EQ_OVERFLOW 1

#define QM_DOORBELL_CMD_SQ 0
#define QM_DOORBELL_CMD_CQ 1
@@ -988,6 +989,35 @@ static void qm_set_qp_disable(struct hisi_qp *qp, int offset)
mb();
}

+static void qm_reset_function(struct hisi_qm *qm)
+{
+ struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev));
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ if (qm_check_dev_error(pf_qm))
+ return;
+
+ ret = qm_reset_prepare_ready(qm);
+ if (ret) {
+ dev_err(dev, "reset function not ready\n");
+ return;
+ }
+
+ ret = hisi_qm_stop(qm, QM_FLR);
+ if (ret) {
+ dev_err(dev, "failed to stop qm when reset function\n");
+ goto clear_bit;
+ }
+
+ ret = hisi_qm_start(qm);
+ if (ret)
+ dev_err(dev, "failed to start qm when reset function\n");
+
+clear_bit:
+ qm_reset_bit_clear(qm);
+}
+
static irqreturn_t qm_aeq_thread(int irq, void *data)
{
struct hisi_qm *qm = data;
@@ -996,12 +1026,17 @@ static irqreturn_t qm_aeq_thread(int irq, void *data)

while (QM_AEQE_PHASE(aeqe) == qm->status.aeqc_phase) {
type = le32_to_cpu(aeqe->dw0) >> QM_AEQE_TYPE_SHIFT;
- if (type < ARRAY_SIZE(qm_fifo_overflow))
- dev_err(&qm->pdev->dev, "%s overflow\n",
- qm_fifo_overflow[type]);
- else
+
+ switch (type) {
+ case QM_EQ_OVERFLOW:
+ dev_err(&qm->pdev->dev, "eq overflow, reset function\n");
+ qm_reset_function(qm);
+ return IRQ_HANDLED;
+ default:
dev_err(&qm->pdev->dev, "unknown error type %u\n",
type);
+ break;
+ }

if (qm->status.aeq_head == QM_Q_DEPTH - 1) {
qm->status.aeqc_phase = !qm->status.aeqc_phase;
@@ -3545,6 +3580,22 @@ static void qm_init_eq_aeq_status(struct hisi_qm *qm)
status->aeqc_phase = true;
}

+static void qm_enable_eq_aeq_interrupts(struct hisi_qm *qm)
+{
+ /* Clear eq/aeq interrupt source */
+ qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, qm->status.aeq_head, 0);
+ qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+
+ writel(0x0, qm->io_base + QM_VF_EQ_INT_MASK);
+ writel(0x0, qm->io_base + QM_VF_AEQ_INT_MASK);
+}
+
+static void qm_disable_eq_aeq_interrupts(struct hisi_qm *qm)
+{
+ writel(0x1, qm->io_base + QM_VF_EQ_INT_MASK);
+ writel(0x1, qm->io_base + QM_VF_AEQ_INT_MASK);
+}
+
static int qm_eq_ctx_cfg(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
@@ -3646,9 +3697,7 @@ static int __hisi_qm_start(struct hisi_qm *qm)
return ret;

qm_init_prefetch(qm);
-
- writel(0x0, qm->io_base + QM_VF_EQ_INT_MASK);
- writel(0x0, qm->io_base + QM_VF_AEQ_INT_MASK);
+ qm_enable_eq_aeq_interrupts(qm);

return 0;
}
@@ -3796,10 +3845,7 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r)
hisi_qm_set_hw_reset(qm, QM_RESET_STOP_RX_OFFSET);
}

- /* Mask eq and aeq irq */
- writel(0x1, qm->io_base + QM_VF_EQ_INT_MASK);
- writel(0x1, qm->io_base + QM_VF_AEQ_INT_MASK);
-
+ qm_disable_eq_aeq_interrupts(qm);
if (qm->fun_type == QM_HW_PF) {
ret = hisi_qm_set_vft(qm, 0, 0, 0);
if (ret < 0) {
--
2.33.0


2021-12-11 11:30:24

by Weili Qian

[permalink] [raw]
Subject: [PATCH 4/6] crypto: hisilicon/qm - use request_threaded_irq instead

The abnormal interrupt method needs to be changed, and the changed method
needs to be locked in order to maintain atomicity. Therefore,
replace request_irq() with request_threaded_irq().

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index beea3a0fd0a5..93abe1feb0b7 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -988,16 +988,12 @@ static void qm_set_qp_disable(struct hisi_qp *qp, int offset)
mb();
}

-static irqreturn_t qm_aeq_irq(int irq, void *data)
+static irqreturn_t qm_aeq_thread(int irq, void *data)
{
struct hisi_qm *qm = data;
struct qm_aeqe *aeqe = qm->aeqe + qm->status.aeq_head;
u32 type;

- atomic64_inc(&qm->debug.dfx.aeq_irq_cnt);
- if (!readl(qm->io_base + QM_VF_AEQ_INT_SOURCE))
- return IRQ_NONE;
-
while (QM_AEQE_PHASE(aeqe) == qm->status.aeqc_phase) {
type = le32_to_cpu(aeqe->dw0) >> QM_AEQE_TYPE_SHIFT;
if (type < ARRAY_SIZE(qm_fifo_overflow))
@@ -1022,6 +1018,17 @@ static irqreturn_t qm_aeq_irq(int irq, void *data)
return IRQ_HANDLED;
}

+static irqreturn_t qm_aeq_irq(int irq, void *data)
+{
+ struct hisi_qm *qm = data;
+
+ atomic64_inc(&qm->debug.dfx.aeq_irq_cnt);
+ if (!readl(qm->io_base + QM_VF_AEQ_INT_SOURCE))
+ return IRQ_NONE;
+
+ return IRQ_WAKE_THREAD;
+}
+
static void qm_irq_unregister(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
@@ -5299,8 +5306,10 @@ static int qm_irq_register(struct hisi_qm *qm)
return ret;

if (qm->ver > QM_HW_V1) {
- ret = request_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR),
- qm_aeq_irq, 0, qm->dev_name, qm);
+ ret = request_threaded_irq(pci_irq_vector(pdev,
+ QM_AEQ_EVENT_IRQ_VECTOR),
+ qm_aeq_irq, qm_aeq_thread,
+ 0, qm->dev_name, qm);
if (ret)
goto err_aeq_irq;

--
2.33.0


2021-12-11 11:30:24

by Weili Qian

[permalink] [raw]
Subject: [PATCH 6/6] crypto: hisilicon/qm - disable queue when 'CQ' error

If the hardware reports the 'CQ' overflow or 'CQE' error by the abnormal
interrupt, disable the queue and stop tasks send to hardware.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 6c61f9d25f75..b1fe9c7b8cc8 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -89,7 +89,10 @@

#define QM_AEQE_PHASE(aeqe) ((le32_to_cpu((aeqe)->dw0) >> 16) & 0x1)
#define QM_AEQE_TYPE_SHIFT 17
+#define QM_AEQE_CQN_MASK GENMASK(15, 0)
+#define QM_CQ_OVERFLOW 0
#define QM_EQ_OVERFLOW 1
+#define QM_CQE_ERROR 2

#define QM_DOORBELL_CMD_SQ 0
#define QM_DOORBELL_CMD_CQ 1
@@ -989,6 +992,15 @@ static void qm_set_qp_disable(struct hisi_qp *qp, int offset)
mb();
}

+static void qm_disable_qp(struct hisi_qm *qm, u32 qp_id)
+{
+ struct hisi_qp *qp = &qm->qp_array[qp_id];
+
+ qm_set_qp_disable(qp, QM_RESET_STOP_TX_OFFSET);
+ hisi_qm_stop_qp(qp);
+ qm_set_qp_disable(qp, QM_RESET_STOP_RX_OFFSET);
+}
+
static void qm_reset_function(struct hisi_qm *qm)
{
struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev));
@@ -1022,16 +1034,24 @@ static irqreturn_t qm_aeq_thread(int irq, void *data)
{
struct hisi_qm *qm = data;
struct qm_aeqe *aeqe = qm->aeqe + qm->status.aeq_head;
- u32 type;
+ u32 type, qp_id;

while (QM_AEQE_PHASE(aeqe) == qm->status.aeqc_phase) {
type = le32_to_cpu(aeqe->dw0) >> QM_AEQE_TYPE_SHIFT;
+ qp_id = le32_to_cpu(aeqe->dw0) & QM_AEQE_CQN_MASK;

switch (type) {
case QM_EQ_OVERFLOW:
dev_err(&qm->pdev->dev, "eq overflow, reset function\n");
qm_reset_function(qm);
return IRQ_HANDLED;
+ case QM_CQ_OVERFLOW:
+ dev_err(&qm->pdev->dev, "cq overflow, stop qp(%u)\n",
+ qp_id);
+ fallthrough;
+ case QM_CQE_ERROR:
+ qm_disable_qp(qm, qp_id);
+ break;
default:
dev_err(&qm->pdev->dev, "unknown error type %u\n",
type);
--
2.33.0


2021-12-11 11:30:26

by Weili Qian

[permalink] [raw]
Subject: [PATCH 2/6] crypto: hisilicon/qm - code movement

This patch does not change any code, just code movement. Preparing for
next patch.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 138 +++++++++++++++++-----------------
1 file changed, 69 insertions(+), 69 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index fba6a26089aa..d124800b67e6 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -605,6 +605,75 @@ static bool qm_qp_avail_state(struct hisi_qm *qm, struct hisi_qp *qp,
return avail;
}

+static u32 qm_get_hw_error_status(struct hisi_qm *qm)
+{
+ return readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
+}
+
+static u32 qm_get_dev_err_status(struct hisi_qm *qm)
+{
+ return qm->err_ini->get_dev_hw_err_status(qm);
+}
+
+/* Check if the error causes the master ooo block */
+static int qm_check_dev_error(struct hisi_qm *qm)
+{
+ u32 val, dev_val;
+
+ if (qm->fun_type == QM_HW_VF)
+ return 0;
+
+ val = qm_get_hw_error_status(qm);
+ dev_val = qm_get_dev_err_status(qm);
+
+ if (qm->ver < QM_HW_V3)
+ return (val & QM_ECC_MBIT) ||
+ (dev_val & qm->err_info.ecc_2bits_mask);
+
+ return (val & readl(qm->io_base + QM_OOO_SHUTDOWN_SEL)) ||
+ (dev_val & (~qm->err_info.dev_ce_mask));
+}
+
+static int qm_wait_reset_finish(struct hisi_qm *qm)
+{
+ int delay = 0;
+
+ /* All reset requests need to be queued for processing */
+ while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
+ msleep(++delay);
+ if (delay > QM_RESET_WAIT_TIMEOUT)
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int qm_reset_prepare_ready(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+
+ /*
+ * PF and VF on host doesnot support resetting at the
+ * same time on Kunpeng920.
+ */
+ if (qm->ver < QM_HW_V3)
+ return qm_wait_reset_finish(pf_qm);
+
+ return qm_wait_reset_finish(qm);
+}
+
+static void qm_reset_bit_clear(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+
+ if (qm->ver < QM_HW_V3)
+ clear_bit(QM_RESETTING, &pf_qm->misc_ctl);
+
+ clear_bit(QM_RESETTING, &qm->misc_ctl);
+}
+
static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd,
u64 base, u16 queue, bool op)
{
@@ -2108,35 +2177,6 @@ static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm)
return ACC_ERR_RECOVERED;
}

-static u32 qm_get_hw_error_status(struct hisi_qm *qm)
-{
- return readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
-}
-
-static u32 qm_get_dev_err_status(struct hisi_qm *qm)
-{
- return qm->err_ini->get_dev_hw_err_status(qm);
-}
-
-/* Check if the error causes the master ooo block */
-static int qm_check_dev_error(struct hisi_qm *qm)
-{
- u32 val, dev_val;
-
- if (qm->fun_type == QM_HW_VF)
- return 0;
-
- val = qm_get_hw_error_status(qm);
- dev_val = qm_get_dev_err_status(qm);
-
- if (qm->ver < QM_HW_V3)
- return (val & QM_ECC_MBIT) ||
- (dev_val & qm->err_info.ecc_2bits_mask);
-
- return (val & readl(qm->io_base + QM_OOO_SHUTDOWN_SEL)) ||
- (dev_val & (~qm->err_info.dev_ce_mask));
-}
-
static int qm_get_mb_cmd(struct hisi_qm *qm, u64 *msg, u16 fun_num)
{
struct qm_mailbox mailbox;
@@ -4754,46 +4794,6 @@ static int qm_try_stop_vfs(struct hisi_qm *qm, u64 cmd,
return ret;
}

-static int qm_wait_reset_finish(struct hisi_qm *qm)
-{
- int delay = 0;
-
- /* All reset requests need to be queued for processing */
- while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
- msleep(++delay);
- if (delay > QM_RESET_WAIT_TIMEOUT)
- return -EBUSY;
- }
-
- return 0;
-}
-
-static int qm_reset_prepare_ready(struct hisi_qm *qm)
-{
- struct pci_dev *pdev = qm->pdev;
- struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
-
- /*
- * PF and VF on host doesnot support resetting at the
- * same time on Kunpeng920.
- */
- if (qm->ver < QM_HW_V3)
- return qm_wait_reset_finish(pf_qm);
-
- return qm_wait_reset_finish(qm);
-}
-
-static void qm_reset_bit_clear(struct hisi_qm *qm)
-{
- struct pci_dev *pdev = qm->pdev;
- struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
-
- if (qm->ver < QM_HW_V3)
- clear_bit(QM_RESETTING, &pf_qm->misc_ctl);
-
- clear_bit(QM_RESETTING, &qm->misc_ctl);
-}
-
static int qm_controller_reset_prepare(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
--
2.33.0


2021-12-17 08:40:04

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH 0/6] crypto: hisilicon/qm - handling abnormal interrupts event

On Sat, Dec 11, 2021 at 07:25:13PM +0800, Weili Qian wrote:
> When the hardware reports abnormal interrupt event, the driver needs to
> handle it according to the error type, such as function reset and
> disable queue.
>
> Weili Qian (6):
> crypto: hisilicon/qm - remove unnecessary device memory reset
> crypto: hisilicon/qm - code movement
> crypto: hisilicon/qm - modify the handling method after abnormal
> interruption
> crypto: hisilicon/qm - use request_threaded_irq instead
> crypto: hisilicon/qm - reset function if event queue overflows
> crypto: hisilicon/qm - disable queue when 'CQ' error
>
> drivers/crypto/hisilicon/qm.c | 278 ++++++++++++++++++++++------------
> 1 file changed, 183 insertions(+), 95 deletions(-)

All applied. Thanks.
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt