2022-06-09 12:38:18

by Weili Qian

[permalink] [raw]
Subject: [PATCH 0/3] crypto: hisilicon/qm - modify event interrupt processing

This patchset contains following updates:
1. Modify accelerator devices event irq processing.
2. Some cleanups.

Weili Qian (3):
crypto: hisilicon/qm - add functions for releasing resources
crypto: hisilicon/qm - move alloc qm->wq to qm.c
crypto: hisilicon/qm - modify event irq processing

drivers/crypto/hisilicon/qm.c | 203 +++++++++++++++--------
drivers/crypto/hisilicon/sec2/sec_main.c | 24 +--
drivers/crypto/hisilicon/zip/zip_main.c | 17 +-
include/linux/hisi_acc_qm.h | 8 +-
4 files changed, 141 insertions(+), 111 deletions(-)

--
2.33.0


2022-06-09 12:38:18

by Weili Qian

[permalink] [raw]
Subject: [PATCH 2/3] crypto: hisilicon/qm - move alloc qm->wq to qm.c

Before stopping the function, the driver needs to flush all the remaining
work about event irq. Therefore, accelerator drivers use a private
workqueue(qm->wq) to handle event irq instead of the system workqueue.
This patch moves alloc workqueue from sec_main.c and zip_main.c to qm.c.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 35 +++++++++++++++++-------
drivers/crypto/hisilicon/sec2/sec_main.c | 24 +---------------
drivers/crypto/hisilicon/zip/zip_main.c | 17 +-----------
3 files changed, 27 insertions(+), 49 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 903896ab5be5..f8d36b68494e 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -959,10 +959,7 @@ static irqreturn_t do_qm_irq(int irq, void *data)
struct hisi_qm *qm = (struct hisi_qm *)data;

/* the workqueue created by device driver of QM */
- if (qm->wq)
- queue_work(qm->wq, &qm->work);
- else
- schedule_work(&qm->work);
+ queue_work(qm->wq, &qm->work);

return IRQ_HANDLED;
}
@@ -3134,11 +3131,8 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp)
if (ret)
dev_err(dev, "Failed to drain out data for stopping!\n");

- if (qp->qm->wq)
- flush_workqueue(qp->qm->wq);
- else
- flush_work(&qp->qm->work);

+ flush_workqueue(qp->qm->wq);
if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used)))
qp_stop_fail_cb(qp);

@@ -3672,6 +3666,11 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
debug->qm_last_words = NULL;
}

+static void hisi_qm_unint_work(struct hisi_qm *qm)
+{
+ destroy_workqueue(qm->wq);
+}
+
static void hisi_qm_memory_uninit(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
@@ -3698,6 +3697,7 @@ void hisi_qm_uninit(struct hisi_qm *qm)
qm_last_regs_uninit(qm);

qm_cmd_uninit(qm);
+ hisi_qm_unint_work(qm);
down_write(&qm->qps_lock);

if (!qm_avail_state(qm, QM_CLOSE)) {
@@ -6022,7 +6022,7 @@ static int hisi_qm_pci_init(struct hisi_qm *qm)
return ret;
}

-static void hisi_qm_init_work(struct hisi_qm *qm)
+static int hisi_qm_init_work(struct hisi_qm *qm)
{
INIT_WORK(&qm->work, qm_work_process);
if (qm->fun_type == QM_HW_PF)
@@ -6030,6 +6030,16 @@ static void hisi_qm_init_work(struct hisi_qm *qm)

if (qm->ver > QM_HW_V2)
INIT_WORK(&qm->cmd_process, qm_cmd_process);
+
+ qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
+ WQ_UNBOUND, num_online_cpus(),
+ pci_name(qm->pdev));
+ if (!qm->wq) {
+ pci_err(qm->pdev, "failed to alloc workqueue!\n");
+ return -ENOMEM;
+ }
+
+ return 0;
}

static int hisi_qp_alloc_memory(struct hisi_qm *qm)
@@ -6180,7 +6190,10 @@ int hisi_qm_init(struct hisi_qm *qm)
if (ret)
goto err_alloc_uacce;

- hisi_qm_init_work(qm);
+ ret = hisi_qm_init_work(qm);
+ if (ret)
+ goto err_free_qm_memory;
+
qm_cmd_init(qm);
atomic_set(&qm->status.flags, QM_INIT);

@@ -6188,6 +6201,8 @@ int hisi_qm_init(struct hisi_qm *qm)

return 0;

+err_free_qm_memory:
+ hisi_qm_memory_uninit(qm);
err_alloc_uacce:
if (qm->use_sva) {
uacce_remove(qm->uacce);
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 4d85d2cbf376..bdb690aaed12 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -1002,8 +1002,6 @@ static int sec_pf_probe_init(struct sec_dev *sec)

static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
{
- int ret;
-
qm->pdev = pdev;
qm->ver = pdev->revision;
qm->algs = "cipher\ndigest\naead";
@@ -1029,25 +1027,7 @@ static int sec_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
qm->qp_num = SEC_QUEUE_NUM_V1 - SEC_PF_DEF_Q_NUM;
}

- /*
- * WQ_HIGHPRI: SEC request must be low delayed,
- * so need a high priority workqueue.
- * WQ_UNBOUND: SEC task is likely with long
- * running CPU intensive workloads.
- */
- qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
- WQ_UNBOUND, num_online_cpus(),
- pci_name(qm->pdev));
- if (!qm->wq) {
- pci_err(qm->pdev, "fail to alloc workqueue\n");
- return -ENOMEM;
- }
-
- ret = hisi_qm_init(qm);
- if (ret)
- destroy_workqueue(qm->wq);
-
- return ret;
+ return hisi_qm_init(qm);
}

static void sec_qm_uninit(struct hisi_qm *qm)
@@ -1078,8 +1058,6 @@ static int sec_probe_init(struct sec_dev *sec)
static void sec_probe_uninit(struct hisi_qm *qm)
{
hisi_qm_dev_err_uninit(qm);
-
- destroy_workqueue(qm->wq);
}

static void sec_iommu_used_check(struct sec_dev *sec)
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 9c925e9c0a2d..c3303d99acac 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -990,8 +990,6 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)

static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
{
- int ret;
-
qm->pdev = pdev;
qm->ver = pdev->revision;
if (pdev->revision >= QM_HW_V3)
@@ -1021,25 +1019,12 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev)
qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
}

- qm->wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_MEM_RECLAIM |
- WQ_UNBOUND, num_online_cpus(),
- pci_name(qm->pdev));
- if (!qm->wq) {
- pci_err(qm->pdev, "fail to alloc workqueue\n");
- return -ENOMEM;
- }
-
- ret = hisi_qm_init(qm);
- if (ret)
- destroy_workqueue(qm->wq);
-
- return ret;
+ return hisi_qm_init(qm);
}

static void hisi_zip_qm_uninit(struct hisi_qm *qm)
{
hisi_qm_uninit(qm);
- destroy_workqueue(qm->wq);
}

static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)
--
2.33.0

2022-06-09 12:38:23

by Weili Qian

[permalink] [raw]
Subject: [PATCH 3/3] crypto: hisilicon/qm - modify event irq processing

When the driver receives an event interrupt, the driver will enable
the event interrupt after handling all completed tasks on the function,
tasks on the function are parsed through only one thread. If the task's
user callback takes time, other tasks on the function will be blocked.

Therefore, the event irq processing is modified as follows:
1. Obtain the ID of the queue that completes the task.
2. Enable event interrupt.
3. Parse the completed tasks in the queue and call the user callback.
Enabling event interrupt in advance can quickly report pending event
interrupts and process tasks in multiple threads.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 142 ++++++++++++++++++++++------------
include/linux/hisi_acc_qm.h | 8 +-
2 files changed, 99 insertions(+), 51 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index f8d36b68494e..ad83c194d664 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -877,13 +877,6 @@ static void qm_pm_put_sync(struct hisi_qm *qm)
pm_runtime_put_autosuspend(dev);
}

-static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
-{
- u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
-
- return &qm->qp_array[cqn];
-}
-
static void qm_cq_head_update(struct hisi_qp *qp)
{
if (qp->qp_status.cq_head == QM_Q_DEPTH - 1) {
@@ -894,47 +887,37 @@ static void qm_cq_head_update(struct hisi_qp *qp)
}
}

-static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
+static void qm_poll_req_cb(struct hisi_qp *qp)
{
- if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
- return;
-
- if (qp->event_cb) {
- qp->event_cb(qp);
- return;
- }
-
- if (qp->req_cb) {
- struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
-
- while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
- dma_rmb();
- qp->req_cb(qp, qp->sqe + qm->sqe_size *
- le16_to_cpu(cqe->sq_head));
- qm_cq_head_update(qp);
- cqe = qp->cqe + qp->qp_status.cq_head;
- qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
- qp->qp_status.cq_head, 0);
- atomic_dec(&qp->qp_status.used);
- }
+ struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
+ struct hisi_qm *qm = qp->qm;

- /* set c_flag */
+ while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
+ dma_rmb();
+ qp->req_cb(qp, qp->sqe + qm->sqe_size *
+ le16_to_cpu(cqe->sq_head));
+ qm_cq_head_update(qp);
+ cqe = qp->cqe + qp->qp_status.cq_head;
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
- qp->qp_status.cq_head, 1);
+ qp->qp_status.cq_head, 0);
+ atomic_dec(&qp->qp_status.used);
}
+
+ /* set c_flag */
+ qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ, qp->qp_status.cq_head, 1);
}

-static void qm_work_process(struct work_struct *work)
+static int qm_get_complete_eqe_num(struct hisi_qm_poll_data *poll_data)
{
- struct hisi_qm *qm = container_of(work, struct hisi_qm, work);
+ struct hisi_qm *qm = poll_data->qm;
struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
- struct hisi_qp *qp;
int eqe_num = 0;
+ u16 cqn;

while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
+ cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
+ poll_data->qp_finish_id[eqe_num] = cqn;
eqe_num++;
- qp = qm_to_hisi_qp(qm, eqe);
- qm_poll_qp(qp, qm);

if (qm->status.eq_head == QM_EQ_DEPTH - 1) {
qm->status.eqc_phase = !qm->status.eqc_phase;
@@ -945,34 +928,70 @@ static void qm_work_process(struct work_struct *work)
qm->status.eq_head++;
}

- if (eqe_num == QM_EQ_DEPTH / 2 - 1) {
- eqe_num = 0;
- qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
- }
+ if (eqe_num == (QM_EQ_DEPTH >> 1) - 1)
+ break;
}

qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+
+ return eqe_num;
}

-static irqreturn_t do_qm_irq(int irq, void *data)
+static void qm_work_process(struct work_struct *work)
{
- struct hisi_qm *qm = (struct hisi_qm *)data;
+ struct hisi_qm_poll_data *poll_data =
+ container_of(work, struct hisi_qm_poll_data, work);
+ struct hisi_qm *qm = poll_data->qm;
+ struct hisi_qp *qp;
+ int eqe_num, i;

- /* the workqueue created by device driver of QM */
- queue_work(qm->wq, &qm->work);
+ /* Get qp id of completed tasks and re-enable the interrupt. */
+ eqe_num = qm_get_complete_eqe_num(poll_data);
+ for (i = eqe_num - 1; i >= 0; i--) {
+ qp = &qm->qp_array[poll_data->qp_finish_id[i]];
+ if (unlikely(atomic_read(&qp->qp_status.flags) == QP_STOP))
+ continue;

- return IRQ_HANDLED;
+ if (qp->event_cb) {
+ qp->event_cb(qp);
+ continue;
+ }
+
+ if (likely(qp->req_cb))
+ qm_poll_req_cb(qp);
+ }
+}
+
+static bool do_qm_irq(struct hisi_qm *qm)
+{
+ struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
+ struct hisi_qm_poll_data *poll_data;
+ u16 cqn;
+
+ if (!readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
+ return false;
+
+ if (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
+ cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
+ poll_data = &qm->poll_data[cqn];
+ queue_work(qm->wq, &poll_data->work);
+
+ return true;
+ }
+
+ return false;
}

static irqreturn_t qm_irq(int irq, void *data)
{
struct hisi_qm *qm = data;
+ bool ret;

- if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
- return do_qm_irq(irq, data);
+ ret = do_qm_irq(qm);
+ if (ret)
+ return IRQ_HANDLED;

atomic64_inc(&qm->debug.dfx.err_irq_cnt);
- dev_err(&qm->pdev->dev, "invalid int source\n");
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);

return IRQ_NONE;
@@ -3551,8 +3570,10 @@ static void hisi_qp_memory_uninit(struct hisi_qm *qm, int num)
for (i = num - 1; i >= 0; i--) {
qdma = &qm->qp_array[i].qdma;
dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
+ kfree(qm->poll_data[i].qp_finish_id);
}

+ kfree(qm->poll_data);
kfree(qm->qp_array);
}

@@ -3561,12 +3582,18 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
struct device *dev = &qm->pdev->dev;
size_t off = qm->sqe_size * QM_Q_DEPTH;
struct hisi_qp *qp;
+ int ret = -ENOMEM;
+
+ qm->poll_data[id].qp_finish_id = kcalloc(qm->qp_num, sizeof(u16),
+ GFP_KERNEL);
+ if (!qm->poll_data[id].qp_finish_id)
+ return -ENOMEM;

qp = &qm->qp_array[id];
qp->qdma.va = dma_alloc_coherent(dev, dma_size, &qp->qdma.dma,
GFP_KERNEL);
if (!qp->qdma.va)
- return -ENOMEM;
+ goto err_free_qp_finish_id;

qp->sqe = qp->qdma.va;
qp->sqe_dma = qp->qdma.dma;
@@ -3577,6 +3604,10 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
qp->qp_id = id;

return 0;
+
+err_free_qp_finish_id:
+ kfree(qm->poll_data[id].qp_finish_id);
+ return ret;
}

static void hisi_qm_pre_init(struct hisi_qm *qm)
@@ -6024,7 +6055,11 @@ static int hisi_qm_pci_init(struct hisi_qm *qm)

static int hisi_qm_init_work(struct hisi_qm *qm)
{
- INIT_WORK(&qm->work, qm_work_process);
+ int i;
+
+ for (i = 0; i < qm->qp_num; i++)
+ INIT_WORK(&qm->poll_data[i].work, qm_work_process);
+
if (qm->fun_type == QM_HW_PF)
INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);

@@ -6052,11 +6087,18 @@ static int hisi_qp_alloc_memory(struct hisi_qm *qm)
if (!qm->qp_array)
return -ENOMEM;

+ qm->poll_data = kcalloc(qm->qp_num, sizeof(struct hisi_qm_poll_data), GFP_KERNEL);
+ if (!qm->poll_data) {
+ kfree(qm->qp_array);
+ return -ENOMEM;
+ }
+
/* one more page for device or qp statuses */
qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
sizeof(struct qm_cqe) * QM_Q_DEPTH;
qp_dma_size = PAGE_ALIGN(qp_dma_size) + PAGE_SIZE;
for (i = 0; i < qm->qp_num; i++) {
+ qm->poll_data[i].qm = qm;
ret = hisi_qp_memory_init(qm, qp_dma_size, i);
if (ret)
goto err_init_qp_mem;
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 6cabafffd0dd..116e8bd68c99 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -265,6 +265,12 @@ struct hisi_qm_list {
void (*unregister_from_crypto)(struct hisi_qm *qm);
};

+struct hisi_qm_poll_data {
+ struct hisi_qm *qm;
+ struct work_struct work;
+ u16 *qp_finish_id;
+};
+
struct hisi_qm {
enum qm_hw_ver ver;
enum qm_fun_type fun_type;
@@ -302,6 +308,7 @@ struct hisi_qm {
struct rw_semaphore qps_lock;
struct idr qp_idr;
struct hisi_qp *qp_array;
+ struct hisi_qm_poll_data *poll_data;

struct mutex mailbox_lock;

@@ -312,7 +319,6 @@ struct hisi_qm {
u32 error_mask;

struct workqueue_struct *wq;
- struct work_struct work;
struct work_struct rst_work;
struct work_struct cmd_process;

--
2.33.0

2022-06-09 12:38:32

by Weili Qian

[permalink] [raw]
Subject: [PATCH 1/3] crypto: hisilicon/qm - add functions for releasing resources

The resources allocated by hisi_qm_memory_init() are released by
hisi_qm_uninit(). Add hisi_qm_memory_uninit() to release resources,
no functional change.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index b4ca2eb034d7..903896ab5be5 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3672,6 +3672,21 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
debug->qm_last_words = NULL;
}

+static void hisi_qm_memory_uninit(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+
+ hisi_qp_memory_uninit(qm, qm->qp_num);
+ if (qm->qdma.va) {
+ hisi_qm_cache_wb(qm);
+ dma_free_coherent(dev, qm->qdma.size,
+ qm->qdma.va, qm->qdma.dma);
+ }
+
+ idr_destroy(&qm->qp_idr);
+ kfree(qm->factor);
+}
+
/**
* hisi_qm_uninit() - Uninitialize qm.
* @qm: The qm needed uninit.
@@ -3680,13 +3695,9 @@ static void qm_last_regs_uninit(struct hisi_qm *qm)
*/
void hisi_qm_uninit(struct hisi_qm *qm)
{
- struct pci_dev *pdev = qm->pdev;
- struct device *dev = &pdev->dev;
-
qm_last_regs_uninit(qm);

qm_cmd_uninit(qm);
- kfree(qm->factor);
down_write(&qm->qps_lock);

if (!qm_avail_state(qm, QM_CLOSE)) {
@@ -3694,14 +3705,7 @@ void hisi_qm_uninit(struct hisi_qm *qm)
return;
}

- hisi_qp_memory_uninit(qm, qm->qp_num);
- idr_destroy(&qm->qp_idr);
-
- if (qm->qdma.va) {
- hisi_qm_cache_wb(qm);
- dma_free_coherent(dev, qm->qdma.size,
- qm->qdma.va, qm->qdma.dma);
- }
+ hisi_qm_memory_uninit(qm);
hisi_qm_set_state(qm, QM_NOT_READY);
up_write(&qm->qps_lock);

--
2.33.0

2022-06-17 09:22:41

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH 0/3] crypto: hisilicon/qm - modify event interrupt processing

On Thu, Jun 09, 2022 at 08:31:16PM +0800, Weili Qian wrote:
> This patchset contains following updates:
> 1. Modify accelerator devices event irq processing.
> 2. Some cleanups.
>
> Weili Qian (3):
> crypto: hisilicon/qm - add functions for releasing resources
> crypto: hisilicon/qm - move alloc qm->wq to qm.c
> crypto: hisilicon/qm - modify event irq processing
>
> drivers/crypto/hisilicon/qm.c | 203 +++++++++++++++--------
> drivers/crypto/hisilicon/sec2/sec_main.c | 24 +--
> drivers/crypto/hisilicon/zip/zip_main.c | 17 +-
> include/linux/hisi_acc_qm.h | 8 +-
> 4 files changed, 141 insertions(+), 111 deletions(-)

All applied. Thanks.
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt