2023-08-11 14:23:51

by Weili Qian

[permalink] [raw]
Subject: [PATCH v2 0/7] crypto: hisilicon - fix some issues in hisilicon drivers

This patchset fixes some issues of the HiSilicon accelerator drivers.

The first patch uses 128bit atomic operations to access mailbox instead of
the generic IO interface. The reason is that one QM hardware entity in
one accelerator servers QM mailbox MMIO interfaces in related PF and VFs.
A mutex cannot lock mailbox processes in different functions.

The second patch allocs memory for mailbox openration when the driver is
bound to the device. The software directly returns after waiting for the
mailbox times out, but the hardware does not cancel the operation. If the
temporary memory is used, the hardware may access the memory after it is
released.

The third patch enables the maximum number of queues supported by the
device instead of returning error, when the maximum number of queues is
less than the default value.

The fourth patch checks the number of queues on the function before
algorithm registering to crypto subsystem. If the number of queues
does not meet the minimum number of queues for task execution, the
function is not registered to crypto to avoid process initialization
failure.

The fifth patch adds a cond_resched() to prevent soft lockup.
The sixth patch fixes aeq type value.
The last patch increases function communication waiting time so that the
PF can communicate with all VFs.

v2:
- Re-describe the issues resolved by these patches.
- Fix some code styles.

Longfang Liu (1):
crypto: hisilicon/qm - fix PF queue parameter issue

Weili Qian (6):
crypto: hisilicon/qm - obtain the mailbox configuration at one time
crypto: hisilicon/qm - alloc buffer to set and get xqc
crypto: hisilicon/qm - check function qp num before alg register
crypto: hisilicon/qm - prevent soft lockup in qm_poll_req_cb()'s loop
crypto: hisilicon/qm - fix the type value of aeq
crypto: hisilicon/qm - increase function communication waiting time

drivers/crypto/hisilicon/debugfs.c | 75 ++-
drivers/crypto/hisilicon/hpre/hpre_crypto.c | 25 +-
drivers/crypto/hisilicon/hpre/hpre_main.c | 19 +-
drivers/crypto/hisilicon/qm.c | 567 ++++++++++----------
drivers/crypto/hisilicon/qm_common.h | 6 +-
drivers/crypto/hisilicon/sec2/sec_crypto.c | 31 +-
drivers/crypto/hisilicon/sec2/sec_main.c | 29 +-
drivers/crypto/hisilicon/zip/zip_crypto.c | 29 +-
drivers/crypto/hisilicon/zip/zip_main.c | 19 +-
include/linux/hisi_acc_qm.h | 39 +-
10 files changed, 475 insertions(+), 364 deletions(-)
mode change 100644 => 100755 drivers/crypto/hisilicon/qm.c

--
2.33.0



2023-08-11 14:23:51

by Weili Qian

[permalink] [raw]
Subject: [PATCH v2 1/7] crypto: hisilicon/qm - obtain the mailbox configuration at one time

The malibox needs to be triggered by a 128bit atomic operation. The
reason is that one QM hardware entity in one accelerator servers QM
mailbox MMIO interfaces in related PF and VFs. A mutex cannot lock
mailbox processes in different functions. When multiple functions access
the mailbox simultaneously, if the generic IO interface readq/writeq
is used to access the mailbox, the data read from mailbox or written to
mailbox is unpredictable. Therefore, the generic IO interface is changed
to a 128bit atomic operation.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 160 ++++++++++++++++++++++------------
include/linux/hisi_acc_qm.h | 1 -
2 files changed, 105 insertions(+), 56 deletions(-)
mode change 100644 => 100755 drivers/crypto/hisilicon/qm.c

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
old mode 100644
new mode 100755
index a99fd589445c..13cd2617170e
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -33,6 +33,10 @@
#define QM_MB_CMD_DATA_SHIFT 32
#define QM_MB_CMD_DATA_MASK GENMASK(31, 0)
#define QM_MB_STATUS_MASK GENMASK(12, 9)
+#define QM_MB_BUSY_MASK BIT(13)
+#define QM_MB_SIZE 16
+#define QM_MB_MAX_WAIT_CNT 6000
+#define QM_MB_WAIT_READY_CNT 10

/* sqc shift */
#define QM_SQ_HOP_NUM_SHIFT 0
@@ -597,17 +601,6 @@ static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd,
mailbox->rsvd = 0;
}

-/* return 0 mailbox ready, -ETIMEDOUT hardware timeout */
-int hisi_qm_wait_mb_ready(struct hisi_qm *qm)
-{
- u32 val;
-
- return readl_relaxed_poll_timeout(qm->io_base + QM_MB_CMD_SEND_BASE,
- val, !((val >> QM_MB_BUSY_SHIFT) &
- 0x1), POLL_PERIOD, POLL_TIMEOUT);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_wait_mb_ready);
-
/* 128 bit should be written to hardware at one time to trigger a mailbox */
static void qm_mb_write(struct hisi_qm *qm, const void *src)
{
@@ -618,7 +611,7 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
#endif

if (!IS_ENABLED(CONFIG_ARM64)) {
- memcpy_toio(fun_base, src, 16);
+ memcpy_toio(fun_base, src, QM_MB_SIZE);
dma_wmb();
return;
}
@@ -635,35 +628,95 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
#endif
}

-static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox)
+/* 128 bit should be read from hardware at one time */
+static void qm_mb_read(struct hisi_qm *qm, void *dst)
{
- int ret;
- u32 val;
+ const void __iomem *fun_base = qm->io_base + QM_MB_CMD_SEND_BASE;
+
+#if IS_ENABLED(CONFIG_ARM64)
+ unsigned long tmp0 = 0, tmp1 = 0;
+#endif

- if (unlikely(hisi_qm_wait_mb_ready(qm))) {
- dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n");
- ret = -EBUSY;
- goto mb_busy;
+ if (!IS_ENABLED(CONFIG_ARM64)) {
+ memcpy_fromio(dst, fun_base, QM_MB_SIZE);
+ dma_wmb();
+ return;
}

- qm_mb_write(qm, mailbox);
+#if IS_ENABLED(CONFIG_ARM64)
+ asm volatile("ldp %0, %1, %3\n"
+ "stp %0, %1, %2\n"
+ "dmb oshst\n"
+ : "=&r" (tmp0),
+ "=&r" (tmp1),
+ "+Q" (*((char *)dst))
+ : "Q" (*((char __iomem *)fun_base))
+ : "memory");
+#endif
+}
+
+int hisi_qm_wait_mb_ready(struct hisi_qm *qm)
+{
+ struct qm_mailbox mailbox;
+ int i = 0;
+
+ while (i++ < QM_MB_WAIT_READY_CNT) {
+ qm_mb_read(qm, &mailbox);
+ if (!(le16_to_cpu(mailbox.w0) & QM_MB_BUSY_MASK))
+ return 0;

- if (unlikely(hisi_qm_wait_mb_ready(qm))) {
- dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n");
- ret = -ETIMEDOUT;
- goto mb_busy;
+ usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
}

- val = readl(qm->io_base + QM_MB_CMD_SEND_BASE);
- if (val & QM_MB_STATUS_MASK) {
- dev_err(&qm->pdev->dev, "QM mailbox operation failed!\n");
- ret = -EIO;
- goto mb_busy;
+ dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n");
+
+ return -EBUSY;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_wait_mb_ready);
+
+static int qm_wait_mb_finish(struct hisi_qm *qm, struct qm_mailbox *mailbox)
+{
+ struct device *dev = &qm->pdev->dev;
+ int i = 0;
+
+ while (++i) {
+ qm_mb_read(qm, mailbox);
+ if (!(le16_to_cpu(mailbox->w0) & QM_MB_BUSY_MASK))
+ break;
+
+ if (i == QM_MB_MAX_WAIT_CNT) {
+ dev_err(dev, "QM mailbox operation timeout!\n");
+ return -ETIMEDOUT;
+ }
+
+ usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
+ }
+
+ if (le16_to_cpu(mailbox->w0) & QM_MB_STATUS_MASK) {
+ dev_err(dev, "QM mailbox operation failed!\n");
+ return -EIO;
}

return 0;
+}
+
+static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox)
+{
+ int ret;

-mb_busy:
+ ret = hisi_qm_wait_mb_ready(qm);
+ if (ret)
+ goto mb_err_cnt_increase;
+
+ qm_mb_write(qm, mailbox);
+
+ ret = qm_wait_mb_finish(qm, mailbox);
+ if (ret)
+ goto mb_err_cnt_increase;
+
+ return 0;
+
+mb_err_cnt_increase:
atomic64_inc(&qm->debug.dfx.mb_err_cnt);
return ret;
}
@@ -687,6 +740,24 @@ int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
}
EXPORT_SYMBOL_GPL(hisi_qm_mb);

+static int hisi_qm_mb_read(struct hisi_qm *qm, u64 *base, u8 cmd, u16 queue)
+{
+ struct qm_mailbox mailbox;
+ int ret;
+
+ qm_mb_pre_init(&mailbox, cmd, 0, queue, 1);
+ mutex_lock(&qm->mailbox_lock);
+ ret = qm_mb_nolock(qm, &mailbox);
+ mutex_unlock(&qm->mailbox_lock);
+ if (ret)
+ return ret;
+
+ *base = le32_to_cpu(mailbox.base_l) |
+ ((u64)le32_to_cpu(mailbox.base_h) << 32);
+
+ return 0;
+}
+
static void qm_db_v1(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority)
{
u64 doorbell;
@@ -1308,12 +1379,10 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
u64 sqc_vft;
int ret;

- ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
+ ret = hisi_qm_mb_read(qm, &sqc_vft, QM_MB_CMD_SQC_VFT_V2, 0);
if (ret)
return ret;

- sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
- ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32);
*base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
*number = (QM_SQC_VFT_NUM_MASK_V2 &
(sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
@@ -1484,25 +1553,6 @@ static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm)
return ACC_ERR_RECOVERED;
}

-static int qm_get_mb_cmd(struct hisi_qm *qm, u64 *msg, u16 fun_num)
-{
- struct qm_mailbox mailbox;
- int ret;
-
- qm_mb_pre_init(&mailbox, QM_MB_CMD_DST, 0, fun_num, 0);
- mutex_lock(&qm->mailbox_lock);
- ret = qm_mb_nolock(qm, &mailbox);
- if (ret)
- goto err_unlock;
-
- *msg = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
- ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32);
-
-err_unlock:
- mutex_unlock(&qm->mailbox_lock);
- return ret;
-}
-
static void qm_clear_cmd_interrupt(struct hisi_qm *qm, u64 vf_mask)
{
u32 val;
@@ -1522,7 +1572,7 @@ static void qm_handle_vf_msg(struct hisi_qm *qm, u32 vf_id)
u64 msg;
int ret;

- ret = qm_get_mb_cmd(qm, &msg, vf_id);
+ ret = hisi_qm_mb_read(qm, &msg, QM_MB_CMD_DST, vf_id);
if (ret) {
dev_err(dev, "failed to get msg from VF(%u)!\n", vf_id);
return;
@@ -4755,7 +4805,7 @@ static int qm_wait_pf_reset_finish(struct hisi_qm *qm)
* Whether message is got successfully,
* VF needs to ack PF by clearing the interrupt.
*/
- ret = qm_get_mb_cmd(qm, &msg, 0);
+ ret = hisi_qm_mb_read(qm, &msg, QM_MB_CMD_DST, 0);
qm_clear_cmd_interrupt(qm, 0);
if (ret) {
dev_err(dev, "failed to get msg from PF in reset done!\n");
@@ -4809,7 +4859,7 @@ static void qm_handle_cmd_msg(struct hisi_qm *qm, u32 fun_num)
* Get the msg from source by sending mailbox. Whether message is got
* successfully, destination needs to ack source by clearing the interrupt.
*/
- ret = qm_get_mb_cmd(qm, &msg, fun_num);
+ ret = hisi_qm_mb_read(qm, &msg, QM_MB_CMD_DST, fun_num);
qm_clear_cmd_interrupt(qm, BIT(fun_num));
if (ret) {
dev_err(dev, "failed to get msg from source!\n");
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 39fbfb4be944..0f83c19a8f36 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -52,7 +52,6 @@
#define QM_MB_OP_SHIFT 14
#define QM_MB_CMD_DATA_ADDR_L 0x304
#define QM_MB_CMD_DATA_ADDR_H 0x308
-#define QM_MB_MAX_WAIT_CNT 6000

/* doorbell */
#define QM_DOORBELL_CMD_SQ 0
--
2.33.0


2023-08-11 14:23:51

by Weili Qian

[permalink] [raw]
Subject: [PATCH v2 5/7] crypto: hisilicon/qm - prevent soft lockup in qm_poll_req_cb()'s loop

The function qm_poll_req_cb() may take a while due to complex req_cb,
so soft lockup may occur in kernel with preemption disabled.

The error logs:
watchdog: BUG: soft lockup - CPU#23 stuck for 23s! [kworker/u262:1:1407]
[ 1461.978428][ C23] Call trace:
[ 1461.981890][ C23] complete+0x8c/0xf0
[ 1461.986031][ C23] kcryptd_async_done+0x154/0x1f4 [dm_crypt]
[ 1461.992154][ C23] sec_skcipher_callback+0x7c/0xf4 [hisi_sec2]
[ 1461.998446][ C23] sec_req_cb+0x104/0x1f4 [hisi_sec2]
[ 1462.003950][ C23] qm_poll_req_cb+0xcc/0x150 [hisi_qm]
[ 1462.009531][ C23] qm_work_process+0x60/0xc0 [hisi_qm]
[ 1462.015101][ C23] process_one_work+0x1c4/0x470
[ 1462.020052][ C23] worker_thread+0x150/0x3c4
[ 1462.024735][ C23] kthread+0x108/0x13c
[ 1462.028889][ C23] ret_from_fork+0x10/0x18

Add a cond_resched() to prevent that.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 376f6a3c5c7f..c9ed30b181bb 100755
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -958,6 +958,8 @@ static void qm_poll_req_cb(struct hisi_qp *qp)
qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
qp->qp_status.cq_head, 0);
atomic_dec(&qp->qp_status.used);
+
+ cond_resched();
}

/* set c_flag */
--
2.33.0


2023-08-11 14:23:51

by Weili Qian

[permalink] [raw]
Subject: [PATCH v2 4/7] crypto: hisilicon/qm - check function qp num before alg register

When the Kunpeng accelerator executes tasks such as encryption
and decryption have minimum requirements on the number of device
queues. If the number of queues does not meet the requirement,
the process initialization will fail. Therefore, the driver checks
the number of queues on the device before registering the algorithm.
If the number does not meet the requirements, the driver does not register
the algorithm to crypto subsystem.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/hpre/hpre_crypto.c | 25 ++++++++++-
drivers/crypto/hisilicon/hpre/hpre_main.c | 14 +++---
drivers/crypto/hisilicon/qm.c | 47 +++++++--------------
drivers/crypto/hisilicon/sec2/sec_crypto.c | 31 ++++++++++++--
drivers/crypto/hisilicon/sec2/sec_main.c | 24 +++++------
drivers/crypto/hisilicon/zip/zip_crypto.c | 29 ++++++++++++-
drivers/crypto/hisilicon/zip/zip_main.c | 14 +++---
include/linux/hisi_acc_qm.h | 18 +++++++-
8 files changed, 138 insertions(+), 64 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 9a1c61be32cc..764532a6ca82 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -57,6 +57,9 @@ struct hpre_ctx;
#define HPRE_DRV_ECDH_MASK_CAP BIT(2)
#define HPRE_DRV_X25519_MASK_CAP BIT(5)

+static DEFINE_MUTEX(hpre_algs_lock);
+static unsigned int hpre_available_devs;
+
typedef void (*hpre_cb)(struct hpre_ctx *ctx, void *sqe);

struct hpre_rsa_ctx {
@@ -2202,11 +2205,17 @@ static void hpre_unregister_x25519(struct hisi_qm *qm)

int hpre_algs_register(struct hisi_qm *qm)
{
- int ret;
+ int ret = 0;
+
+ mutex_lock(&hpre_algs_lock);
+ if (hpre_available_devs) {
+ hpre_available_devs++;
+ goto unlock;
+ }

ret = hpre_register_rsa(qm);
if (ret)
- return ret;
+ goto unlock;

ret = hpre_register_dh(qm);
if (ret)
@@ -2220,6 +2229,9 @@ int hpre_algs_register(struct hisi_qm *qm)
if (ret)
goto unreg_ecdh;

+ hpre_available_devs++;
+ mutex_unlock(&hpre_algs_lock);
+
return ret;

unreg_ecdh:
@@ -2228,13 +2240,22 @@ int hpre_algs_register(struct hisi_qm *qm)
hpre_unregister_dh(qm);
unreg_rsa:
hpre_unregister_rsa(qm);
+unlock:
+ mutex_unlock(&hpre_algs_lock);
return ret;
}

void hpre_algs_unregister(struct hisi_qm *qm)
{
+ mutex_lock(&hpre_algs_lock);
+ if (--hpre_available_devs)
+ goto unlock;
+
hpre_unregister_x25519(qm);
hpre_unregister_ecdh(qm);
hpre_unregister_dh(qm);
hpre_unregister_rsa(qm);
+
+unlock:
+ mutex_unlock(&hpre_algs_lock);
}
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 1709e649f0d1..b02fdd5b2df8 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -107,6 +107,7 @@
#define HPRE_VIA_MSI_DSM 1
#define HPRE_SQE_MASK_OFFSET 8
#define HPRE_SQE_MASK_LEN 24
+#define HPRE_CTX_Q_NUM_DEF 1

#define HPRE_DFX_BASE 0x301000
#define HPRE_DFX_COMMON1 0x301400
@@ -1399,10 +1400,11 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret)
dev_warn(&pdev->dev, "init debugfs fail!\n");

- ret = hisi_qm_alg_register(qm, &hpre_devices);
+ hisi_qm_add_list(qm, &hpre_devices);
+ ret = hisi_qm_alg_register(qm, &hpre_devices, HPRE_CTX_Q_NUM_DEF);
if (ret < 0) {
pci_err(pdev, "fail to register algs to crypto!\n");
- goto err_with_qm_start;
+ goto err_qm_del_list;
}

if (qm->uacce) {
@@ -1424,9 +1426,10 @@ static int hpre_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;

err_with_alg_register:
- hisi_qm_alg_unregister(qm, &hpre_devices);
+ hisi_qm_alg_unregister(qm, &hpre_devices, HPRE_CTX_Q_NUM_DEF);

-err_with_qm_start:
+err_qm_del_list:
+ hisi_qm_del_list(qm, &hpre_devices);
hpre_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);

@@ -1446,7 +1449,8 @@ static void hpre_remove(struct pci_dev *pdev)

hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &hpre_devices);
- hisi_qm_alg_unregister(qm, &hpre_devices);
+ hisi_qm_alg_unregister(qm, &hpre_devices, HPRE_CTX_Q_NUM_DEF);
+ hisi_qm_del_list(qm, &hpre_devices);
if (qm->fun_type == QM_HW_PF && qm->vfs_num)
hisi_qm_sriov_disable(pdev, true);

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 6623707363bb..376f6a3c5c7f 100755
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4833,63 +4833,48 @@ static void qm_cmd_process(struct work_struct *cmd_process)
}

/**
- * hisi_qm_alg_register() - Register alg to crypto and add qm to qm_list.
+ * hisi_qm_alg_register() - Register alg to crypto.
* @qm: The qm needs add.
* @qm_list: The qm list.
+ * @guard: Guard of qp_num.
*
- * This function adds qm to qm list, and will register algorithm to
- * crypto when the qm list is empty.
+ * Register algorithm to crypto when the function is satisfy guard.
*/
-int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
+int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard)
{
struct device *dev = &qm->pdev->dev;
- int flag = 0;
- int ret = 0;
-
- mutex_lock(&qm_list->lock);
- if (list_empty(&qm_list->list))
- flag = 1;
- list_add_tail(&qm->list, &qm_list->list);
- mutex_unlock(&qm_list->lock);

if (qm->ver <= QM_HW_V2 && qm->use_sva) {
dev_info(dev, "HW V2 not both use uacce sva mode and hardware crypto algs.\n");
return 0;
}

- if (flag) {
- ret = qm_list->register_to_crypto(qm);
- if (ret) {
- mutex_lock(&qm_list->lock);
- list_del(&qm->list);
- mutex_unlock(&qm_list->lock);
- }
+ if (qm->qp_num < guard) {
+ dev_info(dev, "qp_num is less than task need.\n");
+ return 0;
}

- return ret;
+ return qm_list->register_to_crypto(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_alg_register);

/**
- * hisi_qm_alg_unregister() - Unregister alg from crypto and delete qm from
- * qm list.
+ * hisi_qm_alg_unregister() - Unregister alg from crypto.
* @qm: The qm needs delete.
* @qm_list: The qm list.
+ * @guard: Guard of qp_num.
*
- * This function deletes qm from qm list, and will unregister algorithm
- * from crypto when the qm list is empty.
+ * Unregister algorithm from crypto when the last function is satisfy guard.
*/
-void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
+void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard)
{
- mutex_lock(&qm_list->lock);
- list_del(&qm->list);
- mutex_unlock(&qm_list->lock);
-
if (qm->ver <= QM_HW_V2 && qm->use_sva)
return;

- if (list_empty(&qm_list->list))
- qm_list->unregister_from_crypto(qm);
+ if (qm->qp_num < guard)
+ return;
+
+ qm_list->unregister_from_crypto(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_alg_unregister);

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 074e50ef512c..99a28a132f8a 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -104,6 +104,9 @@
#define IV_CTR_INIT 0x1
#define IV_BYTE_OFFSET 0x8

+static DEFINE_MUTEX(sec_algs_lock);
+static unsigned int sec_available_devs;
+
struct sec_skcipher {
u64 alg_msk;
struct skcipher_alg alg;
@@ -2544,16 +2547,31 @@ static int sec_register_aead(u64 alg_mask)
int sec_register_to_crypto(struct hisi_qm *qm)
{
u64 alg_mask = sec_get_alg_bitmap(qm, SEC_DRV_ALG_BITMAP_HIGH, SEC_DRV_ALG_BITMAP_LOW);
- int ret;
+ int ret = 0;
+
+ mutex_lock(&sec_algs_lock);
+ if (sec_available_devs) {
+ sec_available_devs++;
+ goto unlock;
+ }

ret = sec_register_skcipher(alg_mask);
if (ret)
- return ret;
+ goto unlock;

ret = sec_register_aead(alg_mask);
if (ret)
- sec_unregister_skcipher(alg_mask, ARRAY_SIZE(sec_skciphers));
+ goto unreg_skcipher;

+ sec_available_devs++;
+ mutex_unlock(&sec_algs_lock);
+
+ return 0;
+
+unreg_skcipher:
+ sec_unregister_skcipher(alg_mask, ARRAY_SIZE(sec_skciphers));
+unlock:
+ mutex_unlock(&sec_algs_lock);
return ret;
}

@@ -2561,6 +2579,13 @@ void sec_unregister_from_crypto(struct hisi_qm *qm)
{
u64 alg_mask = sec_get_alg_bitmap(qm, SEC_DRV_ALG_BITMAP_HIGH, SEC_DRV_ALG_BITMAP_LOW);

+ mutex_lock(&sec_algs_lock);
+ if (--sec_available_devs)
+ goto unlock;
+
sec_unregister_aead(alg_mask, ARRAY_SIZE(sec_aeads));
sec_unregister_skcipher(alg_mask, ARRAY_SIZE(sec_skciphers));
+
+unlock:
+ mutex_unlock(&sec_algs_lock);
}
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 62bd8936a915..0e56a47eb862 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -1234,15 +1234,11 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret)
pci_warn(pdev, "Failed to init debugfs!\n");

- if (qm->qp_num >= ctx_q_num) {
- ret = hisi_qm_alg_register(qm, &sec_devices);
- if (ret < 0) {
- pr_err("Failed to register driver to crypto.\n");
- goto err_qm_stop;
- }
- } else {
- pci_warn(qm->pdev,
- "Failed to use kernel mode, qp not enough!\n");
+ hisi_qm_add_list(qm, &sec_devices);
+ ret = hisi_qm_alg_register(qm, &sec_devices, ctx_q_num);
+ if (ret < 0) {
+ pr_err("Failed to register driver to crypto.\n");
+ goto err_qm_del_list;
}

if (qm->uacce) {
@@ -1264,9 +1260,9 @@ static int sec_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;

err_alg_unregister:
- if (qm->qp_num >= ctx_q_num)
- hisi_qm_alg_unregister(qm, &sec_devices);
-err_qm_stop:
+ hisi_qm_alg_unregister(qm, &sec_devices, ctx_q_num);
+err_qm_del_list:
+ hisi_qm_del_list(qm, &sec_devices);
sec_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);
err_probe_uninit:
@@ -1283,8 +1279,8 @@ static void sec_remove(struct pci_dev *pdev)

hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &sec_devices);
- if (qm->qp_num >= ctx_q_num)
- hisi_qm_alg_unregister(qm, &sec_devices);
+ hisi_qm_alg_unregister(qm, &sec_devices, ctx_q_num);
+ hisi_qm_del_list(qm, &sec_devices);

if (qm->fun_type == QM_HW_PF && qm->vfs_num)
hisi_qm_sriov_disable(pdev, true);
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index 6608971d10cd..0443205cd45c 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -42,6 +42,9 @@
#define HZIP_ALG_ZLIB GENMASK(1, 0)
#define HZIP_ALG_GZIP GENMASK(3, 2)

+static DEFINE_MUTEX(zip_algs_lock);
+static unsigned int zip_available_devs;
+
static const u8 zlib_head[HZIP_ZLIB_HEAD_SIZE] = {0x78, 0x9c};
static const u8 gzip_head[HZIP_GZIP_HEAD_SIZE] = {
0x1f, 0x8b, 0x08, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x03
@@ -821,19 +824,41 @@ int hisi_zip_register_to_crypto(struct hisi_qm *qm)
{
int ret = 0;

+ mutex_lock(&zip_algs_lock);
+ if (zip_available_devs) {
+ zip_available_devs++;
+ goto unlock;
+ }
+
ret = hisi_zip_register_zlib(qm);
if (ret)
- return ret;
+ goto unlock;

ret = hisi_zip_register_gzip(qm);
if (ret)
- hisi_zip_unregister_zlib(qm);
+ goto unreg_zlib;
+
+ zip_available_devs++;
+ mutex_unlock(&zip_algs_lock);

+ return 0;
+
+unreg_zlib:
+ hisi_zip_unregister_zlib(qm);
+unlock:
+ mutex_unlock(&zip_algs_lock);
return ret;
}

void hisi_zip_unregister_from_crypto(struct hisi_qm *qm)
{
+ mutex_lock(&zip_algs_lock);
+ if (--zip_available_devs)
+ goto unlock;
+
hisi_zip_unregister_zlib(qm);
hisi_zip_unregister_gzip(qm);
+
+unlock:
+ mutex_unlock(&zip_algs_lock);
}
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 84dbaeb07ea8..0b5727056f5c 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -66,6 +66,7 @@
#define HZIP_SQE_SIZE 128
#define HZIP_PF_DEF_Q_NUM 64
#define HZIP_PF_DEF_Q_BASE 0
+#define HZIP_CTX_Q_NUM_DEF 2

#define HZIP_SOFT_CTRL_CNT_CLR_CE 0x301000
#define HZIP_SOFT_CTRL_CNT_CLR_CE_BIT BIT(0)
@@ -1231,10 +1232,11 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret)
pci_err(pdev, "failed to init debugfs (%d)!\n", ret);

- ret = hisi_qm_alg_register(qm, &zip_devices);
+ hisi_qm_add_list(qm, &zip_devices);
+ ret = hisi_qm_alg_register(qm, &zip_devices, HZIP_CTX_Q_NUM_DEF);
if (ret < 0) {
pci_err(pdev, "failed to register driver to crypto!\n");
- goto err_qm_stop;
+ goto err_qm_del_list;
}

if (qm->uacce) {
@@ -1256,9 +1258,10 @@ static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;

err_qm_alg_unregister:
- hisi_qm_alg_unregister(qm, &zip_devices);
+ hisi_qm_alg_unregister(qm, &zip_devices, HZIP_CTX_Q_NUM_DEF);

-err_qm_stop:
+err_qm_del_list:
+ hisi_qm_del_list(qm, &zip_devices);
hisi_zip_debugfs_exit(qm);
hisi_qm_stop(qm, QM_NORMAL);

@@ -1278,7 +1281,8 @@ static void hisi_zip_remove(struct pci_dev *pdev)

hisi_qm_pm_uninit(qm);
hisi_qm_wait_task_finish(qm, &zip_devices);
- hisi_qm_alg_unregister(qm, &zip_devices);
+ hisi_qm_alg_unregister(qm, &zip_devices, HZIP_CTX_Q_NUM_DEF);
+ hisi_qm_del_list(qm, &zip_devices);

if (qm->fun_type == QM_HW_PF && qm->vfs_num)
hisi_qm_sriov_disable(pdev, true);
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 576a0601c72e..1b5082ed48d9 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -490,6 +490,20 @@ static inline void hisi_qm_init_list(struct hisi_qm_list *qm_list)
mutex_init(&qm_list->lock);
}

+static inline void hisi_qm_add_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
+{
+ mutex_lock(&qm_list->lock);
+ list_add_tail(&qm->list, &qm_list->list);
+ mutex_unlock(&qm_list->lock);
+}
+
+static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
+{
+ mutex_lock(&qm_list->lock);
+ list_del(&qm->list);
+ mutex_unlock(&qm_list->lock);
+}
+
int hisi_qm_init(struct hisi_qm *qm);
void hisi_qm_uninit(struct hisi_qm *qm);
int hisi_qm_start(struct hisi_qm *qm);
@@ -535,8 +549,8 @@ int hisi_qm_alloc_qps_node(struct hisi_qm_list *qm_list, int qp_num,
void hisi_qm_free_qps(struct hisi_qp **qps, int qp_num);
void hisi_qm_dev_shutdown(struct pci_dev *pdev);
void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
-int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
-void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list);
+int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard);
+void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list, int guard);
int hisi_qm_resume(struct device *dev);
int hisi_qm_suspend(struct device *dev);
void hisi_qm_pm_uninit(struct hisi_qm *qm);
--
2.33.0


2023-08-11 14:23:51

by Weili Qian

[permalink] [raw]
Subject: [PATCH v2 2/7] crypto: hisilicon/qm - alloc buffer to set and get xqc

If the temporarily applied memory is used to set or get the xqc
information, the driver releases the memory immediately after the
hardware mailbox operation time exceeds the driver waiting time.
However, the hardware does not cancel the operation, so the hardware
may write data to released memory.

Therefore, when the driver is bound to a device, the driver reserves
memory for the xqc configuration. The subsequent xqc configuration
uses the reserved memory to prevent hardware from accessing the
released memory.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/debugfs.c | 75 +++---
drivers/crypto/hisilicon/qm.c | 330 ++++++++++++---------------
drivers/crypto/hisilicon/qm_common.h | 5 +-
include/linux/hisi_acc_qm.h | 13 ++
4 files changed, 190 insertions(+), 233 deletions(-)

diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c
index 2cc1591949db..7e8186fe0512 100644
--- a/drivers/crypto/hisilicon/debugfs.c
+++ b/drivers/crypto/hisilicon/debugfs.c
@@ -137,8 +137,8 @@ static void dump_show(struct hisi_qm *qm, void *info,
static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name)
{
struct device *dev = &qm->pdev->dev;
- struct qm_sqc *sqc, *sqc_curr;
- dma_addr_t sqc_dma;
+ struct qm_sqc *sqc_curr;
+ struct qm_sqc sqc;
u32 qp_id;
int ret;

@@ -151,35 +151,29 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name)
return -EINVAL;
}

- sqc = hisi_qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma);
- if (IS_ERR(sqc))
- return PTR_ERR(sqc);
+ ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1);
+ if (!ret) {
+ dump_show(qm, &sqc, sizeof(struct qm_sqc), name);

- ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 1);
- if (ret) {
- down_read(&qm->qps_lock);
- if (qm->sqc) {
- sqc_curr = qm->sqc + qp_id;
+ return 0;
+ }

- dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC");
- }
- up_read(&qm->qps_lock);
+ down_read(&qm->qps_lock);
+ if (qm->sqc) {
+ sqc_curr = qm->sqc + qp_id;

- goto free_ctx;
+ dump_show(qm, sqc_curr, sizeof(*sqc_curr), "SOFT SQC");
}
+ up_read(&qm->qps_lock);

- dump_show(qm, sqc, sizeof(*sqc), name);
-
-free_ctx:
- hisi_qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma);
return 0;
}

static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name)
{
struct device *dev = &qm->pdev->dev;
- struct qm_cqc *cqc, *cqc_curr;
- dma_addr_t cqc_dma;
+ struct qm_cqc *cqc_curr;
+ struct qm_cqc cqc;
u32 qp_id;
int ret;

@@ -192,34 +186,29 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name)
return -EINVAL;
}

- cqc = hisi_qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma);
- if (IS_ERR(cqc))
- return PTR_ERR(cqc);
+ ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1);
+ if (!ret) {
+ dump_show(qm, &cqc, sizeof(struct qm_cqc), name);

- ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 1);
- if (ret) {
- down_read(&qm->qps_lock);
- if (qm->cqc) {
- cqc_curr = qm->cqc + qp_id;
+ return 0;
+ }

- dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC");
- }
- up_read(&qm->qps_lock);
+ down_read(&qm->qps_lock);
+ if (qm->cqc) {
+ cqc_curr = qm->cqc + qp_id;

- goto free_ctx;
+ dump_show(qm, cqc_curr, sizeof(*cqc_curr), "SOFT CQC");
}
+ up_read(&qm->qps_lock);

- dump_show(qm, cqc, sizeof(*cqc), name);
-
-free_ctx:
- hisi_qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma);
return 0;
}

static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name)
{
struct device *dev = &qm->pdev->dev;
- dma_addr_t xeqc_dma;
+ struct qm_aeqc aeqc;
+ struct qm_eqc eqc;
size_t size;
void *xeqc;
int ret;
@@ -233,23 +222,19 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name)
if (!strcmp(name, "EQC")) {
cmd = QM_MB_CMD_EQC;
size = sizeof(struct qm_eqc);
+ xeqc = &eqc;
} else {
cmd = QM_MB_CMD_AEQC;
size = sizeof(struct qm_aeqc);
+ xeqc = &aeqc;
}

- xeqc = hisi_qm_ctx_alloc(qm, size, &xeqc_dma);
- if (IS_ERR(xeqc))
- return PTR_ERR(xeqc);
-
- ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1);
+ ret = qm_set_and_get_xqc(qm, cmd, xeqc, 0, 1);
if (ret)
- goto err_free_ctx;
+ return ret;

dump_show(qm, xeqc, size, name);

-err_free_ctx:
- hisi_qm_ctx_free(qm, size, xeqc, &xeqc_dma);
return ret;
}

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 13cd2617170e..d0b923dc5415 100755
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -50,7 +50,7 @@
#define QM_QC_PASID_ENABLE_SHIFT 7

#define QM_SQ_TYPE_MASK GENMASK(3, 0)
-#define QM_SQ_TAIL_IDX(sqc) ((le16_to_cpu((sqc)->w11) >> 6) & 0x1)
+#define QM_SQ_TAIL_IDX(sqc) ((le16_to_cpu((sqc).w11) >> 6) & 0x1)

/* cqc shift */
#define QM_CQ_HOP_NUM_SHIFT 0
@@ -62,7 +62,7 @@

#define QM_CQE_PHASE(cqe) (le16_to_cpu((cqe)->w7) & 0x1)
#define QM_QC_CQE_SIZE 4
-#define QM_CQ_TAIL_IDX(cqc) ((le16_to_cpu((cqc)->w11) >> 6) & 0x1)
+#define QM_CQ_TAIL_IDX(cqc) ((le16_to_cpu((cqc).w11) >> 6) & 0x1)

/* eqc shift */
#define QM_EQE_AEQE_SIZE (2UL << 12)
@@ -257,19 +257,6 @@
#define QM_MK_SQC_DW3_V2(sqe_sz, sq_depth) \
((((u32)sq_depth) - 1) | ((u32)ilog2(sqe_sz) << QM_SQ_SQE_SIZE_SHIFT))

-#define INIT_QC_COMMON(qc, base, pasid) do { \
- (qc)->head = 0; \
- (qc)->tail = 0; \
- (qc)->base_l = cpu_to_le32(lower_32_bits(base)); \
- (qc)->base_h = cpu_to_le32(upper_32_bits(base)); \
- (qc)->dw3 = 0; \
- (qc)->w8 = 0; \
- (qc)->rsvd0 = 0; \
- (qc)->pasid = cpu_to_le16(pasid); \
- (qc)->w11 = 0; \
- (qc)->rsvd1 = 0; \
-} while (0)
-
enum vft_type {
SQC_VFT = 0,
CQC_VFT,
@@ -758,6 +745,59 @@ static int hisi_qm_mb_read(struct hisi_qm *qm, u64 *base, u8 cmd, u16 queue)
return 0;
}

+/* op 0: set xqc information to hardware, 1: get xqc information from hardware. */
+int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op)
+{
+ struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev));
+ struct qm_mailbox mailbox;
+ dma_addr_t xqc_dma;
+ void *tmp_xqc;
+ size_t size;
+ int ret;
+
+ switch (cmd) {
+ case QM_MB_CMD_SQC:
+ size = sizeof(struct qm_sqc);
+ tmp_xqc = qm->xqc_buf.sqc;
+ xqc_dma = qm->xqc_buf.sqc_dma;
+ break;
+ case QM_MB_CMD_CQC:
+ size = sizeof(struct qm_cqc);
+ tmp_xqc = qm->xqc_buf.cqc;
+ xqc_dma = qm->xqc_buf.cqc_dma;
+ break;
+ case QM_MB_CMD_EQC:
+ size = sizeof(struct qm_eqc);
+ tmp_xqc = qm->xqc_buf.eqc;
+ xqc_dma = qm->xqc_buf.eqc_dma;
+ break;
+ case QM_MB_CMD_AEQC:
+ size = sizeof(struct qm_aeqc);
+ tmp_xqc = qm->xqc_buf.aeqc;
+ xqc_dma = qm->xqc_buf.aeqc_dma;
+ break;
+ }
+
+ /* Setting xqc will fail if master OOO is blocked. */
+ if (qm_check_dev_error(pf_qm)) {
+ dev_err(&qm->pdev->dev, "failed to send mailbox since qm is stop!\n");
+ return -EIO;
+ }
+
+ mutex_lock(&qm->mailbox_lock);
+ if (!op)
+ memcpy(tmp_xqc, xqc, size);
+
+ qm_mb_pre_init(&mailbox, cmd, xqc_dma, qp_id, op);
+ ret = qm_mb_nolock(qm, &mailbox);
+ if (!ret && op)
+ memcpy(xqc, tmp_xqc, size);
+
+ mutex_unlock(&qm->mailbox_lock);
+
+ return ret;
+}
+
static void qm_db_v1(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority)
{
u64 doorbell;
@@ -1390,45 +1430,6 @@ static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
return 0;
}

-void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
- dma_addr_t *dma_addr)
-{
- struct device *dev = &qm->pdev->dev;
- void *ctx_addr;
-
- ctx_addr = kzalloc(ctx_size, GFP_KERNEL);
- if (!ctx_addr)
- return ERR_PTR(-ENOMEM);
-
- *dma_addr = dma_map_single(dev, ctx_addr, ctx_size, DMA_FROM_DEVICE);
- if (dma_mapping_error(dev, *dma_addr)) {
- dev_err(dev, "DMA mapping error!\n");
- kfree(ctx_addr);
- return ERR_PTR(-ENOMEM);
- }
-
- return ctx_addr;
-}
-
-void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
- const void *ctx_addr, dma_addr_t *dma_addr)
-{
- struct device *dev = &qm->pdev->dev;
-
- dma_unmap_single(dev, *dma_addr, ctx_size, DMA_FROM_DEVICE);
- kfree(ctx_addr);
-}
-
-static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
-{
- return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1);
-}
-
-static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
-{
- return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1);
-}
-
static void qm_hw_error_init_v1(struct hisi_qm *qm)
{
writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
@@ -2002,84 +2003,50 @@ static void hisi_qm_release_qp(struct hisi_qp *qp)
static int qm_sq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
{
struct hisi_qm *qm = qp->qm;
- struct device *dev = &qm->pdev->dev;
enum qm_hw_ver ver = qm->ver;
- struct qm_sqc *sqc;
- dma_addr_t sqc_dma;
- int ret;
+ struct qm_sqc sqc = {0};

- sqc = kzalloc(sizeof(struct qm_sqc), GFP_KERNEL);
- if (!sqc)
- return -ENOMEM;
-
- INIT_QC_COMMON(sqc, qp->sqe_dma, pasid);
if (ver == QM_HW_V1) {
- sqc->dw3 = cpu_to_le32(QM_MK_SQC_DW3_V1(0, 0, 0, qm->sqe_size));
- sqc->w8 = cpu_to_le16(qp->sq_depth - 1);
+ sqc.dw3 = cpu_to_le32(QM_MK_SQC_DW3_V1(0, 0, 0, qm->sqe_size));
+ sqc.w8 = cpu_to_le16(qp->sq_depth - 1);
} else {
- sqc->dw3 = cpu_to_le32(QM_MK_SQC_DW3_V2(qm->sqe_size, qp->sq_depth));
- sqc->w8 = 0; /* rand_qc */
+ sqc.dw3 = cpu_to_le32(QM_MK_SQC_DW3_V2(qm->sqe_size, qp->sq_depth));
+ sqc.w8 = 0; /* rand_qc */
}
- sqc->cq_num = cpu_to_le16(qp_id);
- sqc->w13 = cpu_to_le16(QM_MK_SQC_W13(0, 1, qp->alg_type));
+ sqc.cq_num = cpu_to_le16(qp_id);
+ sqc.w13 = cpu_to_le16(QM_MK_SQC_W13(0, 1, qp->alg_type));
+ sqc.base_l = cpu_to_le32(lower_32_bits(qp->sqe_dma));
+ sqc.base_h = cpu_to_le32(upper_32_bits(qp->sqe_dma));
+ sqc.pasid = cpu_to_le16(pasid);

if (ver >= QM_HW_V3 && qm->use_sva && !qp->is_in_kernel)
- sqc->w11 = cpu_to_le16(QM_QC_PASID_ENABLE <<
- QM_QC_PASID_ENABLE_SHIFT);
-
- sqc_dma = dma_map_single(dev, sqc, sizeof(struct qm_sqc),
- DMA_TO_DEVICE);
- if (dma_mapping_error(dev, sqc_dma)) {
- kfree(sqc);
- return -ENOMEM;
- }
-
- ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0);
- dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE);
- kfree(sqc);
+ sqc.w11 = cpu_to_le16(QM_QC_PASID_ENABLE << QM_QC_PASID_ENABLE_SHIFT);

- return ret;
+ return qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 0);
}

static int qm_cq_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
{
struct hisi_qm *qm = qp->qm;
- struct device *dev = &qm->pdev->dev;
enum qm_hw_ver ver = qm->ver;
- struct qm_cqc *cqc;
- dma_addr_t cqc_dma;
- int ret;
-
- cqc = kzalloc(sizeof(struct qm_cqc), GFP_KERNEL);
- if (!cqc)
- return -ENOMEM;
+ struct qm_cqc cqc = {0};

- INIT_QC_COMMON(cqc, qp->cqe_dma, pasid);
if (ver == QM_HW_V1) {
- cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V1(0, 0, 0,
- QM_QC_CQE_SIZE));
- cqc->w8 = cpu_to_le16(qp->cq_depth - 1);
+ cqc.dw3 = cpu_to_le32(QM_MK_CQC_DW3_V1(0, 0, 0, QM_QC_CQE_SIZE));
+ cqc.w8 = cpu_to_le16(qp->cq_depth - 1);
} else {
- cqc->dw3 = cpu_to_le32(QM_MK_CQC_DW3_V2(QM_QC_CQE_SIZE, qp->cq_depth));
- cqc->w8 = 0; /* rand_qc */
+ cqc.dw3 = cpu_to_le32(QM_MK_CQC_DW3_V2(QM_QC_CQE_SIZE, qp->cq_depth));
+ cqc.w8 = 0; /* rand_qc */
}
- cqc->dw6 = cpu_to_le32(1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT);
+ cqc.dw6 = cpu_to_le32(1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT);
+ cqc.base_l = cpu_to_le32(lower_32_bits(qp->cqe_dma));
+ cqc.base_h = cpu_to_le32(upper_32_bits(qp->cqe_dma));
+ cqc.pasid = cpu_to_le16(pasid);

if (ver >= QM_HW_V3 && qm->use_sva && !qp->is_in_kernel)
- cqc->w11 = cpu_to_le16(QM_QC_PASID_ENABLE);
+ cqc.w11 = cpu_to_le16(QM_QC_PASID_ENABLE);

- cqc_dma = dma_map_single(dev, cqc, sizeof(struct qm_cqc),
- DMA_TO_DEVICE);
- if (dma_mapping_error(dev, cqc_dma)) {
- kfree(cqc);
- return -ENOMEM;
- }
-
- ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0);
- dma_unmap_single(dev, cqc_dma, sizeof(struct qm_cqc), DMA_TO_DEVICE);
- kfree(cqc);
-
- return ret;
+ return qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 0);
}

static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, u32 pasid)
@@ -2169,14 +2136,11 @@ static void qp_stop_fail_cb(struct hisi_qp *qp)
*/
static int qm_drain_qp(struct hisi_qp *qp)
{
- size_t size = sizeof(struct qm_sqc) + sizeof(struct qm_cqc);
struct hisi_qm *qm = qp->qm;
struct device *dev = &qm->pdev->dev;
- struct qm_sqc *sqc;
- struct qm_cqc *cqc;
- dma_addr_t dma_addr;
+ struct qm_sqc sqc;
+ struct qm_cqc cqc;
int ret = 0, i = 0;
- void *addr;

/* No need to judge if master OOO is blocked. */
if (qm_check_dev_error(qm))
@@ -2190,44 +2154,32 @@ static int qm_drain_qp(struct hisi_qp *qp)
return ret;
}

- addr = hisi_qm_ctx_alloc(qm, size, &dma_addr);
- if (IS_ERR(addr)) {
- dev_err(dev, "Failed to alloc ctx for sqc and cqc!\n");
- return -ENOMEM;
- }
-
while (++i) {
- ret = qm_dump_sqc_raw(qm, dma_addr, qp->qp_id);
+ ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp->qp_id, 1);
if (ret) {
dev_err_ratelimited(dev, "Failed to dump sqc!\n");
- break;
+ return ret;
}
- sqc = addr;

- ret = qm_dump_cqc_raw(qm, (dma_addr + sizeof(struct qm_sqc)),
- qp->qp_id);
+ ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp->qp_id, 1);
if (ret) {
dev_err_ratelimited(dev, "Failed to dump cqc!\n");
- break;
+ return ret;
}
- cqc = addr + sizeof(struct qm_sqc);

- if ((sqc->tail == cqc->tail) &&
+ if ((sqc.tail == cqc.tail) &&
(QM_SQ_TAIL_IDX(sqc) == QM_CQ_TAIL_IDX(cqc)))
break;

if (i == MAX_WAIT_COUNTS) {
dev_err(dev, "Fail to empty queue %u!\n", qp->qp_id);
- ret = -EBUSY;
- break;
+ return -EBUSY;
}

usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
}

- hisi_qm_ctx_free(qm, size, addr, &dma_addr);
-
- return ret;
+ return 0;
}

static int qm_stop_qp_nolock(struct hisi_qp *qp)
@@ -2940,11 +2892,20 @@ static void hisi_qm_unint_work(struct hisi_qm *qm)
destroy_workqueue(qm->wq);
}

+static void hisi_qm_free_rsv_buf(struct hisi_qm *qm)
+{
+ struct qm_dma *xqc_dma = &qm->xqc_buf.qcdma;
+ struct device *dev = &qm->pdev->dev;
+
+ dma_free_coherent(dev, xqc_dma->size, xqc_dma->va, xqc_dma->dma);
+}
+
static void hisi_qm_memory_uninit(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;

hisi_qp_memory_uninit(qm, qm->qp_num);
+ hisi_qm_free_rsv_buf(qm);
if (qm->qdma.va) {
hisi_qm_cache_wb(qm);
dma_free_coherent(dev, qm->qdma.size,
@@ -3066,62 +3027,26 @@ static void qm_disable_eq_aeq_interrupts(struct hisi_qm *qm)

static int qm_eq_ctx_cfg(struct hisi_qm *qm)
{
- struct device *dev = &qm->pdev->dev;
- struct qm_eqc *eqc;
- dma_addr_t eqc_dma;
- int ret;
+ struct qm_eqc eqc = {0};

- eqc = kzalloc(sizeof(struct qm_eqc), GFP_KERNEL);
- if (!eqc)
- return -ENOMEM;
-
- eqc->base_l = cpu_to_le32(lower_32_bits(qm->eqe_dma));
- eqc->base_h = cpu_to_le32(upper_32_bits(qm->eqe_dma));
+ eqc.base_l = cpu_to_le32(lower_32_bits(qm->eqe_dma));
+ eqc.base_h = cpu_to_le32(upper_32_bits(qm->eqe_dma));
if (qm->ver == QM_HW_V1)
- eqc->dw3 = cpu_to_le32(QM_EQE_AEQE_SIZE);
- eqc->dw6 = cpu_to_le32(((u32)qm->eq_depth - 1) | (1 << QM_EQC_PHASE_SHIFT));
+ eqc.dw3 = cpu_to_le32(QM_EQE_AEQE_SIZE);
+ eqc.dw6 = cpu_to_le32(((u32)qm->eq_depth - 1) | (1 << QM_EQC_PHASE_SHIFT));

- eqc_dma = dma_map_single(dev, eqc, sizeof(struct qm_eqc),
- DMA_TO_DEVICE);
- if (dma_mapping_error(dev, eqc_dma)) {
- kfree(eqc);
- return -ENOMEM;
- }
-
- ret = hisi_qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0);
- dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE);
- kfree(eqc);
-
- return ret;
+ return qm_set_and_get_xqc(qm, QM_MB_CMD_EQC, &eqc, 0, 0);
}

static int qm_aeq_ctx_cfg(struct hisi_qm *qm)
{
- struct device *dev = &qm->pdev->dev;
- struct qm_aeqc *aeqc;
- dma_addr_t aeqc_dma;
- int ret;
-
- aeqc = kzalloc(sizeof(struct qm_aeqc), GFP_KERNEL);
- if (!aeqc)
- return -ENOMEM;
+ struct qm_aeqc aeqc = {0};

- aeqc->base_l = cpu_to_le32(lower_32_bits(qm->aeqe_dma));
- aeqc->base_h = cpu_to_le32(upper_32_bits(qm->aeqe_dma));
- aeqc->dw6 = cpu_to_le32(((u32)qm->aeq_depth - 1) | (1 << QM_EQC_PHASE_SHIFT));
+ aeqc.base_l = cpu_to_le32(lower_32_bits(qm->aeqe_dma));
+ aeqc.base_h = cpu_to_le32(upper_32_bits(qm->aeqe_dma));
+ aeqc.dw6 = cpu_to_le32(((u32)qm->aeq_depth - 1) | (1 << QM_EQC_PHASE_SHIFT));

- aeqc_dma = dma_map_single(dev, aeqc, sizeof(struct qm_aeqc),
- DMA_TO_DEVICE);
- if (dma_mapping_error(dev, aeqc_dma)) {
- kfree(aeqc);
- return -ENOMEM;
- }
-
- ret = hisi_qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0);
- dma_unmap_single(dev, aeqc_dma, sizeof(struct qm_aeqc), DMA_TO_DEVICE);
- kfree(aeqc);
-
- return ret;
+ return qm_set_and_get_xqc(qm, QM_MB_CMD_AEQC, &aeqc, 0, 0);
}

static int qm_eq_aeq_ctx_cfg(struct hisi_qm *qm)
@@ -5353,6 +5278,37 @@ static int hisi_qp_alloc_memory(struct hisi_qm *qm)
return ret;
}

+static int hisi_qm_alloc_rsv_buf(struct hisi_qm *qm)
+{
+ struct qm_rsv_buf *xqc_buf = &qm->xqc_buf;
+ struct qm_dma *xqc_dma = &xqc_buf->qcdma;
+ struct device *dev = &qm->pdev->dev;
+ size_t off = 0;
+
+#define QM_XQC_BUF_INIT(xqc_buf, type) do { \
+ (xqc_buf)->type = ((xqc_buf)->qcdma.va + (off)); \
+ (xqc_buf)->type##_dma = (xqc_buf)->qcdma.dma + (off); \
+ off += QMC_ALIGN(sizeof(struct qm_##type)); \
+} while (0)
+
+ xqc_dma->size = QMC_ALIGN(sizeof(struct qm_eqc)) +
+ QMC_ALIGN(sizeof(struct qm_aeqc)) +
+ QMC_ALIGN(sizeof(struct qm_sqc)) +
+ QMC_ALIGN(sizeof(struct qm_cqc));
+
+ xqc_dma->va = dma_alloc_coherent(dev, xqc_dma->size, &xqc_dma->dma,
+ GFP_ATOMIC);
+ if (!xqc_dma->va)
+ return -ENOMEM;
+
+ QM_XQC_BUF_INIT(xqc_buf, eqc);
+ QM_XQC_BUF_INIT(xqc_buf, aeqc);
+ QM_XQC_BUF_INIT(xqc_buf, sqc);
+ QM_XQC_BUF_INIT(xqc_buf, cqc);
+
+ return 0;
+}
+
static int hisi_qm_memory_init(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
@@ -5394,13 +5350,19 @@ static int hisi_qm_memory_init(struct hisi_qm *qm)
QM_INIT_BUF(qm, sqc, qm->qp_num);
QM_INIT_BUF(qm, cqc, qm->qp_num);

+ ret = hisi_qm_alloc_rsv_buf(qm);
+ if (ret)
+ goto err_free_qdma;
+
ret = hisi_qp_alloc_memory(qm);
if (ret)
- goto err_alloc_qp_array;
+ goto err_free_reserve_buf;

return 0;

-err_alloc_qp_array:
+err_free_reserve_buf:
+ hisi_qm_free_rsv_buf(qm);
+err_free_qdma:
dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma);
err_destroy_idr:
idr_destroy(&qm->qp_idr);
diff --git a/drivers/crypto/hisilicon/qm_common.h b/drivers/crypto/hisilicon/qm_common.h
index 1406a422d455..db96c8bf9692 100644
--- a/drivers/crypto/hisilicon/qm_common.h
+++ b/drivers/crypto/hisilicon/qm_common.h
@@ -77,10 +77,7 @@ static const char * const qm_s[] = {
"init", "start", "close", "stop",
};

-void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
- dma_addr_t *dma_addr);
-void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
- const void *ctx_addr, dma_addr_t *dma_addr);
+int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op);
void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm);
void hisi_qm_set_algqos_init(struct hisi_qm *qm);

diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 0f83c19a8f36..3f33d6d99999 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -284,6 +284,18 @@ struct qm_err_isolate {
struct list_head qm_hw_errs;
};

+struct qm_rsv_buf {
+ struct qm_sqc *sqc;
+ struct qm_cqc *cqc;
+ struct qm_eqc *eqc;
+ struct qm_aeqc *aeqc;
+ dma_addr_t sqc_dma;
+ dma_addr_t cqc_dma;
+ dma_addr_t eqc_dma;
+ dma_addr_t aeqc_dma;
+ struct qm_dma qcdma;
+};
+
struct hisi_qm {
enum qm_hw_ver ver;
enum qm_fun_type fun_type;
@@ -316,6 +328,7 @@ struct hisi_qm {
dma_addr_t cqc_dma;
dma_addr_t eqe_dma;
dma_addr_t aeqe_dma;
+ struct qm_rsv_buf xqc_buf;

struct hisi_qm_status status;
const struct hisi_qm_err_ini *err_ini;
--
2.33.0


2023-08-11 14:23:59

by Weili Qian

[permalink] [raw]
Subject: [PATCH v2 6/7] crypto: hisilicon/qm - fix the type value of aeq

The type of aeq has only 17 to 20 bits, but 17 to 31 bits are read in
function qm_aeq_thread(), fix it.

Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index c9ed30b181bb..a1539974f95a 100755
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -73,6 +73,7 @@

#define QM_AEQE_PHASE(aeqe) ((le32_to_cpu((aeqe)->dw0) >> 16) & 0x1)
#define QM_AEQE_TYPE_SHIFT 17
+#define QM_AEQE_TYPE_MASK 0xf
#define QM_AEQE_CQN_MASK GENMASK(15, 0)
#define QM_CQ_OVERFLOW 0
#define QM_EQ_OVERFLOW 1
@@ -1137,7 +1138,8 @@ static irqreturn_t qm_aeq_thread(int irq, void *data)
u32 type, qp_id;

while (QM_AEQE_PHASE(aeqe) == qm->status.aeqc_phase) {
- type = le32_to_cpu(aeqe->dw0) >> QM_AEQE_TYPE_SHIFT;
+ type = (le32_to_cpu(aeqe->dw0) >> QM_AEQE_TYPE_SHIFT) &
+ QM_AEQE_TYPE_MASK;
qp_id = le32_to_cpu(aeqe->dw0) & QM_AEQE_CQN_MASK;

switch (type) {
--
2.33.0


2023-08-11 14:23:59

by Weili Qian

[permalink] [raw]
Subject: [PATCH v2 7/7] crypto: hisilicon/qm - increase function communication waiting time

When multiple VFs, for example, 63, are enabled, the communication
between the PF and all VFs cannot be completed within the current
waiting time. Therefore, adjust waiting time more than the maximum
mailbox execution time.

Fixes: 3cd53a27c2fc ("crypto: hisilicon/qm - add callback to support communication")
Signed-off-by: Weili Qian <[email protected]>
---
drivers/crypto/hisilicon/qm.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index a1539974f95a..5d633f774c83 100755
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -181,9 +181,9 @@
#define QM_IFC_INT_DISABLE BIT(0)
#define QM_IFC_INT_STATUS_MASK BIT(0)
#define QM_IFC_INT_SET_MASK BIT(0)
-#define QM_WAIT_DST_ACK 10
-#define QM_MAX_PF_WAIT_COUNT 10
-#define QM_MAX_VF_WAIT_COUNT 40
+#define QM_WAIT_DST_ACK 100
+#define QM_MAX_PF_WAIT_COUNT 50
+#define QM_MAX_VF_WAIT_COUNT 100
#define QM_VF_RESET_WAIT_US 20000
#define QM_VF_RESET_WAIT_CNT 3000
#define QM_VF_RESET_WAIT_TIMEOUT_US \
--
2.33.0


2023-08-20 07:04:33

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH v2 1/7] crypto: hisilicon/qm - obtain the mailbox configuration at one time

On Fri, Aug 18, 2023 at 12:21:02PM +0200, Ard Biesheuvel wrote:
>
> IIRC there have been other cases (ThunderX?) where 128 bit MMIO
> accessors were needed for some peripheral, but common arm64 helpers
> were rejected on the basis that this atomic behavior is not
> architectural.
>
> Obviously, using inline asm in the driver is not the right way either,
> so perhaps we should consider introducing some common infrastructure
> anyway, including some expectation management about their guarantees.

The ones in

drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h

look better. So perhaps copy them into hisilicon?

Cheers,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

2023-08-21 11:24:50

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v2 1/7] crypto: hisilicon/qm - obtain the mailbox configuration at one time

On Mon, Aug 21, 2023 at 04:19:54PM +0800, Herbert Xu wrote:
> On Sat, Aug 19, 2023 at 09:33:18AM +0200, Ard Biesheuvel wrote:
> >
> > No, that otx2_write128() routine looks buggy, actually, The ! at the
> > end means writeback, and so the register holding addr will be
> > modified, which is not reflect in the asm constraints. It also lacks a
> > barrier.
>
> OK. But at least having a helper called write128 looks a lot
> cleaner than just having unexplained assembly in the code.

I guess we want something similar to how writeq() is handled on 32-bit
architectures (see include/linux/io-64-nonatomic-{hi-lo,lo-hi}.h.

It's then CPU-dependent on whether you get atomicity.

Will

2023-08-22 11:52:07

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH v2 1/7] crypto: hisilicon/qm - obtain the mailbox configuration at one time

On Mon, Aug 21, 2023, at 08:45, Weili Qian wrote:
> On 2023/8/21 18:26, Will Deacon wrote:
>> On Mon, Aug 21, 2023 at 04:19:54PM +0800, Herbert Xu wrote:

> Thanks for the review.
>
> Since the HiSilicon accelerator devices are supported only on the ARM64
> platform,
> the following 128bit read and write interfaces are added to the driver,
> is this OK?
>
> #if defined(CONFIG_ARM64)
> static void qm_write128(void __iomem *addr, const void *buffer)
> {

The naming makes it specific to the driver, which is not
appropriate for a global definition. Just follow the
generic naming. I guess you wouldn't have to do both
the readl/writel and the ioread32/iowrite32 variants, so
just start with the ioread/iowrite version. That also
avoids having to come up with a new letter.

You have the arguments in the wrong order compared to iowrite32(),
which is very confusing.

Instead of the pointer to the buffer, I'd suggest passing the
value by value here, to make it behave like the other ones.

This does mean it won't build on targets that don't
define u128, but I think we can handle that in a Kconfig
symbol.

> unsigned long tmp0 = 0, tmp1 = 0;

Don't initialize local variable to zero, that is generally
a bad idea because it hides cases where they are not
initialized properly.

> asm volatile("ldp %0, %1, %3\n"
> "stp %0, %1, %2\n"

This is missing the endian-swap for big-endian kernels.

> "dmb oshst\n"

You have the barrier at the wrong end of the helper, it
needs to before the actual store to have the intended
effect.

Also, you should really use the generic __io_bw() helper
rather than open-coding it.

> : "=&r" (tmp0),
> "=&r" (tmp1),

The tmp0/tmp1 registers are technically a clobber, not
an in/out, though ideally these should be turned
into inputs.

> "+Q" (*((char __iomem *)addr))
> : "Q" (*((char *)buffer))

wrong length

> : "memory");
> }

The memory clobber is usually part of the barrier.

> static void qm_read128(void *buffer, const void __iomem *addr)
> {
> unsigned long tmp0 = 0, tmp1 = 0;
>
> asm volatile("ldp %0, %1, %3\n"
> "stp %0, %1, %2\n"
> "dmb oshst\n"
> : "=&r" (tmp0),
> "=&r" (tmp1),
> "+Q" (*((char *)buffer))
> : "Q" (*((char __iomem *)addr))
> : "memory");
> }

Same thing, but you are missing the control dependency
from __io_ar() here, rather than just open-coding it.

> #else
> static void qm_write128(void __iomem *addr, const void *buffer)
> {
>
> }

This is missing the entire implementation?

Arnd

2023-08-30 20:34:56

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH v2 1/7] crypto: hisilicon/qm - obtain the mailbox configuration at one time

On Thu, Aug 24, 2023, at 23:07, Weili Qian wrote:
> On 2023/8/21 22:36, Ard Biesheuvel wrote:
>> On Mon, 21 Aug 2023 at 14:45, Weili Qian <[email protected]> wrote:

>>> : "Q" (*((char *)buffer))
>>
>> This constraint describes the first byte of buffer, which might cause
>> problems because the asm reads the entire buffer not just the first
>> byte.
> I don't understand why constraint describes the first byte of
> buffer,and the compilation result seems to be ok.
>
> 1811 1afc: a9400a61 ldp x1, x2, [x19]
> 1812 1b00: a9000801 stp x1, x2, [x0]
> 1813 1b04: d50332bf dmb oshst
> Maybe I'm wrong about 'Q', could you explain it or where can I learn
> more about this?

The "Q" is not the problem here, it's the cast to (char *), which
tells the compiler that only the first byte is used here, and
allows it to not actually store the rest of the buffer into
memory.

It's not a problem on the __iomem pointer side, since gcc never
accesses those directly, and for the version taking a __u128 literal
or two __u64 registers it is also ok.

>>> unsigned long tmp0 = 0, tmp1 = 0;
>>>
>>> asm volatile("ldp %0, %1, %3\n"
>>> "stp %0, %1, %2\n"
>>> "dmb oshst\n"
>>
>> Is this the right barrier for a read?
> Should be "dmb oshld\n".

As I said, this needs to be __io_ar(), which might be
defined in a different way.

>>
>> Have you tried using __uint128_t accesses instead?
>>
>> E.g., something like
>>
>> static void qm_write128(void __iomem *addr, const void *buffer)
>> {
>> volatile __uint128_t *dst = (volatile __uint128_t __force *)addr;
>> const __uint128_t *src __aligned(1) = buffer;
>>
>> WRITE_ONCE(*dst, *src);
>> dma_wmb();
>> }
>>
>> should produce the right instruction sequence, and works on all
>> architectures that have CONFIG_ARCH_SUPPORTS_INT128=y
>>
>
> I tried this, but WRITE_ONCE does not support type __uint128_t.
> ->WRITE_ONCE
> ->compiletime_assert_rwonce_type
> ->compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
> "Unsupported access size for {READ,WRITE}_ONCE().")

On top of that, WRITE_ONCE() does not guarantee atomicity, and
dma_wmb() might not be the correct barrier.

> So can we define generic IO helpers based on patchset
> https://lore.kernel.org/all/[email protected]/
> Part of the implementation is as follows:
>
> add writeo() in include/asm-generic/io.h
>
> #ifdef CONFIG_ARCH_SUPPORTS_INT128
> #ifndef writeo
> #define writeo writeo
> static inline void writeo(__uint128_t value, volatile void __iomem
> *addr)
> {
> __io_bw();
> __raw_writeo((__uint128_t __force)__cpu_to_le128(value), addr);
> //__cpu_to_le128 will implement.
> __io_aw();
> }
> #endif
> #endif /* CONFIG_ARCH_SUPPORTS_INT128 */

Right, this is fairly close to what we need. The 'o' notation
might be slightly controversial, which is why I suggested
definining only iowrite128() to avoid having to agree on
the correct letter for 16-byte stores.

> in arch/arm64/include/asm/io.h
>
> #ifdef CONFIG_ARCH_SUPPORTS_INT128
> #define __raw_writeo __raw_writeo
> static __always_inline void __raw_writeo(__uint128_t val, volatile
> void __iomem *addr)
> {
> u64 hi, lo;
>
> lo = (u64)val;
> hi = (u64)(val >> 64);
>
> asm volatile ("stp %x0, %x1, [%2]\n" :: "rZ"(lo), "rZ"(hi), "r"(addr));
> }
> #endif /* CONFIG_ARCH_SUPPORTS_INT128 */

This definition looks fine.

> And add io{read|write}128bits in include/linux/io-64-nonatomic-{hi-lo,lo-hi}.h.
> static inline void lo_hi_writeo(__uint128_t val, volatile void __iomem *addr)
> {
> writeq(val, addr);
> writeq(val >> 64, addr);
> }

This also looks fine.

Arnd

2023-08-31 15:14:12

by Weili Qian

[permalink] [raw]
Subject: Re: [PATCH v2 1/7] crypto: hisilicon/qm - obtain the mailbox configuration at one time



On 2023/8/30 3:37, Arnd Bergmann wrote:
> On Thu, Aug 24, 2023, at 23:07, Weili Qian wrote:
>> On 2023/8/21 22:36, Ard Biesheuvel wrote:
>>> On Mon, 21 Aug 2023 at 14:45, Weili Qian <[email protected]> wrote:
>
>>>> : "Q" (*((char *)buffer))
>>>
>>> This constraint describes the first byte of buffer, which might cause
>>> problems because the asm reads the entire buffer not just the first
>>> byte.
>> I don't understand why constraint describes the first byte of
>> buffer,and the compilation result seems to be ok.
>>
>> 1811 1afc: a9400a61 ldp x1, x2, [x19]
>> 1812 1b00: a9000801 stp x1, x2, [x0]
>> 1813 1b04: d50332bf dmb oshst
>> Maybe I'm wrong about 'Q', could you explain it or where can I learn
>> more about this?
>
> The "Q" is not the problem here, it's the cast to (char *), which
> tells the compiler that only the first byte is used here, and
> allows it to not actually store the rest of the buffer into
> memory.
>
> It's not a problem on the __iomem pointer side, since gcc never
> accesses those directly, and for the version taking a __u128 literal
> or two __u64 registers it is also ok.

Thanks for your reply, I have got it.

>
>>>> unsigned long tmp0 = 0, tmp1 = 0;
>>>>
>>>> asm volatile("ldp %0, %1, %3\n"
>>>> "stp %0, %1, %2\n"
>>>> "dmb oshst\n"
>>>
>>> Is this the right barrier for a read?
>> Should be "dmb oshld\n".
>
> As I said, this needs to be __io_ar(), which might be
> defined in a different way.
>
>>>
>>> Have you tried using __uint128_t accesses instead?
>>>
>>> E.g., something like
>>>
>>> static void qm_write128(void __iomem *addr, const void *buffer)
>>> {
>>> volatile __uint128_t *dst = (volatile __uint128_t __force *)addr;
>>> const __uint128_t *src __aligned(1) = buffer;
>>>
>>> WRITE_ONCE(*dst, *src);
>>> dma_wmb();
>>> }
>>>
>>> should produce the right instruction sequence, and works on all
>>> architectures that have CONFIG_ARCH_SUPPORTS_INT128=y
>>>
>>
>> I tried this, but WRITE_ONCE does not support type __uint128_t.
>> ->WRITE_ONCE
>> ->compiletime_assert_rwonce_type
>> ->compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
>> "Unsupported access size for {READ,WRITE}_ONCE().")
>
> On top of that, WRITE_ONCE() does not guarantee atomicity, and
> dma_wmb() might not be the correct barrier.
>
>> So can we define generic IO helpers based on patchset
>> https://lore.kernel.org/all/[email protected]/
>> Part of the implementation is as follows:
>>
>> add writeo() in include/asm-generic/io.h
>>
>> #ifdef CONFIG_ARCH_SUPPORTS_INT128
>> #ifndef writeo
>> #define writeo writeo
>> static inline void writeo(__uint128_t value, volatile void __iomem
>> *addr)
>> {
>> __io_bw();
>> __raw_writeo((__uint128_t __force)__cpu_to_le128(value), addr);
>> //__cpu_to_le128 will implement.
>> __io_aw();
>> }
>> #endif
>> #endif /* CONFIG_ARCH_SUPPORTS_INT128 */
>
> Right, this is fairly close to what we need. The 'o' notation
> might be slightly controversial, which is why I suggested
> definining only iowrite128() to avoid having to agree on
> the correct letter for 16-byte stores.

Okay, I'll just define iowrite128() and ioread128().

Thanks,
Weili

>
>> in arch/arm64/include/asm/io.h
>>
>> #ifdef CONFIG_ARCH_SUPPORTS_INT128
>> #define __raw_writeo __raw_writeo
>> static __always_inline void __raw_writeo(__uint128_t val, volatile
>> void __iomem *addr)
>> {
>> u64 hi, lo;
>>
>> lo = (u64)val;
>> hi = (u64)(val >> 64);
>>
>> asm volatile ("stp %x0, %x1, [%2]\n" :: "rZ"(lo), "rZ"(hi), "r"(addr));
>> }
>> #endif /* CONFIG_ARCH_SUPPORTS_INT128 */
>
> This definition looks fine.
>
>> And add io{read|write}128bits in include/linux/io-64-nonatomic-{hi-lo,lo-hi}.h.
>> static inline void lo_hi_writeo(__uint128_t val, volatile void __iomem *addr)
>> {
>> writeq(val, addr);
>> writeq(val >> 64, addr);
>> }
>
> This also looks fine.
>
> Arnd
> .
>