2020-03-02 06:19:24

by Xu Zaibo

[permalink] [raw]
Subject: [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver

From: liulongfang <[email protected]>

In the scenario of SMMU translation, the SEC performance of short messages
(<512Bytes) cannot meet our expectations. To avoid this, we reserve the
plat buffer (PBUF) memory for small packets when creating TFM.

Signed-off-by: liulongfang <[email protected]>
Signed-off-by: Zaibo Xu <[email protected]>
---
drivers/crypto/hisilicon/sec2/sec.h | 4 +
drivers/crypto/hisilicon/sec2/sec_crypto.c | 173 ++++++++++++++++++++++++++++-
2 files changed, 172 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index e67b416..a73d82c 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -11,6 +11,8 @@

/* Algorithm resource per hardware SEC queue */
struct sec_alg_res {
+ u8 *pbuf;
+ dma_addr_t pbuf_dma;
u8 *c_ivin;
dma_addr_t c_ivin_dma;
u8 *out_mac;
@@ -50,6 +52,7 @@ struct sec_req {

/* Status of the SEC request */
bool fake_busy;
+ bool use_pbuf;
};

/**
@@ -130,6 +133,7 @@ struct sec_ctx {
atomic_t dec_qcyclic;

enum sec_alg_type alg_type;
+ bool pbuf_supported;
struct sec_cipher_ctx c_ctx;
struct sec_auth_ctx a_ctx;
};
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 1eeaa74..3136ada 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -48,6 +48,19 @@
#define SEC_MAX_MAC_LEN 64
#define SEC_MAX_AAD_LEN 65535
#define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH)
+
+#define SEC_PBUF_SZ 512
+#define SEC_PBUF_IV_OFFSET SEC_PBUF_SZ
+#define SEC_PBUF_MAC_OFFSET (SEC_PBUF_SZ + SEC_IV_SIZE)
+#define SEC_PBUF_PKG (SEC_PBUF_SZ + SEC_IV_SIZE + \
+ SEC_MAX_MAC_LEN * 2)
+#define SEC_PBUF_NUM (PAGE_SIZE / SEC_PBUF_PKG)
+#define SEC_PBUF_PAGE_NUM (QM_Q_DEPTH / SEC_PBUF_NUM)
+#define SEC_PBUF_LEFT_SZ (SEC_PBUF_PKG * (QM_Q_DEPTH - \
+ SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM))
+#define SEC_TOTAL_PBUF_SZ (PAGE_SIZE * SEC_PBUF_PAGE_NUM + \
+ SEC_PBUF_LEFT_SZ)
+
#define SEC_SQE_LEN_RATE 4
#define SEC_SQE_CFLAG 2
#define SEC_SQE_AEAD_FLAG 3
@@ -246,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res)
res->out_mac, res->out_mac_dma);
}

+static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res)
+{
+ if (res->pbuf)
+ dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ,
+ res->pbuf, res->pbuf_dma);
+}
+
+/*
+ * To improve performance, pbuffer is used for
+ * small packets (< 512Bytes) as IOMMU translation using.
+ */
+static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res)
+{
+ int pbuf_page_offset;
+ int i, j, k;
+
+ res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ,
+ &res->pbuf_dma, GFP_KERNEL);
+ if (!res->pbuf)
+ return -ENOMEM;
+
+ /*
+ * SEC_PBUF_PKG contains data pbuf, iv and
+ * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC>
+ * Every PAGE contains six SEC_PBUF_PKG
+ * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG
+ * So we need SEC_PBUF_PAGE_NUM numbers of PAGE
+ * for the SEC_TOTAL_PBUF_SZ
+ */
+ for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) {
+ pbuf_page_offset = PAGE_SIZE * i;
+ for (j = 0; j < SEC_PBUF_NUM; j++) {
+ k = i * SEC_PBUF_NUM + j;
+ if (k == QM_Q_DEPTH)
+ break;
+ res[k].pbuf = res->pbuf +
+ j * SEC_PBUF_PKG + pbuf_page_offset;
+ res[k].pbuf_dma = res->pbuf_dma +
+ j * SEC_PBUF_PKG + pbuf_page_offset;
+ }
+ }
+ return 0;
+}
+
static int sec_alg_resource_alloc(struct sec_ctx *ctx,
struct sec_qp_ctx *qp_ctx)
{
@@ -262,6 +319,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
if (ret)
goto alloc_fail;
}
+ if (ctx->pbuf_supported) {
+ ret = sec_alloc_pbuf_resource(dev, res);
+ if (ret) {
+ dev_err(dev, "fail to alloc pbuf dma resource!\n");
+ goto alloc_fail;
+ }
+ }

return 0;
alloc_fail:
@@ -279,6 +343,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,

if (ctx->alg_type == SEC_AEAD)
sec_free_mac_resource(dev, qp_ctx->res);
+ if (ctx->pbuf_supported)
+ sec_free_pbuf_resource(dev, qp_ctx->res);
}

static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
@@ -369,6 +435,8 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
ctx->sec = sec;
ctx->hlf_q_num = sec->ctx_q_num >> 1;

+ ctx->pbuf_supported = ctx->sec->iommu_used;
+
/* Half of queue depth is taken as fake requests limit in the queue. */
ctx->fake_req_limit = QM_Q_DEPTH >> 1;
ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx),
@@ -591,6 +659,66 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)

+static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
+ struct scatterlist *src)
+{
+ struct aead_request *aead_req = req->aead_req.aead_req;
+ struct sec_cipher_req *c_req = &req->c_req;
+ struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+ struct device *dev = SEC_CTX_DEV(ctx);
+ int copy_size, pbuf_length;
+ int req_id = req->req_id;
+
+ if (ctx->alg_type == SEC_AEAD)
+ copy_size = aead_req->cryptlen + aead_req->assoclen;
+ else
+ copy_size = c_req->c_len;
+
+ pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
+ qp_ctx->res[req_id].pbuf,
+ copy_size);
+
+ if (unlikely(pbuf_length != copy_size)) {
+ dev_err(dev, "copy src data to pbuf error!\n");
+ return -EINVAL;
+ }
+
+ c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
+
+ if (!c_req->c_in_dma) {
+ dev_err(dev, "fail to set pbuffer address!\n");
+ return -ENOMEM;
+ }
+
+ c_req->c_out_dma = c_req->c_in_dma;
+
+ return 0;
+}
+
+static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
+ struct scatterlist *dst)
+{
+ struct aead_request *aead_req = req->aead_req.aead_req;
+ struct sec_cipher_req *c_req = &req->c_req;
+ struct sec_qp_ctx *qp_ctx = req->qp_ctx;
+ struct device *dev = SEC_CTX_DEV(ctx);
+ int copy_size, pbuf_length;
+ int req_id = req->req_id;
+
+ if (ctx->alg_type == SEC_AEAD)
+ copy_size = c_req->c_len + aead_req->assoclen;
+ else
+ copy_size = c_req->c_len;
+
+ pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
+ qp_ctx->res[req_id].pbuf,
+ copy_size);
+
+ if (unlikely(pbuf_length != copy_size))
+ dev_err(dev, "copy pbuf data to dst error!\n");
+
+}
+
static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
struct scatterlist *src, struct scatterlist *dst)
{
@@ -599,7 +727,20 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
struct sec_qp_ctx *qp_ctx = req->qp_ctx;
struct sec_alg_res *res = &qp_ctx->res[req->req_id];
struct device *dev = SEC_CTX_DEV(ctx);
+ int ret;
+
+ if (req->use_pbuf) {
+ ret = sec_cipher_pbuf_map(ctx, req, src);
+ c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
+ c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
+ if (ctx->alg_type == SEC_AEAD) {
+ a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
+ a_req->out_mac_dma = res->pbuf_dma +
+ SEC_PBUF_MAC_OFFSET;
+ }

+ return ret;
+ }
c_req->c_ivin = res->c_ivin;
c_req->c_ivin_dma = res->c_ivin_dma;
if (ctx->alg_type == SEC_AEAD) {
@@ -642,10 +783,14 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
struct sec_cipher_req *c_req = &req->c_req;
struct device *dev = SEC_CTX_DEV(ctx);

- if (dst != src)
- hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
+ if (req->use_pbuf) {
+ sec_cipher_pbuf_unmap(ctx, req, dst);
+ } else {
+ if (dst != src)
+ hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);

- hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
+ hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
+ }
}

static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
@@ -844,7 +989,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
sec_sqe->type_cipher_auth = bd_type | cipher;

- sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
+ if (req->use_pbuf)
+ sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
+ else
+ sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
if (c_req->c_in_dma != c_req->c_out_dma)
de = 0x1 << SEC_DE_OFFSET;
@@ -852,7 +1000,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
sec_sqe->sds_sa_type = (de | scene | sa_type);

/* Just set DST address type */
- da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
+ if (req->use_pbuf)
+ da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
+ else
+ da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
sec_sqe->sdm_addr_type |= da_type;

sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
@@ -1215,6 +1366,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
return -EINVAL;
}
sreq->c_req.c_len = sk_req->cryptlen;
+
+ if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ)
+ sreq->use_pbuf = true;
+ else
+ sreq->use_pbuf = false;
+
if (c_alg == SEC_CALG_3DES) {
if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) {
dev_err(dev, "skcipher 3des input length error!\n");
@@ -1334,6 +1491,12 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
return -EINVAL;
}

+ if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
+ SEC_PBUF_SZ)
+ sreq->use_pbuf = true;
+ else
+ sreq->use_pbuf = false;
+
/* Support AES only */
if (unlikely(c_alg != SEC_CALG_AES)) {
dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n");
--
2.8.1


2020-03-02 12:50:12

by Jonathan Cameron

[permalink] [raw]
Subject: Re: [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver

On Mon, 2 Mar 2020 14:15:16 +0800
Zaibo Xu <[email protected]> wrote:

> From: liulongfang <[email protected]>
>
> In the scenario of SMMU translation, the SEC performance of short messages
> (<512Bytes) cannot meet our expectations. To avoid this, we reserve the
> plat buffer (PBUF) memory for small packets when creating TFM.
>
> Signed-off-by: liulongfang <[email protected]>
> Signed-off-by: Zaibo Xu <[email protected]>

Hmm. I guess we pay the price for this performance trick in memory usage.
Oh well, if it becomes a problem we can always look at more clever solutions.

Looks good to me. Fix up liulongfang's sign off..

Reviewed-by: Jonathan Cameron <[email protected]>

> ---
> drivers/crypto/hisilicon/sec2/sec.h | 4 +
> drivers/crypto/hisilicon/sec2/sec_crypto.c | 173 ++++++++++++++++++++++++++++-
> 2 files changed, 172 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
> index e67b416..a73d82c 100644
> --- a/drivers/crypto/hisilicon/sec2/sec.h
> +++ b/drivers/crypto/hisilicon/sec2/sec.h
> @@ -11,6 +11,8 @@
>
> /* Algorithm resource per hardware SEC queue */
> struct sec_alg_res {
> + u8 *pbuf;
> + dma_addr_t pbuf_dma;
> u8 *c_ivin;
> dma_addr_t c_ivin_dma;
> u8 *out_mac;
> @@ -50,6 +52,7 @@ struct sec_req {
>
> /* Status of the SEC request */
> bool fake_busy;
> + bool use_pbuf;
> };
>
> /**
> @@ -130,6 +133,7 @@ struct sec_ctx {
> atomic_t dec_qcyclic;
>
> enum sec_alg_type alg_type;
> + bool pbuf_supported;
> struct sec_cipher_ctx c_ctx;
> struct sec_auth_ctx a_ctx;
> };
> diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
> index 1eeaa74..3136ada 100644
> --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
> +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
> @@ -48,6 +48,19 @@
> #define SEC_MAX_MAC_LEN 64
> #define SEC_MAX_AAD_LEN 65535
> #define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH)
> +
> +#define SEC_PBUF_SZ 512
> +#define SEC_PBUF_IV_OFFSET SEC_PBUF_SZ
> +#define SEC_PBUF_MAC_OFFSET (SEC_PBUF_SZ + SEC_IV_SIZE)
> +#define SEC_PBUF_PKG (SEC_PBUF_SZ + SEC_IV_SIZE + \
> + SEC_MAX_MAC_LEN * 2)
> +#define SEC_PBUF_NUM (PAGE_SIZE / SEC_PBUF_PKG)
> +#define SEC_PBUF_PAGE_NUM (QM_Q_DEPTH / SEC_PBUF_NUM)
> +#define SEC_PBUF_LEFT_SZ (SEC_PBUF_PKG * (QM_Q_DEPTH - \
> + SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM))
> +#define SEC_TOTAL_PBUF_SZ (PAGE_SIZE * SEC_PBUF_PAGE_NUM + \
> + SEC_PBUF_LEFT_SZ)
> +
> #define SEC_SQE_LEN_RATE 4
> #define SEC_SQE_CFLAG 2
> #define SEC_SQE_AEAD_FLAG 3
> @@ -246,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res)
> res->out_mac, res->out_mac_dma);
> }
>
> +static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res)
> +{
> + if (res->pbuf)
> + dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ,
> + res->pbuf, res->pbuf_dma);
> +}
> +
> +/*
> + * To improve performance, pbuffer is used for
> + * small packets (< 512Bytes) as IOMMU translation using.
> + */
> +static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res)
> +{
> + int pbuf_page_offset;
> + int i, j, k;
> +
> + res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ,
> + &res->pbuf_dma, GFP_KERNEL);
> + if (!res->pbuf)
> + return -ENOMEM;
> +
> + /*
> + * SEC_PBUF_PKG contains data pbuf, iv and
> + * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC>
> + * Every PAGE contains six SEC_PBUF_PKG
> + * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG
> + * So we need SEC_PBUF_PAGE_NUM numbers of PAGE
> + * for the SEC_TOTAL_PBUF_SZ
> + */
> + for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) {
> + pbuf_page_offset = PAGE_SIZE * i;
> + for (j = 0; j < SEC_PBUF_NUM; j++) {
> + k = i * SEC_PBUF_NUM + j;
> + if (k == QM_Q_DEPTH)
> + break;
> + res[k].pbuf = res->pbuf +
> + j * SEC_PBUF_PKG + pbuf_page_offset;
> + res[k].pbuf_dma = res->pbuf_dma +
> + j * SEC_PBUF_PKG + pbuf_page_offset;
> + }
> + }
> + return 0;
> +}
> +
> static int sec_alg_resource_alloc(struct sec_ctx *ctx,
> struct sec_qp_ctx *qp_ctx)
> {
> @@ -262,6 +319,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
> if (ret)
> goto alloc_fail;
> }
> + if (ctx->pbuf_supported) {
> + ret = sec_alloc_pbuf_resource(dev, res);
> + if (ret) {
> + dev_err(dev, "fail to alloc pbuf dma resource!\n");
> + goto alloc_fail;
> + }
> + }
>
> return 0;
> alloc_fail:
> @@ -279,6 +343,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
>
> if (ctx->alg_type == SEC_AEAD)
> sec_free_mac_resource(dev, qp_ctx->res);
> + if (ctx->pbuf_supported)
> + sec_free_pbuf_resource(dev, qp_ctx->res);
Is the ordering right here? Seems like this is allocated after the mac_resource
so should be freed before it.

I would prefer specific gotos for resource_alloc failure cases to make it clear
that different cleanup is needed if we fail allocating this from doing the
mac_resource allocation.


> }
>
> static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
> @@ -369,6 +435,8 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
> ctx->sec = sec;
> ctx->hlf_q_num = sec->ctx_q_num >> 1;
>
> + ctx->pbuf_supported = ctx->sec->iommu_used;
> +
> /* Half of queue depth is taken as fake requests limit in the queue. */
> ctx->fake_req_limit = QM_Q_DEPTH >> 1;
> ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx),
> @@ -591,6 +659,66 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
> GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
> GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
>
> +static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
> + struct scatterlist *src)
> +{
> + struct aead_request *aead_req = req->aead_req.aead_req;
> + struct sec_cipher_req *c_req = &req->c_req;
> + struct sec_qp_ctx *qp_ctx = req->qp_ctx;
> + struct device *dev = SEC_CTX_DEV(ctx);
> + int copy_size, pbuf_length;
> + int req_id = req->req_id;
> +
> + if (ctx->alg_type == SEC_AEAD)
> + copy_size = aead_req->cryptlen + aead_req->assoclen;
> + else
> + copy_size = c_req->c_len;
> +
> + pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
> + qp_ctx->res[req_id].pbuf,
> + copy_size);
> +
> + if (unlikely(pbuf_length != copy_size)) {
> + dev_err(dev, "copy src data to pbuf error!\n");
> + return -EINVAL;
> + }
> +
> + c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
> +
> + if (!c_req->c_in_dma) {
> + dev_err(dev, "fail to set pbuffer address!\n");
> + return -ENOMEM;
> + }
> +
> + c_req->c_out_dma = c_req->c_in_dma;
> +
> + return 0;
> +}
> +
> +static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
> + struct scatterlist *dst)
> +{
> + struct aead_request *aead_req = req->aead_req.aead_req;
> + struct sec_cipher_req *c_req = &req->c_req;
> + struct sec_qp_ctx *qp_ctx = req->qp_ctx;
> + struct device *dev = SEC_CTX_DEV(ctx);
> + int copy_size, pbuf_length;
> + int req_id = req->req_id;
> +
> + if (ctx->alg_type == SEC_AEAD)
> + copy_size = c_req->c_len + aead_req->assoclen;
> + else
> + copy_size = c_req->c_len;
> +
> + pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
> + qp_ctx->res[req_id].pbuf,
> + copy_size);
> +
> + if (unlikely(pbuf_length != copy_size))
> + dev_err(dev, "copy pbuf data to dst error!\n");
> +
> +}
> +
> static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
> struct scatterlist *src, struct scatterlist *dst)
> {
> @@ -599,7 +727,20 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
> struct sec_qp_ctx *qp_ctx = req->qp_ctx;
> struct sec_alg_res *res = &qp_ctx->res[req->req_id];
> struct device *dev = SEC_CTX_DEV(ctx);
> + int ret;
> +
> + if (req->use_pbuf) {
> + ret = sec_cipher_pbuf_map(ctx, req, src);
> + c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
> + c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
> + if (ctx->alg_type == SEC_AEAD) {
> + a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
> + a_req->out_mac_dma = res->pbuf_dma +
> + SEC_PBUF_MAC_OFFSET;
> + }
>
> + return ret;
> + }
> c_req->c_ivin = res->c_ivin;
> c_req->c_ivin_dma = res->c_ivin_dma;
> if (ctx->alg_type == SEC_AEAD) {
> @@ -642,10 +783,14 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
> struct sec_cipher_req *c_req = &req->c_req;
> struct device *dev = SEC_CTX_DEV(ctx);
>
> - if (dst != src)
> - hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
> + if (req->use_pbuf) {
> + sec_cipher_pbuf_unmap(ctx, req, dst);
> + } else {
> + if (dst != src)
> + hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
>
> - hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
> + hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
> + }
> }
>
> static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
> @@ -844,7 +989,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
> cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
> sec_sqe->type_cipher_auth = bd_type | cipher;
>
> - sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
> + if (req->use_pbuf)
> + sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
> + else
> + sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
> scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
> if (c_req->c_in_dma != c_req->c_out_dma)
> de = 0x1 << SEC_DE_OFFSET;
> @@ -852,7 +1000,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
> sec_sqe->sds_sa_type = (de | scene | sa_type);
>
> /* Just set DST address type */
> - da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
> + if (req->use_pbuf)
> + da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
> + else
> + da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
> sec_sqe->sdm_addr_type |= da_type;
>
> sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
> @@ -1215,6 +1366,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
> return -EINVAL;
> }
> sreq->c_req.c_len = sk_req->cryptlen;
> +
> + if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ)
> + sreq->use_pbuf = true;
> + else
> + sreq->use_pbuf = false;
> +
> if (c_alg == SEC_CALG_3DES) {
> if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) {
> dev_err(dev, "skcipher 3des input length error!\n");
> @@ -1334,6 +1491,12 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
> return -EINVAL;
> }
>
> + if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
> + SEC_PBUF_SZ)
> + sreq->use_pbuf = true;
> + else
> + sreq->use_pbuf = false;
> +
> /* Support AES only */
> if (unlikely(c_alg != SEC_CALG_AES)) {
> dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n");


2020-03-03 01:23:22

by Xu Zaibo

[permalink] [raw]
Subject: Re: [PATCH v2 5/5] crypto: hisilicon/sec2 - Add pbuffer mode for SEC driver

Hi,
On 2020/3/2 20:49, Jonathan Cameron wrote:
> On Mon, 2 Mar 2020 14:15:16 +0800
> Zaibo Xu <[email protected]> wrote:
>
>> From: liulongfang <[email protected]>
>>
>> In the scenario of SMMU translation, the SEC performance of short messages
>> (<512Bytes) cannot meet our expectations. To avoid this, we reserve the
>> plat buffer (PBUF) memory for small packets when creating TFM.
>>
>> Signed-off-by: liulongfang <[email protected]>
>> Signed-off-by: Zaibo Xu <[email protected]>
> Hmm. I guess we pay the price for this performance trick in memory usage.
> Oh well, if it becomes a problem we can always look at more clever solutions.
>
> Looks good to me. Fix up liulongfang's sign off..
>
> Reviewed-by: Jonathan Cameron <[email protected]>
Okay. And we have done the performance test with DMA pool, which show
worse performance.
About -5.5% income at 512 bytes packets with 16 kthreads.
>
>> ---
>> drivers/crypto/hisilicon/sec2/sec.h | 4 +
>> drivers/crypto/hisilicon/sec2/sec_crypto.c | 173 ++++++++++++++++++++++++++++-
>> 2 files changed, 172 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
>> index e67b416..a73d82c 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec.h
>> +++ b/drivers/crypto/hisilicon/sec2/sec.h
>> @@ -11,6 +11,8 @@
>>
>> /* Algorithm resource per hardware SEC queue */
>> struct sec_alg_res {
>> + u8 *pbuf;
>> + dma_addr_t pbuf_dma;
>> u8 *c_ivin;
>> dma_addr_t c_ivin_dma;
>> u8 *out_mac;
>> @@ -50,6 +52,7 @@ struct sec_req {
>>
>> /* Status of the SEC request */
>> bool fake_busy;
>> + bool use_pbuf;
>> };
>>
>> /**
>> @@ -130,6 +133,7 @@ struct sec_ctx {
>> atomic_t dec_qcyclic;
>>
>> enum sec_alg_type alg_type;
>> + bool pbuf_supported;
>> struct sec_cipher_ctx c_ctx;
>> struct sec_auth_ctx a_ctx;
>> };
>> diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
>> index 1eeaa74..3136ada 100644
>> --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
>> +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
>> @@ -48,6 +48,19 @@
>> #define SEC_MAX_MAC_LEN 64
>> #define SEC_MAX_AAD_LEN 65535
>> #define SEC_TOTAL_MAC_SZ (SEC_MAX_MAC_LEN * QM_Q_DEPTH)
>> +
>> +#define SEC_PBUF_SZ 512
>> +#define SEC_PBUF_IV_OFFSET SEC_PBUF_SZ
>> +#define SEC_PBUF_MAC_OFFSET (SEC_PBUF_SZ + SEC_IV_SIZE)
>> +#define SEC_PBUF_PKG (SEC_PBUF_SZ + SEC_IV_SIZE + \
>> + SEC_MAX_MAC_LEN * 2)
>> +#define SEC_PBUF_NUM (PAGE_SIZE / SEC_PBUF_PKG)
>> +#define SEC_PBUF_PAGE_NUM (QM_Q_DEPTH / SEC_PBUF_NUM)
>> +#define SEC_PBUF_LEFT_SZ (SEC_PBUF_PKG * (QM_Q_DEPTH - \
>> + SEC_PBUF_PAGE_NUM * SEC_PBUF_NUM))
>> +#define SEC_TOTAL_PBUF_SZ (PAGE_SIZE * SEC_PBUF_PAGE_NUM + \
>> + SEC_PBUF_LEFT_SZ)
>> +
>> #define SEC_SQE_LEN_RATE 4
>> #define SEC_SQE_CFLAG 2
>> #define SEC_SQE_AEAD_FLAG 3
>> @@ -246,6 +259,50 @@ static void sec_free_mac_resource(struct device *dev, struct sec_alg_res *res)
>> res->out_mac, res->out_mac_dma);
>> }
>>
>> +static void sec_free_pbuf_resource(struct device *dev, struct sec_alg_res *res)
>> +{
>> + if (res->pbuf)
>> + dma_free_coherent(dev, SEC_TOTAL_PBUF_SZ,
>> + res->pbuf, res->pbuf_dma);
>> +}
>> +
>> +/*
>> + * To improve performance, pbuffer is used for
>> + * small packets (< 512Bytes) as IOMMU translation using.
>> + */
>> +static int sec_alloc_pbuf_resource(struct device *dev, struct sec_alg_res *res)
>> +{
>> + int pbuf_page_offset;
>> + int i, j, k;
>> +
>> + res->pbuf = dma_alloc_coherent(dev, SEC_TOTAL_PBUF_SZ,
>> + &res->pbuf_dma, GFP_KERNEL);
>> + if (!res->pbuf)
>> + return -ENOMEM;
>> +
>> + /*
>> + * SEC_PBUF_PKG contains data pbuf, iv and
>> + * out_mac : <SEC_PBUF|SEC_IV|SEC_MAC>
>> + * Every PAGE contains six SEC_PBUF_PKG
>> + * The sec_qp_ctx contains QM_Q_DEPTH numbers of SEC_PBUF_PKG
>> + * So we need SEC_PBUF_PAGE_NUM numbers of PAGE
>> + * for the SEC_TOTAL_PBUF_SZ
>> + */
>> + for (i = 0; i <= SEC_PBUF_PAGE_NUM; i++) {
>> + pbuf_page_offset = PAGE_SIZE * i;
>> + for (j = 0; j < SEC_PBUF_NUM; j++) {
>> + k = i * SEC_PBUF_NUM + j;
>> + if (k == QM_Q_DEPTH)
>> + break;
>> + res[k].pbuf = res->pbuf +
>> + j * SEC_PBUF_PKG + pbuf_page_offset;
>> + res[k].pbuf_dma = res->pbuf_dma +
>> + j * SEC_PBUF_PKG + pbuf_page_offset;
>> + }
>> + }
>> + return 0;
>> +}
>> +
>> static int sec_alg_resource_alloc(struct sec_ctx *ctx,
>> struct sec_qp_ctx *qp_ctx)
>> {
>> @@ -262,6 +319,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
>> if (ret)
>> goto alloc_fail;
>> }
>> + if (ctx->pbuf_supported) {
>> + ret = sec_alloc_pbuf_resource(dev, res);
>> + if (ret) {
>> + dev_err(dev, "fail to alloc pbuf dma resource!\n");
>> + goto alloc_fail;
>> + }
>> + }
>>
>> return 0;
>> alloc_fail:
>> @@ -279,6 +343,8 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
>>
>> if (ctx->alg_type == SEC_AEAD)
>> sec_free_mac_resource(dev, qp_ctx->res);
>> + if (ctx->pbuf_supported)
>> + sec_free_pbuf_resource(dev, qp_ctx->res);
> Is the ordering right here? Seems like this is allocated after the mac_resource
> so should be freed before it.
>
> I would prefer specific gotos for resource_alloc failure cases to make it clear
> that different cleanup is needed if we fail allocating this from doing the
> mac_resource allocation.
Yes, agree. 'sec_alg_resource_alloc' also has a problem.

Cheers,
Zaibo

.
>
>> }
>>
>> static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
>> @@ -369,6 +435,8 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
>> ctx->sec = sec;
>> ctx->hlf_q_num = sec->ctx_q_num >> 1;
>>
>> + ctx->pbuf_supported = ctx->sec->iommu_used;
>> +
>> /* Half of queue depth is taken as fake requests limit in the queue. */
>> ctx->fake_req_limit = QM_Q_DEPTH >> 1;
>> ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx),
>> @@ -591,6 +659,66 @@ GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
>> GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
>> GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
>>
>> +static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
>> + struct scatterlist *src)
>> +{
>> + struct aead_request *aead_req = req->aead_req.aead_req;
>> + struct sec_cipher_req *c_req = &req->c_req;
>> + struct sec_qp_ctx *qp_ctx = req->qp_ctx;
>> + struct device *dev = SEC_CTX_DEV(ctx);
>> + int copy_size, pbuf_length;
>> + int req_id = req->req_id;
>> +
>> + if (ctx->alg_type == SEC_AEAD)
>> + copy_size = aead_req->cryptlen + aead_req->assoclen;
>> + else
>> + copy_size = c_req->c_len;
>> +
>> + pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
>> + qp_ctx->res[req_id].pbuf,
>> + copy_size);
>> +
>> + if (unlikely(pbuf_length != copy_size)) {
>> + dev_err(dev, "copy src data to pbuf error!\n");
>> + return -EINVAL;
>> + }
>> +
>> + c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
>> +
>> + if (!c_req->c_in_dma) {
>> + dev_err(dev, "fail to set pbuffer address!\n");
>> + return -ENOMEM;
>> + }
>> +
>> + c_req->c_out_dma = c_req->c_in_dma;
>> +
>> + return 0;
>> +}
>> +
>> +static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
>> + struct scatterlist *dst)
>> +{
>> + struct aead_request *aead_req = req->aead_req.aead_req;
>> + struct sec_cipher_req *c_req = &req->c_req;
>> + struct sec_qp_ctx *qp_ctx = req->qp_ctx;
>> + struct device *dev = SEC_CTX_DEV(ctx);
>> + int copy_size, pbuf_length;
>> + int req_id = req->req_id;
>> +
>> + if (ctx->alg_type == SEC_AEAD)
>> + copy_size = c_req->c_len + aead_req->assoclen;
>> + else
>> + copy_size = c_req->c_len;
>> +
>> + pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
>> + qp_ctx->res[req_id].pbuf,
>> + copy_size);
>> +
>> + if (unlikely(pbuf_length != copy_size))
>> + dev_err(dev, "copy pbuf data to dst error!\n");
>> +
>> +}
>> +
>> static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
>> struct scatterlist *src, struct scatterlist *dst)
>> {
>> @@ -599,7 +727,20 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
>> struct sec_qp_ctx *qp_ctx = req->qp_ctx;
>> struct sec_alg_res *res = &qp_ctx->res[req->req_id];
>> struct device *dev = SEC_CTX_DEV(ctx);
>> + int ret;
>> +
>> + if (req->use_pbuf) {
>> + ret = sec_cipher_pbuf_map(ctx, req, src);
>> + c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
>> + c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
>> + if (ctx->alg_type == SEC_AEAD) {
>> + a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
>> + a_req->out_mac_dma = res->pbuf_dma +
>> + SEC_PBUF_MAC_OFFSET;
>> + }
>>
>> + return ret;
>> + }
>> c_req->c_ivin = res->c_ivin;
>> c_req->c_ivin_dma = res->c_ivin_dma;
>> if (ctx->alg_type == SEC_AEAD) {
>> @@ -642,10 +783,14 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
>> struct sec_cipher_req *c_req = &req->c_req;
>> struct device *dev = SEC_CTX_DEV(ctx);
>>
>> - if (dst != src)
>> - hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
>> + if (req->use_pbuf) {
>> + sec_cipher_pbuf_unmap(ctx, req, dst);
>> + } else {
>> + if (dst != src)
>> + hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
>>
>> - hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
>> + hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
>> + }
>> }
>>
>> static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
>> @@ -844,7 +989,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>> cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
>> sec_sqe->type_cipher_auth = bd_type | cipher;
>>
>> - sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
>> + if (req->use_pbuf)
>> + sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
>> + else
>> + sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
>> scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
>> if (c_req->c_in_dma != c_req->c_out_dma)
>> de = 0x1 << SEC_DE_OFFSET;
>> @@ -852,7 +1000,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
>> sec_sqe->sds_sa_type = (de | scene | sa_type);
>>
>> /* Just set DST address type */
>> - da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
>> + if (req->use_pbuf)
>> + da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
>> + else
>> + da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
>> sec_sqe->sdm_addr_type |= da_type;
>>
>> sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
>> @@ -1215,6 +1366,12 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
>> return -EINVAL;
>> }
>> sreq->c_req.c_len = sk_req->cryptlen;
>> +
>> + if (ctx->pbuf_supported && sk_req->cryptlen <= SEC_PBUF_SZ)
>> + sreq->use_pbuf = true;
>> + else
>> + sreq->use_pbuf = false;
>> +
>> if (c_alg == SEC_CALG_3DES) {
>> if (unlikely(sk_req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) {
>> dev_err(dev, "skcipher 3des input length error!\n");
>> @@ -1334,6 +1491,12 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
>> return -EINVAL;
>> }
>>
>> + if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
>> + SEC_PBUF_SZ)
>> + sreq->use_pbuf = true;
>> + else
>> + sreq->use_pbuf = false;
>> +
>> /* Support AES only */
>> if (unlikely(c_alg != SEC_CALG_AES)) {
>> dev_err(SEC_CTX_DEV(ctx), "aead crypto alg error!\n");
>
> .
>