From: Barry Song <[email protected]>
The patchset removes a couple of memcpy in zswap and crypto
to improve zswap's performance.
Thanks for Chengming Zhou's test and perf data.
Quote from Chengming,
I just tested these three patches on my server, found improvement in the
kernel build testcase on a tmpfs with zswap (lz4 + zsmalloc) enabled.
mm-stable 501a06fe8e4c patched
real 1m38.028s 1m32.317s
user 19m11.482s 18m39.439s
sys 19m26.445s 17m5.646s
-v3:
* collect Acked-by of Yosry, Reviewed-by of Chengming
* enhance commit message with respect to Yosry's comment
Hi Andrew, Herbert,
As zswap is the direct use of this patchset and zswap benefits from
this series, It is probably better for this patchset to go through
Andrew's mm tree than Herbert's crypto tree if there is no objection
from Herbert.
Barry Song (3):
crypto: introduce acomp_is_sleepable to expose if comp drivers might
sleep
mm/zswap: remove the memcpy if acomp is not sleepable
crypto: scompress: remove memcpy if sg_nents is 1
crypto/acompress.c | 8 ++++++++
crypto/scompress.c | 36 +++++++++++++++++++++++++++++-------
include/crypto/acompress.h | 9 +++++++++
mm/zswap.c | 6 ++++--
4 files changed, 50 insertions(+), 9 deletions(-)
--
2.34.1
From: Barry Song <[email protected]>
Almost all CPU-based compressors/decompressors are actually synchronous
though they support acomp APIs. While some hardware has hardware-based
accelerators to offload CPU's work such as hisilicon and intel/qat/,
their drivers are working in async mode.
Letting acomp's users know exactly if the acomp is really async will
help users know if the compression and decompression procedure can
sleep.
Generally speaking, async and sleepable are semantically similar but
not equal. But for compress drivers, they are actually equal at least
due to the below facts.
Firstly, scompress drivers - crypto/deflate.c, lz4.c, zstd.c, lzo.c
etc have no sleep. Secondly, zRAM has been using these scompress
drivers for years in atomic contexts, and never worried those drivers
going to sleep.
Signed-off-by: Barry Song <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
---
crypto/acompress.c | 8 ++++++++
include/crypto/acompress.h | 9 +++++++++
2 files changed, 17 insertions(+)
diff --git a/crypto/acompress.c b/crypto/acompress.c
index 1c682810a484..fa15df394a4c 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -152,6 +152,14 @@ struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type,
}
EXPORT_SYMBOL_GPL(crypto_alloc_acomp_node);
+bool acomp_is_sleepable(struct crypto_acomp *acomp)
+{
+ struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+
+ return tfm->__crt_alg->cra_type == &crypto_acomp_type;
+}
+EXPORT_SYMBOL_GPL(acomp_is_sleepable);
+
struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp)
{
struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index 574cffc90730..88ca33532313 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -204,6 +204,15 @@ struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm);
*/
void acomp_request_free(struct acomp_req *req);
+/**
+ * acomp_is_sleepable() -- check if an acomp is sleepable
+ *
+ * @tfm: ACOMPRESS tfm handle allocated with crypto_alloc_acomp()
+ *
+ * Return: true if the acomp is sleepable, otherwise, false
+ */
+bool acomp_is_sleepable(struct crypto_acomp *tfm);
+
/**
* acomp_request_set_callback() -- Sets an asynchronous callback
*
--
2.34.1
From: Barry Song <[email protected]>
Most compressors are actually CPU-based and won't sleep during
compression and decompression. We should remove the redundant
memcpy for them.
Signed-off-by: Barry Song <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
Reviewed-by: Nhat Pham <[email protected]>
Acked-by: Yosry Ahmed <[email protected]>
Reviewed-by: Chengming Zhou <[email protected]>
---
mm/zswap.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index 350dd2fc8159..6319d2281020 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -168,6 +168,7 @@ struct crypto_acomp_ctx {
struct crypto_wait wait;
u8 *buffer;
struct mutex mutex;
+ bool is_sleepable;
};
/*
@@ -716,6 +717,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
goto acomp_fail;
}
acomp_ctx->acomp = acomp;
+ acomp_ctx->is_sleepable = acomp_is_sleepable(acomp);
req = acomp_request_alloc(acomp_ctx->acomp);
if (!req) {
@@ -1368,7 +1370,7 @@ static void __zswap_load(struct zswap_entry *entry, struct page *page)
mutex_lock(&acomp_ctx->mutex);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
- if (!zpool_can_sleep_mapped(zpool)) {
+ if (acomp_ctx->is_sleepable && !zpool_can_sleep_mapped(zpool)) {
memcpy(acomp_ctx->buffer, src, entry->length);
src = acomp_ctx->buffer;
zpool_unmap_handle(zpool, entry->handle);
@@ -1382,7 +1384,7 @@ static void __zswap_load(struct zswap_entry *entry, struct page *page)
BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
mutex_unlock(&acomp_ctx->mutex);
- if (zpool_can_sleep_mapped(zpool))
+ if (!acomp_ctx->is_sleepable || zpool_can_sleep_mapped(zpool))
zpool_unmap_handle(zpool, entry->handle);
}
--
2.34.1
From: Barry Song <[email protected]>
while sg_nents is 1 which is always true for the current kernel
as the only user - zswap is the case, we should remove two big
memcpy.
Signed-off-by: Barry Song <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
---
crypto/scompress.c | 36 +++++++++++++++++++++++++++++-------
1 file changed, 29 insertions(+), 7 deletions(-)
diff --git a/crypto/scompress.c b/crypto/scompress.c
index b108a30a7600..50a487eac792 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -117,6 +117,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
struct crypto_scomp *scomp = *tfm_ctx;
void **ctx = acomp_request_ctx(req);
struct scomp_scratch *scratch;
+ void *src, *dst;
unsigned int dlen;
int ret;
@@ -134,13 +135,25 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
scratch = raw_cpu_ptr(&scomp_scratch);
spin_lock(&scratch->lock);
- scatterwalk_map_and_copy(scratch->src, req->src, 0, req->slen, 0);
+ if (sg_nents(req->src) == 1) {
+ src = kmap_local_page(sg_page(req->src)) + req->src->offset;
+ } else {
+ scatterwalk_map_and_copy(scratch->src, req->src, 0,
+ req->slen, 0);
+ src = scratch->src;
+ }
+
+ if (req->dst && sg_nents(req->dst) == 1)
+ dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset;
+ else
+ dst = scratch->dst;
+
if (dir)
- ret = crypto_scomp_compress(scomp, scratch->src, req->slen,
- scratch->dst, &req->dlen, *ctx);
+ ret = crypto_scomp_compress(scomp, src, req->slen,
+ dst, &req->dlen, *ctx);
else
- ret = crypto_scomp_decompress(scomp, scratch->src, req->slen,
- scratch->dst, &req->dlen, *ctx);
+ ret = crypto_scomp_decompress(scomp, src, req->slen,
+ dst, &req->dlen, *ctx);
if (!ret) {
if (!req->dst) {
req->dst = sgl_alloc(req->dlen, GFP_ATOMIC, NULL);
@@ -152,10 +165,19 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
ret = -ENOSPC;
goto out;
}
- scatterwalk_map_and_copy(scratch->dst, req->dst, 0, req->dlen,
- 1);
+ if (dst == scratch->dst) {
+ scatterwalk_map_and_copy(scratch->dst, req->dst, 0,
+ req->dlen, 1);
+ } else {
+ flush_dcache_page(sg_page(req->dst));
+ }
}
out:
+ if (src != scratch->src)
+ kunmap_local(src);
+ if (dst != scratch->dst)
+ kunmap_local(dst);
+
spin_unlock(&scratch->lock);
return ret;
}
--
2.34.1
On Mon, Feb 19, 2024 at 5:25 PM Herbert Xu <[email protected]> wrote:
>
> On Sat, Feb 17, 2024 at 05:51:00PM +1300, Barry Song wrote:
> .
> > diff --git a/crypto/acompress.c b/crypto/acompress.c
> > index 1c682810a484..fa15df394a4c 100644
> > --- a/crypto/acompress.c
> > +++ b/crypto/acompress.c
> > @@ -152,6 +152,14 @@ struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type,
> > }
> > EXPORT_SYMBOL_GPL(crypto_alloc_acomp_node);
> >
> > +bool acomp_is_sleepable(struct crypto_acomp *acomp)
> > +{
> > + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
> > +
> > + return tfm->__crt_alg->cra_type == &crypto_acomp_type;
> > +}
> > +EXPORT_SYMBOL_GPL(acomp_is_sleepable);
>
> Just because something is of acomp_type it doesn't mean that it's
> async. You should be testing the algorithm flags.
I guess I got your point, drivers using acomp framework might actually
be SYNC if they don't set CRYPTO_ALG_ASYNC.
>
> So introduce a helper crypto_acomp_get_flags (see the similar
> helper crypto_skcipher_get_flags) and test it against CRYPTO_ALG_ASYNC.
On the other hand, some drivers which are actually ASYNC, are lacking
CRYPTO_ALG_ASYNC for example:
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c
b/drivers/crypto/hisilicon/zip/zip_crypto.c
index c650c741a18d..94e2d66b04b6 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -591,6 +591,7 @@ static struct acomp_alg hisi_zip_acomp_deflate = {
.base = {
.cra_name = "deflate",
.cra_driver_name = "hisi-deflate-acomp",
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_module = THIS_MODULE,
.cra_priority = HZIP_ALG_PRIORITY,
.cra_ctxsize = sizeof(struct hisi_zip_ctx),
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c
b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index dfd3baf0a8d8..91adf9d76a2e 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -1916,6 +1916,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = {
.base = {
.cra_name = "deflate",
.cra_driver_name = "deflate-iaa",
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_ctxsize = sizeof(struct iaa_compression_ctx),
.cra_module = THIS_MODULE,
.cra_priority = IAA_ALG_PRIORITY,
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index 574cffc90730..5b10bd075a07 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -160,6 +160,11 @@ static inline void acomp_request_set_tfm(struct
acomp_req *req,
req->base.tfm = crypto_acomp_tfm(tfm);
}
+static inline u32 crypto_acomp_get_flags(struct crypto_acomp *tfm)
+{
+ return crypto_tfm_get_flags(crypto_acomp_tfm(tfm));
+}
+
static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req)
{
return __crypto_acomp_tfm(req->base.tfm);
Herbert, Is the above code what you would prefer?
>
> Cheers,
> --
> Email: Herbert Xu <[email protected]>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Thanks
Barry
On Tue, Feb 20, 2024 at 12:07 PM Barry Song <[email protected]> wrote:
>
> On Mon, Feb 19, 2024 at 5:25 PM Herbert Xu <[email protected]> wrote:
> >
> > On Sat, Feb 17, 2024 at 05:51:00PM +1300, Barry Song wrote:
> > .
> > > diff --git a/crypto/acompress.c b/crypto/acompress.c
> > > index 1c682810a484..fa15df394a4c 100644
> > > --- a/crypto/acompress.c
> > > +++ b/crypto/acompress.c
> > > @@ -152,6 +152,14 @@ struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type,
> > > }
> > > EXPORT_SYMBOL_GPL(crypto_alloc_acomp_node);
> > >
> > > +bool acomp_is_sleepable(struct crypto_acomp *acomp)
> > > +{
> > > + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
> > > +
> > > + return tfm->__crt_alg->cra_type == &crypto_acomp_type;
> > > +}
> > > +EXPORT_SYMBOL_GPL(acomp_is_sleepable);
> >
> > Just because something is of acomp_type it doesn't mean that it's
> > async. You should be testing the algorithm flags.
>
> I guess I got your point, drivers using acomp framework might actually
> be SYNC if they don't set CRYPTO_ALG_ASYNC.
>
> >
> > So introduce a helper crypto_acomp_get_flags (see the similar
> > helper crypto_skcipher_get_flags) and test it against CRYPTO_ALG_ASYNC.
>
> On the other hand, some drivers which are actually ASYNC, are lacking
> CRYPTO_ALG_ASYNC for example:
>
> diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c
> b/drivers/crypto/hisilicon/zip/zip_crypto.c
> index c650c741a18d..94e2d66b04b6 100644
> --- a/drivers/crypto/hisilicon/zip/zip_crypto.c
> +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
> @@ -591,6 +591,7 @@ static struct acomp_alg hisi_zip_acomp_deflate = {
> .base = {
> .cra_name = "deflate",
> .cra_driver_name = "hisi-deflate-acomp",
> + .cra_flags = CRYPTO_ALG_ASYNC,
> .cra_module = THIS_MODULE,
> .cra_priority = HZIP_ALG_PRIORITY,
> .cra_ctxsize = sizeof(struct hisi_zip_ctx),
> diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c
> b/drivers/crypto/intel/iaa/iaa_crypto_main.c
> index dfd3baf0a8d8..91adf9d76a2e 100644
> --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
> +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
> @@ -1916,6 +1916,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = {
> .base = {
> .cra_name = "deflate",
> .cra_driver_name = "deflate-iaa",
> + .cra_flags = CRYPTO_ALG_ASYNC,
> .cra_ctxsize = sizeof(struct iaa_compression_ctx),
> .cra_module = THIS_MODULE,
> .cra_priority = IAA_ALG_PRIORITY,
> diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
> index 574cffc90730..5b10bd075a07 100644
> --- a/include/crypto/acompress.h
> +++ b/include/crypto/acompress.h
> @@ -160,6 +160,11 @@ static inline void acomp_request_set_tfm(struct
> acomp_req *req,
> req->base.tfm = crypto_acomp_tfm(tfm);
> }
>
> +static inline u32 crypto_acomp_get_flags(struct crypto_acomp *tfm)
> +{
> + return crypto_tfm_get_flags(crypto_acomp_tfm(tfm));
> +}
This seems to be wrong. we are expecting cra_flags not crt_flags. should be
the below?
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index 574cffc90730..07bd8f6bc79a 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -160,6 +160,11 @@ static inline void acomp_request_set_tfm(struct
acomp_req *req,
req->base.tfm = crypto_acomp_tfm(tfm);
}
+static inline u32 crypto_acomp_get_alg_flags(struct crypto_acomp *tfm)
+{
+ return crypto_tfm_alg_flags(crypto_acomp_tfm(tfm));
+}
+
static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req)
{
return __crypto_acomp_tfm(req->base.tfm);
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index b164da5e129e..811bfaf8b6f8 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -467,6 +467,11 @@ static inline unsigned int
crypto_tfm_alg_blocksize(struct crypto_tfm *tfm)
return tfm->__crt_alg->cra_blocksize;
}
+static inline unsigned int crypto_tfm_alg_flags(struct crypto_tfm *tfm)
+{
+ return tfm->__crt_alg->cra_flags;
+}
+
static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm)
{
return tfm->__crt_alg->cra_alignmask;
> +
> static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req)
> {
> return __crypto_acomp_tfm(req->base.tfm);
>
>
> Herbert, Is the above code what you would prefer?
>
> >
> > Cheers,
> > --
> > Email: Herbert Xu <[email protected]>
> > Home Page: http://gondor.apana.org.au/~herbert/
> > PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
>
Thanks
Barry