From: Barry Song <[email protected]>
The patchset removes a couple of memcpy in zswap and crypto
to improve zswap's performance.
Thanks for Chengming Zhou's test and perf data.
Quote from Chengming,
I just tested these three patches on my server, found improvement in the
kernel build testcase on a tmpfs with zswap (lz4 + zsmalloc) enabled.
mm-stable 501a06fe8e4c patched
real 1m38.028s 1m32.317s
user 19m11.482s 18m39.439s
sys 19m26.445s 17m5.646s
The patchset is based on mm-stable.
Barry Song (3):
crypto: introduce acomp_is_async to expose if a acomp has a scomp
backend
mm/zswap: remove the memcpy if acomp is not asynchronous
crypto: scompress: remove memcpy if sg_nents is 1
crypto/acompress.c | 8 ++++++++
crypto/scompress.c | 35 ++++++++++++++++++++++++++++-------
include/crypto/acompress.h | 9 +++++++++
mm/zswap.c | 6 ++++--
4 files changed, 49 insertions(+), 9 deletions(-)
--
2.34.1
From: Barry Song <[email protected]>
Almost all CPU-based compressors/decompressors are actually synchronous
though they support acomp APIs. While some hardware has hardware-based
accelerators to offload CPU's work such as hisilicon and intel/qat/,
their drivers are working in async mode.
Letting acomp's users know exactly if the acomp is really async will
help users know if the compression and decompression procedure can
sleep.
Signed-off-by: Barry Song <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
---
crypto/acompress.c | 8 ++++++++
include/crypto/acompress.h | 9 +++++++++
2 files changed, 17 insertions(+)
diff --git a/crypto/acompress.c b/crypto/acompress.c
index 1c682810a484..99118e879a4a 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -152,6 +152,14 @@ struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type,
}
EXPORT_SYMBOL_GPL(crypto_alloc_acomp_node);
+bool acomp_is_async(struct crypto_acomp *acomp)
+{
+ struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+
+ return tfm->__crt_alg->cra_type == &crypto_acomp_type;
+}
+EXPORT_SYMBOL_GPL(acomp_is_async);
+
struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp)
{
struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index 574cffc90730..d91830c2d442 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -204,6 +204,15 @@ struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm);
*/
void acomp_request_free(struct acomp_req *req);
+/**
+ * acomp_is_async() -- check if an acomp is asynchronous(can sleep)
+ *
+ * @tfm: ACOMPRESS tfm handle allocated with crypto_alloc_acomp()
+ *
+ * Return: true if the acomp is asynchronous, otherwise, false
+ */
+bool acomp_is_async(struct crypto_acomp *tfm);
+
/**
* acomp_request_set_callback() -- Sets an asynchronous callback
*
--
2.34.1
From: Barry Song <[email protected]>
Most compressors are actually CPU-based and won't sleep during
compression and decompression. We should remove the redundant
memcpy for them.
Signed-off-by: Barry Song <[email protected]>
Tested-by: Chengming Zhou <[email protected]>
---
mm/zswap.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index ca25b676048e..36898614ebcc 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -168,6 +168,7 @@ struct crypto_acomp_ctx {
struct crypto_wait wait;
u8 *buffer;
struct mutex mutex;
+ bool is_async; /* if acomp can sleep */
};
/*
@@ -716,6 +717,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
goto acomp_fail;
}
acomp_ctx->acomp = acomp;
+ acomp_ctx->is_async = acomp_is_async(acomp);
req = acomp_request_alloc(acomp_ctx->acomp);
if (!req) {
@@ -1370,7 +1372,7 @@ static void __zswap_load(struct zswap_entry *entry, struct page *page)
mutex_lock(&acomp_ctx->mutex);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
- if (!zpool_can_sleep_mapped(zpool)) {
+ if (acomp_ctx->is_async && !zpool_can_sleep_mapped(zpool)) {
memcpy(acomp_ctx->buffer, src, entry->length);
src = acomp_ctx->buffer;
zpool_unmap_handle(zpool, entry->handle);
@@ -1384,7 +1386,7 @@ static void __zswap_load(struct zswap_entry *entry, struct page *page)
BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
mutex_unlock(&acomp_ctx->mutex);
- if (zpool_can_sleep_mapped(zpool))
+ if (!acomp_ctx->is_async || zpool_can_sleep_mapped(zpool))
zpool_unmap_handle(zpool, entry->handle);
}
--
2.34.1
On Wed, Jan 3, 2024 at 1:50 AM Barry Song <[email protected]> wrote:
>
> From: Barry Song <[email protected]>
>
> Most compressors are actually CPU-based and won't sleep during
> compression and decompression. We should remove the redundant
> memcpy for them.
>
> Signed-off-by: Barry Song <[email protected]>
> Tested-by: Chengming Zhou <[email protected]>
nit: it might help to include the test numbers in the changelog in
this patch here too. Save a couple of clicks to dig out the original
patch cover for the numbers :)
> ---
> mm/zswap.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index ca25b676048e..36898614ebcc 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -168,6 +168,7 @@ struct crypto_acomp_ctx {
> struct crypto_wait wait;
> u8 *buffer;
> struct mutex mutex;
> + bool is_async; /* if acomp can sleep */
nit: seems like this comment isn't necessary. is_async is pretty
self-explanatory to me. But definitely not a show stopper tho :)
> };
>
> /*
> @@ -716,6 +717,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node)
> goto acomp_fail;
> }
> acomp_ctx->acomp = acomp;
> + acomp_ctx->is_async = acomp_is_async(acomp);
>
> req = acomp_request_alloc(acomp_ctx->acomp);
> if (!req) {
> @@ -1370,7 +1372,7 @@ static void __zswap_load(struct zswap_entry *entry, struct page *page)
> mutex_lock(&acomp_ctx->mutex);
>
> src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
> - if (!zpool_can_sleep_mapped(zpool)) {
> + if (acomp_ctx->is_async && !zpool_can_sleep_mapped(zpool)) {
> memcpy(acomp_ctx->buffer, src, entry->length);
> src = acomp_ctx->buffer;
> zpool_unmap_handle(zpool, entry->handle);
> @@ -1384,7 +1386,7 @@ static void __zswap_load(struct zswap_entry *entry, struct page *page)
> BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
> mutex_unlock(&acomp_ctx->mutex);
>
> - if (zpool_can_sleep_mapped(zpool))
> + if (!acomp_ctx->is_async || zpool_can_sleep_mapped(zpool))
> zpool_unmap_handle(zpool, entry->handle);
> }
>
> --
> 2.34.1
>
The zswap side looks good to me. I don't have expertise/authority to
ack the crypto API change (but FWIW it LGTM too based on a cursory
code read).
Reviewed-by: Nhat Pham <[email protected]>
On Wed, Jan 3, 2024 at 1:50 AM Barry Song <[email protected]> wrote:
>
> From: Barry Song <[email protected]>
>
> Most compressors are actually CPU-based and won't sleep during
> compression and decompression. We should remove the redundant
> memcpy for them.
>
> Signed-off-by: Barry Song <[email protected]>
> Tested-by: Chengming Zhou <[email protected]>
> ---
> mm/zswap.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index ca25b676048e..36898614ebcc 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -168,6 +168,7 @@ struct crypto_acomp_ctx {
> struct crypto_wait wait;
> u8 *buffer;
> struct mutex mutex;
> + bool is_async; /* if acomp can sleep */
As pointed out in patch 1, I think we should name this explicitly to
be about sleep-ability (e.g. sleepable or can_sleep).
On Tue, Jan 9, 2024 at 6:36 AM Yosry Ahmed <[email protected]> wrote:
>
> On Wed, Jan 3, 2024 at 1:50 AM Barry Song <[email protected]> wrote:
> >
> > From: Barry Song <[email protected]>
> >
> > Almost all CPU-based compressors/decompressors are actually synchronous
> > though they support acomp APIs. While some hardware has hardware-based
> > accelerators to offload CPU's work such as hisilicon and intel/qat/,
> > their drivers are working in async mode.
> > Letting acomp's users know exactly if the acomp is really async will
> > help users know if the compression and decompression procedure can
> > sleep.
> >
> > Signed-off-by: Barry Song <[email protected]>
> > Tested-by: Chengming Zhou <[email protected]>
> > ---
> > crypto/acompress.c | 8 ++++++++
> > include/crypto/acompress.h | 9 +++++++++
> > 2 files changed, 17 insertions(+)
> >
> > diff --git a/crypto/acompress.c b/crypto/acompress.c
> > index 1c682810a484..99118e879a4a 100644
> > --- a/crypto/acompress.c
> > +++ b/crypto/acompress.c
> > @@ -152,6 +152,14 @@ struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type,
> > }
> > EXPORT_SYMBOL_GPL(crypto_alloc_acomp_node);
> >
> > +bool acomp_is_async(struct crypto_acomp *acomp)
>
> Is synchronous semantically the same as sleepable? IIUC synchronous
> code may still sleep, at least generally. The purpose of this change
> is to know whether we will sleep or not in the zswap code, so I
> suggest the code should be explicit about sleep-ability instead (e.g.
> acomp_is_sleepable or acomp_may_sleep).
Thanks, Tosry. sounds reasonable.
I'd like to ask for Herbert's comment, do we have a better way to know
if an acomp can sleep other than checking the below?
return tfm->__crt_alg->cra_type == &crypto_acomp_type;
Thanks
Barry