2019-09-20 17:18:48

by Hui Zhu

[permalink] [raw]
Subject: [RFC v2] zswap: Add CONFIG_ZSWAP_IO_SWITCH to handle swap IO issue

This is the second version of this patch. The previous version is in
https://lkml.org/lkml/2019/9/11/935
I updated the commit introduction and Kconfig because it is not clear.

Currently, I use a VM that has 2 CPUs, 4G memory and 4G swap file.
I found that swap will affect the IO performance when it is running.
So I open zswap to handle it because it just use CPU cycles but not
disk IO.

It work OK but I found that zswap is slower than normal swap in this
VM. zswap is about 300M/s and normal swap is about 500M/s. (The reason
is disk inside VM has fscache in host machine.)
So open zswap is make memory shrinker slower but good for IO performance
in this VM.
So I just want zswap work when the disk of the swap file is under high
IO load.

This commit is designed for this idea.
It add two parameters read_in_flight_limit and write_in_flight_limit to
zswap.
In zswap_frontswap_store, pages will be stored to zswap only when
the IO in flight number of swap device is bigger than
zswap_read_in_flight_limit or zswap_write_in_flight_limit
when zswap is enabled.
Then the zswap just work when the IO in flight number of swap device
is low.

Signed-off-by: Hui Zhu <[email protected]>
---
include/linux/swap.h | 3 +++
mm/Kconfig | 18 +++++++++++++++++
mm/page_io.c | 16 +++++++++++++++
mm/zswap.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 92 insertions(+)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index de2c67a..82b621f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -389,6 +389,9 @@ extern void end_swap_bio_write(struct bio *bio);
extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
bio_end_io_t end_write_func);
extern int swap_set_page_dirty(struct page *page);
+#ifdef CONFIG_ZSWAP_IO_SWITCH
+extern void swap_io_in_flight(struct page *page, unsigned int inflight[2]);
+#endif

int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
unsigned long nr_pages, sector_t start_block);
diff --git a/mm/Kconfig b/mm/Kconfig
index 56cec63..5408d65 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -546,6 +546,24 @@ config ZSWAP
they have not be fully explored on the large set of potential
configurations and workloads that exist.

+config ZSWAP_IO_SWITCH
+ bool "Compressed cache for swap pages according to the IO status"
+ depends on ZSWAP
+ def_bool n
+ help
+ This function help the system that normal swap speed is higher
+ than zswap speed to handle the swap IO issue.
+ For example, a VM that is disk device is not set cache config or
+ set cache=writeback.
+
+ This function make zswap just work when the disk of the swap file
+ is under high IO load.
+ It add two parameters read_in_flight_limit and write_in_flight_limit to
+ zswap. When zswap is enabled, pages will be stored to zswap only
+ when the IO in flight number of swap device is bigger than
+ zswap_read_in_flight_limit or zswap_write_in_flight_limit.
+ If unsure, say "n".
+
config ZPOOL
tristate "Common API for compressed memory storage"
help
diff --git a/mm/page_io.c b/mm/page_io.c
index 24ee600..e66b050 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -434,3 +434,19 @@ int swap_set_page_dirty(struct page *page)
return __set_page_dirty_no_writeback(page);
}
}
+
+#ifdef CONFIG_ZSWAP_IO_SWITCH
+void swap_io_in_flight(struct page *page, unsigned int inflight[2])
+{
+ struct swap_info_struct *sis = page_swap_info(page);
+
+ if (!sis->bdev) {
+ inflight[0] = 0;
+ inflight[1] = 0;
+ return;
+ }
+
+ part_in_flight_rw(bdev_get_queue(sis->bdev), sis->bdev->bd_part,
+ inflight);
+}
+#endif
diff --git a/mm/zswap.c b/mm/zswap.c
index 0e22744..1255645 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -62,6 +62,13 @@ static u64 zswap_reject_compress_poor;
static u64 zswap_reject_alloc_fail;
/* Store failed because the entry metadata could not be allocated (rare) */
static u64 zswap_reject_kmemcache_fail;
+#ifdef CONFIG_ZSWAP_IO_SWITCH
+/* Store failed because zswap_read_in_flight_limit or
+ * zswap_write_in_flight_limit is bigger than IO in flight number of
+ * swap device
+ */
+static u64 zswap_reject_io;
+#endif
/* Duplicate store was encountered (rare) */
static u64 zswap_duplicate_entry;

@@ -114,6 +121,22 @@ static bool zswap_same_filled_pages_enabled = true;
module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
bool, 0644);

+#ifdef CONFIG_ZSWAP_IO_SWITCH
+/* zswap will not try to store the page if zswap_read_in_flight_limit is
+ * bigger than IO read in flight number of swap device
+ */
+static unsigned int zswap_read_in_flight_limit;
+module_param_named(read_in_flight_limit, zswap_read_in_flight_limit,
+ uint, 0644);
+
+/* zswap will not try to store the page if zswap_write_in_flight_limit is
+ * bigger than IO write in flight number of swap device
+ */
+static unsigned int zswap_write_in_flight_limit;
+module_param_named(write_in_flight_limit, zswap_write_in_flight_limit,
+ uint, 0644);
+#endif
+
/*********************************
* data structures
**********************************/
@@ -1009,6 +1032,34 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
goto reject;
}

+#ifdef CONFIG_ZSWAP_IO_SWITCH
+ if (zswap_read_in_flight_limit || zswap_write_in_flight_limit) {
+ unsigned int inflight[2];
+ bool should_swap = false;
+
+ swap_io_in_flight(page, inflight);
+
+ if (zswap_write_in_flight_limit &&
+ inflight[1] < zswap_write_in_flight_limit)
+ should_swap = true;
+
+ if (zswap_read_in_flight_limit &&
+ (should_swap ||
+ (!should_swap && !zswap_write_in_flight_limit))) {
+ if (inflight[0] < zswap_read_in_flight_limit)
+ should_swap = true;
+ else
+ should_swap = false;
+ }
+
+ if (should_swap) {
+ zswap_reject_io++;
+ ret = -EIO;
+ goto reject;
+ }
+ }
+#endif
+
/* reclaim space if needed */
if (zswap_is_full()) {
zswap_pool_limit_hit++;
@@ -1264,6 +1315,10 @@ static int __init zswap_debugfs_init(void)
zswap_debugfs_root, &zswap_reject_kmemcache_fail);
debugfs_create_u64("reject_compress_poor", 0444,
zswap_debugfs_root, &zswap_reject_compress_poor);
+#ifdef CONFIG_ZSWAP_IO_SWITCH
+ debugfs_create_u64("reject_io", 0444,
+ zswap_debugfs_root, &zswap_reject_io);
+#endif
debugfs_create_u64("written_back_pages", 0444,
zswap_debugfs_root, &zswap_written_back_pages);
debugfs_create_u64("duplicate_entry", 0444,
--
2.7.4


2019-09-23 13:34:35

by Randy Dunlap

[permalink] [raw]
Subject: Re: [RFC v2] zswap: Add CONFIG_ZSWAP_IO_SWITCH to handle swap IO issue

On 9/19/19 11:35 PM, Hui Zhu wrote:
> This is the second version of this patch. The previous version is in
> https://lkml.org/lkml/2019/9/11/935
> I updated the commit introduction and Kconfig because it is not clear.
>
Hi,
Just a few minor fixes (below):

>
> Signed-off-by: Hui Zhu <[email protected]>
> ---
> include/linux/swap.h | 3 +++
> mm/Kconfig | 18 +++++++++++++++++
> mm/page_io.c | 16 +++++++++++++++
> mm/zswap.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 92 insertions(+)
>
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 56cec63..5408d65 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -546,6 +546,24 @@ config ZSWAP
> they have not be fully explored on the large set of potential
> configurations and workloads that exist.
>
> +config ZSWAP_IO_SWITCH
> + bool "Compressed cache for swap pages according to the IO status"
> + depends on ZSWAP
> + def_bool n

Just drop the "def_bool n". It's already a "bool" and 'n' is the default value for it.

> + help
> + This function help the system that normal swap speed is higher

helps the system in which normal swap speed is higher

> + than zswap speed to handle the swap IO issue.
> + For example, a VM that is disk device is not set cache config or

possibly:
For example, a VM where the disk device is not set for cache config or

> + set cache=writeback.
> +
> + This function make zswap just work when the disk of the swap file

This function makes

> + is under high IO load.
> + It add two parameters read_in_flight_limit and write_in_flight_limit to

It adds two parameters (read_in_flight_limit and write_in_flight_limit) to

> + zswap. When zswap is enabled, pages will be stored to zswap only
> + when the IO in flight number of swap device is bigger than

of the swap device

> + zswap_read_in_flight_limit or zswap_write_in_flight_limit.
> + If unsure, say "n".
> +
> config ZPOOL
> tristate "Common API for compressed memory storage"
> help

> diff --git a/mm/zswap.c b/mm/zswap.c
> index 0e22744..1255645 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -62,6 +62,13 @@ static u64 zswap_reject_compress_poor;
> static u64 zswap_reject_alloc_fail;
> /* Store failed because the entry metadata could not be allocated (rare) */
> static u64 zswap_reject_kmemcache_fail;
> +#ifdef CONFIG_ZSWAP_IO_SWITCH
> +/* Store failed because zswap_read_in_flight_limit or
> + * zswap_write_in_flight_limit is bigger than IO in flight number of
> + * swap device
> + */

Please use the documented multi-line comment format. E.g.:

/*
* Store failed because zswap_read_in_flight_limit or
* zswap_write_in_flight_limit is bigger than IO in flight number of
* swap device.
*/

> +static u64 zswap_reject_io;
> +#endif
> /* Duplicate store was encountered (rare) */
> static u64 zswap_duplicate_entry;
>
> @@ -114,6 +121,22 @@ static bool zswap_same_filled_pages_enabled = true;
> module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled,
> bool, 0644);
>
> +#ifdef CONFIG_ZSWAP_IO_SWITCH
> +/* zswap will not try to store the page if zswap_read_in_flight_limit is
> + * bigger than IO read in flight number of swap device
> + */

Use documented multi-line comment format.

> +static unsigned int zswap_read_in_flight_limit;
> +module_param_named(read_in_flight_limit, zswap_read_in_flight_limit,
> + uint, 0644);
> +
> +/* zswap will not try to store the page if zswap_write_in_flight_limit is
> + * bigger than IO write in flight number of swap device
> + */

ditto.

thanks.
--
~Randy