2021-09-18 09:23:10

by Brian Geffon

[permalink] [raw]
Subject: [PATCH] zram: Introduce an aged idle interface

This change introduces a new sysfs file for the zram idle interface.
The current idle interface has only a single mode "all." This is
severely limiting in that it requires that you have the foresight
to know that you'll want to have pages idle at some point in the future
and forces the user to mark everything as idle.

This new proposed file idle_aged takes a single integer argument which
represents the age of the pages (in seconds since access) to mark as idle.
Because it requires accessed tracking it is only enabled when
CONFIG_ZRAM_MEMORY_TRACKING is enabled. There are a few
reasons why this is being proposed as a new file rather than extending
the existing file. The first reason is that it wouldn't allow more code
sharing as this change already refactors the existing code to do so.
Secondly, having a standalone file allows a caller to quickly check if this
idle_aged interface is supported. Finally, it simplifies the parsing
logic because now you only need to parse an int rather than more complex
logic which would need to parse things like "aged 50."

Signed-off-by: Brian Geffon <[email protected]>
---
Documentation/admin-guide/blockdev/zram.rst | 11 ++-
drivers/block/zram/zram_drv.c | 75 +++++++++++++++++----
2 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 700329d25f57..ecd1c4916a1c 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -209,6 +209,8 @@ compact WO trigger memory compaction
debug_stat RO this file is used for zram debugging purposes
backing_dev RW set up backend storage for zram to write out
idle WO mark allocated slot as idle
+idle_aged WO mark allocated slot older than 'age' seconds
+ as idle (see later)
====================== ====== ===============================================


@@ -325,8 +327,13 @@ as idle::

echo all > /sys/block/zramX/idle

-From now on, any pages on zram are idle pages. The idle mark
-will be removed until someone requests access of the block.
+Alternatively if the config option CONFIG_ZRAM_MEMORY_TRACKING is enabled
+the idle_aged interface can be used to mark only pages older than 'age'
+seconds as idle::
+
+ echo 86400 > /sys/block/zramX/idle_aged
+
+The idle mark will be removed until someone requests access of the block.
IOW, unless there is access request, those pages are still idle pages.

Admin can request writeback of those idle pages at right timing via::
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fcaf2750f68f..a371dc0edf9d 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}

-static ssize_t idle_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+/*
+ * Mark all pages which are older than or equal to cutoff as IDLE.
+ * Callers should hold the zram init lock in read mode
+ **/
+static void mark_idle(struct zram *zram, ktime_t cutoff)
{
- struct zram *zram = dev_to_zram(dev);
+ int is_idle = 1;
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
int index;

- if (!sysfs_streq(buf, "all"))
- return -EINVAL;
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
-
for (index = 0; index < nr_pages; index++) {
/*
* Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
@@ -314,16 +308,63 @@ static ssize_t idle_store(struct device *dev,
*/
zram_slot_lock(zram, index);
if (zram_allocated(zram, index) &&
- !zram_test_flag(zram, index, ZRAM_UNDER_WB))
- zram_set_flag(zram, index, ZRAM_IDLE);
+ !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ is_idle = (!cutoff || cutoff >= zram->table[index].ac_time);
+#endif
+ if (is_idle)
+ zram_set_flag(zram, index, ZRAM_IDLE);
+ }
zram_slot_unlock(zram, index);
}
+}
+
+static ssize_t idle_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ if (!sysfs_streq(buf, "all"))
+ return -EINVAL;
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ up_read(&zram->init_lock);
+ return -EINVAL;
+ }

+ /* Mark everything as idle */
+ mark_idle(zram, 0);
up_read(&zram->init_lock);

return len;
}

+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+static ssize_t idle_aged_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ktime_t cutoff_time;
+ u64 age_sec;
+ ssize_t rv = -EINVAL;
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram))
+ goto out;
+
+ if (kstrtoull(buf, 10, &age_sec))
+ goto out;
+
+ rv = len;
+ cutoff_time = ktime_sub(ktime_get_boottime(), ns_to_ktime(age_sec * NSEC_PER_SEC));
+ mark_idle(zram, cutoff_time);
+out:
+ up_read(&zram->init_lock);
+ return rv;
+}
+#endif
+
#ifdef CONFIG_ZRAM_WRITEBACK
static ssize_t writeback_limit_enable_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
@@ -1840,6 +1881,9 @@ static DEVICE_ATTR_WO(reset);
static DEVICE_ATTR_WO(mem_limit);
static DEVICE_ATTR_WO(mem_used_max);
static DEVICE_ATTR_WO(idle);
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+static DEVICE_ATTR_WO(idle_aged);
+#endif
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
#ifdef CONFIG_ZRAM_WRITEBACK
@@ -1857,6 +1901,9 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_mem_limit.attr,
&dev_attr_mem_used_max.attr,
&dev_attr_idle.attr,
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ &dev_attr_idle_aged.attr,
+#endif
&dev_attr_max_comp_streams.attr,
&dev_attr_comp_algorithm.attr,
#ifdef CONFIG_ZRAM_WRITEBACK
--
2.33.0.464.g1972c5931b-goog


2021-09-21 03:57:57

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH] zram: Introduce an aged idle interface

On Fri, Sep 17, 2021 at 02:06:40PM -0700, Brian Geffon wrote:
> This change introduces a new sysfs file for the zram idle interface.
> The current idle interface has only a single mode "all." This is
> severely limiting in that it requires that you have the foresight
> to know that you'll want to have pages idle at some point in the future
> and forces the user to mark everything as idle.
>
> This new proposed file idle_aged takes a single integer argument which
> represents the age of the pages (in seconds since access) to mark as idle.
> Because it requires accessed tracking it is only enabled when
> CONFIG_ZRAM_MEMORY_TRACKING is enabled. There are a few
> reasons why this is being proposed as a new file rather than extending
> the existing file. The first reason is that it wouldn't allow more code
> sharing as this change already refactors the existing code to do so.
> Secondly, having a standalone file allows a caller to quickly check if this
> idle_aged interface is supported. Finally, it simplifies the parsing
> logic because now you only need to parse an int rather than more complex
> logic which would need to parse things like "aged 50."

I am not convinced with create new sysfs node to support the aged time.
The reason I used the "all" was to have the room to support the aged
time as you suggested for the future. IOW, If the value is not "all" but
"only numeric", we could take it as "aged" value.

And then, we can write up in the doc "The age-based idle" supported by
kernel version 5.XX with CONFIG_ZRAM_MEMORY_TRACKING.
I understand the separate interface will make code simple but not sure
it's worth to create another sysfs knob.

Any thoughts?

>
> Signed-off-by: Brian Geffon <[email protected]>
> ---
> Documentation/admin-guide/blockdev/zram.rst | 11 ++-
> drivers/block/zram/zram_drv.c | 75 +++++++++++++++++----
> 2 files changed, 70 insertions(+), 16 deletions(-)
>
> diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
> index 700329d25f57..ecd1c4916a1c 100644
> --- a/Documentation/admin-guide/blockdev/zram.rst
> +++ b/Documentation/admin-guide/blockdev/zram.rst
> @@ -209,6 +209,8 @@ compact WO trigger memory compaction
> debug_stat RO this file is used for zram debugging purposes
> backing_dev RW set up backend storage for zram to write out
> idle WO mark allocated slot as idle
> +idle_aged WO mark allocated slot older than 'age' seconds
> + as idle (see later)
> ====================== ====== ===============================================
>
>
> @@ -325,8 +327,13 @@ as idle::
>
> echo all > /sys/block/zramX/idle
>
> -From now on, any pages on zram are idle pages. The idle mark
> -will be removed until someone requests access of the block.
> +Alternatively if the config option CONFIG_ZRAM_MEMORY_TRACKING is enabled
> +the idle_aged interface can be used to mark only pages older than 'age'
> +seconds as idle::
> +
> + echo 86400 > /sys/block/zramX/idle_aged
> +
> +The idle mark will be removed until someone requests access of the block.
> IOW, unless there is access request, those pages are still idle pages.
>
> Admin can request writeback of those idle pages at right timing via::
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index fcaf2750f68f..a371dc0edf9d 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
> return len;
> }
>
> -static ssize_t idle_store(struct device *dev,
> - struct device_attribute *attr, const char *buf, size_t len)
> +/*
> + * Mark all pages which are older than or equal to cutoff as IDLE.
> + * Callers should hold the zram init lock in read mode
> + **/
> +static void mark_idle(struct zram *zram, ktime_t cutoff)
> {
> - struct zram *zram = dev_to_zram(dev);
> + int is_idle = 1;
> unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> int index;
>
> - if (!sysfs_streq(buf, "all"))
> - return -EINVAL;
> -
> - down_read(&zram->init_lock);
> - if (!init_done(zram)) {
> - up_read(&zram->init_lock);
> - return -EINVAL;
> - }
> -
> for (index = 0; index < nr_pages; index++) {
> /*
> * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> @@ -314,16 +308,63 @@ static ssize_t idle_store(struct device *dev,
> */
> zram_slot_lock(zram, index);
> if (zram_allocated(zram, index) &&
> - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> - zram_set_flag(zram, index, ZRAM_IDLE);
> + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + is_idle = (!cutoff || cutoff >= zram->table[index].ac_time);

I am not sure it's a good idea to compare those ktime_t directly
without using the ktime API(e.g., ktime_after or ktime_before).
Maybe, we can get age_sec ktime_t directly as a argument and then
use the ktime API with ktime_get_boottime to compare them.

> +#endif
> + if (is_idle)
> + zram_set_flag(zram, index, ZRAM_IDLE);
> + }
> zram_slot_unlock(zram, index);
> }
> +}
> +
> +static ssize_t idle_store(struct device *dev,
> + struct device_attribute *attr, const char *buf, size_t len)
> +{
> + struct zram *zram = dev_to_zram(dev);
> +
> + if (!sysfs_streq(buf, "all"))
> + return -EINVAL;
> +
> + down_read(&zram->init_lock);
> + if (!init_done(zram)) {
> + up_read(&zram->init_lock);
> + return -EINVAL;
> + }
>
> + /* Mark everything as idle */
> + mark_idle(zram, 0);
> up_read(&zram->init_lock);
>
> return len;
> }
>
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> +static ssize_t idle_aged_store(struct device *dev,
> + struct device_attribute *attr, const char *buf, size_t len)
> +{
> + struct zram *zram = dev_to_zram(dev);
> + ktime_t cutoff_time;
> + u64 age_sec;
> + ssize_t rv = -EINVAL;
> +
> + down_read(&zram->init_lock);
> + if (!init_done(zram))
> + goto out;
> +
> + if (kstrtoull(buf, 10, &age_sec))
> + goto out;
> +
> + rv = len;
> + cutoff_time = ktime_sub(ktime_get_boottime(), ns_to_ktime(age_sec * NSEC_PER_SEC));
> + mark_idle(zram, cutoff_time);
> +out:
> + up_read(&zram->init_lock);
> + return rv;
> +}
> +#endif
> +
> #ifdef CONFIG_ZRAM_WRITEBACK
> static ssize_t writeback_limit_enable_store(struct device *dev,
> struct device_attribute *attr, const char *buf, size_t len)
> @@ -1840,6 +1881,9 @@ static DEVICE_ATTR_WO(reset);
> static DEVICE_ATTR_WO(mem_limit);
> static DEVICE_ATTR_WO(mem_used_max);
> static DEVICE_ATTR_WO(idle);
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> +static DEVICE_ATTR_WO(idle_aged);
> +#endif
> static DEVICE_ATTR_RW(max_comp_streams);
> static DEVICE_ATTR_RW(comp_algorithm);
> #ifdef CONFIG_ZRAM_WRITEBACK
> @@ -1857,6 +1901,9 @@ static struct attribute *zram_disk_attrs[] = {
> &dev_attr_mem_limit.attr,
> &dev_attr_mem_used_max.attr,
> &dev_attr_idle.attr,
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + &dev_attr_idle_aged.attr,
> +#endif
> &dev_attr_max_comp_streams.attr,
> &dev_attr_comp_algorithm.attr,
> #ifdef CONFIG_ZRAM_WRITEBACK
> --
> 2.33.0.464.g1972c5931b-goog
>

2021-09-21 04:01:13

by Brian Geffon

[permalink] [raw]
Subject: Re: [PATCH] zram: Introduce an aged idle interface

On Mon, Sep 20, 2021 at 5:15 PM Minchan Kim <[email protected]> wrote:
>
> On Fri, Sep 17, 2021 at 02:06:40PM -0700, Brian Geffon wrote:
> > This change introduces a new sysfs file for the zram idle interface.
> > The current idle interface has only a single mode "all." This is
> > severely limiting in that it requires that you have the foresight
> > to know that you'll want to have pages idle at some point in the future
> > and forces the user to mark everything as idle.
> >
> > This new proposed file idle_aged takes a single integer argument which
> > represents the age of the pages (in seconds since access) to mark as idle.
> > Because it requires accessed tracking it is only enabled when
> > CONFIG_ZRAM_MEMORY_TRACKING is enabled. There are a few
> > reasons why this is being proposed as a new file rather than extending
> > the existing file. The first reason is that it wouldn't allow more code
> > sharing as this change already refactors the existing code to do so.
> > Secondly, having a standalone file allows a caller to quickly check if this
> > idle_aged interface is supported. Finally, it simplifies the parsing
> > logic because now you only need to parse an int rather than more complex
> > logic which would need to parse things like "aged 50."
>
> I am not convinced with create new sysfs node to support the aged time.
> The reason I used the "all" was to have the room to support the aged
> time as you suggested for the future. IOW, If the value is not "all" but
> "only numeric", we could take it as "aged" value.
>
> And then, we can write up in the doc "The age-based idle" supported by
> kernel version 5.XX with CONFIG_ZRAM_MEMORY_TRACKING.
> I understand the separate interface will make code simple but not sure
> it's worth to create another sysfs knob.
>
> Any thoughts?

Sure, Sergey had similar feedback on that. I'll mail a new patch that
uses the existing knob but attempts to parse the input as an integer.

>
> >
> > Signed-off-by: Brian Geffon <[email protected]>
> > ---
> > Documentation/admin-guide/blockdev/zram.rst | 11 ++-
> > drivers/block/zram/zram_drv.c | 75 +++++++++++++++++----
> > 2 files changed, 70 insertions(+), 16 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
> > index 700329d25f57..ecd1c4916a1c 100644
> > --- a/Documentation/admin-guide/blockdev/zram.rst
> > +++ b/Documentation/admin-guide/blockdev/zram.rst
> > @@ -209,6 +209,8 @@ compact WO trigger memory compaction
> > debug_stat RO this file is used for zram debugging purposes
> > backing_dev RW set up backend storage for zram to write out
> > idle WO mark allocated slot as idle
> > +idle_aged WO mark allocated slot older than 'age' seconds
> > + as idle (see later)
> > ====================== ====== ===============================================
> >
> >
> > @@ -325,8 +327,13 @@ as idle::
> >
> > echo all > /sys/block/zramX/idle
> >
> > -From now on, any pages on zram are idle pages. The idle mark
> > -will be removed until someone requests access of the block.
> > +Alternatively if the config option CONFIG_ZRAM_MEMORY_TRACKING is enabled
> > +the idle_aged interface can be used to mark only pages older than 'age'
> > +seconds as idle::
> > +
> > + echo 86400 > /sys/block/zramX/idle_aged
> > +
> > +The idle mark will be removed until someone requests access of the block.
> > IOW, unless there is access request, those pages are still idle pages.
> >
> > Admin can request writeback of those idle pages at right timing via::
> > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> > index fcaf2750f68f..a371dc0edf9d 100644
> > --- a/drivers/block/zram/zram_drv.c
> > +++ b/drivers/block/zram/zram_drv.c
> > @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
> > return len;
> > }
> >
> > -static ssize_t idle_store(struct device *dev,
> > - struct device_attribute *attr, const char *buf, size_t len)
> > +/*
> > + * Mark all pages which are older than or equal to cutoff as IDLE.
> > + * Callers should hold the zram init lock in read mode
> > + **/
> > +static void mark_idle(struct zram *zram, ktime_t cutoff)
> > {
> > - struct zram *zram = dev_to_zram(dev);
> > + int is_idle = 1;
> > unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> > int index;
> >
> > - if (!sysfs_streq(buf, "all"))
> > - return -EINVAL;
> > -
> > - down_read(&zram->init_lock);
> > - if (!init_done(zram)) {
> > - up_read(&zram->init_lock);
> > - return -EINVAL;
> > - }
> > -
> > for (index = 0; index < nr_pages; index++) {
> > /*
> > * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> > @@ -314,16 +308,63 @@ static ssize_t idle_store(struct device *dev,
> > */
> > zram_slot_lock(zram, index);
> > if (zram_allocated(zram, index) &&
> > - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> > - zram_set_flag(zram, index, ZRAM_IDLE);
> > + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > + is_idle = (!cutoff || cutoff >= zram->table[index].ac_time);
>
> I am not sure it's a good idea to compare those ktime_t directly
> without using the ktime API(e.g., ktime_after or ktime_before).
> Maybe, we can get age_sec ktime_t directly as a argument and then
> use the ktime API with ktime_get_boottime to compare them.

I will make this change in the next version of the patch.

>
> > +#endif
> > + if (is_idle)
> > + zram_set_flag(zram, index, ZRAM_IDLE);
> > + }
> > zram_slot_unlock(zram, index);
> > }
> > +}
> > +
> > +static ssize_t idle_store(struct device *dev,
> > + struct device_attribute *attr, const char *buf, size_t len)
> > +{
> > + struct zram *zram = dev_to_zram(dev);
> > +
> > + if (!sysfs_streq(buf, "all"))
> > + return -EINVAL;
> > +
> > + down_read(&zram->init_lock);
> > + if (!init_done(zram)) {
> > + up_read(&zram->init_lock);
> > + return -EINVAL;
> > + }
> >
> > + /* Mark everything as idle */
> > + mark_idle(zram, 0);
> > up_read(&zram->init_lock);
> >
> > return len;
> > }
> >
> > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > +static ssize_t idle_aged_store(struct device *dev,
> > + struct device_attribute *attr, const char *buf, size_t len)
> > +{
> > + struct zram *zram = dev_to_zram(dev);
> > + ktime_t cutoff_time;
> > + u64 age_sec;
> > + ssize_t rv = -EINVAL;
> > +
> > + down_read(&zram->init_lock);
> > + if (!init_done(zram))
> > + goto out;
> > +
> > + if (kstrtoull(buf, 10, &age_sec))
> > + goto out;
> > +
> > + rv = len;
> > + cutoff_time = ktime_sub(ktime_get_boottime(), ns_to_ktime(age_sec * NSEC_PER_SEC));
> > + mark_idle(zram, cutoff_time);
> > +out:
> > + up_read(&zram->init_lock);
> > + return rv;
> > +}
> > +#endif
> > +
> > #ifdef CONFIG_ZRAM_WRITEBACK
> > static ssize_t writeback_limit_enable_store(struct device *dev,
> > struct device_attribute *attr, const char *buf, size_t len)
> > @@ -1840,6 +1881,9 @@ static DEVICE_ATTR_WO(reset);
> > static DEVICE_ATTR_WO(mem_limit);
> > static DEVICE_ATTR_WO(mem_used_max);
> > static DEVICE_ATTR_WO(idle);
> > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > +static DEVICE_ATTR_WO(idle_aged);
> > +#endif
> > static DEVICE_ATTR_RW(max_comp_streams);
> > static DEVICE_ATTR_RW(comp_algorithm);
> > #ifdef CONFIG_ZRAM_WRITEBACK
> > @@ -1857,6 +1901,9 @@ static struct attribute *zram_disk_attrs[] = {
> > &dev_attr_mem_limit.attr,
> > &dev_attr_mem_used_max.attr,
> > &dev_attr_idle.attr,
> > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > + &dev_attr_idle_aged.attr,
> > +#endif
> > &dev_attr_max_comp_streams.attr,
> > &dev_attr_comp_algorithm.attr,
> > #ifdef CONFIG_ZRAM_WRITEBACK
> > --
> > 2.33.0.464.g1972c5931b-goog
> >

Thank you for looking,
Brian

2021-09-21 20:23:42

by Brian Geffon

[permalink] [raw]
Subject: [PATCH v2] zram: Introduce an aged idle interface

This change introduces an aged idle interface to the existing
idle sysfs file for zram.

When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
now also accepts an integer argument. This integer is the
age (in seconds) of pages to mark as idle. The idle file
still supports 'all' as it always has. This new approach
allows for much more control over which pages get marked
as idle.

Signed-off-by: Brian Geffon <[email protected]>
---
Documentation/admin-guide/blockdev/zram.rst | 8 +++
drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
2 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 700329d25f57..8c8a92e5c00c 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -328,6 +328,14 @@ as idle::
From now on, any pages on zram are idle pages. The idle mark
will be removed until someone requests access of the block.
IOW, unless there is access request, those pages are still idle pages.
+Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
+marked as idle based on how long (in seconds) it's been since they were
+last accessed, in seconds::
+
+ echo 86400 > /sys/block/zramX/idle
+
+In this example all pages which haven't been accessed in more than 86400
+seconds (one day) will be marked idle.

Admin can request writeback of those idle pages at right timing via::

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fcaf2750f68f..1d1472fe4094 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}

-static ssize_t idle_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+/*
+ * Mark all pages which are older than or equal to cutoff as IDLE.
+ * Callers should hold the zram init lock in read mode
+ **/
+static void mark_idle(struct zram *zram, ktime_t cutoff)
{
- struct zram *zram = dev_to_zram(dev);
+ int is_idle = 1;
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
int index;

- if (!sysfs_streq(buf, "all"))
- return -EINVAL;
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
-
for (index = 0; index < nr_pages; index++) {
/*
* Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
@@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
*/
zram_slot_lock(zram, index);
if (zram_allocated(zram, index) &&
- !zram_test_flag(zram, index, ZRAM_UNDER_WB))
- zram_set_flag(zram, index, ZRAM_IDLE);
+ !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
+#endif
+ if (is_idle)
+ zram_set_flag(zram, index, ZRAM_IDLE);
+ }
zram_slot_unlock(zram, index);
}
+}

- up_read(&zram->init_lock);
+static ssize_t idle_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ u64 age_sec;
+ ktime_t cutoff_time = 0;
+ ssize_t rv = -EINVAL;

- return len;
+ if (!sysfs_streq(buf, "all")) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ /* If it did not parse as 'all' try to treat it as an integer */
+ if (!kstrtoull(buf, 10, &age_sec))
+ cutoff_time = ktime_sub(ktime_get_boottime(),
+ ns_to_ktime(age_sec * NSEC_PER_SEC));
+ else
+#endif
+ goto out;
+ }
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram))
+ goto out_unlock;
+
+ /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
+ mark_idle(zram, cutoff_time);
+ rv = len;
+
+out_unlock:
+ up_read(&zram->init_lock);
+out:
+ return rv;
}

#ifdef CONFIG_ZRAM_WRITEBACK
--
2.33.0.464.g1972c5931b-goog

2021-09-21 22:09:09

by Brian Geffon

[permalink] [raw]
Subject: [PATCH v3] zram: Introduce an aged idle interface

This change introduces an aged idle interface to the existing
idle sysfs file for zram.

When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
now also accepts an integer argument. This integer is the
age (in seconds) of pages to mark as idle. The idle file
still supports 'all' as it always has. This new approach
allows for much more control over which pages get marked
as idle.

v2 -> v3:
- Correct unused variable warning when
CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
v1 -> v2:
- Switch to using existing idle file.
- Dont compare ktime directly.

Signed-off-by: Brian Geffon <[email protected]>
---
Documentation/admin-guide/blockdev/zram.rst | 8 +++
drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
2 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 700329d25f57..8c8a92e5c00c 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -328,6 +328,14 @@ as idle::
From now on, any pages on zram are idle pages. The idle mark
will be removed until someone requests access of the block.
IOW, unless there is access request, those pages are still idle pages.
+Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
+marked as idle based on how long (in seconds) it's been since they were
+last accessed, in seconds::
+
+ echo 86400 > /sys/block/zramX/idle
+
+In this example all pages which haven't been accessed in more than 86400
+seconds (one day) will be marked idle.

Admin can request writeback of those idle pages at right timing via::

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fcaf2750f68f..2af5cdb8da1a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}

-static ssize_t idle_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+/*
+ * Mark all pages which are older than or equal to cutoff as IDLE.
+ * Callers should hold the zram init lock in read mode
+ **/
+static void mark_idle(struct zram *zram, ktime_t cutoff)
{
- struct zram *zram = dev_to_zram(dev);
+ int is_idle = 1;
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
int index;

- if (!sysfs_streq(buf, "all"))
- return -EINVAL;
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
-
for (index = 0; index < nr_pages; index++) {
/*
* Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
@@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
*/
zram_slot_lock(zram, index);
if (zram_allocated(zram, index) &&
- !zram_test_flag(zram, index, ZRAM_UNDER_WB))
- zram_set_flag(zram, index, ZRAM_IDLE);
+ !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
+#endif
+ if (is_idle)
+ zram_set_flag(zram, index, ZRAM_IDLE);
+ }
zram_slot_unlock(zram, index);
}
+}

- up_read(&zram->init_lock);
+static ssize_t idle_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ktime_t cutoff_time = 0;
+ ssize_t rv = -EINVAL;

- return len;
+ if (!sysfs_streq(buf, "all")) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ u64 age_sec;
+ /* If it did not parse as 'all' try to treat it as an integer */
+ if (!kstrtoull(buf, 10, &age_sec))
+ cutoff_time = ktime_sub(ktime_get_boottime(),
+ ns_to_ktime(age_sec * NSEC_PER_SEC));
+ else
+#endif
+ goto out;
+ }
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram))
+ goto out_unlock;
+
+ /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
+ mark_idle(zram, cutoff_time);
+ rv = len;
+
+out_unlock:
+ up_read(&zram->init_lock);
+out:
+ return rv;
}

#ifdef CONFIG_ZRAM_WRITEBACK
--
2.33.0.464.g1972c5931b-goog

2021-09-23 00:11:39

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v3] zram: Introduce an aged idle interface

On Tue, Sep 21, 2021 at 12:43:36PM -0700, Brian Geffon wrote:
> This change introduces an aged idle interface to the existing
> idle sysfs file for zram.
>
> When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
> now also accepts an integer argument. This integer is the
> age (in seconds) of pages to mark as idle. The idle file
> still supports 'all' as it always has. This new approach
> allows for much more control over which pages get marked
> as idle.
>
> v2 -> v3:
> - Correct unused variable warning when
> CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
> v1 -> v2:
> - Switch to using existing idle file.
> - Dont compare ktime directly.
>
> Signed-off-by: Brian Geffon <[email protected]>
> ---
> Documentation/admin-guide/blockdev/zram.rst | 8 +++
> drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
> 2 files changed, 52 insertions(+), 16 deletions(-)
>
> diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
> index 700329d25f57..8c8a92e5c00c 100644
> --- a/Documentation/admin-guide/blockdev/zram.rst
> +++ b/Documentation/admin-guide/blockdev/zram.rst
> @@ -328,6 +328,14 @@ as idle::
> From now on, any pages on zram are idle pages. The idle mark
> will be removed until someone requests access of the block.
> IOW, unless there is access request, those pages are still idle pages.
> +Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
> +marked as idle based on how long (in seconds) it's been since they were
> +last accessed, in seconds::
> +
> + echo 86400 > /sys/block/zramX/idle
> +
> +In this example all pages which haven't been accessed in more than 86400
> +seconds (one day) will be marked idle.
>
> Admin can request writeback of those idle pages at right timing via::
>
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index fcaf2750f68f..2af5cdb8da1a 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
> return len;
> }
>
> -static ssize_t idle_store(struct device *dev,
> - struct device_attribute *attr, const char *buf, size_t len)
> +/*
> + * Mark all pages which are older than or equal to cutoff as IDLE.
> + * Callers should hold the zram init lock in read mode
> + **/
> +static void mark_idle(struct zram *zram, ktime_t cutoff)
> {
> - struct zram *zram = dev_to_zram(dev);
> + int is_idle = 1;
> unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> int index;
>
> - if (!sysfs_streq(buf, "all"))
> - return -EINVAL;
> -
> - down_read(&zram->init_lock);
> - if (!init_done(zram)) {
> - up_read(&zram->init_lock);
> - return -EINVAL;
> - }
> -
> for (index = 0; index < nr_pages; index++) {
> /*
> * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> @@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
> */
> zram_slot_lock(zram, index);
> if (zram_allocated(zram, index) &&
> - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> - zram_set_flag(zram, index, ZRAM_IDLE);
> + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
> +#endif
> + if (is_idle)
> + zram_set_flag(zram, index, ZRAM_IDLE);
> + }
> zram_slot_unlock(zram, index);
> }
> +}
>
> - up_read(&zram->init_lock);
> +static ssize_t idle_store(struct device *dev,
> + struct device_attribute *attr, const char *buf, size_t len)
> +{
> + struct zram *zram = dev_to_zram(dev);
> + ktime_t cutoff_time = 0;
> + ssize_t rv = -EINVAL;
>
> - return len;
> + if (!sysfs_streq(buf, "all")) {
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + u64 age_sec;
> + /* If it did not parse as 'all' try to treat it as an integer */
> + if (!kstrtoull(buf, 10, &age_sec))

nit:
Do we need such limit base which work with only 10 base?
Passing 0 would give more flexibility.

Otherwise, looks good to me.

Thanks, Brian.

> + cutoff_time = ktime_sub(ktime_get_boottime(),
> + ns_to_ktime(age_sec * NSEC_PER_SEC));
> + else
> +#endif
> + goto out;
> + }
> +
> + down_read(&zram->init_lock);
> + if (!init_done(zram))
> + goto out_unlock;
> +
> + /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
> + mark_idle(zram, cutoff_time);
> + rv = len;
> +
> +out_unlock:
> + up_read(&zram->init_lock);
> +out:
> + return rv;
> }
>
> #ifdef CONFIG_ZRAM_WRITEBACK
> --
> 2.33.0.464.g1972c5931b-goog
>

2021-09-23 00:47:23

by Brian Geffon

[permalink] [raw]
Subject: Re: [PATCH v3] zram: Introduce an aged idle interface

Hi Minchan,
Thank you for taking a look. I'm happy to make that change, but I
personally cannot see why userspace would want to do something like
idle pages older than "0x3C seconds" or "0o250600 seconds," it just
seems like a strange way to represent seconds. What do you think?

Brian

On Wed, Sep 22, 2021 at 8:09 PM Minchan Kim <[email protected]> wrote:
>
> On Tue, Sep 21, 2021 at 12:43:36PM -0700, Brian Geffon wrote:
> > This change introduces an aged idle interface to the existing
> > idle sysfs file for zram.
> >
> > When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
> > now also accepts an integer argument. This integer is the
> > age (in seconds) of pages to mark as idle. The idle file
> > still supports 'all' as it always has. This new approach
> > allows for much more control over which pages get marked
> > as idle.
> >
> > v2 -> v3:
> > - Correct unused variable warning when
> > CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
> > v1 -> v2:
> > - Switch to using existing idle file.
> > - Dont compare ktime directly.
> >
> > Signed-off-by: Brian Geffon <[email protected]>
> > ---
> > Documentation/admin-guide/blockdev/zram.rst | 8 +++
> > drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
> > 2 files changed, 52 insertions(+), 16 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
> > index 700329d25f57..8c8a92e5c00c 100644
> > --- a/Documentation/admin-guide/blockdev/zram.rst
> > +++ b/Documentation/admin-guide/blockdev/zram.rst
> > @@ -328,6 +328,14 @@ as idle::
> > From now on, any pages on zram are idle pages. The idle mark
> > will be removed until someone requests access of the block.
> > IOW, unless there is access request, those pages are still idle pages.
> > +Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
> > +marked as idle based on how long (in seconds) it's been since they were
> > +last accessed, in seconds::
> > +
> > + echo 86400 > /sys/block/zramX/idle
> > +
> > +In this example all pages which haven't been accessed in more than 86400
> > +seconds (one day) will be marked idle.
> >
> > Admin can request writeback of those idle pages at right timing via::
> >
> > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> > index fcaf2750f68f..2af5cdb8da1a 100644
> > --- a/drivers/block/zram/zram_drv.c
> > +++ b/drivers/block/zram/zram_drv.c
> > @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
> > return len;
> > }
> >
> > -static ssize_t idle_store(struct device *dev,
> > - struct device_attribute *attr, const char *buf, size_t len)
> > +/*
> > + * Mark all pages which are older than or equal to cutoff as IDLE.
> > + * Callers should hold the zram init lock in read mode
> > + **/
> > +static void mark_idle(struct zram *zram, ktime_t cutoff)
> > {
> > - struct zram *zram = dev_to_zram(dev);
> > + int is_idle = 1;
> > unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> > int index;
> >
> > - if (!sysfs_streq(buf, "all"))
> > - return -EINVAL;
> > -
> > - down_read(&zram->init_lock);
> > - if (!init_done(zram)) {
> > - up_read(&zram->init_lock);
> > - return -EINVAL;
> > - }
> > -
> > for (index = 0; index < nr_pages; index++) {
> > /*
> > * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> > @@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
> > */
> > zram_slot_lock(zram, index);
> > if (zram_allocated(zram, index) &&
> > - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> > - zram_set_flag(zram, index, ZRAM_IDLE);
> > + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
> > +#endif
> > + if (is_idle)
> > + zram_set_flag(zram, index, ZRAM_IDLE);
> > + }
> > zram_slot_unlock(zram, index);
> > }
> > +}
> >
> > - up_read(&zram->init_lock);
> > +static ssize_t idle_store(struct device *dev,
> > + struct device_attribute *attr, const char *buf, size_t len)
> > +{
> > + struct zram *zram = dev_to_zram(dev);
> > + ktime_t cutoff_time = 0;
> > + ssize_t rv = -EINVAL;
> >
> > - return len;
> > + if (!sysfs_streq(buf, "all")) {
> > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > + u64 age_sec;
> > + /* If it did not parse as 'all' try to treat it as an integer */
> > + if (!kstrtoull(buf, 10, &age_sec))
>
> nit:
> Do we need such limit base which work with only 10 base?
> Passing 0 would give more flexibility.
>
> Otherwise, looks good to me.
>
> Thanks, Brian.
>
> > + cutoff_time = ktime_sub(ktime_get_boottime(),
> > + ns_to_ktime(age_sec * NSEC_PER_SEC));
> > + else
> > +#endif
> > + goto out;
> > + }
> > +
> > + down_read(&zram->init_lock);
> > + if (!init_done(zram))
> > + goto out_unlock;
> > +
> > + /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
> > + mark_idle(zram, cutoff_time);
> > + rv = len;
> > +
> > +out_unlock:
> > + up_read(&zram->init_lock);
> > +out:
> > + return rv;
> > }
> >
> > #ifdef CONFIG_ZRAM_WRITEBACK
> > --
> > 2.33.0.464.g1972c5931b-goog
> >

2021-09-23 06:03:40

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v3] zram: Introduce an aged idle interface

Hey Brian,

On Wed, Sep 22, 2021 at 08:42:44PM -0400, Brian Geffon wrote:
> Hi Minchan,
> Thank you for taking a look. I'm happy to make that change, but I
> personally cannot see why userspace would want to do something like
> idle pages older than "0x3C seconds" or "0o250600 seconds," it just
> seems like a strange way to represent seconds. What do you think?

Kernel communty loves inline reply instead of top posting. ;-)

I am not strong opinion about mutiple base support. The question
just started from "what's the benefit with only 10-base support?"
if we can support multiple bases with almost zero maintainace
overhead.

>
> Brian
>
> On Wed, Sep 22, 2021 at 8:09 PM Minchan Kim <[email protected]> wrote:
> >
> > On Tue, Sep 21, 2021 at 12:43:36PM -0700, Brian Geffon wrote:
> > > This change introduces an aged idle interface to the existing
> > > idle sysfs file for zram.
> > >
> > > When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
> > > now also accepts an integer argument. This integer is the
> > > age (in seconds) of pages to mark as idle. The idle file
> > > still supports 'all' as it always has. This new approach
> > > allows for much more control over which pages get marked
> > > as idle.
> > >
> > > v2 -> v3:
> > > - Correct unused variable warning when
> > > CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
> > > v1 -> v2:
> > > - Switch to using existing idle file.
> > > - Dont compare ktime directly.
> > >
> > > Signed-off-by: Brian Geffon <[email protected]>
> > > ---
> > > Documentation/admin-guide/blockdev/zram.rst | 8 +++
> > > drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
> > > 2 files changed, 52 insertions(+), 16 deletions(-)
> > >
> > > diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
> > > index 700329d25f57..8c8a92e5c00c 100644
> > > --- a/Documentation/admin-guide/blockdev/zram.rst
> > > +++ b/Documentation/admin-guide/blockdev/zram.rst
> > > @@ -328,6 +328,14 @@ as idle::
> > > From now on, any pages on zram are idle pages. The idle mark
> > > will be removed until someone requests access of the block.
> > > IOW, unless there is access request, those pages are still idle pages.
> > > +Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
> > > +marked as idle based on how long (in seconds) it's been since they were
> > > +last accessed, in seconds::
> > > +
> > > + echo 86400 > /sys/block/zramX/idle
> > > +
> > > +In this example all pages which haven't been accessed in more than 86400
> > > +seconds (one day) will be marked idle.
> > >
> > > Admin can request writeback of those idle pages at right timing via::
> > >
> > > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> > > index fcaf2750f68f..2af5cdb8da1a 100644
> > > --- a/drivers/block/zram/zram_drv.c
> > > +++ b/drivers/block/zram/zram_drv.c
> > > @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
> > > return len;
> > > }
> > >
> > > -static ssize_t idle_store(struct device *dev,
> > > - struct device_attribute *attr, const char *buf, size_t len)
> > > +/*
> > > + * Mark all pages which are older than or equal to cutoff as IDLE.
> > > + * Callers should hold the zram init lock in read mode
> > > + **/
> > > +static void mark_idle(struct zram *zram, ktime_t cutoff)
> > > {
> > > - struct zram *zram = dev_to_zram(dev);
> > > + int is_idle = 1;
> > > unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> > > int index;
> > >
> > > - if (!sysfs_streq(buf, "all"))
> > > - return -EINVAL;
> > > -
> > > - down_read(&zram->init_lock);
> > > - if (!init_done(zram)) {
> > > - up_read(&zram->init_lock);
> > > - return -EINVAL;
> > > - }
> > > -
> > > for (index = 0; index < nr_pages; index++) {
> > > /*
> > > * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> > > @@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
> > > */
> > > zram_slot_lock(zram, index);
> > > if (zram_allocated(zram, index) &&
> > > - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> > > - zram_set_flag(zram, index, ZRAM_IDLE);
> > > + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> > > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > > + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
> > > +#endif
> > > + if (is_idle)
> > > + zram_set_flag(zram, index, ZRAM_IDLE);
> > > + }
> > > zram_slot_unlock(zram, index);
> > > }
> > > +}
> > >
> > > - up_read(&zram->init_lock);
> > > +static ssize_t idle_store(struct device *dev,
> > > + struct device_attribute *attr, const char *buf, size_t len)
> > > +{
> > > + struct zram *zram = dev_to_zram(dev);
> > > + ktime_t cutoff_time = 0;
> > > + ssize_t rv = -EINVAL;
> > >
> > > - return len;
> > > + if (!sysfs_streq(buf, "all")) {
> > > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > > + u64 age_sec;
> > > + /* If it did not parse as 'all' try to treat it as an integer */
> > > + if (!kstrtoull(buf, 10, &age_sec))
> >
> > nit:
> > Do we need such limit base which work with only 10 base?
> > Passing 0 would give more flexibility.
> >
> > Otherwise, looks good to me.
> >
> > Thanks, Brian.
> >
> > > + cutoff_time = ktime_sub(ktime_get_boottime(),
> > > + ns_to_ktime(age_sec * NSEC_PER_SEC));
> > > + else
> > > +#endif
> > > + goto out;
> > > + }
> > > +
> > > + down_read(&zram->init_lock);
> > > + if (!init_done(zram))
> > > + goto out_unlock;
> > > +
> > > + /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
> > > + mark_idle(zram, cutoff_time);
> > > + rv = len;
> > > +
> > > +out_unlock:
> > > + up_read(&zram->init_lock);
> > > +out:
> > > + return rv;
> > > }
> > >
> > > #ifdef CONFIG_ZRAM_WRITEBACK
> > > --
> > > 2.33.0.464.g1972c5931b-goog
> > >

2021-09-23 13:05:50

by Brian Geffon

[permalink] [raw]
Subject: [PATCH v4] zram: Introduce an aged idle interface

This change introduces an aged idle interface to the existing
idle sysfs file for zram.

When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
now also accepts an integer argument. This integer is the
age (in seconds) of pages to mark as idle. The idle file
still supports 'all' as it always has. This new approach
allows for much more control over which pages get marked
as idle.

v3 -> v4:
- Remove base10 restriction.

v2 -> v3:
- Correct unused variable warning when
CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
v1 -> v2:
- Switch to using existing idle file.
- Dont compare ktime directly.

Signed-off-by: Brian Geffon <[email protected]>
---
Documentation/admin-guide/blockdev/zram.rst | 8 +++
drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
2 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 700329d25f57..3e11926a4df9 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -328,6 +328,14 @@ as idle::
From now on, any pages on zram are idle pages. The idle mark
will be removed until someone requests access of the block.
IOW, unless there is access request, those pages are still idle pages.
+Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
+marked as idle based on how long (in seconds) it's been since they were
+last accessed::
+
+ echo 86400 > /sys/block/zramX/idle
+
+In this example all pages which haven't been accessed in more than 86400
+seconds (one day) will be marked idle.

Admin can request writeback of those idle pages at right timing via::

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fcaf2750f68f..ca15d60262fa 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}

-static ssize_t idle_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+/*
+ * Mark all pages which are older than or equal to cutoff as IDLE.
+ * Callers should hold the zram init lock in read mode
+ **/
+static void mark_idle(struct zram *zram, ktime_t cutoff)
{
- struct zram *zram = dev_to_zram(dev);
+ int is_idle = 1;
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
int index;

- if (!sysfs_streq(buf, "all"))
- return -EINVAL;
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
-
for (index = 0; index < nr_pages; index++) {
/*
* Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
@@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
*/
zram_slot_lock(zram, index);
if (zram_allocated(zram, index) &&
- !zram_test_flag(zram, index, ZRAM_UNDER_WB))
- zram_set_flag(zram, index, ZRAM_IDLE);
+ !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
+#endif
+ if (is_idle)
+ zram_set_flag(zram, index, ZRAM_IDLE);
+ }
zram_slot_unlock(zram, index);
}
+}

- up_read(&zram->init_lock);
+static ssize_t idle_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ktime_t cutoff_time = 0;
+ ssize_t rv = -EINVAL;

- return len;
+ if (!sysfs_streq(buf, "all")) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ u64 age_sec;
+ /* If it did not parse as 'all' try to treat it as an integer */
+ if (!kstrtoull(buf, 0, &age_sec))
+ cutoff_time = ktime_sub(ktime_get_boottime(),
+ ns_to_ktime(age_sec * NSEC_PER_SEC));
+ else
+#endif
+ goto out;
+ }
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram))
+ goto out_unlock;
+
+ /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
+ mark_idle(zram, cutoff_time);
+ rv = len;
+
+out_unlock:
+ up_read(&zram->init_lock);
+out:
+ return rv;
}

#ifdef CONFIG_ZRAM_WRITEBACK
--
2.33.0.464.g1972c5931b-goog

2021-09-23 16:49:15

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v4] zram: Introduce an aged idle interface

On Thu, Sep 23, 2021 at 06:01:15AM -0700, Brian Geffon wrote:
> This change introduces an aged idle interface to the existing
> idle sysfs file for zram.
>
> When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
> now also accepts an integer argument. This integer is the
> age (in seconds) of pages to mark as idle. The idle file
> still supports 'all' as it always has. This new approach
> allows for much more control over which pages get marked
> as idle.
>
> v3 -> v4:
> - Remove base10 restriction.
>
> v2 -> v3:
> - Correct unused variable warning when
> CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
> v1 -> v2:
> - Switch to using existing idle file.
> - Dont compare ktime directly.
>
> Signed-off-by: Brian Geffon <[email protected]>
Acked-by: Minchan Kim <[email protected]>

2021-09-23 22:54:08

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v4] zram: Introduce an aged idle interface

On Thu, 23 Sep 2021 06:01:15 -0700 Brian Geffon <[email protected]> wrote:

> This change introduces an aged idle interface to the existing
> idle sysfs file for zram.
>
> When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
> now also accepts an integer argument. This integer is the
> age (in seconds) of pages to mark as idle. The idle file
> still supports 'all' as it always has. This new approach
> allows for much more control over which pages get marked
> as idle.
>
> @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
> return len;
> }
>
> -static ssize_t idle_store(struct device *dev,
> - struct device_attribute *attr, const char *buf, size_t len)
> +/*
> + * Mark all pages which are older than or equal to cutoff as IDLE.
> + * Callers should hold the zram init lock in read mode
> + **/

A simple "*/" is conventional.

> +static void mark_idle(struct zram *zram, ktime_t cutoff)
> {
> - struct zram *zram = dev_to_zram(dev);
> + int is_idle = 1;
> unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> int index;
>
> - if (!sysfs_streq(buf, "all"))
> - return -EINVAL;
> -
> - down_read(&zram->init_lock);
> - if (!init_done(zram)) {
> - up_read(&zram->init_lock);
> - return -EINVAL;
> - }
> -
> for (index = 0; index < nr_pages; index++) {
> /*
> * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> @@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
> */
> zram_slot_lock(zram, index);
> if (zram_allocated(zram, index) &&
> - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> - zram_set_flag(zram, index, ZRAM_IDLE);
> + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
> +#endif
> + if (is_idle)
> + zram_set_flag(zram, index, ZRAM_IDLE);
> + }
> zram_slot_unlock(zram, index);
> }
> +}
>
> - up_read(&zram->init_lock);
> +static ssize_t idle_store(struct device *dev,
> + struct device_attribute *attr, const char *buf, size_t len)
> +{
> + struct zram *zram = dev_to_zram(dev);
> + ktime_t cutoff_time = 0;
> + ssize_t rv = -EINVAL;
>
> - return len;
> + if (!sysfs_streq(buf, "all")) {
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + u64 age_sec;
> + /* If it did not parse as 'all' try to treat it as an integer */
> + if (!kstrtoull(buf, 0, &age_sec))
> + cutoff_time = ktime_sub(ktime_get_boottime(),
> + ns_to_ktime(age_sec * NSEC_PER_SEC));
> + else
> +#endif
> + goto out;
> + }

The ifdef tricks are pretty ugly. Can things be improved with IS_ENABLED()?


2021-09-24 21:13:10

by Brian Geffon

[permalink] [raw]
Subject: [PATCH v5] zram: Introduce an aged idle interface

This change introduces an aged idle interface to the existing
idle sysfs file for zram.

When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
now also accepts an integer argument. This integer is the
age (in seconds) of pages to mark as idle. The idle file
still supports 'all' as it always has. This new approach
allows for much more control over which pages get marked
as idle.

v4 -> v5:
- Andrew's suggestions to use IS_ENABLED and
cleanup comment.

v3 -> v4:
- Remove base10 restriction.

v2 -> v3:
- Correct unused variable warning when
CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
v1 -> v2:
- Switch to using existing idle file.
- Dont compare ktime directly.

Signed-off-by: Brian Geffon <[email protected]>
Acked-by: Minchan Kim <[email protected]>
---
Documentation/admin-guide/blockdev/zram.rst | 8 +++
drivers/block/zram/zram_drv.c | 61 +++++++++++++++------
2 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index a6fd1f9b5faf..c66efb2eeac3 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -327,6 +327,14 @@ as idle::
From now on, any pages on zram are idle pages. The idle mark
will be removed until someone requests access of the block.
IOW, unless there is access request, those pages are still idle pages.
+Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
+marked as idle based on how long (in seconds) it's been since they were
+last accessed::
+
+ echo 86400 > /sys/block/zramX/idle
+
+In this example all pages which haven't been accessed in more than 86400
+seconds (one day) will be marked idle.

Admin can request writeback of those idle pages at right timing via::

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d291bedeef8e..33282f04ea32 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}

-static ssize_t idle_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+/*
+ * Mark all pages which are older than or equal to cutoff as IDLE.
+ * Callers should hold the zram init lock in read mode
+ */
+static void mark_idle(struct zram *zram, ktime_t cutoff)
{
- struct zram *zram = dev_to_zram(dev);
+ int is_idle = 1;
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
int index;

- if (!sysfs_streq(buf, "all"))
- return -EINVAL;
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
-
for (index = 0; index < nr_pages; index++) {
/*
* Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
@@ -314,14 +308,49 @@ static ssize_t idle_store(struct device *dev,
*/
zram_slot_lock(zram, index);
if (zram_allocated(zram, index) &&
- !zram_test_flag(zram, index, ZRAM_UNDER_WB))
- zram_set_flag(zram, index, ZRAM_IDLE);
+ !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
+#endif
+ if (is_idle)
+ zram_set_flag(zram, index, ZRAM_IDLE);
+ }
zram_slot_unlock(zram, index);
}
+}

- up_read(&zram->init_lock);
+static ssize_t idle_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ktime_t cutoff_time = 0;
+ ssize_t rv = -EINVAL;

- return len;
+ if (!sysfs_streq(buf, "all")) {
+ /*
+ * If it did not parse as 'all' try to treat it as an integer when
+ * we have memory tracking enabled.
+ */
+ u64 age_sec;
+ if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec))
+ cutoff_time = ktime_sub(ktime_get_boottime(),
+ ns_to_ktime(age_sec * NSEC_PER_SEC));
+ else
+ goto out;
+ }
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram))
+ goto out_unlock;
+
+ /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
+ mark_idle(zram, cutoff_time);
+ rv = len;
+
+out_unlock:
+ up_read(&zram->init_lock);
+out:
+ return rv;
}

#ifdef CONFIG_ZRAM_WRITEBACK
--
2.33.0.685.g46640cef36-goog

2021-09-24 22:18:01

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v5] zram: Introduce an aged idle interface

On Fri, 24 Sep 2021 09:11:28 -0700 Brian Geffon <[email protected]> wrote:

> This change introduces an aged idle interface to the existing
> idle sysfs file for zram.
>
> When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
> now also accepts an integer argument. This integer is the
> age (in seconds) of pages to mark as idle. The idle file
> still supports 'all' as it always has. This new approach
> allows for much more control over which pages get marked
> as idle.
>
> v4 -> v5:
> - Andrew's suggestions to use IS_ENABLED and
> cleanup comment.

Also this?

--- a/drivers/block/zram/zram_drv.c~zram-introduce-an-aged-idle-interface-v5-fix
+++ a/drivers/block/zram/zram_drv.c
@@ -309,9 +309,8 @@ static void mark_idle(struct zram *zram,
zram_slot_lock(zram, index);
if (zram_allocated(zram, index) &&
!zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
-#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING))
is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
-#endif
if (is_idle)
zram_set_flag(zram, index, ZRAM_IDLE);
}
_

2021-09-25 06:25:00

by Brian Geffon

[permalink] [raw]
Subject: Re: [PATCH v5] zram: Introduce an aged idle interface

> Also this?
>
> --- a/drivers/block/zram/zram_drv.c~zram-introduce-an-aged-idle-interface-v5-fix
> +++ a/drivers/block/zram/zram_drv.c
> @@ -309,9 +309,8 @@ static void mark_idle(struct zram *zram,
> zram_slot_lock(zram, index);
> if (zram_allocated(zram, index) &&
> !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> -#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING))
> is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
> -#endif
> if (is_idle)
> zram_set_flag(zram, index, ZRAM_IDLE);
> }
> _
>

Hi Andrew,
As written that patch won't compile when
CONFIG_ZRAM_MEMORY_TRACKING=n, my guess is that the compiler pass that
removes the dead branch only happens after it attempts to compile the
branch itself. So it appears that even though IS_ENABLED(..) always
evaluates to 0, the compile will fail because table[index].ac_time
does not exist. You should get an error like this:

drivers/block/zram/zram_drv.c:314:57: error: no member named 'ac_time'
in 'struct zram_table_entry'
(!cutoff
|| ktime_after(cutoff, zram->table[index].ac_time)))

Brian

2021-09-28 02:24:32

by Sergey Senozhatsky

[permalink] [raw]
Subject: Re: [PATCH v5] zram: Introduce an aged idle interface

On (21/09/24 09:11), Brian Geffon wrote:
[..]

Some silly nits:

> +static void mark_idle(struct zram *zram, ktime_t cutoff)
> {
> - struct zram *zram = dev_to_zram(dev);
> + int is_idle = 1;
> unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> int index;
>
> - if (!sysfs_streq(buf, "all"))
> - return -EINVAL;
> -
> - down_read(&zram->init_lock);
> - if (!init_done(zram)) {
> - up_read(&zram->init_lock);
> - return -EINVAL;
> - }
> -
> for (index = 0; index < nr_pages; index++) {
> /*
> * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> @@ -314,14 +308,49 @@ static ssize_t idle_store(struct device *dev,
> */
> zram_slot_lock(zram, index);
> if (zram_allocated(zram, index) &&
> - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> - zram_set_flag(zram, index, ZRAM_IDLE);
> + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));

checkpatch "WARNING: suspect code indent for conditional statements (16, 32)"

Looks like `is_idle` is at one extra indent level.

> +#endif
> + if (is_idle)
> + zram_set_flag(zram, index, ZRAM_IDLE);
> + }
> zram_slot_unlock(zram, index);
> }
> +}
>
> - up_read(&zram->init_lock);
> +static ssize_t idle_store(struct device *dev,
> + struct device_attribute *attr, const char *buf, size_t len)
> +{
> + struct zram *zram = dev_to_zram(dev);
> + ktime_t cutoff_time = 0;
> + ssize_t rv = -EINVAL;
>
> - return len;
> + if (!sysfs_streq(buf, "all")) {
> + /*
> + * If it did not parse as 'all' try to treat it as an integer when
> + * we have memory tracking enabled.
> + */
> + u64 age_sec;

checkpatch "WARNING: Missing a blank line after declarations"

> + if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec))
> + cutoff_time = ktime_sub(ktime_get_boottime(),
> + ns_to_ktime(age_sec * NSEC_PER_SEC));
> + else
> + goto out;
> + }
> +
> + down_read(&zram->init_lock);
> + if (!init_done(zram))
> + goto out_unlock;
> +
> + /* A age_sec of 0 marks everything as idle, this is the "all" behavior */

s/age_sec/cutoff_time/

> + mark_idle(zram, cutoff_time);
> + rv = len;

2021-09-29 14:38:55

by Brian Geffon

[permalink] [raw]
Subject: [PATCH v6] zram: Introduce an aged idle interface

This change introduces an aged idle interface to the existing
idle sysfs file for zram.

When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
now also accepts an integer argument. This integer is the
age (in seconds) of pages to mark as idle. The idle file
still supports 'all' as it always has. This new approach
allows for much more control over which pages get marked
as idle.

v5 -> v6:
- Sergey's cleanup suggestions.

v4 -> v5:
- Andrew's suggestions to use IS_ENABLED and
cleanup comment.

v3 -> v4:
- Remove base10 restriction.

v2 -> v3:
- Correct unused variable warning when
CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
v1 -> v2:
- Switch to using existing idle file.
- Dont compare ktime directly.

Signed-off-by: Brian Geffon <[email protected]>
Acked-by: Minchan Kim <[email protected]>
---
Documentation/admin-guide/blockdev/zram.rst | 8 +++
drivers/block/zram/zram_drv.c | 62 +++++++++++++++------
2 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 700329d25f57..3e11926a4df9 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -328,6 +328,14 @@ as idle::
From now on, any pages on zram are idle pages. The idle mark
will be removed until someone requests access of the block.
IOW, unless there is access request, those pages are still idle pages.
+Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
+marked as idle based on how long (in seconds) it's been since they were
+last accessed::
+
+ echo 86400 > /sys/block/zramX/idle
+
+In this example all pages which haven't been accessed in more than 86400
+seconds (one day) will be marked idle.

Admin can request writeback of those idle pages at right timing via::

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fcaf2750f68f..4e76a75a7840 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}

-static ssize_t idle_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+/*
+ * Mark all pages which are older than or equal to cutoff as IDLE.
+ * Callers should hold the zram init lock in read mode
+ */
+static void mark_idle(struct zram *zram, ktime_t cutoff)
{
- struct zram *zram = dev_to_zram(dev);
+ int is_idle = 1;
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
int index;

- if (!sysfs_streq(buf, "all"))
- return -EINVAL;
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
-
for (index = 0; index < nr_pages; index++) {
/*
* Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
@@ -314,14 +308,50 @@ static ssize_t idle_store(struct device *dev,
*/
zram_slot_lock(zram, index);
if (zram_allocated(zram, index) &&
- !zram_test_flag(zram, index, ZRAM_UNDER_WB))
- zram_set_flag(zram, index, ZRAM_IDLE);
+ !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time);
+#endif
+ if (is_idle)
+ zram_set_flag(zram, index, ZRAM_IDLE);
+ }
zram_slot_unlock(zram, index);
}
+}

- up_read(&zram->init_lock);
+static ssize_t idle_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ktime_t cutoff_time = 0;
+ ssize_t rv = -EINVAL;

- return len;
+ if (!sysfs_streq(buf, "all")) {
+ /*
+ * If it did not parse as 'all' try to treat it as an integer when
+ * we have memory tracking enabled.
+ */
+ u64 age_sec;
+
+ if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec))
+ cutoff_time = ktime_sub(ktime_get_boottime(),
+ ns_to_ktime(age_sec * NSEC_PER_SEC));
+ else
+ goto out;
+ }
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram))
+ goto out_unlock;
+
+ /* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */
+ mark_idle(zram, cutoff_time);
+ rv = len;
+
+out_unlock:
+ up_read(&zram->init_lock);
+out:
+ return rv;
}

#ifdef CONFIG_ZRAM_WRITEBACK
--
2.33.0.685.g46640cef36-goog