2020-04-12 09:17:40

by Liang Li

[permalink] [raw]
Subject: [RFC PATCH 3/4] mm: add sys fs configuration for page reporting

This patch add 'delay_millisecs', 'mini_order', 'batch_size',
in '/sys/kernel/mm/page_report/'. Usage:

"delay_millisecs":
Time delay interval between page free and work start to run.

"mini_order":
Only pages with order equal or greater than mini_order will be
reported.

"batch_size"
Wake up the worker only when free pages total size are greater
than 'batch_size'.

Cc: Alexander Duyck <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Alex Williamson <[email protected]>
Signed-off-by: liliangleo <[email protected]>
---
mm/page_reporting.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++--
mm/page_reporting.h | 4 +-
2 files changed, 141 insertions(+), 7 deletions(-)

diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index dc7a22a4b752..cc6a42596560 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -7,15 +7,19 @@
#include <linux/delay.h>
#include <linux/scatterlist.h>
#include <linux/sched.h>
+#include <linux/kobject.h>

#include "page_reporting.h"
#include "internal.h"

-#define PAGE_REPORTING_DELAY (2 * HZ)
#define MAX_SCAN_NUM 1024

unsigned long page_report_batch_size __read_mostly = 4 * 1024 * 1024UL;

+static unsigned long page_report_delay_millisecs __read_mostly = 2000;
+
+unsigned int page_report_mini_order __read_mostly = 8;
+
static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;

enum {
@@ -48,7 +52,8 @@ __page_reporting_request(struct page_reporting_dev_info *prdev)
* now we are limiting this to running no more than once every
* couple of seconds.
*/
- schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
+ schedule_delayed_work(&prdev->work,
+ msecs_to_jiffies(page_report_delay_millisecs));
}

/* notify prdev of free page reporting request */
@@ -260,7 +265,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev,

/* Generate minimum watermark to be able to guarantee progress */
watermark = low_wmark_pages(zone) +
- (PAGE_REPORTING_CAPACITY << PAGE_REPORTING_MIN_ORDER);
+ (PAGE_REPORTING_CAPACITY << page_report_mini_order);

/*
* Cancel request if insufficient free memory or if we failed
@@ -270,7 +275,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev,
return err;

/* Process each free list starting from lowest order/mt */
- for (order = PAGE_REPORTING_MIN_ORDER; order < MAX_ORDER; order++) {
+ for (order = page_report_mini_order; order < MAX_ORDER; order++) {
for (mt = 0; mt < MIGRATE_TYPES; mt++) {
/* We do not pull pages from the isolate free list */
if (is_migrate_isolate(mt))
@@ -337,7 +342,8 @@ static void page_reporting_process(struct work_struct *work)
*/
state = atomic_cmpxchg(&prdev->state, state, PAGE_REPORTING_IDLE);
if (state == PAGE_REPORTING_REQUESTED)
- schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY);
+ schedule_delayed_work(&prdev->work,
+ msecs_to_jiffies(page_report_delay_millisecs));
}

static DEFINE_MUTEX(page_reporting_mutex);
@@ -393,3 +399,131 @@ void page_reporting_unregister(struct page_reporting_dev_info *prdev)
mutex_unlock(&page_reporting_mutex);
}
EXPORT_SYMBOL_GPL(page_reporting_unregister);
+
+static ssize_t batch_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", page_report_batch_size);
+}
+
+static ssize_t batch_size_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long size;
+ int err;
+
+ err = kstrtoul(buf, 10, &size);
+ if (err || size >= UINT_MAX)
+ return -EINVAL;
+
+ page_report_batch_size = size;
+
+ return count;
+}
+
+static struct kobj_attribute batch_size_attr =
+ __ATTR(batch_size, 0644, batch_size_show, batch_size_store);
+
+static ssize_t delay_millisecs_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", page_report_delay_millisecs);
+}
+
+static ssize_t delay_millisecs_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long msecs;
+ int err;
+
+ err = kstrtoul(buf, 10, &msecs);
+ if (err || msecs >= UINT_MAX)
+ return -EINVAL;
+
+ page_report_delay_millisecs = msecs;
+
+ return count;
+}
+
+static struct kobj_attribute wake_delay_millisecs_attr =
+ __ATTR(delay_millisecs, 0644, delay_millisecs_show,
+ delay_millisecs_store);
+
+static ssize_t mini_order_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", page_report_mini_order);
+}
+
+static ssize_t mini_order_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int order;
+ int err;
+
+ err = kstrtouint(buf, 10, &order);
+ if (err || order >= MAX_ORDER)
+ return -EINVAL;
+
+ if (page_report_mini_order != order) {
+ mutex_lock(&page_reporting_mutex);
+ page_report_mini_order = order;
+ mutex_unlock(&page_reporting_mutex);
+ }
+
+ return count;
+}
+
+static struct kobj_attribute mini_order_attr =
+ __ATTR(mini_order, 0644, mini_order_show, mini_order_store);
+
+static struct attribute *page_report_attr[] = {
+ &mini_order_attr.attr,
+ &wake_delay_millisecs_attr.attr,
+ &batch_size_attr.attr,
+ NULL,
+};
+
+static struct attribute_group page_report_attr_group = {
+ .attrs = page_report_attr,
+};
+
+static int __init page_report_init_sysfs(struct kobject **page_report_kobj)
+{
+ int err;
+
+ *page_report_kobj = kobject_create_and_add("page_report", mm_kobj);
+ if (unlikely(!*page_report_kobj)) {
+ pr_err("page_report: failed to create page_report kobject\n");
+ return -ENOMEM;
+ }
+
+ err = sysfs_create_group(*page_report_kobj, &page_report_attr_group);
+ if (err) {
+ pr_err("page_report: failed to register page_report group\n");
+ goto delete_obj;
+ }
+
+ return 0;
+
+delete_obj:
+ kobject_put(*page_report_kobj);
+ return err;
+}
+
+static int __init page_report_init(void)
+{
+ int err;
+ struct kobject *page_report_kobj;
+
+ msecs_to_jiffies(page_report_delay_millisecs);
+ err = page_report_init_sysfs(&page_report_kobj);
+ if (err)
+ return err;
+
+ return 0;
+}
+subsys_initcall(page_report_init);
diff --git a/mm/page_reporting.h b/mm/page_reporting.h
index f18c85ecdfe0..5e52777c934d 100644
--- a/mm/page_reporting.h
+++ b/mm/page_reporting.h
@@ -10,7 +10,7 @@
#include <asm/pgtable.h>
#include <linux/scatterlist.h>

-#define PAGE_REPORTING_MIN_ORDER pageblock_order
+extern unsigned int page_report_mini_order;

extern unsigned long page_report_batch_size;

@@ -42,7 +42,7 @@ static inline void page_reporting_notify_free(unsigned int order)
return;

/* Determine if we have crossed reporting threshold */
- if (order < PAGE_REPORTING_MIN_ORDER)
+ if (order < page_report_mini_order)
return;

batch_size += (1 << order) << PAGE_SHIFT;
--
2.14.1


2020-04-14 17:54:33

by Alexander Duyck

[permalink] [raw]
Subject: Re: [RFC PATCH 3/4] mm: add sys fs configuration for page reporting

On 4/12/2020 2:09 AM, liliangleo wrote:
> This patch add 'delay_millisecs', 'mini_order', 'batch_size',
> in '/sys/kernel/mm/page_report/'. Usage:
>
> "delay_millisecs":
> Time delay interval between page free and work start to run.
>
> "mini_order":
> Only pages with order equal or greater than mini_order will be
> reported.
>
> "batch_size"
> Wake up the worker only when free pages total size are greater
> than 'batch_size'.
>
> Cc: Alexander Duyck <[email protected]>
> Cc: Mel Gorman <[email protected]>
> Cc: Andrea Arcangeli <[email protected]>
> Cc: Dan Williams <[email protected]>
> Cc: Dave Hansen <[email protected]>
> Cc: David Hildenbrand <[email protected]>
> Cc: Michal Hocko <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Alex Williamson <[email protected]>
> Signed-off-by: liliangleo <[email protected]>

I am not really a fan of making these configurable globally. Especially
since the existing virtio-balloon is relying on some of this being
configured the way it is.

It would make much more sense to push these configuration options out to
the registration interface so that the thing that is registering for
page reporting can configure them when it is registered.

2020-04-16 01:34:35

by Liang Li

[permalink] [raw]
Subject: Re: [RFC PATCH 3/4] mm: add sys fs configuration for page reporting

On Mon, Apr 13, 2020 at 11:02 PM Alexander Duyck
<[email protected]> wrote:
>
> On 4/12/2020 2:09 AM, liliangleo wrote:
> > This patch add 'delay_millisecs', 'mini_order', 'batch_size',
> > in '/sys/kernel/mm/page_report/'. Usage:
> >
> > "delay_millisecs":
> > Time delay interval between page free and work start to run.
> >
> > "mini_order":
> > Only pages with order equal or greater than mini_order will be
> > reported.
> >
> > "batch_size"
> > Wake up the worker only when free pages total size are greater
> > than 'batch_size'.
> >
> > Cc: Alexander Duyck <[email protected]>
> > Cc: Mel Gorman <[email protected]>
> > Cc: Andrea Arcangeli <[email protected]>
> > Cc: Dan Williams <[email protected]>
> > Cc: Dave Hansen <[email protected]>
> > Cc: David Hildenbrand <[email protected]>
> > Cc: Michal Hocko <[email protected]>
> > Cc: Andrew Morton <[email protected]>
> > Cc: Alex Williamson <[email protected]>
> > Signed-off-by: liliangleo <[email protected]>
>
> I am not really a fan of making these configurable globally. Especially
> since the existing virtio-balloon is relying on some of this being
> configured the way it is.
>
> It would make much more sense to push these configuration options out to
> the registration interface so that the thing that is registering for
> page reporting can configure them when it is registered.
>

Agree, that's better. Thanks!