2022-02-18 10:54:51

by Jonghyeon Kim

[permalink] [raw]
Subject: [RFC PATCH v1 0/3] Rebase DAMON_RECALIM for NUMA system


Current DAMON_RECALIM is not compatible with the NUMA memory system. To
proactively reclaim memory, DAMON_RECLAIM kernel thread(kdamond) has to wake up
before kswapd does reclaim memory. However, the current watermark for proactive
reclamation is based on entire system free memory. So, though the one memory
node is fully used, kdamond is not waked up.

These patches clarify watermarks of DAMOS and enable monitoring per NUMA node
proactive reclamation on DAMON_RECLAIM.

Jonghyeon Kim (3):
mm/damon: Rebase damos watermarks for NUMA systems
mm/damon/core: Add damon_start_one()
mm/damon/reclaim: Add per NUMA node proactive reclamation by
DAMON_RECLAIM.

include/linux/damon.h | 3 +
mm/damon/core.c | 39 +++++++++--
mm/damon/reclaim.c | 147 ++++++++++++++++++++++++++++++------------
3 files changed, 140 insertions(+), 49 deletions(-)

--
2.17.1


2022-02-18 11:12:56

by Jonghyeon Kim

[permalink] [raw]
Subject: [RFC PATCH v1 2/3] mm/damon/core: Add damon_start_one()

damon_start() function is designed to start multiple damon monitoring
contexts. But, sometimes we need to start monitoring one context.
Although __damon_start() could be considered to start for one monitoring
context, it seems reasonable to adopt a new function that does not need
to handle 'damon_lock' from the caller.

Signed-off-by: Jonghyeon Kim <[email protected]>
---
include/linux/damon.h | 1 +
mm/damon/core.c | 25 +++++++++++++++++++++++++
2 files changed, 26 insertions(+)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index c0adf1566603..069577477662 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -511,6 +511,7 @@ int damon_register_ops(struct damon_operations *ops);
int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id);

int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
+int damon_start_one(struct damon_ctx *ctx);
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);

#endif /* CONFIG_DAMON */
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 290c9c0535ee..e43f138a3489 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -466,6 +466,31 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
return err;
}

+/**
+ * damon_start_one() - Starts the monitorings for one context.
+ * @ctx: monitoring context
+ *
+ * This function starts one monitoring thread for only one monitoring context
+ * handling damon_lock.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_start_one(struct damon_ctx *ctx)
+{
+ int err = 0;
+
+ mutex_lock(&damon_lock);
+ err = __damon_start(ctx);
+ if (err) {
+ mutex_unlock(&damon_lock);
+ return err;
+ }
+ nr_running_ctxs++;
+ mutex_unlock(&damon_lock);
+
+ return err;
+}
+
/*
* __damon_stop() - Stops monitoring of given context.
* @ctx: monitoring context
--
2.17.1

2022-02-18 15:02:45

by Jonghyeon Kim

[permalink] [raw]
Subject: [RFC PATCH v1 3/3] mm/damon/reclaim: Add per NUMA node proactive reclamation by DAMON_RECLAIM.

To add DAMON_RECLAIM worker threads(kdamond) that do proactive
reclamation per NUMA node, each node must have its own context.
'per_node' is added to enable it.

If 'per_node' is true, kdamonds as online NUMA node will be waked up and
start monitoring to proactively reclaim memory. If 'per_node' is false,
only one kdamond thread will start monitoring for all system memory.

Signed-off-by: Jonghyeon Kim <[email protected]>
---
mm/damon/reclaim.c | 147 ++++++++++++++++++++++++++++++++-------------
1 file changed, 104 insertions(+), 43 deletions(-)

diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index b53d9c22fad1..85e8f97dd599 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -177,13 +177,27 @@ static unsigned long monitor_region_end __read_mostly;
module_param(monitor_region_end, ulong, 0600);

/*
- * PID of the DAMON thread
+ * Enable monitoring memory regions per NUMA node.
*
- * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
+ * By default, watermarks consist of based on total system memory.
+ */
+static bool per_node __read_mostly;
+module_param(per_node, bool, 0600);
+
+/*
+ * Number of currently running DAMON worker threads
+ */
+static unsigned long nr_kdamond __read_mostly;
+module_param(nr_kdamond, ulong, 0400);
+
+/*
+ * First PID of the DAMON threads
+ *
+ * If DAMON_RECLAIM is enabled, this becomes the first PID of the worker threads.
* Else, -1.
*/
-static int kdamond_pid __read_mostly = -1;
-module_param(kdamond_pid, int, 0400);
+static int kdamond_start_pid __read_mostly = -1;
+module_param(kdamond_start_pid, int, 0400);

/*
* Number of memory regions that tried to be reclaimed.
@@ -215,8 +229,8 @@ module_param(bytes_reclaimed_regions, ulong, 0400);
static unsigned long nr_quota_exceeds __read_mostly;
module_param(nr_quota_exceeds, ulong, 0400);

-static struct damon_ctx *ctx;
-static struct damon_target *target;
+static struct damon_ctx *ctxs[MAX_NUMNODES];
+static struct damon_target *targets[MAX_NUMNODES];

struct damon_reclaim_ram_walk_arg {
unsigned long start;
@@ -251,7 +265,7 @@ static bool get_monitoring_region(unsigned long *start, unsigned long *end)
return true;
}

-static struct damos *damon_reclaim_new_scheme(void)
+static struct damos *damon_reclaim_new_scheme(int node)
{
struct damos_watermarks wmarks = {
.metric = DAMOS_WMARK_FREE_MEM_RATE,
@@ -259,6 +273,7 @@ static struct damos *damon_reclaim_new_scheme(void)
.high = wmarks_high,
.mid = wmarks_mid,
.low = wmarks_low,
+ .node = node,
};
struct damos_quota quota = {
/*
@@ -290,56 +305,99 @@ static struct damos *damon_reclaim_new_scheme(void)
return scheme;
}

-static int damon_reclaim_turn(bool on)
+static int damon_reclaim_start(int nid)
{
struct damon_region *region;
struct damos *scheme;
int err;
+ unsigned long start, end;

- if (!on) {
- err = damon_stop(&ctx, 1);
- if (!err)
- kdamond_pid = -1;
- return err;
- }
-
- err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0,
+ err = damon_set_attrs(ctxs[nid], sample_interval, aggr_interval, 0,
min_nr_regions, max_nr_regions);
if (err)
return err;

- if (monitor_region_start > monitor_region_end)
- return -EINVAL;
- if (!monitor_region_start && !monitor_region_end &&
- !get_monitoring_region(&monitor_region_start,
- &monitor_region_end))
- return -EINVAL;
+ if (per_node) {
+ monitor_region_start = monitor_region_end = 0;
+
+ start = PFN_PHYS(node_start_pfn(nid));
+ end = PFN_PHYS(node_start_pfn(nid) + node_present_pages(nid) - 1);
+ if (end <= start)
+ return -EINVAL;
+ } else {
+ if (!monitor_region_start && !monitor_region_end &&
+ !get_monitoring_region(&monitor_region_start,
+ &monitor_region_end))
+ return -EINVAL;
+ start = monitor_region_start;
+ end = monitor_region_end;
+ }
+
/* DAMON will free this on its own when finish monitoring */
- region = damon_new_region(monitor_region_start, monitor_region_end);
+ region = damon_new_region(start, end);
if (!region)
return -ENOMEM;
- damon_add_region(region, target);
+ damon_add_region(region, targets[nid]);

/* Will be freed by 'damon_set_schemes()' below */
- scheme = damon_reclaim_new_scheme();
+ scheme = damon_reclaim_new_scheme(nid);
if (!scheme) {
err = -ENOMEM;
goto free_region_out;
}
- err = damon_set_schemes(ctx, &scheme, 1);
+
+ err = damon_set_schemes(ctxs[nid], &scheme, 1);
if (err)
goto free_scheme_out;

- err = damon_start(&ctx, 1);
+ err = damon_start_one(ctxs[nid]);
if (!err) {
- kdamond_pid = ctx->kdamond->pid;
+ if (kdamond_start_pid == -1)
+ kdamond_start_pid = ctxs[nid]->kdamond->pid;
+ nr_kdamond++;
return 0;
}

free_scheme_out:
damon_destroy_scheme(scheme);
free_region_out:
- damon_destroy_region(region, target);
+ damon_destroy_region(region, targets[nid]);
+
+ return err;
+}
+
+static int damon_reclaim_start_all(void)
+{
+ int nid, err;
+
+ if (!per_node)
+ return damon_reclaim_start(0);
+
+ for_each_online_node(nid) {
+ err = damon_reclaim_start(nid);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int damon_reclaim_turn(bool on)
+{
+ int err;
+
+ if (!on) {
+ err = damon_stop(ctxs, nr_kdamond);
+ if (!err) {
+ kdamond_start_pid = -1;
+ nr_kdamond = 0;
+ monitor_region_start = 0;
+ monitor_region_end = 0;
+ }
+ return err;
+ }
+
+ err = damon_reclaim_start_all();
return err;
}

@@ -380,21 +438,24 @@ static int damon_reclaim_after_aggregation(struct damon_ctx *c)

static int __init damon_reclaim_init(void)
{
- ctx = damon_new_ctx();
- if (!ctx)
- return -ENOMEM;
-
- if (damon_select_ops(ctx, DAMON_OPS_PADDR))
- return -EINVAL;
-
- ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
-
- target = damon_new_target();
- if (!target) {
- damon_destroy_ctx(ctx);
- return -ENOMEM;
+ int nid;
+
+ for_each_node(nid) {
+ ctxs[nid] = damon_new_ctx();
+ if (!ctxs[nid])
+ return -ENOMEM;
+
+ if (damon_select_ops(ctxs[nid], DAMON_OPS_PADDR))
+ return -EINVAL;
+ ctxs[nid]->callback.after_aggregation = damon_reclaim_after_aggregation;
+
+ targets[nid] = damon_new_target();
+ if (!targets[nid]) {
+ damon_destroy_ctx(ctxs[nid]);
+ return -ENOMEM;
+ }
+ damon_add_target(ctxs[nid], targets[nid]);
}
- damon_add_target(ctx, target);

schedule_delayed_work(&damon_reclaim_timer, 0);
return 0;
--
2.17.1

2022-02-22 09:57:23

by SeongJae Park

[permalink] [raw]
Subject: Re: [RFC PATCH v1 3/3] mm/damon/reclaim: Add per NUMA node proactive reclamation by DAMON_RECLAIM.

On Fri, 18 Feb 2022 19:26:11 +0900 Jonghyeon Kim <[email protected]> wrote:

> To add DAMON_RECLAIM worker threads(kdamond) that do proactive
> reclamation per NUMA node, each node must have its own context.
> 'per_node' is added to enable it.
>
> If 'per_node' is true, kdamonds as online NUMA node will be waked up and
> start monitoring to proactively reclaim memory. If 'per_node' is false,
> only one kdamond thread will start monitoring for all system memory.
>
> Signed-off-by: Jonghyeon Kim <[email protected]>
> ---
> mm/damon/reclaim.c | 147 ++++++++++++++++++++++++++++++++-------------
> 1 file changed, 104 insertions(+), 43 deletions(-)
>
> diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
> index b53d9c22fad1..85e8f97dd599 100644
> --- a/mm/damon/reclaim.c
> +++ b/mm/damon/reclaim.c
> @@ -177,13 +177,27 @@ static unsigned long monitor_region_end __read_mostly;
> module_param(monitor_region_end, ulong, 0600);
>
> /*
> - * PID of the DAMON thread
> + * Enable monitoring memory regions per NUMA node.
> *
> - * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
> + * By default, watermarks consist of based on total system memory.
> + */
> +static bool per_node __read_mostly;
> +module_param(per_node, bool, 0600);
> +
> +/*
> + * Number of currently running DAMON worker threads
> + */
> +static unsigned long nr_kdamond __read_mostly;
> +module_param(nr_kdamond, ulong, 0400);

I'd prefer to call this nr_kdamond*s*

> +
> +/*
> + * First PID of the DAMON threads
> + *
> + * If DAMON_RECLAIM is enabled, this becomes the first PID of the worker threads.
> * Else, -1.
> */
> -static int kdamond_pid __read_mostly = -1;
> -module_param(kdamond_pid, int, 0400);
> +static int kdamond_start_pid __read_mostly = -1;
> +module_param(kdamond_start_pid, int, 0400);

This change could break old users. Let's keep the name as is and clarify the
fact that it's for only the first kdamond in the document.

As long as DAMON_RECLAIM works in the exclusive manner, users will still be
able to know all pids of kdamonds for DAMON_RECLAIM, as nr_kdamonds is also
provided.

>
> /*
> * Number of memory regions that tried to be reclaimed.
> @@ -215,8 +229,8 @@ module_param(bytes_reclaimed_regions, ulong, 0400);
> static unsigned long nr_quota_exceeds __read_mostly;
> module_param(nr_quota_exceeds, ulong, 0400);
>
> -static struct damon_ctx *ctx;
> -static struct damon_target *target;
> +static struct damon_ctx *ctxs[MAX_NUMNODES];
> +static struct damon_target *targets[MAX_NUMNODES];
>
> struct damon_reclaim_ram_walk_arg {
> unsigned long start;
> @@ -251,7 +265,7 @@ static bool get_monitoring_region(unsigned long *start, unsigned long *end)
> return true;
> }
>
> -static struct damos *damon_reclaim_new_scheme(void)
> +static struct damos *damon_reclaim_new_scheme(int node)
> {
> struct damos_watermarks wmarks = {
> .metric = DAMOS_WMARK_FREE_MEM_RATE,
> @@ -259,6 +273,7 @@ static struct damos *damon_reclaim_new_scheme(void)
> .high = wmarks_high,
> .mid = wmarks_mid,
> .low = wmarks_low,
> + .node = node,
> };
> struct damos_quota quota = {
> /*
> @@ -290,56 +305,99 @@ static struct damos *damon_reclaim_new_scheme(void)
> return scheme;
> }
>
> -static int damon_reclaim_turn(bool on)
> +static int damon_reclaim_start(int nid)
> {
> struct damon_region *region;
> struct damos *scheme;
> int err;
> + unsigned long start, end;
>
> - if (!on) {
> - err = damon_stop(&ctx, 1);
> - if (!err)
> - kdamond_pid = -1;
> - return err;
> - }
> -
> - err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0,
> + err = damon_set_attrs(ctxs[nid], sample_interval, aggr_interval, 0,
> min_nr_regions, max_nr_regions);
> if (err)
> return err;
>
> - if (monitor_region_start > monitor_region_end)
> - return -EINVAL;
> - if (!monitor_region_start && !monitor_region_end &&
> - !get_monitoring_region(&monitor_region_start,
> - &monitor_region_end))
> - return -EINVAL;
> + if (per_node) {
> + monitor_region_start = monitor_region_end = 0;
> +
> + start = PFN_PHYS(node_start_pfn(nid));
> + end = PFN_PHYS(node_start_pfn(nid) + node_present_pages(nid) - 1);
> + if (end <= start)
> + return -EINVAL;
> + } else {
> + if (!monitor_region_start && !monitor_region_end &&
> + !get_monitoring_region(&monitor_region_start,
> + &monitor_region_end))
> + return -EINVAL;
> + start = monitor_region_start;
> + end = monitor_region_end;
> + }
> +
> /* DAMON will free this on its own when finish monitoring */
> - region = damon_new_region(monitor_region_start, monitor_region_end);
> + region = damon_new_region(start, end);
> if (!region)
> return -ENOMEM;
> - damon_add_region(region, target);
> + damon_add_region(region, targets[nid]);
>
> /* Will be freed by 'damon_set_schemes()' below */
> - scheme = damon_reclaim_new_scheme();
> + scheme = damon_reclaim_new_scheme(nid);
> if (!scheme) {
> err = -ENOMEM;
> goto free_region_out;
> }
> - err = damon_set_schemes(ctx, &scheme, 1);
> +
> + err = damon_set_schemes(ctxs[nid], &scheme, 1);
> if (err)
> goto free_scheme_out;
>
> - err = damon_start(&ctx, 1);
> + err = damon_start_one(ctxs[nid]);

This could surprise users assuming DAMON_RECLAIM would work in exclusive manner
as it was.

> if (!err) {
> - kdamond_pid = ctx->kdamond->pid;
> + if (kdamond_start_pid == -1)
> + kdamond_start_pid = ctxs[nid]->kdamond->pid;
> + nr_kdamond++;
> return 0;
> }
>
> free_scheme_out:
> damon_destroy_scheme(scheme);
> free_region_out:
> - damon_destroy_region(region, target);
> + damon_destroy_region(region, targets[nid]);
> +
> + return err;
> +}
> +
> +static int damon_reclaim_start_all(void)
> +{
> + int nid, err;
> +
> + if (!per_node)
> + return damon_reclaim_start(0);
> +
> + for_each_online_node(nid) {
> + err = damon_reclaim_start(nid);
> + if (err)
> + break;

I'd prefer making contexts first and starting them at once in the exclusive
manner using damon_start().

> + }
> +
> + return err;
> +}
> +
> +static int damon_reclaim_turn(bool on)
> +{
> + int err;
> +
> + if (!on) {
> + err = damon_stop(ctxs, nr_kdamond);
> + if (!err) {
> + kdamond_start_pid = -1;
> + nr_kdamond = 0;
> + monitor_region_start = 0;
> + monitor_region_end = 0;
> + }
> + return err;
> + }
> +
> + err = damon_reclaim_start_all();
> return err;
> }
>
> @@ -380,21 +438,24 @@ static int damon_reclaim_after_aggregation(struct damon_ctx *c)
>
> static int __init damon_reclaim_init(void)
> {
> - ctx = damon_new_ctx();
> - if (!ctx)
> - return -ENOMEM;
> -
> - if (damon_select_ops(ctx, DAMON_OPS_PADDR))
> - return -EINVAL;
> -
> - ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
> -
> - target = damon_new_target();
> - if (!target) {
> - damon_destroy_ctx(ctx);
> - return -ENOMEM;
> + int nid;
> +
> + for_each_node(nid) {
> + ctxs[nid] = damon_new_ctx();
> + if (!ctxs[nid])
> + return -ENOMEM;
> +
> + if (damon_select_ops(ctxs[nid], DAMON_OPS_PADDR))
> + return -EINVAL;
> + ctxs[nid]->callback.after_aggregation = damon_reclaim_after_aggregation;
> +
> + targets[nid] = damon_new_target();
> + if (!targets[nid]) {
> + damon_destroy_ctx(ctxs[nid]);

Shouldn't we also destroy previously allocated contexts?

> + return -ENOMEM;
> + }
> + damon_add_target(ctxs[nid], targets[nid]);
> }
> - damon_add_target(ctx, target);
>
> schedule_delayed_work(&damon_reclaim_timer, 0);
> return 0;
> --
> 2.17.1


Thanks,
SJ

2022-02-22 10:02:09

by SeongJae Park

[permalink] [raw]
Subject: Re: [RFC PATCH v1 2/3] mm/damon/core: Add damon_start_one()

Hello Jonghyeon,

On Fri, 18 Feb 2022 19:26:10 +0900 Jonghyeon Kim <[email protected]> wrote:

> damon_start() function is designed to start multiple damon monitoring
> contexts. But, sometimes we need to start monitoring one context.
> Although __damon_start() could be considered to start for one monitoring
> context, it seems reasonable to adopt a new function that does not need
> to handle 'damon_lock' from the caller.
>
> Signed-off-by: Jonghyeon Kim <[email protected]>
> ---
> include/linux/damon.h | 1 +
> mm/damon/core.c | 25 +++++++++++++++++++++++++
> 2 files changed, 26 insertions(+)
>
> diff --git a/include/linux/damon.h b/include/linux/damon.h
> index c0adf1566603..069577477662 100644
> --- a/include/linux/damon.h
> +++ b/include/linux/damon.h
> @@ -511,6 +511,7 @@ int damon_register_ops(struct damon_operations *ops);
> int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id);
>
> int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
> +int damon_start_one(struct damon_ctx *ctx);
> int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
>
> #endif /* CONFIG_DAMON */
> diff --git a/mm/damon/core.c b/mm/damon/core.c
> index 290c9c0535ee..e43f138a3489 100644
> --- a/mm/damon/core.c
> +++ b/mm/damon/core.c
> @@ -466,6 +466,31 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
> return err;
> }
>
> +/**
> + * damon_start_one() - Starts the monitorings for one context.
> + * @ctx: monitoring context
> + *
> + * This function starts one monitoring thread for only one monitoring context
> + * handling damon_lock.
> + *
> + * Return: 0 on success, negative error code otherwise.
> + */
> +int damon_start_one(struct damon_ctx *ctx)
> +{
> + int err = 0;
> +
> + mutex_lock(&damon_lock);
> + err = __damon_start(ctx);
> + if (err) {
> + mutex_unlock(&damon_lock);
> + return err;
> + }
> + nr_running_ctxs++;
> + mutex_unlock(&damon_lock);
> +
> + return err;
> +}
> +

IMHO, this looks like an unnecessary duplication of code. Unless this is
needed for a real usecase, this change might unnecessarily make the code only a
little bit more complicated. And to my understanding of the next patch, this
is not really needed for this patchset. I will left comments on the patch. If
I'm missing something, please clarify why this is really needed.

Furthermore, damon_start() starts a set of DAMON contexts in exclusive manner,
to ensure there will be no interference. This patch breaks the assumption.
That is, contexts that started with damon_start() could be interfered by other
contexts that started with damon_start_one(). I have a plan to make
damon_start() also work for non-exclusive contexts group[1], though.

[1] https://lore.kernel.org/linux-mm/[email protected]/


Thanks,
SJ

> /*
> * __damon_stop() - Stops monitoring of given context.
> * @ctx: monitoring context
> --
> 2.17.1
>

2022-02-22 10:35:45

by SeongJae Park

[permalink] [raw]
Subject: Re: [RFC PATCH v1 0/3] Rebase DAMON_RECALIM for NUMA system

Hello Jonghyeon,

On Fri, 18 Feb 2022 19:26:08 +0900 Jonghyeon Kim <[email protected]> wrote:

>
> Current DAMON_RECALIM is not compatible with the NUMA memory system. To
> proactively reclaim memory, DAMON_RECLAIM kernel thread(kdamond) has to wake up
> before kswapd does reclaim memory. However, the current watermark for proactive
> reclamation is based on entire system free memory. So, though the one memory
> node is fully used, kdamond is not waked up.
>
> These patches clarify watermarks of DAMOS and enable monitoring per NUMA node
> proactive reclamation on DAMON_RECLAIM.

The high level concept of this patchset looks good to me. Nevertheless, maybe
there is some rooms for improvement. I left some comments on patches.


Thanks,
SJ

>
> Jonghyeon Kim (3):
> mm/damon: Rebase damos watermarks for NUMA systems
> mm/damon/core: Add damon_start_one()
> mm/damon/reclaim: Add per NUMA node proactive reclamation by
> DAMON_RECLAIM.
>
> include/linux/damon.h | 3 +
> mm/damon/core.c | 39 +++++++++--
> mm/damon/reclaim.c | 147 ++++++++++++++++++++++++++++++------------
> 3 files changed, 140 insertions(+), 49 deletions(-)
>
> --
> 2.17.1

2022-02-23 11:26:59

by Jonghyeon Kim

[permalink] [raw]
Subject: Re: [RFC PATCH v1 2/3] mm/damon/core: Add damon_start_one()

On Wed, Feb 23, 2022 at 02:11:13PM +0900, Jonghyeon Kim wrote:
> On Tue, Feb 22, 2022 at 09:53:28AM +0000, SeongJae Park wrote:
> > Hello Jonghyeon,
> >
> > On Fri, 18 Feb 2022 19:26:10 +0900 Jonghyeon Kim <[email protected]> wrote:
> >
> > > damon_start() function is designed to start multiple damon monitoring
> > > contexts. But, sometimes we need to start monitoring one context.
> > > Although __damon_start() could be considered to start for one monitoring
> > > context, it seems reasonable to adopt a new function that does not need
> > > to handle 'damon_lock' from the caller.
> > >
> > > Signed-off-by: Jonghyeon Kim <[email protected]>
> > > ---
> > > include/linux/damon.h | 1 +
> > > mm/damon/core.c | 25 +++++++++++++++++++++++++
> > > 2 files changed, 26 insertions(+)
> > >
> > > diff --git a/include/linux/damon.h b/include/linux/damon.h
> > > index c0adf1566603..069577477662 100644
> > > --- a/include/linux/damon.h
> > > +++ b/include/linux/damon.h
> > > @@ -511,6 +511,7 @@ int damon_register_ops(struct damon_operations *ops);
> > > int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id);
> > >
> > > int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
> > > +int damon_start_one(struct damon_ctx *ctx);
> > > int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
> > >
> > > #endif /* CONFIG_DAMON */
> > > diff --git a/mm/damon/core.c b/mm/damon/core.c
> > > index 290c9c0535ee..e43f138a3489 100644
> > > --- a/mm/damon/core.c
> > > +++ b/mm/damon/core.c
> > > @@ -466,6 +466,31 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
> > > return err;
> > > }
> > >
> > > +/**
> > > + * damon_start_one() - Starts the monitorings for one context.
> > > + * @ctx: monitoring context
> > > + *
> > > + * This function starts one monitoring thread for only one monitoring context
> > > + * handling damon_lock.
> > > + *
> > > + * Return: 0 on success, negative error code otherwise.
> > > + */
> > > +int damon_start_one(struct damon_ctx *ctx)
> > > +{
> > > + int err = 0;
> > > +
> > > + mutex_lock(&damon_lock);
> > > + err = __damon_start(ctx);
> > > + if (err) {
> > > + mutex_unlock(&damon_lock);
> > > + return err;
> > > + }
> > > + nr_running_ctxs++;
> > > + mutex_unlock(&damon_lock);
> > > +
> > > + return err;
> > > +}
> > > +
> >
> > IMHO, this looks like an unnecessary duplication of code. Unless this is
> > needed for a real usecase, this change might unnecessarily make the code only a
> > little bit more complicated. And to my understanding of the next patch, this
> > is not really needed for this patchset. I will left comments on the patch. If
> > I'm missing something, please clarify why this is really needed.
> >
> > Furthermore, damon_start() starts a set of DAMON contexts in exclusive manner,
> > to ensure there will be no interference. This patch breaks the assumption.
> > That is, contexts that started with damon_start() could be interfered by other
> > contexts that started with damon_start_one(). I have a plan to make
> > damon_start() also work for non-exclusive contexts group[1], though.
> >
> > [1] https://lore.kernel.org/linux-mm/[email protected]/
> >
> >
> > Thanks,
> > SJ
> >
>
> I understand your opinion, and it makes sense. I will drop this patch in the
> next version.
>
>
> Thanks,
> Jonghyeon

2022-02-23 12:08:01

by Jonghyeon Kim

[permalink] [raw]
Subject: Re: [RFC PATCH v1 0/3] Rebase DAMON_RECALIM for NUMA system

On Wed, Feb 23, 2022 at 02:11:46PM +0900, Jonghyeon Kim wrote:
> On Tue, Feb 22, 2022 at 09:53:02AM +0000, SeongJae Park wrote:
> > Hello Jonghyeon,
> >
> > On Fri, 18 Feb 2022 19:26:08 +0900 Jonghyeon Kim <[email protected]> wrote:
> >
> > >
> > > Current DAMON_RECALIM is not compatible with the NUMA memory system. To
> > > proactively reclaim memory, DAMON_RECLAIM kernel thread(kdamond) has to wake up
> > > before kswapd does reclaim memory. However, the current watermark for proactive
> > > reclamation is based on entire system free memory. So, though the one memory
> > > node is fully used, kdamond is not waked up.
> > >
> > > These patches clarify watermarks of DAMOS and enable monitoring per NUMA node
> > > proactive reclamation on DAMON_RECLAIM.
> >
> > The high level concept of this patchset looks good to me. Nevertheless, maybe
> > there is some rooms for improvement. I left some comments on patches.
> >
> >
> > Thanks,
> > SJ
> >
>
> Hello SeongJae,
>
> Thanks for your patient comments about these patches. I will clean up and
> develop these patches including your suggestion and correction in next version.
>
>
> Jonghyeon

2022-02-23 23:41:12

by Jonghyeon Kim

[permalink] [raw]
Subject: Re: [RFC PATCH v1 3/3] mm/damon/reclaim: Add per NUMA node proactive reclamation by DAMON_RECLAIM.

On Wed, Feb 23, 2022 at 02:11:27PM +0900, Jonghyeon Kim wrote:
> On Tue, Feb 22, 2022 at 09:53:38AM +0000, SeongJae Park wrote:
> > On Fri, 18 Feb 2022 19:26:11 +0900 Jonghyeon Kim <[email protected]> wrote:
> >
> > > To add DAMON_RECLAIM worker threads(kdamond) that do proactive
> > > reclamation per NUMA node, each node must have its own context.
> > > 'per_node' is added to enable it.
> > >
> > > If 'per_node' is true, kdamonds as online NUMA node will be waked up and
> > > start monitoring to proactively reclaim memory. If 'per_node' is false,
> > > only one kdamond thread will start monitoring for all system memory.
> > >
> > > Signed-off-by: Jonghyeon Kim <[email protected]>
> > > ---
> > > mm/damon/reclaim.c | 147 ++++++++++++++++++++++++++++++++-------------
> > > 1 file changed, 104 insertions(+), 43 deletions(-)
> > >
> > > diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
> > > index b53d9c22fad1..85e8f97dd599 100644
> > > --- a/mm/damon/reclaim.c
> > > +++ b/mm/damon/reclaim.c
> > > @@ -177,13 +177,27 @@ static unsigned long monitor_region_end __read_mostly;
> > > module_param(monitor_region_end, ulong, 0600);
> > >
> > > /*
> > > - * PID of the DAMON thread
> > > + * Enable monitoring memory regions per NUMA node.
> > > *
> > > - * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
> > > + * By default, watermarks consist of based on total system memory.
> > > + */
> > > +static bool per_node __read_mostly;
> > > +module_param(per_node, bool, 0600);
> > > +
> > > +/*
> > > + * Number of currently running DAMON worker threads
> > > + */
> > > +static unsigned long nr_kdamond __read_mostly;
> > > +module_param(nr_kdamond, ulong, 0400);
> >
> > I'd prefer to call this nr_kdamond*s*
> >
>
> I see.
>
> > > +
> > > +/*
> > > + * First PID of the DAMON threads
> > > + *
> > > + * If DAMON_RECLAIM is enabled, this becomes the first PID of the worker threads.
> > > * Else, -1.
> > > */
> > > -static int kdamond_pid __read_mostly = -1;
> > > -module_param(kdamond_pid, int, 0400);
> > > +static int kdamond_start_pid __read_mostly = -1;
> > > +module_param(kdamond_start_pid, int, 0400);
> >
> > This change could break old users. Let's keep the name as is and clarify the
> > fact that it's for only the first kdamond in the document.
>
> Got it, I will keep that name and update the DAMON document.
>
> >
> > As long as DAMON_RECLAIM works in the exclusive manner, users will still be
> > able to know all pids of kdamonds for DAMON_RECLAIM, as nr_kdamonds is also
> > provided.
>
> I will find some kind of way to show all pids of kdamonds.
>
> >
> > >
> > > /*
> > > * Number of memory regions that tried to be reclaimed.
> > > @@ -215,8 +229,8 @@ module_param(bytes_reclaimed_regions, ulong, 0400);
> > > static unsigned long nr_quota_exceeds __read_mostly;
> > > module_param(nr_quota_exceeds, ulong, 0400);
> > >
> > > -static struct damon_ctx *ctx;
> > > -static struct damon_target *target;
> > > +static struct damon_ctx *ctxs[MAX_NUMNODES];
> > > +static struct damon_target *targets[MAX_NUMNODES];
> > >
> > > struct damon_reclaim_ram_walk_arg {
> > > unsigned long start;
> > > @@ -251,7 +265,7 @@ static bool get_monitoring_region(unsigned long *start, unsigned long *end)
> > > return true;
> > > }
> > >
> > > -static struct damos *damon_reclaim_new_scheme(void)
> > > +static struct damos *damon_reclaim_new_scheme(int node)
> > > {
> > > struct damos_watermarks wmarks = {
> > > .metric = DAMOS_WMARK_FREE_MEM_RATE,
> > > @@ -259,6 +273,7 @@ static struct damos *damon_reclaim_new_scheme(void)
> > > .high = wmarks_high,
> > > .mid = wmarks_mid,
> > > .low = wmarks_low,
> > > + .node = node,
> > > };
> > > struct damos_quota quota = {
> > > /*
> > > @@ -290,56 +305,99 @@ static struct damos *damon_reclaim_new_scheme(void)
> > > return scheme;
> > > }
> > >
> > > -static int damon_reclaim_turn(bool on)
> > > +static int damon_reclaim_start(int nid)
> > > {
> > > struct damon_region *region;
> > > struct damos *scheme;
> > > int err;
> > > + unsigned long start, end;
> > >
> > > - if (!on) {
> > > - err = damon_stop(&ctx, 1);
> > > - if (!err)
> > > - kdamond_pid = -1;
> > > - return err;
> > > - }
> > > -
> > > - err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0,
> > > + err = damon_set_attrs(ctxs[nid], sample_interval, aggr_interval, 0,
> > > min_nr_regions, max_nr_regions);
> > > if (err)
> > > return err;
> > >
> > > - if (monitor_region_start > monitor_region_end)
> > > - return -EINVAL;
> > > - if (!monitor_region_start && !monitor_region_end &&
> > > - !get_monitoring_region(&monitor_region_start,
> > > - &monitor_region_end))
> > > - return -EINVAL;
> > > + if (per_node) {
> > > + monitor_region_start = monitor_region_end = 0;
> > > +
> > > + start = PFN_PHYS(node_start_pfn(nid));
> > > + end = PFN_PHYS(node_start_pfn(nid) + node_present_pages(nid) - 1);
> > > + if (end <= start)
> > > + return -EINVAL;
> > > + } else {
> > > + if (!monitor_region_start && !monitor_region_end &&
> > > + !get_monitoring_region(&monitor_region_start,
> > > + &monitor_region_end))
> > > + return -EINVAL;
> > > + start = monitor_region_start;
> > > + end = monitor_region_end;
> > > + }
> > > +
> > > /* DAMON will free this on its own when finish monitoring */
> > > - region = damon_new_region(monitor_region_start, monitor_region_end);
> > > + region = damon_new_region(start, end);
> > > if (!region)
> > > return -ENOMEM;
> > > - damon_add_region(region, target);
> > > + damon_add_region(region, targets[nid]);
> > >
> > > /* Will be freed by 'damon_set_schemes()' below */
> > > - scheme = damon_reclaim_new_scheme();
> > > + scheme = damon_reclaim_new_scheme(nid);
> > > if (!scheme) {
> > > err = -ENOMEM;
> > > goto free_region_out;
> > > }
> > > - err = damon_set_schemes(ctx, &scheme, 1);
> > > +
> > > + err = damon_set_schemes(ctxs[nid], &scheme, 1);
> > > if (err)
> > > goto free_scheme_out;
> > >
> > > - err = damon_start(&ctx, 1);
> > > + err = damon_start_one(ctxs[nid]);
> >
> > This could surprise users assuming DAMON_RECLAIM would work in exclusive manner
> > as it was.
>
> Yes, I will drop this function following the next version.
>
> >
> > > if (!err) {
> > > - kdamond_pid = ctx->kdamond->pid;
> > > + if (kdamond_start_pid == -1)
> > > + kdamond_start_pid = ctxs[nid]->kdamond->pid;
> > > + nr_kdamond++;
> > > return 0;
> > > }
> > >
> > > free_scheme_out:
> > > damon_destroy_scheme(scheme);
> > > free_region_out:
> > > - damon_destroy_region(region, target);
> > > + damon_destroy_region(region, targets[nid]);
> > > +
> > > + return err;
> > > +}
> > > +
> > > +static int damon_reclaim_start_all(void)
> > > +{
> > > + int nid, err;
> > > +
> > > + if (!per_node)
> > > + return damon_reclaim_start(0);
> > > +
> > > + for_each_online_node(nid) {
> > > + err = damon_reclaim_start(nid);
> > > + if (err)
> > > + break;
> >
> > I'd prefer making contexts first and starting them at once in the exclusive
> > manner using damon_start().
> >
>
> Agree.
>
> > > + }
> > > +
> > > + return err;
> > > +}
> > > +
> > > +static int damon_reclaim_turn(bool on)
> > > +{
> > > + int err;
> > > +
> > > + if (!on) {
> > > + err = damon_stop(ctxs, nr_kdamond);
> > > + if (!err) {
> > > + kdamond_start_pid = -1;
> > > + nr_kdamond = 0;
> > > + monitor_region_start = 0;
> > > + monitor_region_end = 0;
> > > + }
> > > + return err;
> > > + }
> > > +
> > > + err = damon_reclaim_start_all();
> > > return err;
> > > }
> > >
> > > @@ -380,21 +438,24 @@ static int damon_reclaim_after_aggregation(struct damon_ctx *c)
> > >
> > > static int __init damon_reclaim_init(void)
> > > {
> > > - ctx = damon_new_ctx();
> > > - if (!ctx)
> > > - return -ENOMEM;
> > > -
> > > - if (damon_select_ops(ctx, DAMON_OPS_PADDR))
> > > - return -EINVAL;
> > > -
> > > - ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
> > > -
> > > - target = damon_new_target();
> > > - if (!target) {
> > > - damon_destroy_ctx(ctx);
> > > - return -ENOMEM;
> > > + int nid;
> > > +
> > > + for_each_node(nid) {
> > > + ctxs[nid] = damon_new_ctx();
> > > + if (!ctxs[nid])
> > > + return -ENOMEM;
> > > +
> > > + if (damon_select_ops(ctxs[nid], DAMON_OPS_PADDR))
> > > + return -EINVAL;
> > > + ctxs[nid]->callback.after_aggregation = damon_reclaim_after_aggregation;
> > > +
> > > + targets[nid] = damon_new_target();
> > > + if (!targets[nid]) {
> > > + damon_destroy_ctx(ctxs[nid]);
> >
> > Shouldn't we also destroy previously allocated contexts?
>
> Yes, I think so. I will fix.
>
> >
> > > + return -ENOMEM;
> > > + }
> > > + damon_add_target(ctxs[nid], targets[nid]);
> > > }
> > > - damon_add_target(ctx, target);
> > >
> > > schedule_delayed_work(&damon_reclaim_timer, 0);
> > > return 0;
> > > --
> > > 2.17.1
> >
> >
> > Thanks,
> > SJ
>
>
> Thanks for the comments!
> Jonghyeon