2022-04-16 05:56:05

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 5/6] mm/migration: export demotion_path of a node via sysfs

Add a /sys/devices/system/node/nodeX/demotion_path file
to export the possible target(s) in node_demotion[node].

Signed-off-by: Davidlohr Bueso <[email protected]>
---
Documentation/ABI/stable/sysfs-devices-node | 6 ++++
drivers/base/node.c | 39 +++++++++++++++++++++
include/linux/migrate.h | 15 ++++++++
mm/migrate.c | 15 +-------
4 files changed, 61 insertions(+), 14 deletions(-)

diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 3c935e1334f7..f620c6ae013c 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -192,3 +192,9 @@ Description:
When it completes successfully, the specified amount or more memory
will have been reclaimed, and -EAGAIN if less bytes are reclaimed
than the specified amount.
+
+What: /sys/devices/system/node/nodeX/demotion_path
+Date: April 2022
+Contact: Davidlohr Bueso <[email protected]>
+Description:
+ Shows nodes within the next tier of slower memory below this node.
diff --git a/drivers/base/node.c b/drivers/base/node.c
index d80c478e2a6e..ab4bae777535 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -17,6 +17,7 @@
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/device.h>
+#include <linux/migrate.h>
#include <linux/pm_runtime.h>
#include <linux/swap.h>
#include <linux/slab.h>
@@ -560,11 +561,49 @@ static ssize_t node_read_distance(struct device *dev,
}
static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);

+static ssize_t node_read_demotion_path(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int nid = dev->id;
+ int len = 0;
+ int i;
+ struct demotion_nodes *nd;
+
+ /*
+ * buf is currently PAGE_SIZE in length and each node needs 4 chars
+ * at the most (target + space or newline).
+ */
+ BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
+
+ if (!node_demotion) {
+ len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
+ goto done;
+ }
+
+ nd = &node_demotion[nid];
+
+ rcu_read_lock();
+ if (nd->nr == 0)
+ len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
+ else {
+ for (i = 0; i < nd->nr; i++) {
+ len += sysfs_emit_at(buf, len, "%s%d",
+ i ? " " : "", nd->nodes[i]);
+ }
+ }
+ rcu_read_unlock();
+done:
+ len += sysfs_emit_at(buf, len, "\n");
+ return len;
+}
+static DEVICE_ATTR(demotion_path, 0444, node_read_demotion_path, NULL);
+
static struct attribute *node_dev_attrs[] = {
&dev_attr_meminfo.attr,
&dev_attr_numastat.attr,
&dev_attr_distance.attr,
&dev_attr_vmstat.attr,
+ &dev_attr_demotion_path.attr,
NULL
};

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 90e75d5a54d6..b0ac6a717e44 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -111,6 +111,21 @@ static inline int migrate_misplaced_page(struct page *page,
}
#endif /* CONFIG_NUMA_BALANCING */

+#define DEFAULT_DEMOTION_TARGET_NODES 15
+
+#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
+#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1)
+#else
+#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES
+#endif
+
+struct demotion_nodes {
+ unsigned short nr;
+ short nodes[DEMOTION_TARGET_NODES];
+};
+
+extern struct demotion_nodes *node_demotion __read_mostly;
+
#ifdef CONFIG_MIGRATION

/*
diff --git a/mm/migrate.c b/mm/migrate.c
index 6c31ee1e1c9b..e47ea25fcfe8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2172,20 +2172,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
* must be held over all reads to ensure that no cycles are
* observed.
*/
-#define DEFAULT_DEMOTION_TARGET_NODES 15
-
-#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
-#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1)
-#else
-#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES
-#endif
-
-struct demotion_nodes {
- unsigned short nr;
- short nodes[DEMOTION_TARGET_NODES];
-};
-
-static struct demotion_nodes *node_demotion __read_mostly;
+struct demotion_nodes *node_demotion __read_mostly;

/**
* next_demotion_node() - Get the next node in the demotion path
--
2.26.2


2022-04-22 22:28:28

by Davidlohr Bueso

[permalink] [raw]
Subject: Re: [PATCH 5/6] mm/migration: export demotion_path of a node via sysfs

On Fri, 22 Apr 2022, Yang Shi wrote:

>On Fri, Apr 22, 2022 at 10:31 AM Yang Shi <[email protected]> wrote:
>>
>> On Fri, Apr 15, 2022 at 10:39 PM Davidlohr Bueso <[email protected]> wrote:
>> >
>> > Add a /sys/devices/system/node/nodeX/demotion_path file
>> > to export the possible target(s) in node_demotion[node].
>>
>> I'm not sure if you noticed that Jagdish Gediya is working on the
>> similar patch, please see
>> https://lore.kernel.org/linux-mm/[email protected]/
>
>Loop in Jagdish Gediya, Ying Huang and Wei Xu.
>

Hmm I had missed this thread, I'll go have a look.

>>
>> It would be better to combine the two to avoid duplicate effort.

Indeed - and even more reason for lsfmm discussions defining the
future ABI for tiering.

Thanks,
Davidlohr

2022-04-22 22:45:47

by Yang Shi

[permalink] [raw]
Subject: Re: [PATCH 5/6] mm/migration: export demotion_path of a node via sysfs

On Fri, Apr 15, 2022 at 10:39 PM Davidlohr Bueso <[email protected]> wrote:
>
> Add a /sys/devices/system/node/nodeX/demotion_path file
> to export the possible target(s) in node_demotion[node].

I'm not sure if you noticed that Jagdish Gediya is working on the
similar patch, please see
https://lore.kernel.org/linux-mm/[email protected]/

It would be better to combine the two to avoid duplicate effort.

>
> Signed-off-by: Davidlohr Bueso <[email protected]>
> ---
> Documentation/ABI/stable/sysfs-devices-node | 6 ++++
> drivers/base/node.c | 39 +++++++++++++++++++++
> include/linux/migrate.h | 15 ++++++++
> mm/migrate.c | 15 +-------
> 4 files changed, 61 insertions(+), 14 deletions(-)
>
> diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
> index 3c935e1334f7..f620c6ae013c 100644
> --- a/Documentation/ABI/stable/sysfs-devices-node
> +++ b/Documentation/ABI/stable/sysfs-devices-node
> @@ -192,3 +192,9 @@ Description:
> When it completes successfully, the specified amount or more memory
> will have been reclaimed, and -EAGAIN if less bytes are reclaimed
> than the specified amount.
> +
> +What: /sys/devices/system/node/nodeX/demotion_path
> +Date: April 2022
> +Contact: Davidlohr Bueso <[email protected]>
> +Description:
> + Shows nodes within the next tier of slower memory below this node.
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index d80c478e2a6e..ab4bae777535 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -17,6 +17,7 @@
> #include <linux/nodemask.h>
> #include <linux/cpu.h>
> #include <linux/device.h>
> +#include <linux/migrate.h>
> #include <linux/pm_runtime.h>
> #include <linux/swap.h>
> #include <linux/slab.h>
> @@ -560,11 +561,49 @@ static ssize_t node_read_distance(struct device *dev,
> }
> static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
>
> +static ssize_t node_read_demotion_path(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + int nid = dev->id;
> + int len = 0;
> + int i;
> + struct demotion_nodes *nd;
> +
> + /*
> + * buf is currently PAGE_SIZE in length and each node needs 4 chars
> + * at the most (target + space or newline).
> + */
> + BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
> +
> + if (!node_demotion) {
> + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
> + goto done;
> + }
> +
> + nd = &node_demotion[nid];
> +
> + rcu_read_lock();
> + if (nd->nr == 0)
> + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
> + else {
> + for (i = 0; i < nd->nr; i++) {
> + len += sysfs_emit_at(buf, len, "%s%d",
> + i ? " " : "", nd->nodes[i]);
> + }
> + }
> + rcu_read_unlock();
> +done:
> + len += sysfs_emit_at(buf, len, "\n");
> + return len;
> +}
> +static DEVICE_ATTR(demotion_path, 0444, node_read_demotion_path, NULL);
> +
> static struct attribute *node_dev_attrs[] = {
> &dev_attr_meminfo.attr,
> &dev_attr_numastat.attr,
> &dev_attr_distance.attr,
> &dev_attr_vmstat.attr,
> + &dev_attr_demotion_path.attr,
> NULL
> };
>
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index 90e75d5a54d6..b0ac6a717e44 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -111,6 +111,21 @@ static inline int migrate_misplaced_page(struct page *page,
> }
> #endif /* CONFIG_NUMA_BALANCING */
>
> +#define DEFAULT_DEMOTION_TARGET_NODES 15
> +
> +#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
> +#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1)
> +#else
> +#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES
> +#endif
> +
> +struct demotion_nodes {
> + unsigned short nr;
> + short nodes[DEMOTION_TARGET_NODES];
> +};
> +
> +extern struct demotion_nodes *node_demotion __read_mostly;
> +
> #ifdef CONFIG_MIGRATION
>
> /*
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 6c31ee1e1c9b..e47ea25fcfe8 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2172,20 +2172,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
> * must be held over all reads to ensure that no cycles are
> * observed.
> */
> -#define DEFAULT_DEMOTION_TARGET_NODES 15
> -
> -#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
> -#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1)
> -#else
> -#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES
> -#endif
> -
> -struct demotion_nodes {
> - unsigned short nr;
> - short nodes[DEMOTION_TARGET_NODES];
> -};
> -
> -static struct demotion_nodes *node_demotion __read_mostly;
> +struct demotion_nodes *node_demotion __read_mostly;
>
> /**
> * next_demotion_node() - Get the next node in the demotion path
> --
> 2.26.2
>
>

2022-04-22 22:46:06

by Yang Shi

[permalink] [raw]
Subject: Re: [PATCH 5/6] mm/migration: export demotion_path of a node via sysfs

On Fri, Apr 22, 2022 at 10:31 AM Yang Shi <[email protected]> wrote:
>
> On Fri, Apr 15, 2022 at 10:39 PM Davidlohr Bueso <[email protected]> wrote:
> >
> > Add a /sys/devices/system/node/nodeX/demotion_path file
> > to export the possible target(s) in node_demotion[node].
>
> I'm not sure if you noticed that Jagdish Gediya is working on the
> similar patch, please see
> https://lore.kernel.org/linux-mm/[email protected]/

Loop in Jagdish Gediya, Ying Huang and Wei Xu.

>
> It would be better to combine the two to avoid duplicate effort.
>
> >
> > Signed-off-by: Davidlohr Bueso <[email protected]>
> > ---
> > Documentation/ABI/stable/sysfs-devices-node | 6 ++++
> > drivers/base/node.c | 39 +++++++++++++++++++++
> > include/linux/migrate.h | 15 ++++++++
> > mm/migrate.c | 15 +-------
> > 4 files changed, 61 insertions(+), 14 deletions(-)
> >
> > diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
> > index 3c935e1334f7..f620c6ae013c 100644
> > --- a/Documentation/ABI/stable/sysfs-devices-node
> > +++ b/Documentation/ABI/stable/sysfs-devices-node
> > @@ -192,3 +192,9 @@ Description:
> > When it completes successfully, the specified amount or more memory
> > will have been reclaimed, and -EAGAIN if less bytes are reclaimed
> > than the specified amount.
> > +
> > +What: /sys/devices/system/node/nodeX/demotion_path
> > +Date: April 2022
> > +Contact: Davidlohr Bueso <[email protected]>
> > +Description:
> > + Shows nodes within the next tier of slower memory below this node.
> > diff --git a/drivers/base/node.c b/drivers/base/node.c
> > index d80c478e2a6e..ab4bae777535 100644
> > --- a/drivers/base/node.c
> > +++ b/drivers/base/node.c
> > @@ -17,6 +17,7 @@
> > #include <linux/nodemask.h>
> > #include <linux/cpu.h>
> > #include <linux/device.h>
> > +#include <linux/migrate.h>
> > #include <linux/pm_runtime.h>
> > #include <linux/swap.h>
> > #include <linux/slab.h>
> > @@ -560,11 +561,49 @@ static ssize_t node_read_distance(struct device *dev,
> > }
> > static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
> >
> > +static ssize_t node_read_demotion_path(struct device *dev,
> > + struct device_attribute *attr, char *buf)
> > +{
> > + int nid = dev->id;
> > + int len = 0;
> > + int i;
> > + struct demotion_nodes *nd;
> > +
> > + /*
> > + * buf is currently PAGE_SIZE in length and each node needs 4 chars
> > + * at the most (target + space or newline).
> > + */
> > + BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
> > +
> > + if (!node_demotion) {
> > + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
> > + goto done;
> > + }
> > +
> > + nd = &node_demotion[nid];
> > +
> > + rcu_read_lock();
> > + if (nd->nr == 0)
> > + len += sysfs_emit_at(buf, len, "%d", NUMA_NO_NODE);
> > + else {
> > + for (i = 0; i < nd->nr; i++) {
> > + len += sysfs_emit_at(buf, len, "%s%d",
> > + i ? " " : "", nd->nodes[i]);
> > + }
> > + }
> > + rcu_read_unlock();
> > +done:
> > + len += sysfs_emit_at(buf, len, "\n");
> > + return len;
> > +}
> > +static DEVICE_ATTR(demotion_path, 0444, node_read_demotion_path, NULL);
> > +
> > static struct attribute *node_dev_attrs[] = {
> > &dev_attr_meminfo.attr,
> > &dev_attr_numastat.attr,
> > &dev_attr_distance.attr,
> > &dev_attr_vmstat.attr,
> > + &dev_attr_demotion_path.attr,
> > NULL
> > };
> >
> > diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> > index 90e75d5a54d6..b0ac6a717e44 100644
> > --- a/include/linux/migrate.h
> > +++ b/include/linux/migrate.h
> > @@ -111,6 +111,21 @@ static inline int migrate_misplaced_page(struct page *page,
> > }
> > #endif /* CONFIG_NUMA_BALANCING */
> >
> > +#define DEFAULT_DEMOTION_TARGET_NODES 15
> > +
> > +#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
> > +#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1)
> > +#else
> > +#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES
> > +#endif
> > +
> > +struct demotion_nodes {
> > + unsigned short nr;
> > + short nodes[DEMOTION_TARGET_NODES];
> > +};
> > +
> > +extern struct demotion_nodes *node_demotion __read_mostly;
> > +
> > #ifdef CONFIG_MIGRATION
> >
> > /*
> > diff --git a/mm/migrate.c b/mm/migrate.c
> > index 6c31ee1e1c9b..e47ea25fcfe8 100644
> > --- a/mm/migrate.c
> > +++ b/mm/migrate.c
> > @@ -2172,20 +2172,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
> > * must be held over all reads to ensure that no cycles are
> > * observed.
> > */
> > -#define DEFAULT_DEMOTION_TARGET_NODES 15
> > -
> > -#if MAX_NUMNODES < DEFAULT_DEMOTION_TARGET_NODES
> > -#define DEMOTION_TARGET_NODES (MAX_NUMNODES - 1)
> > -#else
> > -#define DEMOTION_TARGET_NODES DEFAULT_DEMOTION_TARGET_NODES
> > -#endif
> > -
> > -struct demotion_nodes {
> > - unsigned short nr;
> > - short nodes[DEMOTION_TARGET_NODES];
> > -};
> > -
> > -static struct demotion_nodes *node_demotion __read_mostly;
> > +struct demotion_nodes *node_demotion __read_mostly;
> >
> > /**
> > * next_demotion_node() - Get the next node in the demotion path
> > --
> > 2.26.2
> >
> >