2022-01-09 13:15:25

by Xiongwei Song

[permalink] [raw]
Subject: [PATCH 0/2] Add support for getting page info of ZONE_DEVICE by /proc/kpage*

From: Xiongwei Song <[email protected]>

Patch 1 is adding pfn_to_devmap_page() function to get page of ZONE_DEVICE
by pfn. It checks the sections that is offline but valid or is online but
parts of pfns is offline. Then checks if dev_pagemap is valid, if yes,
return page pointer.

Patch 2 is finishing supporting /proc/kpage* in exposing pages info of
ZONE_DEVICE to userspace.

The unit test has been done by "page-types -r", which ran in qemu with the
below arguments:
-object memory-backend-file,id=mem2,share,mem-path=./virtio_pmem.img,size=2G
-device virtio-pmem-pci,memdev=mem2,id=nv1
, which is used to emulate pmem device with 2G memory space.

As we know, the pages in ZONE_DEVICE are only set PG_reserved flag. So
before the serires,
run "page-types -r", the result is:
flags page-count MB symbolic-flags long-symbolic-flags
0x0000000100000000 24377 95 ___________________________r________________ reserved
, which means the only PG_reserved set of pages in system wide have 24377.

run "cat /proc/zoneinfo" to get the ZONE_DEVICE info:
Node 1, zone Device
pages free 0
boost 0
min 0
low 0
high 0
spanned 0
present 0
managed 0
cma 0
protection: (0, 0, 0, 0, 0)

After this series,
run "page-types -r", the result is:
flags page-count MB symbolic-flags long-symbolic-flags
0x0000000100000000 548665 2143 ___________________________r________________ reserved
, which means the only PG_reserved set of pages in system wide have 548665.

Run "cat /proc/zoneinfo" to get the ZONE_DEVICE info:
Node 1, zone Device
pages free 0
boost 0
min 0
low 0
high 0
spanned 524288
present 0
managed 0
cma 0
protection: (0, 0, 0, 0, 0)

, these added pages number is 524288 in ZONE_DEVICE as spanned field
showed. Meanwhile, we can do 548665 - 24377 = 524288 that is increment
of the reserved pages, it equals to the spanned field of ZONE_DEVICE.
Hence it looks like the patchset works well.

Xiongwei Song (2):
mm/memremap.c: Add pfn_to_devmap_page() to get page in ZONE_DEVICE
proc: Add getting pages info of ZONE_DEVICE support

fs/proc/page.c | 35 ++++++++++++++++++++-------------
include/linux/memremap.h | 8 ++++++++
mm/memremap.c | 42 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 72 insertions(+), 13 deletions(-)

--
2.30.2



2022-01-09 13:15:25

by Xiongwei Song

[permalink] [raw]
Subject: [PATCH 2/2] proc: Add getting pages info of ZONE_DEVICE support

From: Xiongwei Song <[email protected]>

When requesting pages info by /proc/kpage*, the pages in ZONE_DEVICE were
missed.

The pfn_to_devmap_page() function can help to get page that belongs to
ZONE_DEVICE.

Signed-off-by: Xiongwei Song <[email protected]>
---
fs/proc/page.c | 35 ++++++++++++++++++++++-------------
1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 9f1077d94cde..2cdc2b315ff8 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -15,6 +15,7 @@
#include <linux/page_idle.h>
#include <linux/kernel-page-flags.h>
#include <linux/uaccess.h>
+#include <linux/memremap.h>
#include "internal.h"

#define KPMSIZE sizeof(u64)
@@ -46,6 +47,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
{
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
+ struct dev_pagemap *pgmap = NULL;
struct page *ppage;
unsigned long src = *ppos;
unsigned long pfn;
@@ -60,17 +62,18 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);

while (count > 0) {
- /*
- * TODO: ZONE_DEVICE support requires to identify
- * memmaps that were actually initialized.
- */
ppage = pfn_to_online_page(pfn);
+ if (!ppage)
+ ppage = pfn_to_devmap_page(pfn, &pgmap);

if (!ppage || PageSlab(ppage) || page_has_type(ppage))
pcount = 0;
else
pcount = page_mapcount(ppage);

+ if (pgmap)
+ put_dev_pagemap(pgmap);
+
if (put_user(pcount, out)) {
ret = -EFAULT;
break;
@@ -229,10 +232,12 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
{
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
+ struct dev_pagemap *pgmap = NULL;
struct page *ppage;
unsigned long src = *ppos;
unsigned long pfn;
ssize_t ret = 0;
+ u64 flags;

pfn = src / KPMSIZE;
if (src & KPMMASK || count & KPMMASK)
@@ -242,13 +247,15 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);

while (count > 0) {
- /*
- * TODO: ZONE_DEVICE support requires to identify
- * memmaps that were actually initialized.
- */
ppage = pfn_to_online_page(pfn);
+ if (!ppage)
+ ppage = pfn_to_devmap_page(pfn, &pgmap);
+
+ flags = stable_page_flags(ppage);
+ if (pgmap)
+ put_dev_pagemap(pgmap);

- if (put_user(stable_page_flags(ppage), out)) {
+ if (put_user(flags, out)) {
ret = -EFAULT;
break;
}
@@ -277,6 +284,7 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
{
const unsigned long max_dump_pfn = get_max_dump_pfn();
u64 __user *out = (u64 __user *)buf;
+ struct dev_pagemap *pgmap = NULL;
struct page *ppage;
unsigned long src = *ppos;
unsigned long pfn;
@@ -291,17 +299,18 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
count = min_t(unsigned long, count, (max_dump_pfn * KPMSIZE) - src);

while (count > 0) {
- /*
- * TODO: ZONE_DEVICE support requires to identify
- * memmaps that were actually initialized.
- */
ppage = pfn_to_online_page(pfn);
+ if (!ppage)
+ ppage = pfn_to_devmap_page(pfn, &pgmap);

if (ppage)
ino = page_cgroup_ino(ppage);
else
ino = 0;

+ if (pgmap)
+ put_dev_pagemap(pgmap);
+
if (put_user(ino, out)) {
ret = -EFAULT;
break;
--
2.30.2


2022-01-09 13:15:25

by Xiongwei Song

[permalink] [raw]
Subject: [PATCH 1/2] mm/memremap.c: Add pfn_to_devmap_page() to get page in ZONE_DEVICE

From: Xiongwei Song <[email protected]>

when requesting page information by /proc/kpage*, the pages in ZONE_DEVICE
were missed. We need a function to help on this.

The pfn_to_devmap_page() function like pfn_to_online_page(), but only
concerns the pages in ZONE_DEVICE.

Signed-off-by: Xiongwei Song <[email protected]>
---
include/linux/memremap.h | 8 ++++++++
mm/memremap.c | 42 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+)

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index c0e9d35889e8..621723e9c4a5 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -137,6 +137,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
+struct page *pfn_to_devmap_page(unsigned long pfn,
+ struct dev_pagemap **pgmap);
bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);

unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
@@ -166,6 +168,12 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
return NULL;
}

+static inline struct page *pfn_to_devmap_page(unsigned long pfn,
+ struct dev_pagemap **pgmap)
+{
+ return NULL;
+}
+
static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
{
return false;
diff --git a/mm/memremap.c b/mm/memremap.c
index 5a66a71ab591..072dbe6ab81c 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -494,6 +494,48 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
}
EXPORT_SYMBOL_GPL(get_dev_pagemap);

+/**
+ * pfn_to_devmap_page - get page pointer which belongs to dev_pagemap by @pfn
+ * @pfn: page frame number to lookup page_map
+ * @pgmap: to save pgmap address which is for putting reference
+ *
+ * If @pgmap is non-NULL, then pfn is on ZONE_DEVICE and return page pointer.
+ */
+struct page *pfn_to_devmap_page(unsigned long pfn, struct dev_pagemap **pgmap)
+{
+ unsigned long nr = pfn_to_section_nr(pfn);
+ struct mem_section *ms;
+ struct page *page = NULL;
+
+ if (nr >= NR_MEM_SECTIONS)
+ return NULL;
+
+ if (IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) && !pfn_valid(pfn))
+ return NULL;
+
+ ms = __nr_to_section(nr);
+ if (!valid_section(ms))
+ return NULL;
+ if (!pfn_section_valid(ms, pfn))
+ return NULL;
+
+ /*
+ * Two types of sections may include valid pfns:
+ * - The pfns of section belong to ZONE_DEVICE and ZONE_{NORMAL,MOVABLE}
+ * at the same time.
+ * - All pfns in one section are offline but valid.
+ */
+ if (!online_device_section(ms) && online_section(ms))
+ return NULL;
+
+ *pgmap = get_dev_pagemap(pfn, NULL);
+ if (*pgmap)
+ page = pfn_to_page(pfn);
+
+ return page;
+}
+EXPORT_SYMBOL_GPL(pfn_to_devmap_page);
+
#ifdef CONFIG_DEV_PAGEMAP_OPS
void free_devmap_managed_page(struct page *page)
{
--
2.30.2


2022-01-10 08:16:36

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm/memremap.c: Add pfn_to_devmap_page() to get page in ZONE_DEVICE

On 09.01.22 14:05, [email protected] wrote:
> From: Xiongwei Song <[email protected]>
>
> when requesting page information by /proc/kpage*, the pages in ZONE_DEVICE
> were missed. We need a function to help on this.
>
> The pfn_to_devmap_page() function like pfn_to_online_page(), but only
> concerns the pages in ZONE_DEVICE.
>
> Signed-off-by: Xiongwei Song <[email protected]>
> ---
> include/linux/memremap.h | 8 ++++++++
> mm/memremap.c | 42 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 50 insertions(+)
>
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index c0e9d35889e8..621723e9c4a5 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -137,6 +137,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
> void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
> struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
> struct dev_pagemap *pgmap);
> +struct page *pfn_to_devmap_page(unsigned long pfn,
> + struct dev_pagemap **pgmap);
> bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
>
> unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
> @@ -166,6 +168,12 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
> return NULL;
> }
>
> +static inline struct page *pfn_to_devmap_page(unsigned long pfn,
> + struct dev_pagemap **pgmap)
> +{
> + return NULL;
> +}
> +
> static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
> {
> return false;
> diff --git a/mm/memremap.c b/mm/memremap.c
> index 5a66a71ab591..072dbe6ab81c 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -494,6 +494,48 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
> }
> EXPORT_SYMBOL_GPL(get_dev_pagemap);
>
> +/**
> + * pfn_to_devmap_page - get page pointer which belongs to dev_pagemap by @pfn
> + * @pfn: page frame number to lookup page_map
> + * @pgmap: to save pgmap address which is for putting reference
> + *
> + * If @pgmap is non-NULL, then pfn is on ZONE_DEVICE and return page pointer.
> + */
> +struct page *pfn_to_devmap_page(unsigned long pfn, struct dev_pagemap **pgmap)
> +{
> + unsigned long nr = pfn_to_section_nr(pfn);
> + struct mem_section *ms;
> + struct page *page = NULL;
> +
> + if (nr >= NR_MEM_SECTIONS)
> + return NULL;
> +
> + if (IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) && !pfn_valid(pfn))
> + return NULL;
> +
> + ms = __nr_to_section(nr);
> + if (!valid_section(ms))
> + return NULL;
> + if (!pfn_section_valid(ms, pfn))
> + return NULL;
> +
> + /*
> + * Two types of sections may include valid pfns:
> + * - The pfns of section belong to ZONE_DEVICE and ZONE_{NORMAL,MOVABLE}
> + * at the same time.
> + * - All pfns in one section are offline but valid.
> + */
> + if (!online_device_section(ms) && online_section(ms))
> + return NULL;
> +
> + *pgmap = get_dev_pagemap(pfn, NULL);
> + if (*pgmap)
> + page = pfn_to_page(pfn);
> +
> + return page;
> +}
> +EXPORT_SYMBOL_GPL(pfn_to_devmap_page);

Is this complexity really required?

Take a look at mm/memory-failure.c

p = pfn_to_online_page(pfn);
if (!p) {
if (pfn_valid(pfn)) {
pgmap = get_dev_pagemap(pfn, NULL);
if (pgmap)
// success
// error
}
// error
}


Also, why do we need the export?

--
Thanks,

David / dhildenb


2022-01-10 09:31:11

by Xiongwei Song

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm/memremap.c: Add pfn_to_devmap_page() to get page in ZONE_DEVICE

Hi David,

On Mon, Jan 10, 2022 at 4:16 PM David Hildenbrand <[email protected]> wrote:
>
> On 09.01.22 14:05, [email protected] wrote:
> > From: Xiongwei Song <[email protected]>
> >
> > when requesting page information by /proc/kpage*, the pages in ZONE_DEVICE
> > were missed. We need a function to help on this.
> >
> > The pfn_to_devmap_page() function like pfn_to_online_page(), but only
> > concerns the pages in ZONE_DEVICE.
> >
> > Signed-off-by: Xiongwei Song <[email protected]>
> > ---
> > include/linux/memremap.h | 8 ++++++++
> > mm/memremap.c | 42 ++++++++++++++++++++++++++++++++++++++++
> > 2 files changed, 50 insertions(+)
> >
> > diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> > index c0e9d35889e8..621723e9c4a5 100644
> > --- a/include/linux/memremap.h
> > +++ b/include/linux/memremap.h
> > @@ -137,6 +137,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
> > void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
> > struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
> > struct dev_pagemap *pgmap);
> > +struct page *pfn_to_devmap_page(unsigned long pfn,
> > + struct dev_pagemap **pgmap);
> > bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
> >
> > unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
> > @@ -166,6 +168,12 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
> > return NULL;
> > }
> >
> > +static inline struct page *pfn_to_devmap_page(unsigned long pfn,
> > + struct dev_pagemap **pgmap)
> > +{
> > + return NULL;
> > +}
> > +
> > static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
> > {
> > return false;
> > diff --git a/mm/memremap.c b/mm/memremap.c
> > index 5a66a71ab591..072dbe6ab81c 100644
> > --- a/mm/memremap.c
> > +++ b/mm/memremap.c
> > @@ -494,6 +494,48 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
> > }
> > EXPORT_SYMBOL_GPL(get_dev_pagemap);
> >
> > +/**
> > + * pfn_to_devmap_page - get page pointer which belongs to dev_pagemap by @pfn
> > + * @pfn: page frame number to lookup page_map
> > + * @pgmap: to save pgmap address which is for putting reference
> > + *
> > + * If @pgmap is non-NULL, then pfn is on ZONE_DEVICE and return page pointer.
> > + */
> > +struct page *pfn_to_devmap_page(unsigned long pfn, struct dev_pagemap **pgmap)
> > +{
> > + unsigned long nr = pfn_to_section_nr(pfn);
> > + struct mem_section *ms;
> > + struct page *page = NULL;
> > +
> > + if (nr >= NR_MEM_SECTIONS)
> > + return NULL;
> > +
> > + if (IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) && !pfn_valid(pfn))
> > + return NULL;
> > +
> > + ms = __nr_to_section(nr);
> > + if (!valid_section(ms))
> > + return NULL;
> > + if (!pfn_section_valid(ms, pfn))
> > + return NULL;
> > +
> > + /*
> > + * Two types of sections may include valid pfns:
> > + * - The pfns of section belong to ZONE_DEVICE and ZONE_{NORMAL,MOVABLE}
> > + * at the same time.
> > + * - All pfns in one section are offline but valid.
> > + */
> > + if (!online_device_section(ms) && online_section(ms))
> > + return NULL;
> > +
> > + *pgmap = get_dev_pagemap(pfn, NULL);
> > + if (*pgmap)
> > + page = pfn_to_page(pfn);
> > +
> > + return page;
> > +}
> > +EXPORT_SYMBOL_GPL(pfn_to_devmap_page);
>
> Is this complexity really required?
>
> Take a look at mm/memory-failure.c
>
> p = pfn_to_online_page(pfn);
> if (!p) {
> if (pfn_valid(pfn)) {
> pgmap = get_dev_pagemap(pfn, NULL);
> if (pgmap)
> // success
> // error
> }
> // error
> }

Yes, this method is much simpler than mine. Will do this in v2.

>
>
> Also, why do we need the export?

Ah, no strong reason. Will remove that in v2.

Would you mind adding "Suggested-by" for this patch?

Thank you for your time.

Regards,
Xiongwei