LinuxLists.cc - [PATCH RFC v1 0/2] mm, pcp: add high order page info in /proc/zoneinfo

2024-01-15 09:35:12

Subject: [PATCH RFC v1 0/2] mm, pcp: add high order page info in /proc/zoneinfo

From: Xin Hao <[email protected]>

With /proc/zoneinfo we can simply get the number of pages used each cpu,
but we can't get more detailed information about the distribution of
those pages, such as the count of high order pages, through these
patches, we can know the usage of each order page in detail, which will
be helpful for us to analyze the pcp memory usage of application on the
related cpus.

By the way, on my intel 32 cores machine, i found that the 'struct
per_cpu_pages' size increases 64 bytes, it seems like a bad news,
i did some tests like stress-ng, but it did not see any performance
degradation, and maybe use 'pcp->list' is also a possible way to get
high order pages count, but it will increase the /proc/zoneinfo query
time, so any meaningful suggestions are welcome!

Xin Hao (2):
mm, pcp: rename pcp->count to pcp->total_count
mm, pcp: add more detail info about high order page count

include/linux/mmzone.h | 3 ++-
mm/page_alloc.c | 46 +++++++++++++++++++++++-------------------
mm/show_mem.c | 6 +++---
mm/vmstat.c | 22 ++++++++++++--------
4 files changed, 44 insertions(+), 33 deletions(-)

--
2.31.1

2024-01-15 09:35:25

by Vern Hao

[permalink] [raw]

Subject: [PATCH RFC v1 1/2] mm, pcp: rename pcp->count to pcp->total_count

From: Xin Hao <[email protected]>

Just a rename for avoiding name conflicts in the next patch

Signed-off-by: Xin Hao <[email protected]>
---
include/linux/mmzone.h | 2 +-
mm/page_alloc.c | 42 +++++++++++++++++++++---------------------
mm/show_mem.c | 6 +++---
mm/vmstat.c | 6 +++---
4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4ed33b127821..883168776fea 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -683,7 +683,7 @@ enum zone_watermarks {

struct per_cpu_pages {
spinlock_t lock; /* Protects lists field */
- int count; /* number of pages in the list */
+ int total_count; /* total number of pages in the list */
int high; /* high watermark, emptying needed */
int high_min; /* min high watermark */
int high_max; /* max high watermark */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5be4cd8f6b5a..4e91e429b8d1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1197,7 +1197,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
* Ensure proper count is passed which otherwise would stuck in the
* below while (list_empty(list)) loop.
*/
- count = min(pcp->count, count);
+ count = min(pcp->total_count, count);

/* Ensure requested pindex is drained first. */
pindex = pindex - 1;
@@ -1227,7 +1227,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
/* must delete to avoid corrupting pcp list */
list_del(&page->pcp_list);
count -= nr_pages;
- pcp->count -= nr_pages;
+ pcp->total_count -= nr_pages;

/* MIGRATE_ISOLATE page should not go to pcplists */
VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
@@ -2209,13 +2209,13 @@ int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
* control latency. This caps pcp->high decrement too.
*/
if (pcp->high > high_min) {
- pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
+ pcp->high = max3(pcp->total_count - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
pcp->high - (pcp->high >> 3), high_min);
if (pcp->high > high_min)
todo++;
}

- to_drain = pcp->count - pcp->high;
+ to_drain = pcp->total_count - pcp->high;
if (to_drain > 0) {
spin_lock(&pcp->lock);
free_pcppages_bulk(zone, to_drain, pcp, 0);
@@ -2237,7 +2237,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
int to_drain, batch;

batch = READ_ONCE(pcp->batch);
- to_drain = min(pcp->count, batch);
+ to_drain = min(pcp->total_count, batch);
if (to_drain > 0) {
spin_lock(&pcp->lock);
free_pcppages_bulk(zone, to_drain, pcp, 0);
@@ -2254,9 +2254,9 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
struct per_cpu_pages *pcp;

pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
- if (pcp->count) {
+ if (pcp->total_count) {
spin_lock(&pcp->lock);
- free_pcppages_bulk(zone, pcp->count, pcp, 0);
+ free_pcppages_bulk(zone, pcp->total_count, pcp, 0);
spin_unlock(&pcp->lock);
}
}
@@ -2292,7 +2292,7 @@ void drain_local_pages(struct zone *zone)
*
* drain_all_pages() is optimized to only execute on cpus where pcplists are
* not empty. The check for non-emptiness can however race with a free to
- * pcplist that has not yet increased the pcp->count from 0 to 1. Callers
+ * pcplist that has not yet increased the pcp->total_count from 0 to 1. Callers
* that need the guarantee that every CPU has drained can disable the
* optimizing racy check.
*/
@@ -2336,12 +2336,12 @@ static void __drain_all_pages(struct zone *zone, bool force_all_cpus)
has_pcps = true;
} else if (zone) {
pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
- if (pcp->count)
+ if (pcp->total_count)
has_pcps = true;
} else {
for_each_populated_zone(z) {
pcp = per_cpu_ptr(z->per_cpu_pageset, cpu);
- if (pcp->count) {
+ if (pcp->total_count) {
has_pcps = true;
break;
}
@@ -2393,7 +2393,7 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free

/* Free as much as possible if batch freeing high-order pages. */
if (unlikely(free_high))
- return min(pcp->count, batch << CONFIG_PCP_BATCH_SCALE_MAX);
+ return min(pcp->total_count, batch << CONFIG_PCP_BATCH_SCALE_MAX);

/* Check for PCP disabled or boot pageset */
if (unlikely(high < batch))
@@ -2448,8 +2448,8 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
int free_count = max_t(int, pcp->free_count, batch);

pcp->high = max(high - free_count, high_min);
- high = max(pcp->count, high_min);
- } else if (pcp->count >= high) {
+ high = max(pcp->total_count, high_min);
+ } else if (pcp->total_count >= high) {
int need_high = pcp->free_count + batch;

/* pcp->high should be large enough to hold batch freed pages */
@@ -2477,7 +2477,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
__count_vm_events(PGFREE, 1 << order);
pindex = order_to_pindex(migratetype, order);
list_add(&page->pcp_list, &pcp->lists[pindex]);
- pcp->count += 1 << order;
+ pcp->total_count += 1 << order;

batch = READ_ONCE(pcp->batch);
/*
@@ -2490,7 +2490,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
free_high = (pcp->free_count >= batch &&
(pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) &&
(!(pcp->flags & PCPF_FREE_HIGH_BATCH) ||
- pcp->count >= READ_ONCE(batch)));
+ pcp->total_count >= READ_ONCE(batch)));
pcp->flags |= PCPF_PREV_FREE_HIGH_ORDER;
} else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) {
pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER;
@@ -2498,7 +2498,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX))
pcp->free_count += (1 << order);
high = nr_pcp_high(pcp, zone, batch, free_high);
- if (pcp->count >= high) {
+ if (pcp->total_count >= high) {
free_pcppages_bulk(zone, nr_pcp_free(pcp, batch, high, free_high),
pcp, pindex);
if (test_bit(ZONE_BELOW_HIGH, &zone->flags) &&
@@ -2815,7 +2815,7 @@ static int nr_pcp_alloc(struct per_cpu_pages *pcp, struct zone *zone, int order)
high = pcp->high = min(high + batch, high_max);

if (!order) {
- max_nr_alloc = max(high - pcp->count - base_batch, base_batch);
+ max_nr_alloc = max(high - pcp->total_count - base_batch, base_batch);
/*
* Double the number of pages allocated each time there is
* subsequent allocation of order-0 pages without any freeing.
@@ -2857,14 +2857,14 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
batch, list,
migratetype, alloc_flags);

- pcp->count += alloced << order;
+ pcp->total_count += alloced << order;
if (unlikely(list_empty(list)))
return NULL;
}

page = list_first_entry(list, struct page, pcp_list);
list_del(&page->pcp_list);
- pcp->count -= 1 << order;
+ pcp->total_count -= 1 << order;
} while (check_new_pages(page, order));

return page;
@@ -5482,7 +5482,7 @@ static int zone_highsize(struct zone *zone, int batch, int cpu_online,

/*
* pcp->high and pcp->batch values are related and generally batch is lower
- * than high. They are also related to pcp->count such that count is lower
+ * than high. They are also related to pcp->total_count such that count is lower
* than high, and as soon as it reaches high, the pcplist is flushed.
*
* However, guaranteeing these relations at all times would require e.g. write
@@ -5490,7 +5490,7 @@ static int zone_highsize(struct zone *zone, int batch, int cpu_online,
* thus be prone to error and bad for performance. Thus the update only prevents
* store tearing. Any new users of pcp->batch, pcp->high_min and pcp->high_max
* should ensure they can cope with those fields changing asynchronously, and
- * fully trust only the pcp->count field on the local CPU with interrupts
+ * fully trust only the pcp->total_count field on the local CPU with interrupts
* disabled.
*
* mutex_is_locked(&pcp_batch_high_lock) required when calling this function
diff --git a/mm/show_mem.c b/mm/show_mem.c
index 8dcfafbd283c..6fcb2c771613 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -197,7 +197,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
continue;

for_each_online_cpu(cpu)
- free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+ free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->total_count;
}

printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
@@ -299,7 +299,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z

free_pcp = 0;
for_each_online_cpu(cpu)
- free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+ free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->total_count;

show_node(zone);
printk(KERN_CONT
@@ -342,7 +342,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
K(zone_page_state(zone, NR_MLOCK)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
- K(this_cpu_read(zone->per_cpu_pageset->count)),
+ K(this_cpu_read(zone->per_cpu_pageset->total_count)),
K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
printk("lowmem_reserve[]:");
for (i = 0; i < MAX_NR_ZONES; i++)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index db79935e4a54..c1e8096ff0a6 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -846,7 +846,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
* if not then there is nothing to expire.
*/
if (!__this_cpu_read(pcp->expire) ||
- !__this_cpu_read(pcp->count))
+ !__this_cpu_read(pcp->total_count))
continue;

/*
@@ -862,7 +862,7 @@ static int refresh_cpu_vm_stats(bool do_pagesets)
continue;
}

- if (__this_cpu_read(pcp->count)) {
+ if (__this_cpu_read(pcp->total_count)) {
drain_zone_pages(zone, this_cpu_ptr(pcp));
changes++;
}
@@ -1745,7 +1745,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
"\n high: %i"
"\n batch: %i",
i,
- pcp->count,
+ pcp->total_count,
pcp->high,
pcp->batch);
#ifdef CONFIG_SMP
--
2.31.1

2024-01-15 09:35:31

by Vern Hao

[permalink] [raw]

Subject: [PATCH RFC v1 2/2] mm, pcp: add more detail info about high order page count

From: Xin Hao <[email protected]>

With this patch, we can see the distribution of pages of different orders on
each cpu, just like below.
#cat /proc/zoneinfo
....
cpu: 2
total_count: 14286
order0 : 1260
order1 : 13
order2 : 42
order3 : 4
order4 : 0
order5 : 0
order6 : 0
order7 : 0
order8 : 0
order9 : 25
order10: 0
order11: 0
order12: 0
high: 14541
batch: 63

Signed-off-by: Xin Hao <[email protected]>
---
include/linux/mmzone.h | 1 +
mm/page_alloc.c | 4 ++++
mm/vmstat.c | 18 ++++++++++++------
3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 883168776fea..55d25b4f51e5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -684,6 +684,7 @@ enum zone_watermarks {
struct per_cpu_pages {
spinlock_t lock; /* Protects lists field */
int total_count; /* total number of pages in the list */
+ int count[NR_PCP_LISTS]; /* per-order page counts */
int high; /* high watermark, emptying needed */
int high_min; /* min high watermark */
int high_max; /* max high watermark */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e91e429b8d1..7ec2dc5c5ea5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1228,6 +1228,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
list_del(&page->pcp_list);
count -= nr_pages;
pcp->total_count -= nr_pages;
+ pcp->count[order] -= 1;

/* MIGRATE_ISOLATE page should not go to pcplists */
VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
@@ -2478,6 +2479,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
pindex = order_to_pindex(migratetype, order);
list_add(&page->pcp_list, &pcp->lists[pindex]);
pcp->total_count += 1 << order;
+ pcp->count[order] += 1;

batch = READ_ONCE(pcp->batch);
/*
@@ -2858,6 +2860,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
migratetype, alloc_flags);

pcp->total_count += alloced << order;
+ pcp->count[order] += alloced;
if (unlikely(list_empty(list)))
return NULL;
}
@@ -2865,6 +2868,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
page = list_first_entry(list, struct page, pcp_list);
list_del(&page->pcp_list);
pcp->total_count -= 1 << order;
+ pcp->count[order] -= 1;
} while (check_new_pages(page, order));

return page;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c1e8096ff0a6..e04300ec450f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1735,19 +1735,25 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,

seq_printf(m, "\n pagesets");
for_each_online_cpu(i) {
+ int j;
struct per_cpu_pages *pcp;
struct per_cpu_zonestat __maybe_unused *pzstats;

pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
seq_printf(m,
"\n cpu: %i"
- "\n count: %i"
- "\n high: %i"
- "\n batch: %i",
+ "\n total_count: %i",
i,
- pcp->total_count,
- pcp->high,
- pcp->batch);
+ pcp->total_count);
+ for (j = 0; j < NR_PCP_LISTS; j++)
+ seq_printf(m,
+ "\n order%-2i: %-3i",
+ j, pcp->count[j]);
+ seq_printf(m,
+ "\n high: %i"
+ "\n batch: %i",
+ pcp->high,
+ pcp->batch);
#ifdef CONFIG_SMP
pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
seq_printf(m, "\n vm stats threshold: %d",
--
2.31.1

2024-01-15 20:32:15

by David Rientjes

[permalink] [raw]

Subject: Re: [PATCH RFC v1 0/2] mm, pcp: add high order page info in /proc/zoneinfo

On Mon, 15 Jan 2024, Vern Hao wrote:

> From: Xin Hao <[email protected]>
>
> With /proc/zoneinfo we can simply get the number of pages used each cpu,
> but we can't get more detailed information about the distribution of
> those pages, such as the count of high order pages, through these
> patches, we can know the usage of each order page in detail, which will
> be helpful for us to analyze the pcp memory usage of application on the
> related cpus.
>

Could you elaborate on the use case for this?

I'm not exactly sure what is meant by analyzing the pcp memory usage of an
application; this would be a property of page allocations and freeing on
individual cpus for orders that have pcp lists. That's traditionally not
a property of an individual application.

> By the way, on my intel 32 cores machine, i found that the 'struct
> per_cpu_pages' size increases 64 bytes, it seems like a bad news,
> i did some tests like stress-ng, but it did not see any performance
> degradation, and maybe use 'pcp->list' is also a possible way to get
> high order pages count, but it will increase the /proc/zoneinfo query
> time, so any meaningful suggestions are welcome!
>
> Xin Hao (2):
> mm, pcp: rename pcp->count to pcp->total_count
> mm, pcp: add more detail info about high order page count
>
> include/linux/mmzone.h | 3 ++-
> mm/page_alloc.c | 46 +++++++++++++++++++++++-------------------
> mm/show_mem.c | 6 +++---
> mm/vmstat.c | 22 ++++++++++++--------
> 4 files changed, 44 insertions(+), 33 deletions(-)
>
> --
> 2.31.1
>
>
>

2024-01-15 20:34:39

by David Rientjes

[permalink] [raw]

Subject: Re: [PATCH RFC v1 2/2] mm, pcp: add more detail info about high order page count

On Mon, 15 Jan 2024, Vern Hao wrote:

> From: Xin Hao <[email protected]>
>
> With this patch, we can see the distribution of pages of different orders on
> each cpu, just like below.
> #cat /proc/zoneinfo
> ....
> cpu: 2
> total_count: 14286

I don't think we should be changing the naming of the field if there are
existing users that parse /proc/zoneinfo.

> order0 : 1260
> order1 : 13
> order2 : 42
> order3 : 4
> order4 : 0
> order5 : 0
> order6 : 0
> order7 : 0
> order8 : 0
> order9 : 25
> order10: 0
> order11: 0
> order12: 0
> high: 14541
> batch: 63
>
> Signed-off-by: Xin Hao <[email protected]>
> ---
> include/linux/mmzone.h | 1 +
> mm/page_alloc.c | 4 ++++
> mm/vmstat.c | 18 ++++++++++++------
> 3 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 883168776fea..55d25b4f51e5 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -684,6 +684,7 @@ enum zone_watermarks {
> struct per_cpu_pages {
> spinlock_t lock; /* Protects lists field */
> int total_count; /* total number of pages in the list */
> + int count[NR_PCP_LISTS]; /* per-order page counts */
> int high; /* high watermark, emptying needed */
> int high_min; /* min high watermark */
> int high_max; /* max high watermark */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 4e91e429b8d1..7ec2dc5c5ea5 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1228,6 +1228,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
> list_del(&page->pcp_list);
> count -= nr_pages;
> pcp->total_count -= nr_pages;
> + pcp->count[order] -= 1;
>
> /* MIGRATE_ISOLATE page should not go to pcplists */
> VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
> @@ -2478,6 +2479,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
> pindex = order_to_pindex(migratetype, order);
> list_add(&page->pcp_list, &pcp->lists[pindex]);
> pcp->total_count += 1 << order;
> + pcp->count[order] += 1;
>
> batch = READ_ONCE(pcp->batch);
> /*
> @@ -2858,6 +2860,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
> migratetype, alloc_flags);
>
> pcp->total_count += alloced << order;
> + pcp->count[order] += alloced;
> if (unlikely(list_empty(list)))
> return NULL;
> }
> @@ -2865,6 +2868,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
> page = list_first_entry(list, struct page, pcp_list);
> list_del(&page->pcp_list);
> pcp->total_count -= 1 << order;
> + pcp->count[order] -= 1;
> } while (check_new_pages(page, order));
>
> return page;
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index c1e8096ff0a6..e04300ec450f 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1735,19 +1735,25 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
>
> seq_printf(m, "\n pagesets");
> for_each_online_cpu(i) {
> + int j;
> struct per_cpu_pages *pcp;
> struct per_cpu_zonestat __maybe_unused *pzstats;
>
> pcp = per_cpu_ptr(zone->per_cpu_pageset, i);
> seq_printf(m,
> "\n cpu: %i"
> - "\n count: %i"
> - "\n high: %i"
> - "\n batch: %i",
> + "\n total_count: %i",
> i,
> - pcp->total_count,
> - pcp->high,
> - pcp->batch);
> + pcp->total_count);
> + for (j = 0; j < NR_PCP_LISTS; j++)
> + seq_printf(m,
> + "\n order%-2i: %-3i",
> + j, pcp->count[j]);
> + seq_printf(m,
> + "\n high: %i"
> + "\n batch: %i",
> + pcp->high,
> + pcp->batch);
> #ifdef CONFIG_SMP
> pzstats = per_cpu_ptr(zone->per_cpu_zonestats, i);
> seq_printf(m, "\n vm stats threshold: %d",
> --
> 2.31.1
>
>
>

2024-01-16 14:40:16

by Mel Gorman

[permalink] [raw]

Subject: Re: [PATCH RFC v1 2/2] mm, pcp: add more detail info about high order page count

On Mon, Jan 15, 2024 at 05:34:36PM +0800, Vern Hao wrote:
> From: Xin Hao <[email protected]>
>
> With this patch, we can see the distribution of pages of different orders on
> each cpu, just like below.
> #cat /proc/zoneinfo
> ....
> cpu: 2
> total_count: 14286
> order0 : 1260
> order1 : 13
> order2 : 42
> order3 : 4
> order4 : 0
> order5 : 0
> order6 : 0
> order7 : 0
> order8 : 0
> order9 : 25
> order10: 0
> order11: 0
> order12: 0
> high: 14541
> batch: 63
>
> Signed-off-by: Xin Hao <[email protected]>

I am not a major fan because increasing the size of a per-cpu structure for
debugging purposes incurs a cost for everyone while only a tiny minority
may care. There is a mild risk it would break existing parsers of that file
although maybe that's not a big deal. However, the same information could be
extracted by locking the pcp structures and counting the items per list. It
would increase the cost of reading zoneinfo but it's unlikely the file is
read at high frequency. If that was a concern, a separate proc file could be
used. Finally, the same information likely can be extracted via a systemtap
script, a BPF script (if it can get to the right symbols and locking, I
didn't check) or via a kernel probe. Even with that information, it's not
clear what meaningful action a user can take, so this is a developer-only
feature really with a cost incurred for everybody.

--
Mel Gorman
SUSE Labs