2023-05-08 06:59:15

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH -next 00/12] mm: page_alloc: misc cleanup and refector

This is aim to reduce more space in page_alloc.c, also do some
cleanup, no functional changes intended.

This is based on next-20230508.

Kefeng Wang (12):
mm: page_alloc: move mirrored_kernelcore into mm_init.c
mm: page_alloc: move init_on_alloc/free() into mm_init.c
mm: page_alloc: move set_zone_contiguous() into mm_init.c
mm: page_alloc: collect mem statistic into show_mem.c
mm: page_alloc: squash page_is_consistent()
mm: page_alloc: remove alloc_contig_dump_pages() stub
mm: page_alloc: split out FAIL_PAGE_ALLOC
mm: page_alloc: split out DEBUG_PAGEALLOC
mm: page_alloc: move mark_free_page() into snapshot.c
mm: page_alloc: move pm_* function into power
mm: vmscan: use gfp_has_io_fs()
mm: page_alloc: move sysctls into it own fils

include/linux/fault-inject.h | 9 +
include/linux/gfp.h | 15 +-
include/linux/memory_hotplug.h | 3 -
include/linux/mm.h | 87 ++--
include/linux/mmzone.h | 21 -
include/linux/suspend.h | 9 +-
kernel/power/main.c | 27 ++
kernel/power/power.h | 5 +
kernel/power/snapshot.c | 52 ++
kernel/sysctl.c | 67 ---
lib/Makefile | 2 +-
lib/show_mem.c | 37 --
mm/Makefile | 4 +-
mm/debug_page_alloc.c | 59 +++
mm/fail_page_alloc.c | 66 +++
mm/internal.h | 16 +
mm/mm_init.c | 84 ++++
mm/page_alloc.c | 844 ++++-----------------------------
mm/show_mem.c | 429 +++++++++++++++++
mm/swapfile.c | 1 +
mm/vmscan.c | 2 +-
21 files changed, 902 insertions(+), 937 deletions(-)
delete mode 100644 lib/show_mem.c
create mode 100644 mm/debug_page_alloc.c
create mode 100644 mm/fail_page_alloc.c
create mode 100644 mm/show_mem.c

--
2.35.3


2023-05-08 07:01:13

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH 04/12] mm: page_alloc: collect mem statistic into show_mem.c

Let's move show_mem.c from lib to mm, as it belongs memory subsystem,
also split some memory statistic related functions from page_alloc.c
to show_mem.c, and we cleanup some unneeded include.

There is no functional change.

Signed-off-by: Kefeng Wang <[email protected]>
---
lib/Makefile | 2 +-
lib/show_mem.c | 37 -----
mm/Makefile | 2 +-
mm/page_alloc.c | 402 ---------------------------------------------
mm/show_mem.c | 429 ++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 431 insertions(+), 441 deletions(-)
delete mode 100644 lib/show_mem.c
create mode 100644 mm/show_mem.c

diff --git a/lib/Makefile b/lib/Makefile
index 876fcdeae34e..38f23f352736 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -30,7 +30,7 @@ endif
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o timerqueue.o xarray.o \
maple_tree.o idr.o extable.o irq_regs.o argv_split.o \
- flex_proportions.o ratelimit.o show_mem.o \
+ flex_proportions.o ratelimit.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
nmi_backtrace.o win_minmax.o memcat_p.o \
diff --git a/lib/show_mem.c b/lib/show_mem.c
deleted file mode 100644
index 1485c87be935..000000000000
--- a/lib/show_mem.c
+++ /dev/null
@@ -1,37 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Generic show_mem() implementation
- *
- * Copyright (C) 2008 Johannes Weiner <[email protected]>
- */
-
-#include <linux/mm.h>
-#include <linux/cma.h>
-
-void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
-{
- unsigned long total = 0, reserved = 0, highmem = 0;
- struct zone *zone;
-
- printk("Mem-Info:\n");
- __show_free_areas(filter, nodemask, max_zone_idx);
-
- for_each_populated_zone(zone) {
-
- total += zone->present_pages;
- reserved += zone->present_pages - zone_managed_pages(zone);
-
- if (is_highmem(zone))
- highmem += zone->present_pages;
- }
-
- printk("%lu pages RAM\n", total);
- printk("%lu pages HighMem/MovableOnly\n", highmem);
- printk("%lu pages reserved\n", reserved);
-#ifdef CONFIG_CMA
- printk("%lu pages cma reserved\n", totalcma_pages);
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
- printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
-#endif
-}
diff --git a/mm/Makefile b/mm/Makefile
index e29afc890cde..5262ce5baa28 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -51,7 +51,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
mm_init.o percpu.o slab_common.o \
- compaction.o \
+ compaction.o show_mem.o\
interval_tree.o list_lru.o workingset.o \
debug.o gup.o mmap_lock.o $(mmu-y)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fe7c1ee5becd..9a85238f1140 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -18,10 +18,7 @@
#include <linux/stddef.h>
#include <linux/mm.h>
#include <linux/highmem.h>
-#include <linux/swap.h>
-#include <linux/swapops.h>
#include <linux/interrupt.h>
-#include <linux/pagemap.h>
#include <linux/jiffies.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
@@ -30,8 +27,6 @@
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
-#include <linux/blkdev.h>
-#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/oom.h>
#include <linux/topology.h>
@@ -40,19 +35,10 @@
#include <linux/cpuset.h>
#include <linux/memory_hotplug.h>
#include <linux/nodemask.h>
-#include <linux/vmalloc.h>
#include <linux/vmstat.h>
-#include <linux/mempolicy.h>
-#include <linux/memremap.h>
-#include <linux/stop_machine.h>
-#include <linux/random.h>
#include <linux/sort.h>
#include <linux/pfn.h>
-#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
-#include <linux/page-isolation.h>
-#include <linux/debugobjects.h>
-#include <linux/kmemleak.h>
#include <linux/compaction.h>
#include <trace/events/kmem.h>
#include <trace/events/oom.h>
@@ -60,12 +46,9 @@
#include <linux/mm_inline.h>
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
-#include <linux/hugetlb.h>
-#include <linux/sched/rt.h>
#include <linux/sched/mm.h>
#include <linux/page_owner.h>
#include <linux/page_table_check.h>
-#include <linux/kthread.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
#include <linux/lockdep.h>
@@ -73,13 +56,10 @@
#include <linux/psi.h>
#include <linux/khugepaged.h>
#include <linux/delayacct.h>
-#include <asm/sections.h>
-#include <asm/tlbflush.h>
#include <asm/div64.h>
#include "internal.h"
#include "shuffle.h"
#include "page_reporting.h"
-#include "swap.h"

/* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */
typedef int __bitwise fpi_t;
@@ -226,11 +206,6 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
};
EXPORT_SYMBOL(node_states);

-atomic_long_t _totalram_pages __read_mostly;
-EXPORT_SYMBOL(_totalram_pages);
-unsigned long totalreserve_pages __read_mostly;
-unsigned long totalcma_pages __read_mostly;
-
int percpu_pagelist_high_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;

@@ -5050,383 +5025,6 @@ unsigned long nr_free_buffer_pages(void)
}
EXPORT_SYMBOL_GPL(nr_free_buffer_pages);

-static inline void show_node(struct zone *zone)
-{
- if (IS_ENABLED(CONFIG_NUMA))
- printk("Node %d ", zone_to_nid(zone));
-}
-
-long si_mem_available(void)
-{
- long available;
- unsigned long pagecache;
- unsigned long wmark_low = 0;
- unsigned long pages[NR_LRU_LISTS];
- unsigned long reclaimable;
- struct zone *zone;
- int lru;
-
- for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
- pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
-
- for_each_zone(zone)
- wmark_low += low_wmark_pages(zone);
-
- /*
- * Estimate the amount of memory available for userspace allocations,
- * without causing swapping or OOM.
- */
- available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages;
-
- /*
- * Not all the page cache can be freed, otherwise the system will
- * start swapping or thrashing. Assume at least half of the page
- * cache, or the low watermark worth of cache, needs to stay.
- */
- pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
- pagecache -= min(pagecache / 2, wmark_low);
- available += pagecache;
-
- /*
- * Part of the reclaimable slab and other kernel memory consists of
- * items that are in use, and cannot be freed. Cap this estimate at the
- * low watermark.
- */
- reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
- global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
- available += reclaimable - min(reclaimable / 2, wmark_low);
-
- if (available < 0)
- available = 0;
- return available;
-}
-EXPORT_SYMBOL_GPL(si_mem_available);
-
-void si_meminfo(struct sysinfo *val)
-{
- val->totalram = totalram_pages();
- val->sharedram = global_node_page_state(NR_SHMEM);
- val->freeram = global_zone_page_state(NR_FREE_PAGES);
- val->bufferram = nr_blockdev_pages();
- val->totalhigh = totalhigh_pages();
- val->freehigh = nr_free_highpages();
- val->mem_unit = PAGE_SIZE;
-}
-
-EXPORT_SYMBOL(si_meminfo);
-
-#ifdef CONFIG_NUMA
-void si_meminfo_node(struct sysinfo *val, int nid)
-{
- int zone_type; /* needs to be signed */
- unsigned long managed_pages = 0;
- unsigned long managed_highpages = 0;
- unsigned long free_highpages = 0;
- pg_data_t *pgdat = NODE_DATA(nid);
-
- for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
- managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
- val->totalram = managed_pages;
- val->sharedram = node_page_state(pgdat, NR_SHMEM);
- val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
-#ifdef CONFIG_HIGHMEM
- for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
- struct zone *zone = &pgdat->node_zones[zone_type];
-
- if (is_highmem(zone)) {
- managed_highpages += zone_managed_pages(zone);
- free_highpages += zone_page_state(zone, NR_FREE_PAGES);
- }
- }
- val->totalhigh = managed_highpages;
- val->freehigh = free_highpages;
-#else
- val->totalhigh = managed_highpages;
- val->freehigh = free_highpages;
-#endif
- val->mem_unit = PAGE_SIZE;
-}
-#endif
-
-/*
- * Determine whether the node should be displayed or not, depending on whether
- * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
- */
-static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask)
-{
- if (!(flags & SHOW_MEM_FILTER_NODES))
- return false;
-
- /*
- * no node mask - aka implicit memory numa policy. Do not bother with
- * the synchronization - read_mems_allowed_begin - because we do not
- * have to be precise here.
- */
- if (!nodemask)
- nodemask = &cpuset_current_mems_allowed;
-
- return !node_isset(nid, *nodemask);
-}
-
-static void show_migration_types(unsigned char type)
-{
- static const char types[MIGRATE_TYPES] = {
- [MIGRATE_UNMOVABLE] = 'U',
- [MIGRATE_MOVABLE] = 'M',
- [MIGRATE_RECLAIMABLE] = 'E',
- [MIGRATE_HIGHATOMIC] = 'H',
-#ifdef CONFIG_CMA
- [MIGRATE_CMA] = 'C',
-#endif
-#ifdef CONFIG_MEMORY_ISOLATION
- [MIGRATE_ISOLATE] = 'I',
-#endif
- };
- char tmp[MIGRATE_TYPES + 1];
- char *p = tmp;
- int i;
-
- for (i = 0; i < MIGRATE_TYPES; i++) {
- if (type & (1 << i))
- *p++ = types[i];
- }
-
- *p = '\0';
- printk(KERN_CONT "(%s) ", tmp);
-}
-
-static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx)
-{
- int zone_idx;
- for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++)
- if (zone_managed_pages(pgdat->node_zones + zone_idx))
- return true;
- return false;
-}
-
-/*
- * Show free area list (used inside shift_scroll-lock stuff)
- * We also calculate the percentage fragmentation. We do this by counting the
- * memory on each free list with the exception of the first item on the list.
- *
- * Bits in @filter:
- * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
- * cpuset.
- */
-void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
-{
- unsigned long free_pcp = 0;
- int cpu, nid;
- struct zone *zone;
- pg_data_t *pgdat;
-
- for_each_populated_zone(zone) {
- if (zone_idx(zone) > max_zone_idx)
- continue;
- if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
- continue;
-
- for_each_online_cpu(cpu)
- free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
- }
-
- printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
- " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
- " unevictable:%lu dirty:%lu writeback:%lu\n"
- " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
- " mapped:%lu shmem:%lu pagetables:%lu\n"
- " sec_pagetables:%lu bounce:%lu\n"
- " kernel_misc_reclaimable:%lu\n"
- " free:%lu free_pcp:%lu free_cma:%lu\n",
- global_node_page_state(NR_ACTIVE_ANON),
- global_node_page_state(NR_INACTIVE_ANON),
- global_node_page_state(NR_ISOLATED_ANON),
- global_node_page_state(NR_ACTIVE_FILE),
- global_node_page_state(NR_INACTIVE_FILE),
- global_node_page_state(NR_ISOLATED_FILE),
- global_node_page_state(NR_UNEVICTABLE),
- global_node_page_state(NR_FILE_DIRTY),
- global_node_page_state(NR_WRITEBACK),
- global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
- global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
- global_node_page_state(NR_FILE_MAPPED),
- global_node_page_state(NR_SHMEM),
- global_node_page_state(NR_PAGETABLE),
- global_node_page_state(NR_SECONDARY_PAGETABLE),
- global_zone_page_state(NR_BOUNCE),
- global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
- global_zone_page_state(NR_FREE_PAGES),
- free_pcp,
- global_zone_page_state(NR_FREE_CMA_PAGES));
-
- for_each_online_pgdat(pgdat) {
- if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
- continue;
- if (!node_has_managed_zones(pgdat, max_zone_idx))
- continue;
-
- printk("Node %d"
- " active_anon:%lukB"
- " inactive_anon:%lukB"
- " active_file:%lukB"
- " inactive_file:%lukB"
- " unevictable:%lukB"
- " isolated(anon):%lukB"
- " isolated(file):%lukB"
- " mapped:%lukB"
- " dirty:%lukB"
- " writeback:%lukB"
- " shmem:%lukB"
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- " shmem_thp: %lukB"
- " shmem_pmdmapped: %lukB"
- " anon_thp: %lukB"
-#endif
- " writeback_tmp:%lukB"
- " kernel_stack:%lukB"
-#ifdef CONFIG_SHADOW_CALL_STACK
- " shadow_call_stack:%lukB"
-#endif
- " pagetables:%lukB"
- " sec_pagetables:%lukB"
- " all_unreclaimable? %s"
- "\n",
- pgdat->node_id,
- K(node_page_state(pgdat, NR_ACTIVE_ANON)),
- K(node_page_state(pgdat, NR_INACTIVE_ANON)),
- K(node_page_state(pgdat, NR_ACTIVE_FILE)),
- K(node_page_state(pgdat, NR_INACTIVE_FILE)),
- K(node_page_state(pgdat, NR_UNEVICTABLE)),
- K(node_page_state(pgdat, NR_ISOLATED_ANON)),
- K(node_page_state(pgdat, NR_ISOLATED_FILE)),
- K(node_page_state(pgdat, NR_FILE_MAPPED)),
- K(node_page_state(pgdat, NR_FILE_DIRTY)),
- K(node_page_state(pgdat, NR_WRITEBACK)),
- K(node_page_state(pgdat, NR_SHMEM)),
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- K(node_page_state(pgdat, NR_SHMEM_THPS)),
- K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
- K(node_page_state(pgdat, NR_ANON_THPS)),
-#endif
- K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
- node_page_state(pgdat, NR_KERNEL_STACK_KB),
-#ifdef CONFIG_SHADOW_CALL_STACK
- node_page_state(pgdat, NR_KERNEL_SCS_KB),
-#endif
- K(node_page_state(pgdat, NR_PAGETABLE)),
- K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
- pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
- "yes" : "no");
- }
-
- for_each_populated_zone(zone) {
- int i;
-
- if (zone_idx(zone) > max_zone_idx)
- continue;
- if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
- continue;
-
- free_pcp = 0;
- for_each_online_cpu(cpu)
- free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
-
- show_node(zone);
- printk(KERN_CONT
- "%s"
- " free:%lukB"
- " boost:%lukB"
- " min:%lukB"
- " low:%lukB"
- " high:%lukB"
- " reserved_highatomic:%luKB"
- " active_anon:%lukB"
- " inactive_anon:%lukB"
- " active_file:%lukB"
- " inactive_file:%lukB"
- " unevictable:%lukB"
- " writepending:%lukB"
- " present:%lukB"
- " managed:%lukB"
- " mlocked:%lukB"
- " bounce:%lukB"
- " free_pcp:%lukB"
- " local_pcp:%ukB"
- " free_cma:%lukB"
- "\n",
- zone->name,
- K(zone_page_state(zone, NR_FREE_PAGES)),
- K(zone->watermark_boost),
- K(min_wmark_pages(zone)),
- K(low_wmark_pages(zone)),
- K(high_wmark_pages(zone)),
- K(zone->nr_reserved_highatomic),
- K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
- K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
- K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
- K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
- K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
- K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
- K(zone->present_pages),
- K(zone_managed_pages(zone)),
- K(zone_page_state(zone, NR_MLOCK)),
- K(zone_page_state(zone, NR_BOUNCE)),
- K(free_pcp),
- K(this_cpu_read(zone->per_cpu_pageset->count)),
- K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
- printk("lowmem_reserve[]:");
- for (i = 0; i < MAX_NR_ZONES; i++)
- printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
- printk(KERN_CONT "\n");
- }
-
- for_each_populated_zone(zone) {
- unsigned int order;
- unsigned long nr[MAX_ORDER + 1], flags, total = 0;
- unsigned char types[MAX_ORDER + 1];
-
- if (zone_idx(zone) > max_zone_idx)
- continue;
- if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
- continue;
- show_node(zone);
- printk(KERN_CONT "%s: ", zone->name);
-
- spin_lock_irqsave(&zone->lock, flags);
- for (order = 0; order <= MAX_ORDER; order++) {
- struct free_area *area = &zone->free_area[order];
- int type;
-
- nr[order] = area->nr_free;
- total += nr[order] << order;
-
- types[order] = 0;
- for (type = 0; type < MIGRATE_TYPES; type++) {
- if (!free_area_empty(area, type))
- types[order] |= 1 << type;
- }
- }
- spin_unlock_irqrestore(&zone->lock, flags);
- for (order = 0; order <= MAX_ORDER; order++) {
- printk(KERN_CONT "%lu*%lukB ",
- nr[order], K(1UL) << order);
- if (nr[order])
- show_migration_types(types[order]);
- }
- printk(KERN_CONT "= %lukB\n", K(total));
- }
-
- for_each_online_node(nid) {
- if (show_mem_node_skip(filter, nid, nodemask))
- continue;
- hugetlb_show_meminfo_node(nid);
- }
-
- printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES));
-
- show_swap_cache_info();
-}
-
static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
{
zoneref->zone = zone;
diff --git a/mm/show_mem.c b/mm/show_mem.c
new file mode 100644
index 000000000000..9f1a5d8b03d1
--- /dev/null
+++ b/mm/show_mem.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic show_mem() implementation
+ *
+ * Copyright (C) 2008 Johannes Weiner <[email protected]>
+ */
+
+#include <linux/blkdev.h>
+#include <linux/cma.h>
+#include <linux/cpuset.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/swap.h>
+#include <linux/vmstat.h>
+
+#include "internal.h"
+#include "swap.h"
+
+atomic_long_t _totalram_pages __read_mostly;
+EXPORT_SYMBOL(_totalram_pages);
+unsigned long totalreserve_pages __read_mostly;
+unsigned long totalcma_pages __read_mostly;
+
+void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
+{
+ unsigned long total = 0, reserved = 0, highmem = 0;
+ struct zone *zone;
+
+ printk("Mem-Info:\n");
+ __show_free_areas(filter, nodemask, max_zone_idx);
+
+ for_each_populated_zone(zone) {
+
+ total += zone->present_pages;
+ reserved += zone->present_pages - zone_managed_pages(zone);
+
+ if (is_highmem(zone))
+ highmem += zone->present_pages;
+ }
+
+ printk("%lu pages RAM\n", total);
+ printk("%lu pages HighMem/MovableOnly\n", highmem);
+ printk("%lu pages reserved\n", reserved);
+#ifdef CONFIG_CMA
+ printk("%lu pages cma reserved\n", totalcma_pages);
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+ printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
+#endif
+}
+
+static inline void show_node(struct zone *zone)
+{
+ if (IS_ENABLED(CONFIG_NUMA))
+ printk("Node %d ", zone_to_nid(zone));
+}
+
+long si_mem_available(void)
+{
+ long available;
+ unsigned long pagecache;
+ unsigned long wmark_low = 0;
+ unsigned long pages[NR_LRU_LISTS];
+ unsigned long reclaimable;
+ struct zone *zone;
+ int lru;
+
+ for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
+ pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
+
+ for_each_zone(zone)
+ wmark_low += low_wmark_pages(zone);
+
+ /*
+ * Estimate the amount of memory available for userspace allocations,
+ * without causing swapping or OOM.
+ */
+ available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages;
+
+ /*
+ * Not all the page cache can be freed, otherwise the system will
+ * start swapping or thrashing. Assume at least half of the page
+ * cache, or the low watermark worth of cache, needs to stay.
+ */
+ pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
+ pagecache -= min(pagecache / 2, wmark_low);
+ available += pagecache;
+
+ /*
+ * Part of the reclaimable slab and other kernel memory consists of
+ * items that are in use, and cannot be freed. Cap this estimate at the
+ * low watermark.
+ */
+ reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) +
+ global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
+ available += reclaimable - min(reclaimable / 2, wmark_low);
+
+ if (available < 0)
+ available = 0;
+ return available;
+}
+EXPORT_SYMBOL_GPL(si_mem_available);
+
+void si_meminfo(struct sysinfo *val)
+{
+ val->totalram = totalram_pages();
+ val->sharedram = global_node_page_state(NR_SHMEM);
+ val->freeram = global_zone_page_state(NR_FREE_PAGES);
+ val->bufferram = nr_blockdev_pages();
+ val->totalhigh = totalhigh_pages();
+ val->freehigh = nr_free_highpages();
+ val->mem_unit = PAGE_SIZE;
+}
+
+EXPORT_SYMBOL(si_meminfo);
+
+#ifdef CONFIG_NUMA
+void si_meminfo_node(struct sysinfo *val, int nid)
+{
+ int zone_type; /* needs to be signed */
+ unsigned long managed_pages = 0;
+ unsigned long managed_highpages = 0;
+ unsigned long free_highpages = 0;
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+ managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]);
+ val->totalram = managed_pages;
+ val->sharedram = node_page_state(pgdat, NR_SHMEM);
+ val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES);
+#ifdef CONFIG_HIGHMEM
+ for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
+ struct zone *zone = &pgdat->node_zones[zone_type];
+
+ if (is_highmem(zone)) {
+ managed_highpages += zone_managed_pages(zone);
+ free_highpages += zone_page_state(zone, NR_FREE_PAGES);
+ }
+ }
+ val->totalhigh = managed_highpages;
+ val->freehigh = free_highpages;
+#else
+ val->totalhigh = managed_highpages;
+ val->freehigh = free_highpages;
+#endif
+ val->mem_unit = PAGE_SIZE;
+}
+#endif
+
+/*
+ * Determine whether the node should be displayed or not, depending on whether
+ * SHOW_MEM_FILTER_NODES was passed to show_free_areas().
+ */
+static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask)
+{
+ if (!(flags & SHOW_MEM_FILTER_NODES))
+ return false;
+
+ /*
+ * no node mask - aka implicit memory numa policy. Do not bother with
+ * the synchronization - read_mems_allowed_begin - because we do not
+ * have to be precise here.
+ */
+ if (!nodemask)
+ nodemask = &cpuset_current_mems_allowed;
+
+ return !node_isset(nid, *nodemask);
+}
+
+static void show_migration_types(unsigned char type)
+{
+ static const char types[MIGRATE_TYPES] = {
+ [MIGRATE_UNMOVABLE] = 'U',
+ [MIGRATE_MOVABLE] = 'M',
+ [MIGRATE_RECLAIMABLE] = 'E',
+ [MIGRATE_HIGHATOMIC] = 'H',
+#ifdef CONFIG_CMA
+ [MIGRATE_CMA] = 'C',
+#endif
+#ifdef CONFIG_MEMORY_ISOLATION
+ [MIGRATE_ISOLATE] = 'I',
+#endif
+ };
+ char tmp[MIGRATE_TYPES + 1];
+ char *p = tmp;
+ int i;
+
+ for (i = 0; i < MIGRATE_TYPES; i++) {
+ if (type & (1 << i))
+ *p++ = types[i];
+ }
+
+ *p = '\0';
+ printk(KERN_CONT "(%s) ", tmp);
+}
+
+static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx)
+{
+ int zone_idx;
+ for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++)
+ if (zone_managed_pages(pgdat->node_zones + zone_idx))
+ return true;
+ return false;
+}
+
+/*
+ * Show free area list (used inside shift_scroll-lock stuff)
+ * We also calculate the percentage fragmentation. We do this by counting the
+ * memory on each free list with the exception of the first item on the list.
+ *
+ * Bits in @filter:
+ * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
+ * cpuset.
+ */
+void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
+{
+ unsigned long free_pcp = 0;
+ int cpu, nid;
+ struct zone *zone;
+ pg_data_t *pgdat;
+
+ for_each_populated_zone(zone) {
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
+ continue;
+
+ for_each_online_cpu(cpu)
+ free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+ }
+
+ printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
+ " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
+ " unevictable:%lu dirty:%lu writeback:%lu\n"
+ " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
+ " mapped:%lu shmem:%lu pagetables:%lu\n"
+ " sec_pagetables:%lu bounce:%lu\n"
+ " kernel_misc_reclaimable:%lu\n"
+ " free:%lu free_pcp:%lu free_cma:%lu\n",
+ global_node_page_state(NR_ACTIVE_ANON),
+ global_node_page_state(NR_INACTIVE_ANON),
+ global_node_page_state(NR_ISOLATED_ANON),
+ global_node_page_state(NR_ACTIVE_FILE),
+ global_node_page_state(NR_INACTIVE_FILE),
+ global_node_page_state(NR_ISOLATED_FILE),
+ global_node_page_state(NR_UNEVICTABLE),
+ global_node_page_state(NR_FILE_DIRTY),
+ global_node_page_state(NR_WRITEBACK),
+ global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
+ global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
+ global_node_page_state(NR_FILE_MAPPED),
+ global_node_page_state(NR_SHMEM),
+ global_node_page_state(NR_PAGETABLE),
+ global_node_page_state(NR_SECONDARY_PAGETABLE),
+ global_zone_page_state(NR_BOUNCE),
+ global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
+ global_zone_page_state(NR_FREE_PAGES),
+ free_pcp,
+ global_zone_page_state(NR_FREE_CMA_PAGES));
+
+ for_each_online_pgdat(pgdat) {
+ if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
+ continue;
+ if (!node_has_managed_zones(pgdat, max_zone_idx))
+ continue;
+
+ printk("Node %d"
+ " active_anon:%lukB"
+ " inactive_anon:%lukB"
+ " active_file:%lukB"
+ " inactive_file:%lukB"
+ " unevictable:%lukB"
+ " isolated(anon):%lukB"
+ " isolated(file):%lukB"
+ " mapped:%lukB"
+ " dirty:%lukB"
+ " writeback:%lukB"
+ " shmem:%lukB"
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ " shmem_thp: %lukB"
+ " shmem_pmdmapped: %lukB"
+ " anon_thp: %lukB"
+#endif
+ " writeback_tmp:%lukB"
+ " kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
+ " pagetables:%lukB"
+ " sec_pagetables:%lukB"
+ " all_unreclaimable? %s"
+ "\n",
+ pgdat->node_id,
+ K(node_page_state(pgdat, NR_ACTIVE_ANON)),
+ K(node_page_state(pgdat, NR_INACTIVE_ANON)),
+ K(node_page_state(pgdat, NR_ACTIVE_FILE)),
+ K(node_page_state(pgdat, NR_INACTIVE_FILE)),
+ K(node_page_state(pgdat, NR_UNEVICTABLE)),
+ K(node_page_state(pgdat, NR_ISOLATED_ANON)),
+ K(node_page_state(pgdat, NR_ISOLATED_FILE)),
+ K(node_page_state(pgdat, NR_FILE_MAPPED)),
+ K(node_page_state(pgdat, NR_FILE_DIRTY)),
+ K(node_page_state(pgdat, NR_WRITEBACK)),
+ K(node_page_state(pgdat, NR_SHMEM)),
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ K(node_page_state(pgdat, NR_SHMEM_THPS)),
+ K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
+ K(node_page_state(pgdat, NR_ANON_THPS)),
+#endif
+ K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
+ node_page_state(pgdat, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ node_page_state(pgdat, NR_KERNEL_SCS_KB),
+#endif
+ K(node_page_state(pgdat, NR_PAGETABLE)),
+ K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
+ pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
+ "yes" : "no");
+ }
+
+ for_each_populated_zone(zone) {
+ int i;
+
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
+ continue;
+
+ free_pcp = 0;
+ for_each_online_cpu(cpu)
+ free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
+
+ show_node(zone);
+ printk(KERN_CONT
+ "%s"
+ " free:%lukB"
+ " boost:%lukB"
+ " min:%lukB"
+ " low:%lukB"
+ " high:%lukB"
+ " reserved_highatomic:%luKB"
+ " active_anon:%lukB"
+ " inactive_anon:%lukB"
+ " active_file:%lukB"
+ " inactive_file:%lukB"
+ " unevictable:%lukB"
+ " writepending:%lukB"
+ " present:%lukB"
+ " managed:%lukB"
+ " mlocked:%lukB"
+ " bounce:%lukB"
+ " free_pcp:%lukB"
+ " local_pcp:%ukB"
+ " free_cma:%lukB"
+ "\n",
+ zone->name,
+ K(zone_page_state(zone, NR_FREE_PAGES)),
+ K(zone->watermark_boost),
+ K(min_wmark_pages(zone)),
+ K(low_wmark_pages(zone)),
+ K(high_wmark_pages(zone)),
+ K(zone->nr_reserved_highatomic),
+ K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)),
+ K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)),
+ K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
+ K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
+ K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
+ K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
+ K(zone->present_pages),
+ K(zone_managed_pages(zone)),
+ K(zone_page_state(zone, NR_MLOCK)),
+ K(zone_page_state(zone, NR_BOUNCE)),
+ K(free_pcp),
+ K(this_cpu_read(zone->per_cpu_pageset->count)),
+ K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
+ printk("lowmem_reserve[]:");
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
+ printk(KERN_CONT "\n");
+ }
+
+ for_each_populated_zone(zone) {
+ unsigned int order;
+ unsigned long nr[MAX_ORDER + 1], flags, total = 0;
+ unsigned char types[MAX_ORDER + 1];
+
+ if (zone_idx(zone) > max_zone_idx)
+ continue;
+ if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
+ continue;
+ show_node(zone);
+ printk(KERN_CONT "%s: ", zone->name);
+
+ spin_lock_irqsave(&zone->lock, flags);
+ for (order = 0; order <= MAX_ORDER; order++) {
+ struct free_area *area = &zone->free_area[order];
+ int type;
+
+ nr[order] = area->nr_free;
+ total += nr[order] << order;
+
+ types[order] = 0;
+ for (type = 0; type < MIGRATE_TYPES; type++) {
+ if (!free_area_empty(area, type))
+ types[order] |= 1 << type;
+ }
+ }
+ spin_unlock_irqrestore(&zone->lock, flags);
+ for (order = 0; order <= MAX_ORDER; order++) {
+ printk(KERN_CONT "%lu*%lukB ",
+ nr[order], K(1UL) << order);
+ if (nr[order])
+ show_migration_types(types[order]);
+ }
+ printk(KERN_CONT "= %lukB\n", K(total));
+ }
+
+ for_each_online_node(nid) {
+ if (show_mem_node_skip(filter, nid, nodemask))
+ continue;
+ hugetlb_show_meminfo_node(nid);
+ }
+
+ printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES));
+
+ show_swap_cache_info();
+}
--
2.35.3

2023-05-08 07:02:58

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH 05/12] mm: page_alloc: squash page_is_consistent()

Squash the page_is_consistent() into bad_range() as there is
only one caller.

Signed-off-by: Kefeng Wang <[email protected]>
---
mm/page_alloc.c | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9a85238f1140..348dcbaca757 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -517,13 +517,6 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
return ret;
}

-static int page_is_consistent(struct zone *zone, struct page *page)
-{
- if (zone != page_zone(page))
- return 0;
-
- return 1;
-}
/*
* Temporary debugging check for pages not lying within a given zone.
*/
@@ -531,7 +524,7 @@ static int __maybe_unused bad_range(struct zone *zone, struct page *page)
{
if (page_outside_zone_boundaries(zone, page))
return 1;
- if (!page_is_consistent(zone, page))
+ if (zone != page_zone(page))
return 1;

return 0;
--
2.35.3

2023-05-08 07:03:14

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH 03/12] mm: page_alloc: move set_zone_contiguous() into mm_init.c

set_zone_contiguous() is only used in mm init/hotplug, and
clear_zone_contiguous() only used in hotplug, move them from
page_alloc.c to the more appropriate file.

Signed-off-by: Kefeng Wang <[email protected]>
---
include/linux/memory_hotplug.h | 3 --
mm/internal.h | 7 +++
mm/mm_init.c | 74 +++++++++++++++++++++++++++++++
mm/page_alloc.c | 79 ----------------------------------
4 files changed, 81 insertions(+), 82 deletions(-)

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 9fcbf5706595..04bc286eed42 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -326,9 +326,6 @@ static inline int remove_memory(u64 start, u64 size)
static inline void __remove_memory(u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */

-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
#ifdef CONFIG_MEMORY_HOTPLUG
extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat);
extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
diff --git a/mm/internal.h b/mm/internal.h
index e28442c0858a..9482862b28cc 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -371,6 +371,13 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
}

+void set_zone_contiguous(struct zone *zone);
+
+static inline void clear_zone_contiguous(struct zone *zone)
+{
+ zone->contiguous = false;
+}
+
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __putback_isolated_page(struct page *page, unsigned int order,
int mt);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 15201887f8e0..1f30b9e16577 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2330,6 +2330,80 @@ void __init init_cma_reserved_pageblock(struct page *page)
}
#endif

+/*
+ * Check that the whole (or subset of) a pageblock given by the interval of
+ * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
+ * with the migration of free compaction scanner.
+ *
+ * Return struct page pointer of start_pfn, or NULL if checks were not passed.
+ *
+ * It's possible on some configurations to have a setup like node0 node1 node0
+ * i.e. it's possible that all pages within a zones range of pages do not
+ * belong to a single zone. We assume that a border between node0 and node1
+ * can occur within a single pageblock, but not a node0 node1 node0
+ * interleaving within a single pageblock. It is therefore sufficient to check
+ * the first and last page of a pageblock and avoid checking each individual
+ * page in a pageblock.
+ *
+ * Note: the function may return non-NULL struct page even for a page block
+ * which contains a memory hole (i.e. there is no physical memory for a subset
+ * of the pfn range). For example, if the pageblock order is MAX_ORDER, which
+ * will fall into 2 sub-sections, and the end pfn of the pageblock may be hole
+ * even though the start pfn is online and valid. This should be safe most of
+ * the time because struct pages are still initialized via init_unavailable_range()
+ * and pfn walkers shouldn't touch any physical memory range for which they do
+ * not recognize any specific metadata in struct pages.
+ */
+struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
+ unsigned long end_pfn, struct zone *zone)
+{
+ struct page *start_page;
+ struct page *end_page;
+
+ /* end_pfn is one past the range we are checking */
+ end_pfn--;
+
+ if (!pfn_valid(end_pfn))
+ return NULL;
+
+ start_page = pfn_to_online_page(start_pfn);
+ if (!start_page)
+ return NULL;
+
+ if (page_zone(start_page) != zone)
+ return NULL;
+
+ end_page = pfn_to_page(end_pfn);
+
+ /* This gives a shorter code than deriving page_zone(end_page) */
+ if (page_zone_id(start_page) != page_zone_id(end_page))
+ return NULL;
+
+ return start_page;
+}
+
+void set_zone_contiguous(struct zone *zone)
+{
+ unsigned long block_start_pfn = zone->zone_start_pfn;
+ unsigned long block_end_pfn;
+
+ block_end_pfn = pageblock_end_pfn(block_start_pfn);
+ for (; block_start_pfn < zone_end_pfn(zone);
+ block_start_pfn = block_end_pfn,
+ block_end_pfn += pageblock_nr_pages) {
+
+ block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
+
+ if (!__pageblock_pfn_to_page(block_start_pfn,
+ block_end_pfn, zone))
+ return;
+ cond_resched();
+ }
+
+ /* We confirm that there is no hole */
+ zone->contiguous = true;
+}
+
void __init page_alloc_init_late(void)
{
struct zone *zone;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4f094ba7c8fb..fe7c1ee5becd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1480,85 +1480,6 @@ void __free_pages_core(struct page *page, unsigned int order)
__free_pages_ok(page, order, FPI_TO_TAIL);
}

-/*
- * Check that the whole (or subset of) a pageblock given by the interval of
- * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
- * with the migration of free compaction scanner.
- *
- * Return struct page pointer of start_pfn, or NULL if checks were not passed.
- *
- * It's possible on some configurations to have a setup like node0 node1 node0
- * i.e. it's possible that all pages within a zones range of pages do not
- * belong to a single zone. We assume that a border between node0 and node1
- * can occur within a single pageblock, but not a node0 node1 node0
- * interleaving within a single pageblock. It is therefore sufficient to check
- * the first and last page of a pageblock and avoid checking each individual
- * page in a pageblock.
- *
- * Note: the function may return non-NULL struct page even for a page block
- * which contains a memory hole (i.e. there is no physical memory for a subset
- * of the pfn range). For example, if the pageblock order is MAX_ORDER, which
- * will fall into 2 sub-sections, and the end pfn of the pageblock may be hole
- * even though the start pfn is online and valid. This should be safe most of
- * the time because struct pages are still initialized via init_unavailable_range()
- * and pfn walkers shouldn't touch any physical memory range for which they do
- * not recognize any specific metadata in struct pages.
- */
-struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
- unsigned long end_pfn, struct zone *zone)
-{
- struct page *start_page;
- struct page *end_page;
-
- /* end_pfn is one past the range we are checking */
- end_pfn--;
-
- if (!pfn_valid(end_pfn))
- return NULL;
-
- start_page = pfn_to_online_page(start_pfn);
- if (!start_page)
- return NULL;
-
- if (page_zone(start_page) != zone)
- return NULL;
-
- end_page = pfn_to_page(end_pfn);
-
- /* This gives a shorter code than deriving page_zone(end_page) */
- if (page_zone_id(start_page) != page_zone_id(end_page))
- return NULL;
-
- return start_page;
-}
-
-void set_zone_contiguous(struct zone *zone)
-{
- unsigned long block_start_pfn = zone->zone_start_pfn;
- unsigned long block_end_pfn;
-
- block_end_pfn = pageblock_end_pfn(block_start_pfn);
- for (; block_start_pfn < zone_end_pfn(zone);
- block_start_pfn = block_end_pfn,
- block_end_pfn += pageblock_nr_pages) {
-
- block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
-
- if (!__pageblock_pfn_to_page(block_start_pfn,
- block_end_pfn, zone))
- return;
- cond_resched();
- }
-
- /* We confirm that there is no hole */
- zone->contiguous = true;
-}
-
-void clear_zone_contiguous(struct zone *zone)
-{
- zone->contiguous = false;
-}
-
/*
* The order of subdivision here is critical for the IO subsystem.
* Please do not alter this order without good reasons and regression
--
2.35.3

2023-05-08 07:03:57

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH 12/12] mm: page_alloc: move sysctls into it own fils

This moves all page alloc related sysctls to its own file,
as part of the kernel/sysctl.c spring cleaning, also move
some functions declarations from mm.h into internal.h.

Signed-off-by: Kefeng Wang <[email protected]>
---
include/linux/mm.h | 11 -----
include/linux/mmzone.h | 21 ---------
kernel/sysctl.c | 67 ---------------------------
mm/internal.h | 9 ++++
mm/mm_init.c | 2 +
mm/page_alloc.c | 103 +++++++++++++++++++++++++++++++++++------
6 files changed, 100 insertions(+), 113 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fc8732a119cf..d533ef955dd0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3045,12 +3045,6 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn);
#endif

extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_range(unsigned long, int, unsigned long,
- unsigned long, unsigned long, enum meminit_context,
- struct vmem_altmap *, int migratetype);
-extern void setup_per_zone_wmarks(void);
-extern void calculate_min_free_kbytes(void);
-extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
extern void __init mmap_init(void);

@@ -3071,11 +3065,6 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);

extern void setup_per_cpu_pageset(void);

-/* page_alloc.c */
-extern int min_free_kbytes;
-extern int watermark_boost_factor;
-extern int watermark_scale_factor;
-
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a4889c9d4055..3a68326c9989 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1512,27 +1512,6 @@ static inline bool has_managed_dma(void)
}
#endif

-/* These two functions are used to setup the per zone pages min values */
-struct ctl_table;
-
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *,
- loff_t *);
-int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
- size_t *, loff_t *);
-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
- size_t *, loff_t *);
-int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-int numa_zonelist_order_handler(struct ctl_table *, int,
- void *, size_t *, loff_t *);
-extern int percpu_pagelist_high_fraction;
-extern char numa_zonelist_order[];
-#define NUMA_ZONELIST_ORDER_LEN 16

#ifndef CONFIG_NUMA

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bfe53e835524..a57de67f032f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2119,13 +2119,6 @@ static struct ctl_table vm_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
- {
- .procname = "lowmem_reserve_ratio",
- .data = &sysctl_lowmem_reserve_ratio,
- .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
- .mode = 0644,
- .proc_handler = lowmem_reserve_ratio_sysctl_handler,
- },
{
.procname = "drop_caches",
.data = &sysctl_drop_caches,
@@ -2135,39 +2128,6 @@ static struct ctl_table vm_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_FOUR,
},
- {
- .procname = "min_free_kbytes",
- .data = &min_free_kbytes,
- .maxlen = sizeof(min_free_kbytes),
- .mode = 0644,
- .proc_handler = min_free_kbytes_sysctl_handler,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "watermark_boost_factor",
- .data = &watermark_boost_factor,
- .maxlen = sizeof(watermark_boost_factor),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "watermark_scale_factor",
- .data = &watermark_scale_factor,
- .maxlen = sizeof(watermark_scale_factor),
- .mode = 0644,
- .proc_handler = watermark_scale_factor_sysctl_handler,
- .extra1 = SYSCTL_ONE,
- .extra2 = SYSCTL_THREE_THOUSAND,
- },
- {
- .procname = "percpu_pagelist_high_fraction",
- .data = &percpu_pagelist_high_fraction,
- .maxlen = sizeof(percpu_pagelist_high_fraction),
- .mode = 0644,
- .proc_handler = percpu_pagelist_high_fraction_sysctl_handler,
- .extra1 = SYSCTL_ZERO,
- },
{
.procname = "page_lock_unfairness",
.data = &sysctl_page_lock_unfairness,
@@ -2223,24 +2183,6 @@ static struct ctl_table vm_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
- {
- .procname = "min_unmapped_ratio",
- .data = &sysctl_min_unmapped_ratio,
- .maxlen = sizeof(sysctl_min_unmapped_ratio),
- .mode = 0644,
- .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE_HUNDRED,
- },
- {
- .procname = "min_slab_ratio",
- .data = &sysctl_min_slab_ratio,
- .maxlen = sizeof(sysctl_min_slab_ratio),
- .mode = 0644,
- .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE_HUNDRED,
- },
#endif
#ifdef CONFIG_SMP
{
@@ -2267,15 +2209,6 @@ static struct ctl_table vm_table[] = {
.proc_handler = mmap_min_addr_handler,
},
#endif
-#ifdef CONFIG_NUMA
- {
- .procname = "numa_zonelist_order",
- .data = &numa_zonelist_order,
- .maxlen = NUMA_ZONELIST_ORDER_LEN,
- .mode = 0644,
- .proc_handler = numa_zonelist_order_handler,
- },
-#endif
#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
(defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
{
diff --git a/mm/internal.h b/mm/internal.h
index 9482862b28cc..8d8b2faebc89 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -213,6 +213,15 @@ static inline bool is_check_pages_enabled(void)
return static_branch_unlikely(&check_pages_enabled);
}

+extern int min_free_kbytes;
+
+void page_alloc_sysctl_init(void);
+void setup_per_zone_wmarks(void);
+void calculate_min_free_kbytes(void);
+int __meminit init_per_zone_wmark_min(void);
+void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
+ unsigned long, enum meminit_context, struct vmem_altmap *, int);
+
/*
* Structure for holding the mostly immutable allocation parameters passed
* between functions involved in allocations, including the alloc_pages*
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 1f30b9e16577..afa56cd50ca4 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2444,6 +2444,8 @@ void __init page_alloc_init_late(void)
/* Initialize page ext after all struct pages are initialized. */
if (deferred_struct_pages)
page_ext_init();
+
+ page_alloc_sysctl_init();
}

#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index aa4e4af9fc88..880f08575d59 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -206,7 +206,6 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
};
EXPORT_SYMBOL(node_states);

-int percpu_pagelist_high_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;

/*
@@ -302,8 +301,8 @@ compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {

int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
-int watermark_boost_factor __read_mostly = 15000;
-int watermark_scale_factor = 10;
+static int watermark_boost_factor __read_mostly = 15000;
+static int watermark_scale_factor = 10;

/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
int movable_zone;
@@ -4828,12 +4827,12 @@ static int __parse_numa_zonelist_order(char *s)
return 0;
}

-char numa_zonelist_order[] = "Node";
-
+static char numa_zonelist_order[] = "Node";
+#define NUMA_ZONELIST_ORDER_LEN 16
/*
* sysctl handler for numa_zonelist_order
*/
-int numa_zonelist_order_handler(struct ctl_table *table, int write,
+static int numa_zonelist_order_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
if (write)
@@ -4841,7 +4840,6 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write,
return proc_dostring(table, write, buffer, length, ppos);
}

-
static int node_load[MAX_NUMNODES];

/**
@@ -5244,6 +5242,7 @@ static int zone_batchsize(struct zone *zone)
#endif
}

+static int percpu_pagelist_high_fraction;
static int zone_highsize(struct zone *zone, int batch, int cpu_online)
{
#ifdef CONFIG_MMU
@@ -5773,7 +5772,7 @@ postcore_initcall(init_per_zone_wmark_min)
* that we can call two helper functions whenever min_free_kbytes
* changes.
*/
-int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
+static int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -5789,7 +5788,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
return 0;
}

-int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
+static int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -5819,7 +5818,7 @@ static void setup_min_unmapped_ratio(void)
}


-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
+static int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -5846,7 +5845,7 @@ static void setup_min_slab_ratio(void)
sysctl_min_slab_ratio) / 100;
}

-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
+static int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
int rc;
@@ -5870,8 +5869,8 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
* minimum watermarks. The lowmem reserve ratio can only make sense
* if in function of the boot time zone sizes.
*/
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
- void *buffer, size_t *length, loff_t *ppos)
+static int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table,
+ int write, void *buffer, size_t *length, loff_t *ppos)
{
int i;

@@ -5891,7 +5890,7 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
* cpu. It is the fraction of total pages in each zone that a hot per cpu
* pagelist can have before it gets flushed back to buddy allocator.
*/
-int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
+static int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
int write, void *buffer, size_t *length, loff_t *ppos)
{
struct zone *zone;
@@ -5924,6 +5923,82 @@ int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
return ret;
}

+static struct ctl_table page_alloc_sysctl_table[] = {
+ {
+ .procname = "min_free_kbytes",
+ .data = &min_free_kbytes,
+ .maxlen = sizeof(min_free_kbytes),
+ .mode = 0644,
+ .proc_handler = min_free_kbytes_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "watermark_boost_factor",
+ .data = &watermark_boost_factor,
+ .maxlen = sizeof(watermark_boost_factor),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "watermark_scale_factor",
+ .data = &watermark_scale_factor,
+ .maxlen = sizeof(watermark_scale_factor),
+ .mode = 0644,
+ .proc_handler = watermark_scale_factor_sysctl_handler,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_THREE_THOUSAND,
+ },
+ {
+ .procname = "percpu_pagelist_high_fraction",
+ .data = &percpu_pagelist_high_fraction,
+ .maxlen = sizeof(percpu_pagelist_high_fraction),
+ .mode = 0644,
+ .proc_handler = percpu_pagelist_high_fraction_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "lowmem_reserve_ratio",
+ .data = &sysctl_lowmem_reserve_ratio,
+ .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
+ .mode = 0644,
+ .proc_handler = lowmem_reserve_ratio_sysctl_handler,
+ },
+#ifdef CONFIG_NUMA
+ {
+ .procname = "numa_zonelist_order",
+ .data = &numa_zonelist_order,
+ .maxlen = NUMA_ZONELIST_ORDER_LEN,
+ .mode = 0644,
+ .proc_handler = numa_zonelist_order_handler,
+ },
+ {
+ .procname = "min_unmapped_ratio",
+ .data = &sysctl_min_unmapped_ratio,
+ .maxlen = sizeof(sysctl_min_unmapped_ratio),
+ .mode = 0644,
+ .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+ {
+ .procname = "min_slab_ratio",
+ .data = &sysctl_min_slab_ratio,
+ .maxlen = sizeof(sysctl_min_slab_ratio),
+ .mode = 0644,
+ .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE_HUNDRED,
+ },
+#endif
+ {}
+};
+
+void __init page_alloc_sysctl_init(void)
+{
+ register_sysctl_init("vm", page_alloc_sysctl_table);
+}
+
#ifdef CONFIG_CONTIG_ALLOC
/* Usage: See admin-guide/dynamic-debug-howto.rst */
static void alloc_contig_dump_pages(struct list_head *page_list)
--
2.35.3

2023-05-08 07:57:00

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH 03/12] mm: page_alloc: move set_zone_contiguous() into mm_init.c



On 2023/5/8 15:12, Huang, Ying wrote:
> Kefeng Wang <[email protected]> writes:
>
>> set_zone_contiguous() is only used in mm init/hotplug, and
>> clear_zone_contiguous() only used in hotplug, move them from
>> page_alloc.c to the more appropriate file.
>>
>> Signed-off-by: Kefeng Wang <[email protected]>
>> ---
>> include/linux/memory_hotplug.h | 3 --
>> mm/internal.h | 7 +++
>> mm/mm_init.c | 74 +++++++++++++++++++++++++++++++
>> mm/page_alloc.c | 79 ----------------------------------
>> 4 files changed, 81 insertions(+), 82 deletions(-)
>>
...
>>
>> +/*
>> + * Check that the whole (or subset of) a pageblock given by the interval of
>> + * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
>> + * with the migration of free compaction scanner.
>> + *
>> + * Return struct page pointer of start_pfn, or NULL if checks were not passed.
>> + *
>> + * It's possible on some configurations to have a setup like node0 node1 node0
>> + * i.e. it's possible that all pages within a zones range of pages do not
>> + * belong to a single zone. We assume that a border between node0 and node1
>> + * can occur within a single pageblock, but not a node0 node1 node0
>> + * interleaving within a single pageblock. It is therefore sufficient to check
>> + * the first and last page of a pageblock and avoid checking each individual
>> + * page in a pageblock.
>> + *
>> + * Note: the function may return non-NULL struct page even for a page block
>> + * which contains a memory hole (i.e. there is no physical memory for a subset
>> + * of the pfn range). For example, if the pageblock order is MAX_ORDER, which
>> + * will fall into 2 sub-sections, and the end pfn of the pageblock may be hole
>> + * even though the start pfn is online and valid. This should be safe most of
>> + * the time because struct pages are still initialized via init_unavailable_range()
>> + * and pfn walkers shouldn't touch any physical memory range for which they do
>> + * not recognize any specific metadata in struct pages.
>> + */
>> +struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
>> + unsigned long end_pfn, struct zone *zone)
>
> __pageblock_pfn_to_page() is also called by compaction code too (e.g.,
> isolate_freepages_range() -> pageblock_pfn_to_page() ->
> __pageblock_pfn_to_page()).
>
> So, it is used not only by initialization and hotplug?
>

I should drop the move of this function, thanks for your reminder.

> Best Regards,
> Huang, Ying

2023-05-08 07:59:14

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 03/12] mm: page_alloc: move set_zone_contiguous() into mm_init.c

Kefeng Wang <[email protected]> writes:

> set_zone_contiguous() is only used in mm init/hotplug, and
> clear_zone_contiguous() only used in hotplug, move them from
> page_alloc.c to the more appropriate file.
>
> Signed-off-by: Kefeng Wang <[email protected]>
> ---
> include/linux/memory_hotplug.h | 3 --
> mm/internal.h | 7 +++
> mm/mm_init.c | 74 +++++++++++++++++++++++++++++++
> mm/page_alloc.c | 79 ----------------------------------
> 4 files changed, 81 insertions(+), 82 deletions(-)
>
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 9fcbf5706595..04bc286eed42 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -326,9 +326,6 @@ static inline int remove_memory(u64 start, u64 size)
> static inline void __remove_memory(u64 start, u64 size) {}
> #endif /* CONFIG_MEMORY_HOTREMOVE */
>
> -extern void set_zone_contiguous(struct zone *zone);
> -extern void clear_zone_contiguous(struct zone *zone);
> -
> #ifdef CONFIG_MEMORY_HOTPLUG
> extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat);
> extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
> diff --git a/mm/internal.h b/mm/internal.h
> index e28442c0858a..9482862b28cc 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -371,6 +371,13 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
> return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
> }
>
> +void set_zone_contiguous(struct zone *zone);
> +
> +static inline void clear_zone_contiguous(struct zone *zone)
> +{
> + zone->contiguous = false;
> +}
> +
> extern int __isolate_free_page(struct page *page, unsigned int order);
> extern void __putback_isolated_page(struct page *page, unsigned int order,
> int mt);
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 15201887f8e0..1f30b9e16577 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -2330,6 +2330,80 @@ void __init init_cma_reserved_pageblock(struct page *page)
> }
> #endif
>
> +/*
> + * Check that the whole (or subset of) a pageblock given by the interval of
> + * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
> + * with the migration of free compaction scanner.
> + *
> + * Return struct page pointer of start_pfn, or NULL if checks were not passed.
> + *
> + * It's possible on some configurations to have a setup like node0 node1 node0
> + * i.e. it's possible that all pages within a zones range of pages do not
> + * belong to a single zone. We assume that a border between node0 and node1
> + * can occur within a single pageblock, but not a node0 node1 node0
> + * interleaving within a single pageblock. It is therefore sufficient to check
> + * the first and last page of a pageblock and avoid checking each individual
> + * page in a pageblock.
> + *
> + * Note: the function may return non-NULL struct page even for a page block
> + * which contains a memory hole (i.e. there is no physical memory for a subset
> + * of the pfn range). For example, if the pageblock order is MAX_ORDER, which
> + * will fall into 2 sub-sections, and the end pfn of the pageblock may be hole
> + * even though the start pfn is online and valid. This should be safe most of
> + * the time because struct pages are still initialized via init_unavailable_range()
> + * and pfn walkers shouldn't touch any physical memory range for which they do
> + * not recognize any specific metadata in struct pages.
> + */
> +struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
> + unsigned long end_pfn, struct zone *zone)

__pageblock_pfn_to_page() is also called by compaction code too (e.g.,
isolate_freepages_range() -> pageblock_pfn_to_page() ->
__pageblock_pfn_to_page()).

So, it is used not only by initialization and hotplug?

Best Regards,
Huang, Ying

> +{
> + struct page *start_page;
> + struct page *end_page;
> +
> + /* end_pfn is one past the range we are checking */
> + end_pfn--;
> +
> + if (!pfn_valid(end_pfn))
> + return NULL;
> +
> + start_page = pfn_to_online_page(start_pfn);
> + if (!start_page)
> + return NULL;
> +
> + if (page_zone(start_page) != zone)
> + return NULL;
> +
> + end_page = pfn_to_page(end_pfn);
> +
> + /* This gives a shorter code than deriving page_zone(end_page) */
> + if (page_zone_id(start_page) != page_zone_id(end_page))
> + return NULL;
> +
> + return start_page;
> +}
> +
> +void set_zone_contiguous(struct zone *zone)
> +{
> + unsigned long block_start_pfn = zone->zone_start_pfn;
> + unsigned long block_end_pfn;
> +
> + block_end_pfn = pageblock_end_pfn(block_start_pfn);
> + for (; block_start_pfn < zone_end_pfn(zone);
> + block_start_pfn = block_end_pfn,
> + block_end_pfn += pageblock_nr_pages) {
> +
> + block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
> +
> + if (!__pageblock_pfn_to_page(block_start_pfn,
> + block_end_pfn, zone))
> + return;
> + cond_resched();
> + }
> +
> + /* We confirm that there is no hole */
> + zone->contiguous = true;
> +}
> +
> void __init page_alloc_init_late(void)
> {
> struct zone *zone;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 4f094ba7c8fb..fe7c1ee5becd 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1480,85 +1480,6 @@ void __free_pages_core(struct page *page, unsigned int order)
> __free_pages_ok(page, order, FPI_TO_TAIL);
> }
>
> -/*
> - * Check that the whole (or subset of) a pageblock given by the interval of
> - * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
> - * with the migration of free compaction scanner.
> - *
> - * Return struct page pointer of start_pfn, or NULL if checks were not passed.
> - *
> - * It's possible on some configurations to have a setup like node0 node1 node0
> - * i.e. it's possible that all pages within a zones range of pages do not
> - * belong to a single zone. We assume that a border between node0 and node1
> - * can occur within a single pageblock, but not a node0 node1 node0
> - * interleaving within a single pageblock. It is therefore sufficient to check
> - * the first and last page of a pageblock and avoid checking each individual
> - * page in a pageblock.
> - *
> - * Note: the function may return non-NULL struct page even for a page block
> - * which contains a memory hole (i.e. there is no physical memory for a subset
> - * of the pfn range). For example, if the pageblock order is MAX_ORDER, which
> - * will fall into 2 sub-sections, and the end pfn of the pageblock may be hole
> - * even though the start pfn is online and valid. This should be safe most of
> - * the time because struct pages are still initialized via init_unavailable_range()
> - * and pfn walkers shouldn't touch any physical memory range for which they do
> - * not recognize any specific metadata in struct pages.
> - */
> -struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
> - unsigned long end_pfn, struct zone *zone)
> -{
> - struct page *start_page;
> - struct page *end_page;
> -
> - /* end_pfn is one past the range we are checking */
> - end_pfn--;
> -
> - if (!pfn_valid(end_pfn))
> - return NULL;
> -
> - start_page = pfn_to_online_page(start_pfn);
> - if (!start_page)
> - return NULL;
> -
> - if (page_zone(start_page) != zone)
> - return NULL;
> -
> - end_page = pfn_to_page(end_pfn);
> -
> - /* This gives a shorter code than deriving page_zone(end_page) */
> - if (page_zone_id(start_page) != page_zone_id(end_page))
> - return NULL;
> -
> - return start_page;
> -}
> -
> -void set_zone_contiguous(struct zone *zone)
> -{
> - unsigned long block_start_pfn = zone->zone_start_pfn;
> - unsigned long block_end_pfn;
> -
> - block_end_pfn = pageblock_end_pfn(block_start_pfn);
> - for (; block_start_pfn < zone_end_pfn(zone);
> - block_start_pfn = block_end_pfn,
> - block_end_pfn += pageblock_nr_pages) {
> -
> - block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
> -
> - if (!__pageblock_pfn_to_page(block_start_pfn,
> - block_end_pfn, zone))
> - return;
> - cond_resched();
> - }
> -
> - /* We confirm that there is no hole */
> - zone->contiguous = true;
> -}
> -
> -void clear_zone_contiguous(struct zone *zone)
> -{
> - zone->contiguous = false;
> -}
> -
> /*
> * The order of subdivision here is critical for the IO subsystem.
> * Please do not alter this order without good reasons and regression

2023-05-09 16:59:03

by Mike Rapoport

[permalink] [raw]
Subject: Re: [PATCH 05/12] mm: page_alloc: squash page_is_consistent()

On Mon, May 08, 2023 at 03:11:53PM +0800, Kefeng Wang wrote:
> Squash the page_is_consistent() into bad_range() as there is
> only one caller.
>
> Signed-off-by: Kefeng Wang <[email protected]>

Reviewed-by: Mike Rapoport (IBM) <[email protected]>

> ---
> mm/page_alloc.c | 9 +--------
> 1 file changed, 1 insertion(+), 8 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 9a85238f1140..348dcbaca757 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -517,13 +517,6 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
> return ret;
> }
>
> -static int page_is_consistent(struct zone *zone, struct page *page)
> -{
> - if (zone != page_zone(page))
> - return 0;
> -
> - return 1;
> -}
> /*
> * Temporary debugging check for pages not lying within a given zone.
> */
> @@ -531,7 +524,7 @@ static int __maybe_unused bad_range(struct zone *zone, struct page *page)
> {
> if (page_outside_zone_boundaries(zone, page))
> return 1;
> - if (!page_is_consistent(zone, page))
> + if (zone != page_zone(page))
> return 1;
>
> return 0;
> --
> 2.35.3
>

2023-05-10 08:06:04

by Kefeng Wang

[permalink] [raw]
Subject: [PATCH v2 03/12] mm: page_alloc: move set_zone_contiguous() into mm_init.c

set_zone_contiguous() is only used in mm init/hotplug, and
clear_zone_contiguous() only used in hotplug, move them from
page_alloc.c to the more appropriate file.

Signed-off-by: Kefeng Wang <[email protected]>
---
v2: drop move of __pageblock_pfn_to_page(), suggested by Huang Ying

include/linux/memory_hotplug.h | 3 ---
mm/internal.h | 7 +++++++
mm/mm_init.c | 22 ++++++++++++++++++++++
mm/page_alloc.c | 27 ---------------------------
4 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 9fcbf5706595..04bc286eed42 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -326,9 +326,6 @@ static inline int remove_memory(u64 start, u64 size)
static inline void __remove_memory(u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */

-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
#ifdef CONFIG_MEMORY_HOTPLUG
extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat);
extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
diff --git a/mm/internal.h b/mm/internal.h
index e28442c0858a..9482862b28cc 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -371,6 +371,13 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
}

+void set_zone_contiguous(struct zone *zone);
+
+static inline void clear_zone_contiguous(struct zone *zone)
+{
+ zone->contiguous = false;
+}
+
extern int __isolate_free_page(struct page *page, unsigned int order);
extern void __putback_isolated_page(struct page *page, unsigned int order,
int mt);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 15201887f8e0..0fd4ddfdfb2e 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2330,6 +2330,28 @@ void __init init_cma_reserved_pageblock(struct page *page)
}
#endif

+void set_zone_contiguous(struct zone *zone)
+{
+ unsigned long block_start_pfn = zone->zone_start_pfn;
+ unsigned long block_end_pfn;
+
+ block_end_pfn = pageblock_end_pfn(block_start_pfn);
+ for (; block_start_pfn < zone_end_pfn(zone);
+ block_start_pfn = block_end_pfn,
+ block_end_pfn += pageblock_nr_pages) {
+
+ block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
+
+ if (!__pageblock_pfn_to_page(block_start_pfn,
+ block_end_pfn, zone))
+ return;
+ cond_resched();
+ }
+
+ /* We confirm that there is no hole */
+ zone->contiguous = true;
+}
+
void __init page_alloc_init_late(void)
{
struct zone *zone;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4f094ba7c8fb..7bb0d6abfe3d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1532,33 +1532,6 @@ struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
return start_page;
}

-void set_zone_contiguous(struct zone *zone)
-{
- unsigned long block_start_pfn = zone->zone_start_pfn;
- unsigned long block_end_pfn;
-
- block_end_pfn = pageblock_end_pfn(block_start_pfn);
- for (; block_start_pfn < zone_end_pfn(zone);
- block_start_pfn = block_end_pfn,
- block_end_pfn += pageblock_nr_pages) {
-
- block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
-
- if (!__pageblock_pfn_to_page(block_start_pfn,
- block_end_pfn, zone))
- return;
- cond_resched();
- }
-
- /* We confirm that there is no hole */
- zone->contiguous = true;
-}
-
-void clear_zone_contiguous(struct zone *zone)
-{
- zone->contiguous = false;
-}
-
/*
* The order of subdivision here is critical for the IO subsystem.
* Please do not alter this order without good reasons and regression
--
2.35.3


2023-05-11 00:28:33

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 04/12] mm: page_alloc: collect mem statistic into show_mem.c

Hi Kefeng,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]

url: https://github.com/intel-lab-lkp/linux/commits/Kefeng-Wang/mm-page_alloc-move-mirrored_kernelcore-into-mm_init-c/20230508-145724
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20230508071200.123962-5-wangkefeng.wang%40huawei.com
patch subject: [PATCH 04/12] mm: page_alloc: collect mem statistic into show_mem.c
config: loongarch-randconfig-s051-20230509 (https://download.01.org/0day-ci/archive/20230511/[email protected]/config)
compiler: loongarch64-linux-gcc (GCC) 12.1.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.4-39-gce1a6720-dirty
# https://github.com/intel-lab-lkp/linux/commit/be69df472e4d9a6b09a17b854d3aeb9722fc2675
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Kefeng-Wang/mm-page_alloc-move-mirrored_kernelcore-into-mm_init-c/20230508-145724
git checkout be69df472e4d9a6b09a17b854d3aeb9722fc2675
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=loongarch olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=loongarch SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
| Link: https://lore.kernel.org/oe-kbuild-all/[email protected]/

sparse warnings: (new ones prefixed by >>)
>> mm/show_mem.c:336:17: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *ptr @@ got int [noderef] __percpu * @@
mm/show_mem.c:336:17: sparse: expected void *ptr
mm/show_mem.c:336:17: sparse: got int [noderef] __percpu *
>> mm/show_mem.c:336:17: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *ptr @@ got int [noderef] __percpu * @@
mm/show_mem.c:336:17: sparse: expected void *ptr
mm/show_mem.c:336:17: sparse: got int [noderef] __percpu *
>> mm/show_mem.c:336:17: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *ptr @@ got int [noderef] __percpu * @@
mm/show_mem.c:336:17: sparse: expected void *ptr
mm/show_mem.c:336:17: sparse: got int [noderef] __percpu *
>> mm/show_mem.c:336:17: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *ptr @@ got int [noderef] __percpu * @@
mm/show_mem.c:336:17: sparse: expected void *ptr
mm/show_mem.c:336:17: sparse: got int [noderef] __percpu *

vim +336 mm/show_mem.c

207
208 /*
209 * Show free area list (used inside shift_scroll-lock stuff)
210 * We also calculate the percentage fragmentation. We do this by counting the
211 * memory on each free list with the exception of the first item on the list.
212 *
213 * Bits in @filter:
214 * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
215 * cpuset.
216 */
217 void __show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx)
218 {
219 unsigned long free_pcp = 0;
220 int cpu, nid;
221 struct zone *zone;
222 pg_data_t *pgdat;
223
224 for_each_populated_zone(zone) {
225 if (zone_idx(zone) > max_zone_idx)
226 continue;
227 if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
228 continue;
229
230 for_each_online_cpu(cpu)
231 free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
232 }
233
234 printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
235 " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
236 " unevictable:%lu dirty:%lu writeback:%lu\n"
237 " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
238 " mapped:%lu shmem:%lu pagetables:%lu\n"
239 " sec_pagetables:%lu bounce:%lu\n"
240 " kernel_misc_reclaimable:%lu\n"
241 " free:%lu free_pcp:%lu free_cma:%lu\n",
242 global_node_page_state(NR_ACTIVE_ANON),
243 global_node_page_state(NR_INACTIVE_ANON),
244 global_node_page_state(NR_ISOLATED_ANON),
245 global_node_page_state(NR_ACTIVE_FILE),
246 global_node_page_state(NR_INACTIVE_FILE),
247 global_node_page_state(NR_ISOLATED_FILE),
248 global_node_page_state(NR_UNEVICTABLE),
249 global_node_page_state(NR_FILE_DIRTY),
250 global_node_page_state(NR_WRITEBACK),
251 global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B),
252 global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B),
253 global_node_page_state(NR_FILE_MAPPED),
254 global_node_page_state(NR_SHMEM),
255 global_node_page_state(NR_PAGETABLE),
256 global_node_page_state(NR_SECONDARY_PAGETABLE),
257 global_zone_page_state(NR_BOUNCE),
258 global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
259 global_zone_page_state(NR_FREE_PAGES),
260 free_pcp,
261 global_zone_page_state(NR_FREE_CMA_PAGES));
262
263 for_each_online_pgdat(pgdat) {
264 if (show_mem_node_skip(filter, pgdat->node_id, nodemask))
265 continue;
266 if (!node_has_managed_zones(pgdat, max_zone_idx))
267 continue;
268
269 printk("Node %d"
270 " active_anon:%lukB"
271 " inactive_anon:%lukB"
272 " active_file:%lukB"
273 " inactive_file:%lukB"
274 " unevictable:%lukB"
275 " isolated(anon):%lukB"
276 " isolated(file):%lukB"
277 " mapped:%lukB"
278 " dirty:%lukB"
279 " writeback:%lukB"
280 " shmem:%lukB"
281 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
282 " shmem_thp: %lukB"
283 " shmem_pmdmapped: %lukB"
284 " anon_thp: %lukB"
285 #endif
286 " writeback_tmp:%lukB"
287 " kernel_stack:%lukB"
288 #ifdef CONFIG_SHADOW_CALL_STACK
289 " shadow_call_stack:%lukB"
290 #endif
291 " pagetables:%lukB"
292 " sec_pagetables:%lukB"
293 " all_unreclaimable? %s"
294 "\n",
295 pgdat->node_id,
296 K(node_page_state(pgdat, NR_ACTIVE_ANON)),
297 K(node_page_state(pgdat, NR_INACTIVE_ANON)),
298 K(node_page_state(pgdat, NR_ACTIVE_FILE)),
299 K(node_page_state(pgdat, NR_INACTIVE_FILE)),
300 K(node_page_state(pgdat, NR_UNEVICTABLE)),
301 K(node_page_state(pgdat, NR_ISOLATED_ANON)),
302 K(node_page_state(pgdat, NR_ISOLATED_FILE)),
303 K(node_page_state(pgdat, NR_FILE_MAPPED)),
304 K(node_page_state(pgdat, NR_FILE_DIRTY)),
305 K(node_page_state(pgdat, NR_WRITEBACK)),
306 K(node_page_state(pgdat, NR_SHMEM)),
307 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
308 K(node_page_state(pgdat, NR_SHMEM_THPS)),
309 K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
310 K(node_page_state(pgdat, NR_ANON_THPS)),
311 #endif
312 K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
313 node_page_state(pgdat, NR_KERNEL_STACK_KB),
314 #ifdef CONFIG_SHADOW_CALL_STACK
315 node_page_state(pgdat, NR_KERNEL_SCS_KB),
316 #endif
317 K(node_page_state(pgdat, NR_PAGETABLE)),
318 K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
319 pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
320 "yes" : "no");
321 }
322
323 for_each_populated_zone(zone) {
324 int i;
325
326 if (zone_idx(zone) > max_zone_idx)
327 continue;
328 if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask))
329 continue;
330
331 free_pcp = 0;
332 for_each_online_cpu(cpu)
333 free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count;
334
335 show_node(zone);
> 336 printk(KERN_CONT

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests

2023-05-16 05:59:04

by Kefeng Wang

[permalink] [raw]
Subject: Re: [PATCH 04/12] mm: page_alloc: collect mem statistic into show_mem.c



On 2023/5/11 8:04, kernel test robot wrote:
> Hi Kefeng,
>
> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on akpm-mm/mm-everything]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Kefeng-Wang/mm-page_alloc-move-mirrored_kernelcore-into-mm_init-c/20230508-145724
> base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> patch link: https://lore.kernel.org/r/20230508071200.123962-5-wangkefeng.wang%40huawei.com
> patch subject: [PATCH 04/12] mm: page_alloc: collect mem statistic into show_mem.c
> config: loongarch-randconfig-s051-20230509 (https://download.01.org/0day-ci/archive/20230511/[email protected]/config)
> compiler: loongarch64-linux-gcc (GCC) 12.1.0
> reproduce:
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # apt-get install sparse
> # sparse version: v0.6.4-39-gce1a6720-dirty
> # https://github.com/intel-lab-lkp/linux/commit/be69df472e4d9a6b09a17b854d3aeb9722fc2675
> git remote add linux-review https://github.com/intel-lab-lkp/linux
> git fetch --no-tags linux-review Kefeng-Wang/mm-page_alloc-move-mirrored_kernelcore-into-mm_init-c/20230508-145724
> git checkout be69df472e4d9a6b09a17b854d3aeb9722fc2675
> # save the config file
> mkdir build_dir && cp config build_dir/.config
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=loongarch olddefconfig
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=loongarch SHELL=/bin/bash
>
> If you fix the issue, kindly add following tag where applicable
> | Reported-by: kernel test robot <[email protected]>
> | Link: https://lore.kernel.org/oe-kbuild-all/[email protected]/
>
> sparse warnings: (new ones prefixed by >>)
>>> mm/show_mem.c:336:17: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *ptr @@ got int [noderef] __percpu * @@
> mm/show_mem.c:336:17: sparse: expected void *ptr
> mm/show_mem.c:336:17: sparse: got int [noderef] __percpu *
>>> mm/show_mem.c:336:17: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *ptr @@ got int [noderef] __percpu * @@
> mm/show_mem.c:336:17: sparse: expected void *ptr
> mm/show_mem.c:336:17: sparse: got int [noderef] __percpu *
>>> mm/show_mem.c:336:17: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *ptr @@ got int [noderef] __percpu * @@
> mm/show_mem.c:336:17: sparse: expected void *ptr
> mm/show_mem.c:336:17: sparse: got int [noderef] __percpu *
>>> mm/show_mem.c:336:17: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *ptr @@ got int [noderef] __percpu * @@
> mm/show_mem.c:336:17: sparse: expected void *ptr
> mm/show_mem.c:336:17: sparse: got int [noderef] __percpu *
>
> vim +336 mm/show_mem.c
>

Thanks, I won't make any change about __show_free_areas() function,
and it better not to fix it, at least not in this patch, the patch is
only to move some functions. The sparse warning is caused by
K(this_cpu_read(zone->per_cpu_pageset->count)), maybe change it to
__this_cpu_read()?




>