This series of patches fix a problem when adding memory in bad manner.
For example: for a x86_64 machine booted with "mem=400M" and with 2GiB
memory installed, following commands cause problem:
# echo 0x40000000 > /sys/devices/system/memory/probe
[ 28.613895] init_memory_mapping: [mem 0x40000000-0x47ffffff]
# echo 0x48000000 > /sys/devices/system/memory/probe
[ 28.693675] init_memory_mapping: [mem 0x48000000-0x4fffffff]
# echo online_movable > /sys/devices/system/memory/memory9/state
# echo 0x50000000 > /sys/devices/system/memory/probe
[ 29.084090] init_memory_mapping: [mem 0x50000000-0x57ffffff]
# echo 0x58000000 > /sys/devices/system/memory/probe
[ 29.151880] init_memory_mapping: [mem 0x58000000-0x5fffffff]
# echo online_movable > /sys/devices/system/memory/memory11/state
# echo online> /sys/devices/system/memory/memory8/state
# echo online> /sys/devices/system/memory/memory10/state
# echo offline> /sys/devices/system/memory/memory9/state
[ 30.558819] Offlined Pages 32768
# free
total used free shared buffers cached
Mem: 780588 18014398509432020 830552 0 0 51180
-/+ buffers/cache: 18014398509380840 881732
Swap: 0 0 0
This is because the above commands probe higher memory after online a
section with online_movable, which causes ZONE_HIGHMEM (or ZONE_NORMAL
for systems without ZONE_HIGHMEM) overlaps ZONE_MOVABLE.
After the second online_movable, the problem can be observed from
zoneinfo:
# cat /proc/zoneinfo
...
Node 0, zone Movable
pages free 65491
min 250
low 312
high 375
scanned 0
spanned 18446744073709518848
present 65536
managed 65536
...
This series of patches solve the problem by checking ZONE_MOVABLE when
choosing zone for new memory. If new memory is inside or higher than
ZONE_MOVABLE, makes it go there instead.
Wang Nan (5):
memory-hotplug: x86_64: suitable memory should go to ZONE_MOVABLE
memory-hotplug: x86_32: suitable memory should go to ZONE_MOVABLE
memory-hotplug: ia64: suitable memory should go to ZONE_MOVABLE
memory-hotplug: sh: suitable memory should go to ZONE_MOVABLE
memory-hotplug: powerpc: suitable memory should go to ZONE_MOVABLE
arch/ia64/mm/init.c | 7 +++++++
arch/powerpc/mm/mem.c | 6 ++++++
arch/sh/mm/init.c | 13 ++++++++-----
arch/x86/mm/init_32.c | 6 ++++++
arch/x86/mm/init_64.c | 10 ++++++++--
5 files changed, 35 insertions(+), 7 deletions(-)
--
1.8.4
This patch add new memory to ZONE_MOVABLE if movable zone is setup
and lower than newly added memory for sh.
Signed-off-by: Wang Nan <[email protected]>
---
arch/sh/mm/init.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 2d089fe..ff9decc 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -487,16 +487,19 @@ void free_initrd_mem(unsigned long start, unsigned long end)
#ifdef CONFIG_MEMORY_HOTPLUG
int arch_add_memory(int nid, u64 start, u64 size)
{
- pg_data_t *pgdat;
+ pg_data_t *pgdat = NODE_DATA(nid);
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+ struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
int ret;
- pgdat = NODE_DATA(nid);
+ if (!zone_is_empty(movable_zone))
+ if (zone_spans_pfn(movable_zone, start_pfn) ||
+ (zone_end_pfn(movable_zone) <= start_pfn))
+ zone = movable_zone;
- /* We only have ZONE_NORMAL, so this is easy.. */
- ret = __add_pages(nid, pgdat->node_zones + ZONE_NORMAL,
- start_pfn, nr_pages);
+ ret = __add_pages(nid, zone, start_pfn, nr_pages);
if (unlikely(ret))
printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
--
1.8.4
This patch add new memory to ZONE_MOVABLE if movable zone is setup
and lower than newly added memory for powerpc.
Signed-off-by: Wang Nan <[email protected]>
---
arch/powerpc/mm/mem.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2c8e90f..2d869ef 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -118,6 +118,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdata;
struct zone *zone;
+ struct zone *movable_zone;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -129,6 +130,11 @@ int arch_add_memory(int nid, u64 start, u64 size)
/* this should work for most non-highmem platforms */
zone = pgdata->node_zones;
+ movable_zone = pgdat->node_zones + ZONE_MOVABLE;
+ if (!zone_is_empty(movable_zone))
+ if (zone_spans_pfn(movable_zone, start_pfn) ||
+ (zone_end_pfn(movable_zone) <= start_pfn))
+ zone = movable_zone;
return __add_pages(nid, zone, start_pfn, nr_pages);
}
--
1.8.4
This patch add new memory to ZONE_MOVABLE if movable zone is setup
and lower than newly added memory for x86_64.
Signed-off-by: Wang Nan <[email protected]>
---
arch/x86/mm/init_64.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df1a992..825915e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -685,17 +685,23 @@ static void update_end_of_memory_vars(u64 start, u64 size)
}
/*
- * Memory is added always to NORMAL zone. This means you will never get
- * additional DMA/DMA32 memory.
+ * Memory is added always to NORMAL or MOVABLE zone. This means you
+ * will never get additional DMA/DMA32 memory.
*/
int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdat = NODE_DATA(nid);
struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+ struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
+ if (!zone_is_empty(movable_zone))
+ if (zone_spans_pfn(movable_zone, start_pfn) ||
+ (zone_end_pfn(movable_zone) <= start_pfn))
+ zone = movable_zone;
+
init_memory_mapping(start, start + size);
ret = __add_pages(nid, zone, start_pfn, nr_pages);
--
1.8.4
This patch add new memory to ZONE_MOVABLE if movable zone is setup
and lower than newly added memory for x86_32.
Signed-off-by: Wang Nan <[email protected]>
---
arch/x86/mm/init_32.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e395048..dd69833 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -826,9 +826,15 @@ int arch_add_memory(int nid, u64 start, u64 size)
{
struct pglist_data *pgdata = NODE_DATA(nid);
struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
+ struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ if (!zone_is_empty(movable_zone))
+ if (zone_spans_pfn(movable_zone, start_pfn) ||
+ (zone_end_pfn(movable_zone) <= start_pfn))
+ zone = movable_zone;
+
return __add_pages(nid, zone, start_pfn, nr_pages);
}
--
1.8.4
This patch add new memory to ZONE_MOVABLE if movable zone is setup
and lower than newly added memory for ia64.
Signed-off-by: Wang Nan <[email protected]>
---
arch/ia64/mm/init.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 25c3502..d81c916 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -625,6 +625,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
{
pg_data_t *pgdat;
struct zone *zone;
+ struct zone *movable_zone;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
@@ -632,6 +633,12 @@ int arch_add_memory(int nid, u64 start, u64 size)
pgdat = NODE_DATA(nid);
zone = pgdat->node_zones + ZONE_NORMAL;
+ movable_zone = pgdat->node_zones + ZONE_MOVABLE;
+ if (!zone_is_empty(movable_zone))
+ if (zone_spans_pfn(movable_zone, start_pfn) ||
+ (zone_end_pfn(movable_zone) <= start_pfn))
+ zone = movable_zone;
+
ret = __add_pages(nid, zone, start_pfn, nr_pages);
if (ret)
--
1.8.4
Hello,
On 07/18/2014 03:55 PM, Wang Nan wrote:
> This series of patches fix a problem when adding memory in bad manner.
> For example: for a x86_64 machine booted with "mem=400M" and with 2GiB
> memory installed, following commands cause problem:
>
> # echo 0x40000000 > /sys/devices/system/memory/probe
> [ 28.613895] init_memory_mapping: [mem 0x40000000-0x47ffffff]
> # echo 0x48000000 > /sys/devices/system/memory/probe
> [ 28.693675] init_memory_mapping: [mem 0x48000000-0x4fffffff]
> # echo online_movable > /sys/devices/system/memory/memory9/state
> # echo 0x50000000 > /sys/devices/system/memory/probe
> [ 29.084090] init_memory_mapping: [mem 0x50000000-0x57ffffff]
> # echo 0x58000000 > /sys/devices/system/memory/probe
> [ 29.151880] init_memory_mapping: [mem 0x58000000-0x5fffffff]
> # echo online_movable > /sys/devices/system/memory/memory11/state
> # echo online> /sys/devices/system/memory/memory8/state
> # echo online> /sys/devices/system/memory/memory10/state
> # echo offline> /sys/devices/system/memory/memory9/state
> [ 30.558819] Offlined Pages 32768
> # free
> total used free shared buffers cached
> Mem: 780588 18014398509432020 830552 0 0 51180
> -/+ buffers/cache: 18014398509380840 881732
> Swap: 0 0 0
>
> This is because the above commands probe higher memory after online a
> section with online_movable, which causes ZONE_HIGHMEM (or ZONE_NORMAL
> for systems without ZONE_HIGHMEM) overlaps ZONE_MOVABLE.
Yeah, this is rare in reality but can happen. Could you please also
include the free result and zoneinfo after applying your patch?
Thanks.
>
> After the second online_movable, the problem can be observed from
> zoneinfo:
>
> # cat /proc/zoneinfo
> ...
> Node 0, zone Movable
> pages free 65491
> min 250
> low 312
> high 375
> scanned 0
> spanned 18446744073709518848
> present 65536
> managed 65536
> ...
>
> This series of patches solve the problem by checking ZONE_MOVABLE when
> choosing zone for new memory. If new memory is inside or higher than
> ZONE_MOVABLE, makes it go there instead.
>
>
> Wang Nan (5):
> memory-hotplug: x86_64: suitable memory should go to ZONE_MOVABLE
> memory-hotplug: x86_32: suitable memory should go to ZONE_MOVABLE
> memory-hotplug: ia64: suitable memory should go to ZONE_MOVABLE
> memory-hotplug: sh: suitable memory should go to ZONE_MOVABLE
> memory-hotplug: powerpc: suitable memory should go to ZONE_MOVABLE
>
> arch/ia64/mm/init.c | 7 +++++++
> arch/powerpc/mm/mem.c | 6 ++++++
> arch/sh/mm/init.c | 13 ++++++++-----
> arch/x86/mm/init_32.c | 6 ++++++
> arch/x86/mm/init_64.c | 10 ++++++++--
> 5 files changed, 35 insertions(+), 7 deletions(-)
>
--
Thanks.
Zhang Yanfei
On 2014/7/18 15:56, Wang Nan wrote:
> This patch add new memory to ZONE_MOVABLE if movable zone is setup
> and lower than newly added memory for x86_32.
>
> Signed-off-by: Wang Nan <[email protected]>
> ---
> arch/x86/mm/init_32.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
> index e395048..dd69833 100644
> --- a/arch/x86/mm/init_32.c
> +++ b/arch/x86/mm/init_32.c
> @@ -826,9 +826,15 @@ int arch_add_memory(int nid, u64 start, u64 size)
> {
> struct pglist_data *pgdata = NODE_DATA(nid);
> struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
> + struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE;
Sorry. pgdat should be pgdata.
> unsigned long start_pfn = start >> PAGE_SHIFT;
> unsigned long nr_pages = size >> PAGE_SHIFT;
>
> + if (!zone_is_empty(movable_zone))
> + if (zone_spans_pfn(movable_zone, start_pfn) ||
> + (zone_end_pfn(movable_zone) <= start_pfn))
> + zone = movable_zone;
> +
> return __add_pages(nid, zone, start_pfn, nr_pages);
> }
>
>
On 2014/7/18 17:16, Zhang Yanfei wrote:
> Hello,
>
> On 07/18/2014 03:55 PM, Wang Nan wrote:
>> This series of patches fix a problem when adding memory in bad manner.
>> For example: for a x86_64 machine booted with "mem=400M" and with 2GiB
>> memory installed, following commands cause problem:
>>
>> # echo 0x40000000 > /sys/devices/system/memory/probe
>> [ 28.613895] init_memory_mapping: [mem 0x40000000-0x47ffffff]
>> # echo 0x48000000 > /sys/devices/system/memory/probe
>> [ 28.693675] init_memory_mapping: [mem 0x48000000-0x4fffffff]
>> # echo online_movable > /sys/devices/system/memory/memory9/state
>> # echo 0x50000000 > /sys/devices/system/memory/probe
>> [ 29.084090] init_memory_mapping: [mem 0x50000000-0x57ffffff]
>> # echo 0x58000000 > /sys/devices/system/memory/probe
>> [ 29.151880] init_memory_mapping: [mem 0x58000000-0x5fffffff]
>> # echo online_movable > /sys/devices/system/memory/memory11/state
>> # echo online> /sys/devices/system/memory/memory8/state
>> # echo online> /sys/devices/system/memory/memory10/state
>> # echo offline> /sys/devices/system/memory/memory9/state
>> [ 30.558819] Offlined Pages 32768
>> # free
>> total used free shared buffers cached
>> Mem: 780588 18014398509432020 830552 0 0 51180
>> -/+ buffers/cache: 18014398509380840 881732
>> Swap: 0 0 0
>>
>> This is because the above commands probe higher memory after online a
>> section with online_movable, which causes ZONE_HIGHMEM (or ZONE_NORMAL
>> for systems without ZONE_HIGHMEM) overlaps ZONE_MOVABLE.
>
> Yeah, this is rare in reality but can happen. Could you please also
> include the free result and zoneinfo after applying your patch?
>
> Thanks.
>
OK.
I paste the free result and zoneinfo at the end of this mail. This is a x86_64 result,
generated on qemu, after applying my patches.
I only tested x86_64 and x86_32 (and found a typo). Could any one can help me verify
other platforms?
Thanks.
>>
>> After the second online_movable, the problem can be observed from
>> zoneinfo:
>>
>> # cat /proc/zoneinfo
>> ...
>> Node 0, zone Movable
>> pages free 65491
>> min 250
>> low 312
>> high 375
>> scanned 0
>> spanned 18446744073709518848
>> present 65536
>> managed 65536
>> ...
>>
>> This series of patches solve the problem by checking ZONE_MOVABLE when
>> choosing zone for new memory. If new memory is inside or higher than
>> ZONE_MOVABLE, makes it go there instead.
>>
>>
>> Wang Nan (5):
>> memory-hotplug: x86_64: suitable memory should go to ZONE_MOVABLE
>> memory-hotplug: x86_32: suitable memory should go to ZONE_MOVABLE
>> memory-hotplug: ia64: suitable memory should go to ZONE_MOVABLE
>> memory-hotplug: sh: suitable memory should go to ZONE_MOVABLE
>> memory-hotplug: powerpc: suitable memory should go to ZONE_MOVABLE
>>
>> arch/ia64/mm/init.c | 7 +++++++
>> arch/powerpc/mm/mem.c | 6 ++++++
>> arch/sh/mm/init.c | 13 ++++++++-----
>> arch/x86/mm/init_32.c | 6 ++++++
>> arch/x86/mm/init_64.c | 10 ++++++++--
>> 5 files changed, 35 insertions(+), 7 deletions(-)
>>
>
>
After applying the above patches, here is the free result and zoneinfo (after offline memory9):
bash-4.2# free
total used free shared buffers cached
Mem: 780588 80860 699728 0 0 51180
-/+ buffers/cache: 29680 750908
Swap: 0 0 0
bash-4.2# cat /proc/zoneinfo
Node 0, zone DMA
pages free 3272
min 14
low 17
high 21
scanned 0
spanned 4095
present 3998
managed 3977
nr_free_pages 3272
...
start_pfn: 1
inactive_ratio: 1
Node 0, zone DMA32
pages free 73548
min 341
low 426
high 511
scanned 0
spanned 98304
present 98304
managed 92866
nr_free_pages 73548
...
start_pfn: 4096
inactive_ratio: 1
Node 0, zone Normal
pages free 32630
min 120
low 150
high 180
scanned 0
spanned 32768
present 32768
managed 32768
nr_free_pages 32630
...
start_pfn: 262144
inactive_ratio: 1
Node 0, zone Movable
pages free 65491
min 241
low 301
high 361
scanned 0
spanned 98304
present 65536
managed 65536
nr_free_pages 65491
...
start_pfn: 294912
inactive_ratio: 1
On 07/18/2014 12:55 AM, Wang Nan wrote:
> + if (!zone_is_empty(movable_zone))
> + if (zone_spans_pfn(movable_zone, start_pfn) ||
> + (zone_end_pfn(movable_zone) <= start_pfn))
> + zone = movable_zone;
> +
It's nice that you hit so many architectures, but is there a way to do
this that doesn't involve copying and pasting the same bit of code in to
each architecture?