2012-05-22 06:59:24

by Wen Congyang

[permalink] [raw]
Subject: [PATCH 1/2] x86: add max_addr boot option

Currently, the boot option max_addr is only supported on ia64 platform.
We also need it on x86 platform.
For example:
There are two nodes:
NODE#0 address range 0x00000000 00000000 - 0x00010000 00000000
NODE#1 address range 0x00010000 00000000 - 0x00020000 00000000
If we only want to use node0, we can specify the max_addr. The boot
option "mem=" can do the same thing now. But the boot option "mem="
means the total memory used by the system. If we tell the user
that the boot option "mem=" can do this, it will confuse the user.
So we need an new boot option "max_addr" on x86 platform.

Signed-off-by: Wen Congyang <[email protected]>
---
Documentation/kernel-parameters.txt | 2 +-
arch/x86/kernel/e820.c | 36 +++++++++++++++++++++++++++++++++++
2 files changed, 37 insertions(+), 1 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c1601e5..fe80e58 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1409,7 +1409,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
yeeloong laptop.
Example: machtype=lemote-yeeloong-2f-7inch

- max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
+ max_addr=nn[KMG] [KNL,BOOT,ia64,X86] All physical memory greater
than or equal to this physical address is ignored.

maxcpus= [SMP] Maximum number of processors that an SMP kernel
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 62d61e9..2a6bec7 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -47,6 +47,7 @@ unsigned long pci_mem_start = 0xaeedbabe;
#ifdef CONFIG_PCI
EXPORT_SYMBOL(pci_mem_start);
#endif
+static u64 max_addr = ~0ULL;

/*
* This function checks if any part of the range <start,end> is mapped
@@ -117,6 +118,20 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
return;
}

+ if (start >= max_addr) {
+ printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx\n",
+ (unsigned long long)start,
+ (unsigned long long)(start + size));
+ return;
+ }
+
+ if (max_addr - start < size) {
+ printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx\n",
+ (unsigned long long)max_addr,
+ (unsigned long long)(start + size));
+ size = max_addr - start;
+ }
+
e820x->map[x].addr = start;
e820x->map[x].size = size;
e820x->map[x].type = type;
@@ -835,6 +850,22 @@ static int __init parse_memopt(char *p)
}
early_param("mem", parse_memopt);

+static int __init parse_memmax_opt(char *p)
+{
+ char *oldp;
+
+ if (!p)
+ return -EINVAL;
+
+ oldp = p;
+ max_addr = memparse(p, &p);
+ if (p == oldp)
+ return -EINVAL;
+
+ return 0;
+}
+early_param("max_addr", parse_memmax_opt);
+
static int __init parse_memmap_opt(char *p)
{
char *oldp;
@@ -881,6 +912,11 @@ early_param("memmap", parse_memmap_opt);

void __init finish_e820_parsing(void)
{
+ if (max_addr != ~0ULL) {
+ userdef = 1;
+ e820_remove_range(max_addr, ULLONG_MAX - max_addr, E820_RAM, 1);
+ }
+
if (userdef) {
u32 nr = e820.nr_map;

--
1.7.1


2012-05-22 07:01:38

by Wen Congyang

[permalink] [raw]
Subject: [PATCH 2/2] x86: reimplement mem boot option

The boot option "mem=" specifies the total memory that the system can
use. But we implement it as max_addr.

The x86 system can be booted by EFI. If the user specify the boot
option "add_efi_memmap", we add all memory map from EFI, but we
donot handle the memory map according to the boot option "mem=".

This patch reimplement the boot option "mem=", and handle the memory
map after calling efi_init().

Signed-off-by: Wen Congyang <[email protected]>
---
arch/x86/include/asm/e820.h | 1 +
arch/x86/kernel/e820.c | 36 +++++++++++++++++++++++++++++++-----
arch/x86/kernel/setup.c | 1 +
3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 3778256..d1bb772 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -127,6 +127,7 @@ extern void e820_reserve_resources(void);
extern void e820_reserve_resources_late(void);
extern void setup_memory_map(void);
extern char *default_machine_specific_memory_setup(void);
+extern void set_memlimit(void);

/*
* Returns true iff the specified range [s,e) is completely contained inside
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 2a6bec7..0148944 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -48,6 +48,7 @@ unsigned long pci_mem_start = 0xaeedbabe;
EXPORT_SYMBOL(pci_mem_start);
#endif
static u64 max_addr = ~0ULL;
+static u64 mem_limit = ~0ULL;

/*
* This function checks if any part of the range <start,end> is mapped
@@ -824,8 +825,6 @@ static int userdef __initdata;
/* "mem=nopentium" disables the 4MB page tables. */
static int __init parse_memopt(char *p)
{
- u64 mem_size;
-
if (!p)
return -EINVAL;

@@ -840,16 +839,43 @@ static int __init parse_memopt(char *p)
}

userdef = 1;
- mem_size = memparse(p, &p);
+ mem_limit = memparse(p, &p);
/* don't remove all of memory when handling "mem={invalid}" param */
- if (mem_size == 0)
+ if (mem_limit == 0)
return -EINVAL;
- e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);

return 0;
}
early_param("mem", parse_memopt);

+void __init set_memlimit(void)
+{
+ u64 total_size = 0;
+ int i;
+
+ if (mem_limit == ~0ULL)
+ return;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (ei->type != E820_RAM)
+ continue;
+
+ if (total_size >= mem_limit) {
+ memset(ei, 0, sizeof(struct e820entry));
+ continue;
+ }
+
+ if (mem_limit - total_size <= ei->size)
+ ei->size = mem_limit - total_size;
+
+ total_size += ei->size;
+ }
+
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
static int __init parse_memmax_opt(char *p)
{
char *oldp;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1a29015..7938fae 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -823,6 +823,7 @@ void __init setup_arch(char **cmdline_p)

if (efi_enabled)
efi_init();
+ set_memlimit();

dmi_scan_machine();

--
1.7.1

2012-05-22 19:51:44

by Rob Landley

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86: add max_addr boot option

On 05/22/2012 02:02 AM, Wen Congyang wrote:
> If we only want to use node0, we can specify the max_addr. The boot
> option "mem=" can do the same thing now. But the boot option "mem="
> means the total memory used by the system. If we tell the user
> that the boot option "mem=" can do this, it will confuse the user.
> So we need an new boot option "max_addr" on x86 platform.

I didn't follow that reasoning at all. Care to try again?

(mem= can do this, but telling users that would confuse them? What?)

Rob
--
GNU/Linux isn't: Linux=GPLv2, GNU=GPLv3+, they can't share code.
Either it's "mere aggregation", or a license violation. Pick one.

2012-05-23 01:19:46

by Wen Congyang

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86: add max_addr boot option

At 05/23/2012 03:51 AM, Rob Landley Wrote:
> On 05/22/2012 02:02 AM, Wen Congyang wrote:
>> If we only want to use node0, we can specify the max_addr. The boot
>> option "mem=" can do the same thing now. But the boot option "mem="
>> means the total memory used by the system. If we tell the user
>> that the boot option "mem=" can do this, it will confuse the user.
>> So we need an new boot option "max_addr" on x86 platform.
>
> I didn't follow that reasoning at all. Care to try again?
>
> (mem= can do this, but telling users that would confuse them? What?)

mem= means the total memory, but we implement it as max address.
I donot know why we implement it as max address. The users donot
know how we implement, and they only know that they can use
mem= to set the total memory. If you tell the users that mem=
can set max address, it will confuse them.

Thanks
Wen Congyang

>
> Rob

2012-05-23 04:08:36

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86: add max_addr boot option

(2012/05/23 4:51), Rob Landley wrote:

> On 05/22/2012 02:02 AM, Wen Congyang wrote:
>> If we only want to use node0, we can specify the max_addr. The boot
>> option "mem=" can do the same thing now. But the boot option "mem="
>> means the total memory used by the system. If we tell the user
>> that the boot option "mem=" can do this, it will confuse the user.
>> So we need an new boot option "max_addr" on x86 platform.
>
> I didn't follow that reasoning at all. Care to try again?
>
> (mem= can do this, but telling users that would confuse them? What?)
>


Kernel doc says

mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
Amount of memory to be used when the kernel is not able
to see the whole system memory or for test.
[X86-32] Use together with memmap= to avoid physical
address space collisions. Without memmap= PCI devices
could be placed at addresses belonging to unused RAM.

max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
than or equal to this physical address is ignored.

And, now, on x86+e820, mem= option works as max_addr= option.

This caused some troubles in our customer sometimes. In many server, address
range 3G-4g are reserved for PCI.

This is my host's dmesg.
==
BIOS-provided physical RAM map:
BIOS-e820: 0000000000000000 - 000000000009e800 (usable)
BIOS-e820: 000000000009e800 - 00000000000a0000 (reserved)
BIOS-e820: 00000000000ce000 - 00000000000d0000 (reserved)
BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)
BIOS-e820: 0000000000100000 - 00000000bfee0000 (usable)
BIOS-e820: 00000000bfee0000 - 00000000bfee7000 (ACPI data)
BIOS-e820: 00000000bfee7000 - 00000000bff00000 (ACPI NVS)
BIOS-e820: 00000000bff00000 - 00000000bff80000 (usable)
BIOS-e820: 00000000bff80000 - 00000000c0000000 (reserved)
BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved)
BIOS-e820: 00000000fe000000 - 00000000fed00000 (reserved)
BIOS-e820: 00000000fee00000 - 00000000fef00000 (reserved)
BIOS-e820: 00000000ffb00000 - 0000000100000000 (reserved)
BIOS-e820: 0000000100000000 - 0000000640000000 (usable)
==

So, if customer sets mem=10G, the system will boot with 9G memory.
I think this is a bug and mem= should see 'amount of memory'.

For users who want to hide memory in higher address, I think
max_addr= option is suitable.

Thanks,
-Kame










2012-05-23 14:25:52

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86: add max_addr boot option

On Tue, May 22, 2012 at 1:02 AM, Wen Congyang <[email protected]> wrote:

> + ? ? ? if (start >= max_addr) {
> + ? ? ? ? ? ? ? printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx\n",
> + ? ? ? ? ? ? ? ? ? ? ?(unsigned long long)start,
> + ? ? ? ? ? ? ? ? ? ? ?(unsigned long long)(start + size));
> + ? ? ? ? ? ? ? return;
> + ? ? ? }
> +
> + ? ? ? if (max_addr - start < size) {
> + ? ? ? ? ? ? ? printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx\n",
> + ? ? ? ? ? ? ? ? ? ? ?(unsigned long long)max_addr,
> + ? ? ? ? ? ? ? ? ? ? ?(unsigned long long)(start + size));
> + ? ? ? ? ? ? ? size = max_addr - start;

s/Ingoring/Ignoring/

Please use a format like "e820: ignoring [mem %#010Lx-%#010Lx]" so it
matches other places where we print physical address ranges. See
https://lkml.org/lkml/2012/2/13/436

Bjorn

2012-05-24 05:16:12

by Wen Congyang

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86: add max_addr boot option

At 05/23/2012 10:25 PM, Bjorn Helgaas Wrote:
> On Tue, May 22, 2012 at 1:02 AM, Wen Congyang <[email protected]> wrote:
>
>> + if (start >= max_addr) {
>> + printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx\n",
>> + (unsigned long long)start,
>> + (unsigned long long)(start + size));
>> + return;
>> + }
>> +
>> + if (max_addr - start < size) {
>> + printk(KERN_INFO "Ingoring memory: %016Lx - %016Lx\n",
>> + (unsigned long long)max_addr,
>> + (unsigned long long)(start + size));
>> + size = max_addr - start;
>
> s/Ingoring/Ignoring/
>
> Please use a format like "e820: ignoring [mem %#010Lx-%#010Lx]" so it
> matches other places where we print physical address ranges. See
> https://lkml.org/lkml/2012/2/13/436

Hmm, no one reviews/acks patch. So I am not sure whether we need to do this.
So I think there is no need to use such format now.

Thanks
Wen Congyang
>
> Bjorn
>