2024-06-11 21:58:06

by Steven Rostedt

[permalink] [raw]
Subject: [PATCH v4 1/2] mm/memblock: Add "reserve_mem" to reserved named memory at boot up

From: "Steven Rostedt (Google)" <[email protected]>

In order to allow for requesting a memory region that can be used for
things like pstore on multiple machines where the memory layout is not the
same, add a new option to the kernel command line called "reserve_mem".

The format is: reserve_mem=nn:align:name

Where it will find nn amount of memory at the given alignment of align.
The name field is to allow another subsystem to retrieve where the memory
was found. For example:

reserve_mem=12M:4096:oops ramoops.mem_name=oops

Where ramoops.mem_name will tell ramoops that memory was reserved for it
via the reserve_mem option and it can find it by calling:

if (reserve_mem_find_by_name("oops", &start, &size)) {
// start holds the start address and size holds the size given

This is typically used for systems that do not wipe the RAM, and this
command line will try to reserve the same physical memory on soft reboots.
Note, it is not guaranteed to be the same location. For example, if KASLR
places the kernel at the location of where the RAM reservation was from a
previous boot, the new reservation will be at a different location. Any
subsystem using this feature must add a way to verify that the contents of
the physical memory is from a previous boot, as there may be cases where
the memory will not be located at the same location.

Not all systems may work either. There could be bit flips if the reboot
goes through the BIOS. Using kexec to reboot the machine is likely to
have better results in such cases.

Link: https://lore.kernel.org/all/[email protected]/

Suggested-by: Mike Rapoport <[email protected]>
Signed-off-by: Steven Rostedt (Google) <[email protected]>
---
Changes since v3: https://lore.kernel.org/linux-trace-kernel/[email protected]

- Changed table type of start and size from unsigned long to phys_addr_t
(as well as the parameters to the functions that use them)

- Changed old reference to "early_reserve_mem" to "reserve_mem"

- Check before reservering memory:
o Size is non-zero
o name has text in it

- If align is less than SMP_CACHE_BYTES, make it SMP_CACHE_BYTES

- Remove the silly check of testing *p == '\0' after a p += strlen(p)

.../admin-guide/kernel-parameters.txt | 20 +++
include/linux/mm.h | 2 +
mm/memblock.c | 115 ++++++++++++++++++
3 files changed, 137 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b600df82669d..ce7de8136f2f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5710,6 +5710,26 @@
them. If <base> is less than 0x10000, the region
is assumed to be I/O ports; otherwise it is memory.

+ reserve_mem= [RAM]
+ Format: nn[KNG]:<align>:<label>
+ Reserve physical memory and label it with a name that
+ other subsystems can use to access it. This is typically
+ used for systems that do not wipe the RAM, and this command
+ line will try to reserve the same physical memory on
+ soft reboots. Note, it is not guaranteed to be the same
+ location. For example, if KASLR places the kernel at the
+ location of where the RAM reservation was from a previous
+ boot, the new reservation will be at a different location.
+ Any subsystem using this feature must add a way to verify
+ that the contents of the physical memory is from a previous
+ boot, as there may be cases where the memory will not be
+ located at the same location.
+
+ The format is size:align:label for example, to request
+ 12 megabytes of 4096 alignment for ramoops:
+
+ reserve_mem=12M:4096:oops ramoops.mem_name=oops
+
reservetop= [X86-32,EARLY]
Format: nn[KMG]
Reserves a hole at the top of the kernel virtual
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9849dfda44d4..077fb589b88a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4263,4 +4263,6 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
void vma_pgtable_walk_begin(struct vm_area_struct *vma);
void vma_pgtable_walk_end(struct vm_area_struct *vma);

+int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
+
#endif /* _LINUX_MM_H */
diff --git a/mm/memblock.c b/mm/memblock.c
index d09136e040d3..044ddce8f085 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2244,6 +2244,121 @@ void __init memblock_free_all(void)
totalram_pages_add(pages);
}

+/* Keep a table to reserve named memory */
+#define RESERVE_MEM_MAX_ENTRIES 8
+#define RESERVE_MEM_NAME_SIZE 16
+struct reserve_mem_table {
+ char name[RESERVE_MEM_NAME_SIZE];
+ phys_addr_t start;
+ phys_addr_t size;
+};
+static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
+static int reserved_mem_count;
+
+/* Add wildcard region with a lookup name */
+static int __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
+ const char *name)
+{
+ struct reserve_mem_table *map;
+
+ if (!name || !name[0] || strlen(name) >= RESERVE_MEM_NAME_SIZE)
+ return -EINVAL;
+
+ if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES)
+ return -1;
+
+ map = &reserved_mem_table[reserved_mem_count++];
+ map->start = start;
+ map->size = size;
+ strscpy(map->name, name);
+ return 0;
+}
+
+/**
+ * reserve_mem_find_by_name - Find reserved memory region with a given name
+ * @name: The name that is attached to a reserved memory region
+ * @start: If found, holds the start address
+ * @size: If found, holds the size of the address.
+ *
+ * Returns: 1 if found or 0 if not found.
+ */
+int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size)
+{
+ struct reserve_mem_table *map;
+ int i;
+
+ for (i = 0; i < reserved_mem_count; i++) {
+ map = &reserved_mem_table[i];
+ if (!map->size)
+ continue;
+ if (strcmp(name, map->name) == 0) {
+ *start = map->start;
+ *size = map->size;
+ return 1;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(reserve_mem_find_by_name);
+
+/*
+ * Parse reserve_mem=nn:align:name
+ */
+static int __init reserve_mem(char *p)
+{
+ phys_addr_t start, size, align;
+ char *name;
+ char *oldp;
+ int err;
+
+ if (!p)
+ return -EINVAL;
+
+ oldp = p;
+ size = memparse(p, &p);
+ if (!size || p == oldp)
+ return -EINVAL;
+
+ if (*p != ':')
+ return -EINVAL;
+
+ align = memparse(p+1, &p);
+ if (*p != ':')
+ return -EINVAL;
+
+ /*
+ * memblock_phys_alloc() doesn't like a zero size align,
+ * but it is OK for this command to have it.
+ */
+ if (align <= SMP_CACHE_BYTES)
+ align = SMP_CACHE_BYTES;
+
+ name = p + 1;
+ if (!strlen(name))
+ return -EINVAL;
+
+ /* Make sure that name has text */
+ for (p = name; *p; p++) {
+ if (!isspace(*p))
+ break;
+ }
+ if (!*p)
+ return -EINVAL;
+
+ start = memblock_phys_alloc(size, align);
+ if (!start)
+ return -ENOMEM;
+
+ err = reserved_mem_add(start, size, name);
+ if (err) {
+ memblock_phys_free(start, size);
+ return err;
+ }
+
+ return 0;
+}
+__setup("reserve_mem=", reserve_mem);
+
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
static const char * const flagname[] = {
[ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",
--
2.43.0




2024-06-11 22:26:20

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] mm/memblock: Add "reserve_mem" to reserved named memory at boot up

On 6/11/24 14:56, Steven Rostedt wrote:
> From: "Steven Rostedt (Google)" <[email protected]>
>
> In order to allow for requesting a memory region that can be used for
> things like pstore on multiple machines where the memory layout is not the
> same, add a new option to the kernel command line called "reserve_mem".
>
> The format is: reserve_mem=nn:align:name
>
> Where it will find nn amount of memory at the given alignment of align.
> The name field is to allow another subsystem to retrieve where the memory
> was found. For example:
>
> reserve_mem=12M:4096:oops ramoops.mem_name=oops
>
> Where ramoops.mem_name will tell ramoops that memory was reserved for it
> via the reserve_mem option and it can find it by calling:
>
> if (reserve_mem_find_by_name("oops", &start, &size)) {
> // start holds the start address and size holds the size given
>
> This is typically used for systems that do not wipe the RAM, and this
> command line will try to reserve the same physical memory on soft reboots.
> Note, it is not guaranteed to be the same location. For example, if KASLR
> places the kernel at the location of where the RAM reservation was from a
> previous boot, the new reservation will be at a different location. Any
> subsystem using this feature must add a way to verify that the contents of
> the physical memory is from a previous boot, as there may be cases where
> the memory will not be located at the same location.
>
> Not all systems may work either. There could be bit flips if the reboot
> goes through the BIOS. Using kexec to reboot the machine is likely to
> have better results in such cases.
>
> Link: https://lore.kernel.org/all/[email protected]/
>
> Suggested-by: Mike Rapoport <[email protected]>
> Signed-off-by: Steven Rostedt (Google) <[email protected]>
> ---
> Changes since v3: https://lore.kernel.org/linux-trace-kernel/[email protected]
>
> - Changed table type of start and size from unsigned long to phys_addr_t
> (as well as the parameters to the functions that use them)
>
> - Changed old reference to "early_reserve_mem" to "reserve_mem"
>
> - Check before reservering memory:
> o Size is non-zero
> o name has text in it
>
> - If align is less than SMP_CACHE_BYTES, make it SMP_CACHE_BYTES
>
> - Remove the silly check of testing *p == '\0' after a p += strlen(p)
>
> .../admin-guide/kernel-parameters.txt | 20 +++
> include/linux/mm.h | 2 +
> mm/memblock.c | 115 ++++++++++++++++++
> 3 files changed, 137 insertions(+)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index b600df82669d..ce7de8136f2f 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -5710,6 +5710,26 @@
> them. If <base> is less than 0x10000, the region
> is assumed to be I/O ports; otherwise it is memory.
>
> + reserve_mem= [RAM]
> + Format: nn[KNG]:<align>:<label>
> + Reserve physical memory and label it with a name that
> + other subsystems can use to access it. This is typically
> + used for systems that do not wipe the RAM, and this command
> + line will try to reserve the same physical memory on
> + soft reboots. Note, it is not guaranteed to be the same
> + location. For example, if KASLR places the kernel at the
> + location of where the RAM reservation was from a previous
> + boot, the new reservation will be at a different location.
> + Any subsystem using this feature must add a way to verify
> + that the contents of the physical memory is from a previous
> + boot, as there may be cases where the memory will not be
> + located at the same location.
> +
> + The format is size:align:label for example, to request
> + 12 megabytes of 4096 alignment for ramoops:
> +
> + reserve_mem=12M:4096:oops ramoops.mem_name=oops
> +
> reservetop= [X86-32,EARLY]
> Format: nn[KMG]
> Reserves a hole at the top of the kernel virtual
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 9849dfda44d4..077fb589b88a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -4263,4 +4263,6 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
> void vma_pgtable_walk_begin(struct vm_area_struct *vma);
> void vma_pgtable_walk_end(struct vm_area_struct *vma);
>
> +int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
> +
> #endif /* _LINUX_MM_H */
> diff --git a/mm/memblock.c b/mm/memblock.c
> index d09136e040d3..044ddce8f085 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -2244,6 +2244,121 @@ void __init memblock_free_all(void)
> totalram_pages_add(pages);
> }
>
> +/* Keep a table to reserve named memory */
> +#define RESERVE_MEM_MAX_ENTRIES 8
> +#define RESERVE_MEM_NAME_SIZE 16
> +struct reserve_mem_table {
> + char name[RESERVE_MEM_NAME_SIZE];
> + phys_addr_t start;
> + phys_addr_t size;
> +};
> +static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
> +static int reserved_mem_count;
> +
> +/* Add wildcard region with a lookup name */
> +static int __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
> + const char *name)
> +{
> + struct reserve_mem_table *map;
> +
> + if (!name || !name[0] || strlen(name) >= RESERVE_MEM_NAME_SIZE)
> + return -EINVAL;
> +

I know I am picky, but name is never NULL, and strlen(name) is guaranteed to be > 0.
Personally I'd suggest to check for strlen(name) >= RESERVE_MEM_NAME_SIZE together
with !strlen(name) and drop the duplicate checks here (and, as side effect, avoid
the pointless memory allocation if the name is invalid).

> + if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES)
> + return -1;
> +
> + map = &reserved_mem_table[reserved_mem_count++];
> + map->start = start;
> + map->size = size;
> + strscpy(map->name, name);
> + return 0;
> +}
> +
> +/**
> + * reserve_mem_find_by_name - Find reserved memory region with a given name
> + * @name: The name that is attached to a reserved memory region
> + * @start: If found, holds the start address
> + * @size: If found, holds the size of the address.
> + *
> + * Returns: 1 if found or 0 if not found.
> + */
> +int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size)
> +{
> + struct reserve_mem_table *map;
> + int i;
> +
> + for (i = 0; i < reserved_mem_count; i++) {
> + map = &reserved_mem_table[i];
> + if (!map->size)
> + continue;
> + if (strcmp(name, map->name) == 0) {
> + *start = map->start;
> + *size = map->size;
> + return 1;
> + }
> + }
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(reserve_mem_find_by_name);
> +
> +/*
> + * Parse reserve_mem=nn:align:name
> + */
> +static int __init reserve_mem(char *p)
> +{
> + phys_addr_t start, size, align;
> + char *name;
> + char *oldp;
> + int err;
> +
> + if (!p)
> + return -EINVAL;
> +
> + oldp = p;
> + size = memparse(p, &p);
> + if (!size || p == oldp)
> + return -EINVAL;
> +
> + if (*p != ':')
> + return -EINVAL;
> +
> + align = memparse(p+1, &p);
> + if (*p != ':')
> + return -EINVAL;
> +
> + /*
> + * memblock_phys_alloc() doesn't like a zero size align,
> + * but it is OK for this command to have it.
> + */
> + if (align <= SMP_CACHE_BYTES)

Any reason for using <= instead of < ?

Guenter

> + align = SMP_CACHE_BYTES;
> +
> + name = p + 1;
> + if (!strlen(name))
> + return -EINVAL;
> +
> + /* Make sure that name has text */
> + for (p = name; *p; p++) {
> + if (!isspace(*p))
> + break;
> + }
> + if (!*p)
> + return -EINVAL;
> +
> + start = memblock_phys_alloc(size, align);
> + if (!start)
> + return -ENOMEM;
> +
> + err = reserved_mem_add(start, size, name);
> + if (err) {
> + memblock_phys_free(start, size);
> + return err;
> + }
> +
> + return 0;
> +}
> +__setup("reserve_mem=", reserve_mem);
> +
> #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
> static const char * const flagname[] = {
> [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",


2024-06-11 22:47:11

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] mm/memblock: Add "reserve_mem" to reserved named memory at boot up

On Tue, 11 Jun 2024 15:26:05 -0700
Guenter Roeck <[email protected]> wrote:

> > diff --git a/mm/memblock.c b/mm/memblock.c
> > index d09136e040d3..044ddce8f085 100644
> > --- a/mm/memblock.c
> > +++ b/mm/memblock.c
> > @@ -2244,6 +2244,121 @@ void __init memblock_free_all(void)
> > totalram_pages_add(pages);
> > }
> >
> > +/* Keep a table to reserve named memory */
> > +#define RESERVE_MEM_MAX_ENTRIES 8
> > +#define RESERVE_MEM_NAME_SIZE 16
> > +struct reserve_mem_table {
> > + char name[RESERVE_MEM_NAME_SIZE];
> > + phys_addr_t start;
> > + phys_addr_t size;
> > +};
> > +static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
> > +static int reserved_mem_count;
> > +
> > +/* Add wildcard region with a lookup name */
> > +static int __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
> > + const char *name)
> > +{
> > + struct reserve_mem_table *map;
> > +
> > + if (!name || !name[0] || strlen(name) >= RESERVE_MEM_NAME_SIZE)
> > + return -EINVAL;
> > +
>
> I know I am picky, but name is never NULL, and strlen(name) is guaranteed to be > 0.
> Personally I'd suggest to check for strlen(name) >= RESERVE_MEM_NAME_SIZE together
> with !strlen(name) and drop the duplicate checks here (and, as side effect, avoid
> the pointless memory allocation if the name is invalid).

Yeah, it's now checked before hand. I'll remove it for v5.

>
> > + if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES)
> > + return -1;
> > +
> > + map = &reserved_mem_table[reserved_mem_count++];
> > + map->start = start;
> > + map->size = size;
> > + strscpy(map->name, name);
> > + return 0;
> > +}
> > +


> > +/*
> > + * Parse reserve_mem=nn:align:name
> > + */
> > +static int __init reserve_mem(char *p)
> > +{
> > + phys_addr_t start, size, align;
> > + char *name;
> > + char *oldp;
> > + int err;
> > +
> > + if (!p)
> > + return -EINVAL;
> > +
> > + oldp = p;
> > + size = memparse(p, &p);
> > + if (!size || p == oldp)
> > + return -EINVAL;
> > +
> > + if (*p != ':')
> > + return -EINVAL;
> > +
> > + align = memparse(p+1, &p);
> > + if (*p != ':')
> > + return -EINVAL;
> > +
> > + /*
> > + * memblock_phys_alloc() doesn't like a zero size align,
> > + * but it is OK for this command to have it.
> > + */
> > + if (align <= SMP_CACHE_BYTES)
>
> Any reason for using <= instead of < ?

Nope. Not sure why I did that. :-/

I'll fix that too.

Thanks,

-- Steve

>
> Guenter
>
> > + align = SMP_CACHE_BYTES;
> > +
> > + name = p + 1;
> > + if (!strlen(name))
> > + return -EINVAL;
> > +
> > + /* Make sure that name has text */
> > + for (p = name; *p; p++) {
> > + if (!isspace(*p))
> > + break;
> > + }
> > + if (!*p)
> > + return -EINVAL;
> > +
> > + start = memblock_phys_alloc(size, align);
> > + if (!start)
> > + return -ENOMEM;
> > +
> > + err = reserved_mem_add(start, size, name);
> > + if (err) {
> > + memblock_phys_free(start, size);
> > + return err;
> > + }
> > +
> > + return 0;
> > +}
> > +__setup("reserve_mem=", reserve_mem);
> > +
> > #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
> > static const char * const flagname[] = {
> > [ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",