2023-06-19 06:07:32

by Baoquan He

[permalink] [raw]
Subject: [RFC PATCH 0/4] kdump: add generic functions to simplify crashkernel crashkernel in architecture

In the current arm64, crashkernel=,high support has been finished after
several rounds of posting and careful reviewing. The code in arm64 which
parses crashkernel kernel parameters firstly, then reserve memory can be
a good example for other ARCH to refer to.

Whereas in x86_64, the code mixing crashkernel parameter parsing and
memory reserving is twisted, and looks messy. Refactoring the code to
make it more readable maintainable is necessary.

Here, try to abstract the crashkernel parameter parsing code into a
generic function parse_crashkernel_generic(), and the crashkernel memory
reserving code into a generic function reserve_crashkernel_generic().
Then, in ARCH which crashkernel=,high support is needed, a simple
arch_reserve_crashkernel() can be added to call above two generic
functions. This can remove the duplicated implmentation code in each
ARCH, like arm64, x86_64.

I only change the arm64 and x86_64 implementation to make use of the
generic functions to simplify code. Risc-v can be done very easily refer
to the steps in arm64 and x86_64. I leave this to Jiahao or other risc-v
developer since Jiahao have posted a patchset to add crashkernel=,high
support to risc-v.

This patchset is based on the latest linus's tree, and on top of below
patch:

arm64: kdump: simplify the reservation behaviour of crashkernel=,high
https://git.kernel.org/arm64/c/6c4dcaddbd36


Baoquan He (4):
kdump: rename parse_crashkernel() to parse_crashkernel_common()
kdump: add generic functions to parse crashkernel and do reservation
arm64: kdump: use generic interfaces to simplify crashkernel
reservation code
x86: kdump: use generic interfaces to simplify crashkernel reservation
code

arch/arm/kernel/setup.c | 4 +-
arch/arm64/Kconfig | 3 +
arch/arm64/include/asm/kexec.h | 8 ++
arch/arm64/mm/init.c | 141 ++----------------------
arch/ia64/kernel/setup.c | 4 +-
arch/loongarch/kernel/setup.c | 3 +-
arch/mips/cavium-octeon/setup.c | 2 +-
arch/mips/kernel/setup.c | 4 +-
arch/powerpc/kernel/fadump.c | 5 +-
arch/powerpc/kexec/core.c | 4 +-
arch/powerpc/mm/nohash/kaslr_booke.c | 4 +-
arch/riscv/mm/init.c | 5 +-
arch/s390/kernel/setup.c | 4 +-
arch/sh/kernel/machine_kexec.c | 5 +-
arch/x86/Kconfig | 3 +
arch/x86/include/asm/kexec.h | 32 ++++++
arch/x86/kernel/setup.c | 141 +++---------------------
include/linux/crash_core.h | 33 +++++-
kernel/crash_core.c | 158 +++++++++++++++++++++++++--
19 files changed, 274 insertions(+), 289 deletions(-)

--
2.34.1



2023-06-19 06:10:54

by Baoquan He

[permalink] [raw]
Subject: [RFC PATCH 3/4] arm64: kdump: use generic interfaces to simplify crashkernel reservation code

With the help of generic functions parse_crashkernel_generic() and
reserve_crashkernel_generic(), crashkernel reservation can be
simplified by steps:

1) Provide CRASH_ALIGN, CRASH_ADDR_LOW_MAX, CRASH_ADDR_HIGH_MAX and
DEFAULT_CRASH_KERNEL_LOW_SIZE in <asm/kexec.h>;

2) Add arch_reserve_crashkernel() to call parse_crashkernel_generic()
and reserve_crashkernel_generic();

3) Add ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION Kconfig in
arch/arm64/Kconfig.

The old reserve_crashkernel_low() and reserve_crashkernel() can be
removed.

Signed-off-by: Baoquan He <[email protected]>
---
arch/arm64/Kconfig | 3 +
arch/arm64/include/asm/kexec.h | 8 ++
arch/arm64/mm/init.c | 141 ++-------------------------------
3 files changed, 19 insertions(+), 133 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 343e1e1cae10..0a846b30b91f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1453,6 +1453,9 @@ config KEXEC_FILE
for kernel and initramfs as opposed to list of segments as
accepted by previous system call.

+config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+ def_bool CRASH_CORE
+
config KEXEC_SIG
bool "Verify kernel signature during kexec_file_load() syscall"
depends on KEXEC_FILE
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index 9ac9572a3bbe..6f044ab6917d 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -132,4 +132,12 @@ extern int load_other_segments(struct kimage *image,

#endif /* __ASSEMBLY__ */

+/* Current arm64 boot protocol requires 2MB alignment */
+#define CRASH_ALIGN SZ_2M
+
+#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit
+#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1)
+
+#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
+
#endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 2fc4ea0b5b7a..3480a0476052 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -64,149 +64,24 @@ EXPORT_SYMBOL(memstart_addr);
*/
phys_addr_t __ro_after_init arm64_dma_phys_limit;

-/* Current arm64 boot protocol requires 2MB alignment */
-#define CRASH_ALIGN SZ_2M
-
-#define CRASH_ADDR_LOW_MAX arm64_dma_phys_limit
-#define CRASH_ADDR_HIGH_MAX (PHYS_MASK + 1)
-#define CRASH_HIGH_SEARCH_BASE SZ_4G
-
-#define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20)
-
-static int __init reserve_crashkernel_low(unsigned long long low_size)
-{
- unsigned long long low_base;
-
- low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
- if (!low_base) {
- pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
- return -ENOMEM;
- }
-
- pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
- low_base, low_base + low_size, low_size >> 20);
-
- crashk_low_res.start = low_base;
- crashk_low_res.end = low_base + low_size - 1;
- insert_resource(&iomem_resource, &crashk_low_res);
-
- return 0;
-}
-
-/*
- * reserve_crashkernel() - reserves memory for crash kernel
- *
- * This function reserves memory area given in "crashkernel=" kernel command
- * line parameter. The memory reserved is used by dump capture kernel when
- * primary kernel is crashing.
- */
-static void __init reserve_crashkernel(void)
+static void __init arch_reserve_crashkernel(void)
{
- unsigned long long crash_low_size = 0, search_base = 0;
- unsigned long long crash_max = CRASH_ADDR_LOW_MAX;
+ unsigned long long low_size = 0;
unsigned long long crash_base, crash_size;
char *cmdline = boot_command_line;
- bool fixed_base = false;
bool high = false;
int ret;

if (!IS_ENABLED(CONFIG_KEXEC_CORE))
return;

- /* crashkernel=X[@offset] */
- ret = parse_crashkernel_common(cmdline, memblock_phys_mem_size(),
- &crash_size, &crash_base);
- if (ret == -ENOENT) {
- ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
- if (ret || !crash_size)
- return;
-
- /*
- * crashkernel=Y,low can be specified or not, but invalid value
- * is not allowed.
- */
- ret = parse_crashkernel_low(cmdline, 0, &crash_low_size, &crash_base);
- if (ret == -ENOENT)
- crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
- else if (ret)
- return;
-
- search_base = CRASH_HIGH_SEARCH_BASE;
- crash_max = CRASH_ADDR_HIGH_MAX;
- high = true;
- } else if (ret || !crash_size) {
- /* The specified value is invalid */
+ ret = parse_crashkernel_generic(cmdline, &crash_size, &crash_base,
+ &low_size, &high);
+ if (ret)
return;
- }
-
- crash_size = PAGE_ALIGN(crash_size);
-
- /* User specifies base address explicitly. */
- if (crash_base) {
- fixed_base = true;
- search_base = crash_base;
- crash_max = crash_base + crash_size;
- }
-
-retry:
- crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
- search_base, crash_max);
- if (!crash_base) {
- /*
- * For crashkernel=size[KMG]@offset[KMG], print out failure
- * message if can't reserve the specified region.
- */
- if (fixed_base) {
- pr_warn("crashkernel reservation failed - memory is in use.\n");
- return;
- }
-
- /*
- * For crashkernel=size[KMG], if the first attempt was for
- * low memory, fall back to high memory, the minimum required
- * low memory will be reserved later.
- */
- if (!high && crash_max == CRASH_ADDR_LOW_MAX) {
- crash_max = CRASH_ADDR_HIGH_MAX;
- search_base = CRASH_ADDR_LOW_MAX;
- crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
- goto retry;
- }
-
- /*
- * For crashkernel=size[KMG],high, if the first attempt was
- * for high memory, fall back to low memory.
- */
- if (high && crash_max == CRASH_ADDR_HIGH_MAX) {
- crash_max = CRASH_ADDR_LOW_MAX;
- search_base = 0;
- goto retry;
- }
- pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
- crash_size);
- return;
- }
-
- if ((crash_base >= CRASH_ADDR_LOW_MAX) && crash_low_size &&
- reserve_crashkernel_low(crash_low_size)) {
- memblock_phys_free(crash_base, crash_size);
- return;
- }
-
- pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
- crash_base, crash_base + crash_size, crash_size >> 20);
-
- /*
- * The crashkernel memory will be removed from the kernel linear
- * map. Inform kmemleak so that it won't try to access it.
- */
- kmemleak_ignore_phys(crash_base);
- if (crashk_low_res.end)
- kmemleak_ignore_phys(crashk_low_res.start);

- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
- insert_resource(&iomem_resource, &crashk_res);
+ reserve_crashkernel_generic(cmdline, crash_size, crash_base,
+ low_size, high);
}

/*
@@ -454,7 +329,7 @@ void __init bootmem_init(void)
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
*/
- reserve_crashkernel();
+ arch_reserve_crashkernel();

memblock_dump_all();
}
--
2.34.1


2023-06-19 06:18:34

by Baoquan He

[permalink] [raw]
Subject: [RFC PATCH 1/4] kdump: rename parse_crashkernel() to parse_crashkernel_common()

Now there are parse_crashkernel_high() and parse_crashkernel_low()
parsing crashkernel=,high|low. Function parse_crashkernel() could be
misunderstood as a generic interface parsing all possible crashkernel
kernel parameters.

Hence, rename parse_crashkernel() to parse_crashkernel_common() to
differentiate it from parse_crashkernel_high() and
parse_crashkernel_low(), to remove possible ambiguity.

Signed-off-by: Baoquan He <[email protected]>
---
arch/arm/kernel/setup.c | 4 ++--
arch/arm64/mm/init.c | 4 ++--
arch/ia64/kernel/setup.c | 4 ++--
arch/loongarch/kernel/setup.c | 3 ++-
arch/mips/cavium-octeon/setup.c | 2 +-
arch/mips/kernel/setup.c | 4 ++--
arch/powerpc/kernel/fadump.c | 5 +++--
arch/powerpc/kexec/core.c | 4 ++--
arch/powerpc/mm/nohash/kaslr_booke.c | 4 ++--
arch/riscv/mm/init.c | 5 +++--
arch/s390/kernel/setup.c | 4 ++--
arch/sh/kernel/machine_kexec.c | 5 +++--
arch/x86/kernel/setup.c | 3 ++-
include/linux/crash_core.h | 2 +-
kernel/crash_core.c | 14 +++++---------
15 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 75cd4699e7b3..7f3dc9ff32f7 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1016,8 +1016,8 @@ static void __init reserve_crashkernel(void)
int ret;

total_mem = get_total_mem();
- ret = parse_crashkernel(boot_command_line, total_mem,
- &crash_size, &crash_base);
+ ret = parse_crashkernel_common(boot_command_line, total_mem,
+ &crash_size, &crash_base);
/* invalid value specified or crashkernel=0 */
if (ret || !crash_size)
return;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index c28c2c8483cc..2fc4ea0b5b7a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -114,8 +114,8 @@ static void __init reserve_crashkernel(void)
return;

/* crashkernel=X[@offset] */
- ret = parse_crashkernel(cmdline, memblock_phys_mem_size(),
- &crash_size, &crash_base);
+ ret = parse_crashkernel_common(cmdline, memblock_phys_mem_size(),
+ &crash_size, &crash_base);
if (ret == -ENOENT) {
ret = parse_crashkernel_high(cmdline, 0, &crash_size, &crash_base);
if (ret || !crash_size)
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c05728044272..91531fce0ba3 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -276,8 +276,8 @@ static void __init setup_crashkernel(unsigned long total, int *n)
unsigned long long base = 0, size = 0;
int ret;

- ret = parse_crashkernel(boot_command_line, total,
- &size, &base);
+ ret = parse_crashkernel_common(boot_command_line, total,
+ &size, &base);
if (ret == 0 && size > 0) {
if (!base) {
sort_regions(rsvd_region, *n);
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 4444b13418f0..17f9d3d58906 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -267,7 +267,8 @@ static void __init arch_parse_crashkernel(void)
unsigned long long crash_base, crash_size;

total_mem = memblock_phys_mem_size();
- ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
+ ret = parse_crashkernel_common(boot_command_line, total_mem,
+ &crash_size, &crash_base);
if (ret < 0 || crash_size <= 0)
return;

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index c5561016f577..bdcac8a4d85e 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -885,7 +885,7 @@ void __init prom_init(void)
strcat(arcs_cmdline, arg);
/*
* To do: switch parsing to new style, something like:
- * parse_crashkernel(arg, sysinfo->system_dram_size,
+ * parse_crashkernel_common(arg, sysinfo->system_dram_size,
* &crashk_size, &crashk_base);
*/
#endif
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index c0e65135481b..99f3ed8aeb35 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -457,8 +457,8 @@ static void __init mips_parse_crashkernel(void)
int ret;

total_mem = memblock_phys_mem_size();
- ret = parse_crashkernel(boot_command_line, total_mem,
- &crash_size, &crash_base);
+ ret = parse_crashkernel_common(boot_command_line, total_mem,
+ &crash_size, &crash_base);
if (ret != 0 || crash_size <= 0)
return;

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index ea0a073abd96..b4a038cbd1cf 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -312,8 +312,9 @@ static __init u64 fadump_calculate_reserve_size(void)
* option. If yes, then use that but ignore base as fadump reserves
* memory at a predefined offset.
*/
- ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
- &size, &base);
+ ret = parse_crashkernel_common(boot_command_line,
+ memblock_phys_mem_size(),
+ &size, &base);
if (ret == 0 && size > 0) {
unsigned long max_size;

diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index de64c7962991..853de54cc2db 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -108,8 +108,8 @@ void __init reserve_crashkernel(void)

total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size();
/* use common parsing */
- ret = parse_crashkernel(boot_command_line, total_mem_sz,
- &crash_size, &crash_base);
+ ret = parse_crashkernel_common(boot_command_line, total_mem_sz,
+ &crash_size, &crash_base);
if (ret == 0 && crash_size > 0) {
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index 2fb3edafe9ab..8c049659056f 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -177,8 +177,8 @@ static void __init get_crash_kernel(void *fdt, unsigned long size)
unsigned long long crash_size, crash_base;
int ret;

- ret = parse_crashkernel(boot_command_line, size, &crash_size,
- &crash_base);
+ ret = parse_crashkernel_common(boot_command_line, size,
+ &crash_size, &crash_base);
if (ret != 0 || crash_size == 0)
return;
if (crash_base == 0)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 4fa420faa780..6d4b27fc254b 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1326,8 +1326,9 @@ static void __init reserve_crashkernel(void)
return;
}

- ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
- &crash_size, &crash_base);
+ ret = parse_crashkernel_common(boot_command_line,
+ memblock_phys_mem_size(),
+ &crash_size, &crash_base);
if (ret || !crash_size)
return;

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index fe10da1a271e..9693f155936e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -625,8 +625,8 @@ static void __init reserve_crashkernel(void)
phys_addr_t low, high;
int rc;

- rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
- &crash_base);
+ rc = parse_crashkernel_common(boot_command_line, ident_map_size,
+ &crash_size, &crash_base);

crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 223c14f44af7..9775f6fe245c 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -153,8 +153,9 @@ void __init reserve_crashkernel(void)
unsigned long long crash_size, crash_base;
int ret;

- ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
- &crash_size, &crash_base);
+ ret = parse_crashkernel_common(boot_command_line,
+ memblock_phys_mem_size(),
+ &crash_size, &crash_base);
if (ret == 0 && crash_size > 0) {
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 16babff771bd..b2b67b8c1c1e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -558,7 +558,8 @@ static void __init reserve_crashkernel(void)
total_mem = memblock_phys_mem_size();

/* crashkernel=XM */
- ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
+ ret = parse_crashkernel_common(boot_command_line, total_mem,
+ &crash_size, &crash_base);
if (ret != 0 || crash_size <= 0) {
/* crashkernel=X,high */
ret = parse_crashkernel_high(boot_command_line, total_mem,
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index de62a722431e..6ec10ed5c6a6 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -77,7 +77,7 @@ Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len);
void final_note(Elf_Word *buf);

-int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
+int __init parse_crashkernel_common(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 90ce1dfd591c..57738ca0ee7f 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -273,17 +273,13 @@ static int __init __parse_crashkernel(char *cmdline,
return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
}

-/*
- * That function is the entry point for command line parsing and should be
- * called from the arch-specific code.
- */
-int __init parse_crashkernel(char *cmdline,
- unsigned long long system_ram,
- unsigned long long *crash_size,
- unsigned long long *crash_base)
+int __init parse_crashkernel_common(char *cmdline,
+ unsigned long long system_ram,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base)
{
return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
- "crashkernel=", NULL);
+ "crashkernel=", NULL);
}

int __init parse_crashkernel_high(char *cmdline,
--
2.34.1


2023-06-19 06:28:59

by Baoquan He

[permalink] [raw]
Subject: Re: [RFC PATCH 0/4] kdump: add generic functions to simplify crashkernel crashkernel in architecture

On 06/19/23 at 01:59pm, Baoquan He wrote:
> In the current arm64, crashkernel=,high support has been finished after
> several rounds of posting and careful reviewing. The code in arm64 which
> parses crashkernel kernel parameters firstly, then reserve memory can be
> a good example for other ARCH to refer to.
>
> Whereas in x86_64, the code mixing crashkernel parameter parsing and
> memory reserving is twisted, and looks messy. Refactoring the code to
> make it more readable maintainable is necessary.
^ 'and' missed
>
> Here, try to abstract the crashkernel parameter parsing code into a
> generic function parse_crashkernel_generic(), and the crashkernel memory
> reserving code into a generic function reserve_crashkernel_generic().
> Then, in ARCH which crashkernel=,high support is needed, a simple
> arch_reserve_crashkernel() can be added to call above two generic
> functions. This can remove the duplicated implmentation code in each
> ARCH, like arm64, x86_64.
>
> I only change the arm64 and x86_64 implementation to make use of the
> generic functions to simplify code. Risc-v can be done very easily refer
> to the steps in arm64 and x86_64. I leave this to Jiahao or other risc-v
> developer since Jiahao have posted a patchset to add crashkernel=,high
> support to risc-v.
>
> This patchset is based on the latest linus's tree, and on top of below
> patch:
>
> arm64: kdump: simplify the reservation behaviour of crashkernel=,high
> https://git.kernel.org/arm64/c/6c4dcaddbd36
>
>
> Baoquan He (4):
> kdump: rename parse_crashkernel() to parse_crashkernel_common()
> kdump: add generic functions to parse crashkernel and do reservation
> arm64: kdump: use generic interfaces to simplify crashkernel
> reservation code
> x86: kdump: use generic interfaces to simplify crashkernel reservation
> code
>
> arch/arm/kernel/setup.c | 4 +-
> arch/arm64/Kconfig | 3 +
> arch/arm64/include/asm/kexec.h | 8 ++
> arch/arm64/mm/init.c | 141 ++----------------------
> arch/ia64/kernel/setup.c | 4 +-
> arch/loongarch/kernel/setup.c | 3 +-
> arch/mips/cavium-octeon/setup.c | 2 +-
> arch/mips/kernel/setup.c | 4 +-
> arch/powerpc/kernel/fadump.c | 5 +-
> arch/powerpc/kexec/core.c | 4 +-
> arch/powerpc/mm/nohash/kaslr_booke.c | 4 +-
> arch/riscv/mm/init.c | 5 +-
> arch/s390/kernel/setup.c | 4 +-
> arch/sh/kernel/machine_kexec.c | 5 +-
> arch/x86/Kconfig | 3 +
> arch/x86/include/asm/kexec.h | 32 ++++++
> arch/x86/kernel/setup.c | 141 +++---------------------
> include/linux/crash_core.h | 33 +++++-
> kernel/crash_core.c | 158 +++++++++++++++++++++++++--
> 19 files changed, 274 insertions(+), 289 deletions(-)
>
> --
> 2.34.1
>


2023-06-19 06:33:28

by Baoquan He

[permalink] [raw]
Subject: [RFC PATCH 2/4] kdump: add generic functions to parse crashkernel and do reservation

In architecture like x86_64, arm64 and riscv, they have vast virtual
address space and usually have huge physical memory RAM. Their
crashkernel reservation doesn't have to be limited under 4G RAM,
but can be extended to the whole physical memory via crashkernel=,high
support.

Now add function parse_crashkernel_generic() to parse all possible
crashkernel parameters, including crashkernel=xM[@offset],
crashkernel=,high|low. And add function reserve_crashkernel_generic()
to reserve crashkernel memory if users specify any case of above
kernel pamameters.

This is preparation to simplify code of crashkernel=,high support
in architecutures.

Signed-off-by: Baoquan He <[email protected]>
---
include/linux/crash_core.h | 31 ++++++++
kernel/crash_core.c | 144 +++++++++++++++++++++++++++++++++++++
2 files changed, 175 insertions(+)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 6ec10ed5c6a6..1b12868cad1b 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -84,4 +84,35 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);

+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+int __init parse_crashkernel_generic(char *cmdline,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base,
+ unsigned long long *low_size,
+ bool *high);
+
+void __init reserve_crashkernel_generic(char *cmdline,
+ unsigned long long crash_size,
+ unsigned long long crash_base,
+ unsigned long long crash_low_size,
+ bool high);
+#else
+
+static inline int __init parse_crashkernel_generic(char *cmdline,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base,
+ unsigned long long *low_size,
+ bool *high)
+{
+ return 0;
+}
+
+static inline void __init reserve_crashkernel_generic(char *cmdline,
+ unsigned long long crash_size,
+ unsigned long long crash_base,
+ unsigned long long crash_low_size,
+ bool high)
+{}
+#endif
+
#endif /* LINUX_CRASH_CORE_H */
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 57738ca0ee7f..b82dc8c970de 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -10,6 +10,9 @@
#include <linux/utsname.h>
#include <linux/vmalloc.h>
#include <linux/sizes.h>
+#include <linux/memblock.h>
+#include <linux/kexec.h>
+#include <linux/kmemleak.h>

#include <asm/page.h>
#include <asm/sections.h>
@@ -310,6 +313,147 @@ static int __init parse_crashkernel_dummy(char *arg)
}
early_param("crashkernel", parse_crashkernel_dummy);

+
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+int __init parse_crashkernel_generic(char *cmdline,
+ unsigned long long *crash_size,
+ unsigned long long *crash_base,
+ unsigned long long *low_size,
+ bool *high)
+{
+ int ret;
+
+ /* crashkernel=X[@offset] */
+ ret = parse_crashkernel_common(cmdline, memblock_phys_mem_size(),
+ crash_size, crash_base);
+ if (ret == -ENOENT) {
+ ret = parse_crashkernel_high(cmdline, 0, crash_size, crash_base);
+ if (ret || !*crash_size)
+ return -1;
+
+ /*
+ * crashkernel=Y,low can be specified or not, but invalid value
+ * is not allowed.
+ */
+ ret = parse_crashkernel_low(cmdline, 0, low_size, crash_base);
+ if (ret == -ENOENT)
+ *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+ else if (ret)
+ return -1;
+
+ *high = true;
+ } else if (ret || !*crash_size) {
+ /* The specified value is invalid */
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __init reserve_crashkernel_low(unsigned long long low_size)
+{
+#ifdef CONFIG_64BIT
+ unsigned long long low_base;
+
+ low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
+ if (!low_base) {
+ pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size);
+ return -ENOMEM;
+ }
+
+ pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n",
+ low_base, low_base + low_size, low_size >> 20);
+
+ crashk_low_res.start = low_base;
+ crashk_low_res.end = low_base + low_size - 1;
+ insert_resource(&iomem_resource, &crashk_low_res);
+#endif
+ return 0;
+}
+
+void __init reserve_crashkernel_generic(char *cmdline,
+ unsigned long long crash_size,
+ unsigned long long crash_base,
+ unsigned long long crash_low_size,
+ bool high)
+{
+ unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0;
+ bool fixed_base = false;
+
+ /* User specifies base address explicitly. */
+ if (crash_base) {
+ fixed_base = true;
+ search_base = crash_base;
+ search_end = crash_base + crash_size;
+ }
+
+ if (high) {
+ search_base = CRASH_ADDR_LOW_MAX;
+ search_end = CRASH_ADDR_HIGH_MAX;
+ }
+
+retry:
+ crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
+ search_base, search_end);
+ if (!crash_base) {
+ /*
+ * For crashkernel=size[KMG]@offset[KMG], print out failure
+ * message if can't reserve the specified region.
+ */
+ if (fixed_base) {
+ pr_warn("crashkernel reservation failed - memory is in use.\n");
+ return;
+ }
+
+ /*
+ * For crashkernel=size[KMG], if the first attempt was for
+ * low memory, fall back to high memory, the minimum required
+ * low memory will be reserved later.
+ */
+ if (!high && search_end == CRASH_ADDR_LOW_MAX) {
+ search_end = CRASH_ADDR_HIGH_MAX;
+ search_base = CRASH_ADDR_LOW_MAX;
+ crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
+ goto retry;
+ }
+
+ /*
+ * For crashkernel=size[KMG],high, if the first attempt was
+ * for high memory, fall back to low memory.
+ */
+ if (high && search_end == CRASH_ADDR_HIGH_MAX) {
+ search_end = CRASH_ADDR_LOW_MAX;
+ search_base = 0;
+ goto retry;
+ }
+ pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+ crash_size);
+ return;
+ }
+
+ if ((crash_base > CRASH_ADDR_LOW_MAX) &&
+ crash_low_size && reserve_crashkernel_low(crash_low_size)) {
+ memblock_phys_free(crash_base, crash_size);
+ return;
+ }
+
+ pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
+ crash_base, crash_base + crash_size, crash_size >> 20);
+
+ /*
+ * The crashkernel memory will be removed from the kernel linear
+ * map. Inform kmemleak so that it won't try to access it.
+ */
+ kmemleak_ignore_phys(crash_base);
+ if (crashk_low_res.end)
+ kmemleak_ignore_phys(crashk_low_res.start);
+
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+ insert_resource(&iomem_resource, &crashk_res);
+}
+#endif
+
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
void *data, size_t data_len)
{
--
2.34.1


2023-06-19 06:39:25

by Baoquan He

[permalink] [raw]
Subject: [RFC PATCH 4/4] x86: kdump: use generic interfaces to simplify crashkernel reservation code

With the help of generic functions parse_crashkernel_generic() and
reserve_crashkernel_generic(), crashkernel reservation can be
simplified by steps:

1) Provide CRASH_ALIGN, CRASH_ADDR_LOW_MAX, CRASH_ADDR_HIGH_MAX and
DEFAULT_CRASH_KERNEL_LOW_SIZE in <asm/kexec.h>;

2) Add arch_reserve_crashkernel() to call parse_crashkernel_generic()
and reserve_crashkernel_generic(), and do the ARCH specific work if
needed.

3) Add ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION Kconfig in
arch/x86/Kconfig.

When adding DEFAULT_CRASH_KERNEL_LOW_SIZE, add crash_low_size_default()
to calculate crashkernel low memory because x86_64 has special
requirement.

The old reserve_crashkernel_low() and reserve_crashkernel() can be
removed.

Signed-off-by: Baoquan He <[email protected]>
---
arch/x86/Kconfig | 3 +
arch/x86/include/asm/kexec.h | 32 ++++++++
arch/x86/kernel/setup.c | 142 ++++-------------------------------
3 files changed, 50 insertions(+), 127 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 53bab123a8ee..fb91a5064c8d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2076,6 +2076,9 @@ config KEXEC_FILE
config ARCH_HAS_KEXEC_PURGATORY
def_bool KEXEC_FILE

+config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+ def_bool CRASH_CORE
+
config KEXEC_SIG
bool "Verify kernel signature during kexec_file_load() syscall"
depends on KEXEC_FILE
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5b77bbc28f96..84a7d1f6f153 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -66,6 +66,37 @@ struct kimage;
# define KEXEC_ARCH KEXEC_ARCH_X86_64
#endif

+/*
+ * --------- Crashkernel reservation ------------------------------
+ */
+
+/* 16M alignment for crash kernel regions */
+#define CRASH_ALIGN SZ_16M
+
+/*
+ * Keep the crash kernel below this limit.
+ *
+ * Earlier 32-bits kernels would limit the kernel to the low 512 MB range
+ * due to mapping restrictions.
+ *
+ * 64-bit kdump kernels need to be restricted to be under 64 TB, which is
+ * the upper limit of system RAM in 4-level paging mode. Since the kdump
+ * jump could be from 5-level paging to 4-level paging, the jump will fail if
+ * the kernel is put above 64 TB, and during the 1st kernel bootup there's
+ * no good way to detect the paging mode of the target kernel which will be
+ * loaded for dumping.
+ */
+
+#ifdef CONFIG_X86_32
+# define CRASH_ADDR_LOW_MAX SZ_512M
+# define CRASH_ADDR_HIGH_MAX SZ_512M
+#else
+# define CRASH_ADDR_LOW_MAX SZ_4G
+# define CRASH_ADDR_HIGH_MAX SZ_64T
+#endif
+
+# define DEFAULT_CRASH_KERNEL_LOW_SIZE crash_low_size_default()
+
/*
* This function is responsible for capturing register states if coming
* via panic otherwise just fix up the ss and sp if coming via kernel
@@ -209,6 +240,7 @@ typedef void crash_vmclear_fn(void);
extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
extern void kdump_nmi_shootdown_cpus(void);

+extern unsigned long crash_low_size_default(void);
#endif /* __ASSEMBLY__ */

#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b2b67b8c1c1e..11ae1f0c9487 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -475,151 +475,39 @@ static void __init memblock_x86_reserve_range_setup_data(void)
* --------- Crashkernel reservation ------------------------------
*/

-/* 16M alignment for crash kernel regions */
-#define CRASH_ALIGN SZ_16M
-
-/*
- * Keep the crash kernel below this limit.
- *
- * Earlier 32-bits kernels would limit the kernel to the low 512 MB range
- * due to mapping restrictions.
- *
- * 64-bit kdump kernels need to be restricted to be under 64 TB, which is
- * the upper limit of system RAM in 4-level paging mode. Since the kdump
- * jump could be from 5-level paging to 4-level paging, the jump will fail if
- * the kernel is put above 64 TB, and during the 1st kernel bootup there's
- * no good way to detect the paging mode of the target kernel which will be
- * loaded for dumping.
- */
-#ifdef CONFIG_X86_32
-# define CRASH_ADDR_LOW_MAX SZ_512M
-# define CRASH_ADDR_HIGH_MAX SZ_512M
-#else
-# define CRASH_ADDR_LOW_MAX SZ_4G
-# define CRASH_ADDR_HIGH_MAX SZ_64T
-#endif
-
-static int __init reserve_crashkernel_low(void)
+unsigned long crash_low_size_default(void)
{
#ifdef CONFIG_X86_64
- unsigned long long base, low_base = 0, low_size = 0;
- unsigned long low_mem_limit;
- int ret;
-
- low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX);
-
- /* crashkernel=Y,low */
- ret = parse_crashkernel_low(boot_command_line, low_mem_limit, &low_size, &base);
- if (ret) {
- /*
- * two parts from kernel/dma/swiotlb.c:
- * -swiotlb size: user-specified with swiotlb= or default.
- *
- * -swiotlb overflow buffer: now hardcoded to 32k. We round it
- * to 8M for other buffers that may need to stay low too. Also
- * make sure we allocate enough extra low memory so that we
- * don't run out of DMA buffers for 32-bit devices.
- */
- low_size = max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
- } else {
- /* passed with crashkernel=0,low ? */
- if (!low_size)
- return 0;
- }
-
- low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
- if (!low_base) {
- pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
- (unsigned long)(low_size >> 20));
- return -ENOMEM;
- }
-
- pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n",
- (unsigned long)(low_size >> 20),
- (unsigned long)(low_base >> 20),
- (unsigned long)(low_mem_limit >> 20));
-
- crashk_low_res.start = low_base;
- crashk_low_res.end = low_base + low_size - 1;
- insert_resource(&iomem_resource, &crashk_low_res);
-#endif
+ return max(swiotlb_size_or_default() + (8UL << 20), 256UL << 20);
+#else
return 0;
+#endif
}

-static void __init reserve_crashkernel(void)
+static void __init arch_reserve_crashkernel(void)
{
- unsigned long long crash_size, crash_base, total_mem;
+ unsigned long long crash_base, crash_size, low_size = 0;
+ char *cmdline = boot_command_line;
bool high = false;
int ret;

if (!IS_ENABLED(CONFIG_KEXEC_CORE))
return;

- total_mem = memblock_phys_mem_size();
-
- /* crashkernel=XM */
- ret = parse_crashkernel_common(boot_command_line, total_mem,
- &crash_size, &crash_base);
- if (ret != 0 || crash_size <= 0) {
- /* crashkernel=X,high */
- ret = parse_crashkernel_high(boot_command_line, total_mem,
- &crash_size, &crash_base);
- if (ret != 0 || crash_size <= 0)
- return;
- high = true;
- }
+ ret = parse_crashkernel_generic(cmdline, &crash_size, &crash_base,
+ &low_size, &high);
+ if (ret)
+ return;

if (xen_pv_domain()) {
pr_info("Ignoring crashkernel for a Xen PV domain\n");
return;
}

- /* 0 means: find the address automatically */
- if (!crash_base) {
- /*
- * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
- * crashkernel=x,high reserves memory over 4G, also allocates
- * 256M extra low memory for DMA buffers and swiotlb.
- * But the extra memory is not required for all machines.
- * So try low memory first and fall back to high memory
- * unless "crashkernel=size[KMG],high" is specified.
- */
- if (!high)
- crash_base = memblock_phys_alloc_range(crash_size,
- CRASH_ALIGN, CRASH_ALIGN,
- CRASH_ADDR_LOW_MAX);
- if (!crash_base)
- crash_base = memblock_phys_alloc_range(crash_size,
- CRASH_ALIGN, CRASH_ALIGN,
- CRASH_ADDR_HIGH_MAX);
- if (!crash_base) {
- pr_info("crashkernel reservation failed - No suitable area found.\n");
- return;
- }
- } else {
- unsigned long long start;
-
- start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base,
- crash_base + crash_size);
- if (start != crash_base) {
- pr_info("crashkernel reservation failed - memory is in use.\n");
- return;
- }
- }
-
- if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
- memblock_phys_free(crash_base, crash_size);
- return;
- }
-
- pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n",
- (unsigned long)(crash_size >> 20),
- (unsigned long)(crash_base >> 20),
- (unsigned long)(total_mem >> 20));
+ reserve_crashkernel_generic(cmdline, crash_size, crash_base,
+ low_size, high);

- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
- insert_resource(&iomem_resource, &crashk_res);
+ return;
}

static struct resource standard_io_resources[] = {
@@ -1229,7 +1117,7 @@ void __init setup_arch(char **cmdline_p)
* Reserve memory for crash kernel after SRAT is parsed so that it
* won't consume hotpluggable memory.
*/
- reserve_crashkernel();
+ arch_reserve_crashkernel();

memblock_find_dma_reserve();

--
2.34.1


2023-07-08 02:18:09

by Dave Young

[permalink] [raw]
Subject: Re: [RFC PATCH 0/4] kdump: add generic functions to simplify crashkernel crashkernel in architecture

On 06/19/23 at 01:59pm, Baoquan He wrote:
> In the current arm64, crashkernel=,high support has been finished after
> several rounds of posting and careful reviewing. The code in arm64 which
> parses crashkernel kernel parameters firstly, then reserve memory can be
> a good example for other ARCH to refer to.
>
> Whereas in x86_64, the code mixing crashkernel parameter parsing and
> memory reserving is twisted, and looks messy. Refactoring the code to
> make it more readable maintainable is necessary.
>
> Here, try to abstract the crashkernel parameter parsing code into a
> generic function parse_crashkernel_generic(), and the crashkernel memory
> reserving code into a generic function reserve_crashkernel_generic().
> Then, in ARCH which crashkernel=,high support is needed, a simple
> arch_reserve_crashkernel() can be added to call above two generic
> functions. This can remove the duplicated implmentation code in each
> ARCH, like arm64, x86_64.

Hi Baoquan, the parse_crashkernel_common and parse_crashkernel_generic
are confusion to me. Thanks for the effort though.

I'm not sure if it will be easy or not, but ideally I think the parse
function can be arch independent, something like a general funtion
parse_crashkernel() which can return the whole necessary infomation of
crashkenrel for arch code to use, for example return like
below pseudo stucture(just a concept, may need to think more):

structure crashkernel_range {
size,
range,
struct list_head list;
}

structure crashkernel{
structure crashkernel_range *range_list;
union {
offset,
low_high
}
}

So the arch code can just get the data of crashkernel and then check
about the details, if it does not support low and high reservation then
it can just ignore the option.

Thoughts?

>
> I only change the arm64 and x86_64 implementation to make use of the
> generic functions to simplify code. Risc-v can be done very easily refer
> to the steps in arm64 and x86_64. I leave this to Jiahao or other risc-v
> developer since Jiahao have posted a patchset to add crashkernel=,high
> support to risc-v.
>
> This patchset is based on the latest linus's tree, and on top of below
> patch:
>
> arm64: kdump: simplify the reservation behaviour of crashkernel=,high
> https://git.kernel.org/arm64/c/6c4dcaddbd36
>
>
> Baoquan He (4):
> kdump: rename parse_crashkernel() to parse_crashkernel_common()
> kdump: add generic functions to parse crashkernel and do reservation
> arm64: kdump: use generic interfaces to simplify crashkernel
> reservation code
> x86: kdump: use generic interfaces to simplify crashkernel reservation
> code
>
> arch/arm/kernel/setup.c | 4 +-
> arch/arm64/Kconfig | 3 +
> arch/arm64/include/asm/kexec.h | 8 ++
> arch/arm64/mm/init.c | 141 ++----------------------
> arch/ia64/kernel/setup.c | 4 +-
> arch/loongarch/kernel/setup.c | 3 +-
> arch/mips/cavium-octeon/setup.c | 2 +-
> arch/mips/kernel/setup.c | 4 +-
> arch/powerpc/kernel/fadump.c | 5 +-
> arch/powerpc/kexec/core.c | 4 +-
> arch/powerpc/mm/nohash/kaslr_booke.c | 4 +-
> arch/riscv/mm/init.c | 5 +-
> arch/s390/kernel/setup.c | 4 +-
> arch/sh/kernel/machine_kexec.c | 5 +-
> arch/x86/Kconfig | 3 +
> arch/x86/include/asm/kexec.h | 32 ++++++
> arch/x86/kernel/setup.c | 141 +++---------------------
> include/linux/crash_core.h | 33 +++++-
> kernel/crash_core.c | 158 +++++++++++++++++++++++++--
> 19 files changed, 274 insertions(+), 289 deletions(-)
>
> --
> 2.34.1
>
>

Thanks
Dave


2023-07-10 17:40:57

by Philipp Rudo

[permalink] [raw]
Subject: Re: [RFC PATCH 0/4] kdump: add generic functions to simplify crashkernel crashkernel in architecture

Hi Baoquan,
Hi Dave,

On Sat, 8 Jul 2023 10:15:53 +0800
Dave Young <[email protected]> wrote:

> On 06/19/23 at 01:59pm, Baoquan He wrote:
> > In the current arm64, crashkernel=,high support has been finished after
> > several rounds of posting and careful reviewing. The code in arm64 which
> > parses crashkernel kernel parameters firstly, then reserve memory can be
> > a good example for other ARCH to refer to.
> >
> > Whereas in x86_64, the code mixing crashkernel parameter parsing and
> > memory reserving is twisted, and looks messy. Refactoring the code to
> > make it more readable maintainable is necessary.
> >
> > Here, try to abstract the crashkernel parameter parsing code into a
> > generic function parse_crashkernel_generic(), and the crashkernel memory
> > reserving code into a generic function reserve_crashkernel_generic().
> > Then, in ARCH which crashkernel=,high support is needed, a simple
> > arch_reserve_crashkernel() can be added to call above two generic
> > functions. This can remove the duplicated implmentation code in each
> > ARCH, like arm64, x86_64.
>
> Hi Baoquan, the parse_crashkernel_common and parse_crashkernel_generic
> are confusion to me. Thanks for the effort though.

I agree, having both parse_crashkernel_common and
parse_crashkernel_generic is pretty confusing.

> I'm not sure if it will be easy or not, but ideally I think the parse
> function can be arch independent, something like a general funtion
> parse_crashkernel() which can return the whole necessary infomation of
> crashkenrel for arch code to use, for example return like
> below pseudo stucture(just a concept, may need to think more):
>
> structure crashkernel_range {
> size,
> range,
> struct list_head list;
> }
>
> structure crashkernel{
> structure crashkernel_range *range_list;
> union {
> offset,
> low_high
> }
> }
>
> So the arch code can just get the data of crashkernel and then check
> about the details, if it does not support low and high reservation then
> it can just ignore the option.
>
> Thoughts?

Sounds reasonable. The only thing I would make different is for the
parser to take the systems ram into account and simply return the size
that needs to be allocated in case multiple ranges are given.

I've played around a little on how this might look like (probably
wasting way too much time on it) and came up with the patch below. With
that patch parse_crashkernel_{common,high,low} and friends could be
removed once all architectures are ported to the new parse_crashkernel
function.

Please note that I never tested the patch. So it probably doesn't even
compile. Nevertheless I hope it helps to get an idea on what I think
should work :-)

Thanks
Philipp

----

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index fb904cc57ab1..fd5d01872c53 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -66,22 +66,12 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit;

static void __init arch_reserve_crashkernel(void)
{
- unsigned long long low_size = 0;
- unsigned long long crash_base, crash_size;
char *cmdline = boot_command_line;
- bool high = false;
- int ret;

if (!IS_ENABLED(CONFIG_KEXEC_CORE))
return;

- ret = parse_crashkernel_generic(cmdline, &crash_size, &crash_base,
- &low_size, &high);
- if (ret)
- return;
-
- reserve_crashkernel_generic(cmdline, crash_size, crash_base,
- low_size, high);
+ reserve_crashkernel_generic(cmdline);
}

/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b26221022283..4a78bf8ad494 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -486,28 +486,17 @@ unsigned long crash_low_size_default(void)

static void __init arch_reserve_crashkernel(void)
{
- unsigned long long crash_base, crash_size, low_size = 0;
char *cmdline = boot_command_line;
- bool high = false;
- int ret;

if (!IS_ENABLED(CONFIG_KEXEC_CORE))
return;

- ret = parse_crashkernel_generic(cmdline, &crash_size, &crash_base,
- &low_size, &high);
- if (ret)
- return;
-
if (xen_pv_domain()) {
pr_info("Ignoring crashkernel for a Xen PV domain\n");
return;
}

- reserve_crashkernel_generic(cmdline, crash_size, crash_base,
- low_size, high);
-
- return;
+ reserve_crashkernel_generic(cmdline);
}

static struct resource standard_io_resources[] = {
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 1b12868cad1b..a1ebd6c47467 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -84,35 +84,25 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);

-#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
-int __init parse_crashkernel_generic(char *cmdline,
- unsigned long long *crash_size,
- unsigned long long *crash_base,
- unsigned long long *low_size,
- bool *high);
-
-void __init reserve_crashkernel_generic(char *cmdline,
- unsigned long long crash_size,
- unsigned long long crash_base,
- unsigned long long crash_low_size,
- bool high);
-#else
-
-static inline int __init parse_crashkernel_generic(char *cmdline,
- unsigned long long *crash_size,
- unsigned long long *crash_base,
- unsigned long long *low_size,
- bool *high)
-{
- return 0;
+enum crashkernel_type {
+ CRASHKERNEL_NORMAL,
+ CRASHKERNEL_FIXED_OFFSET,
+ CRASHKERNEL_HIGH,
+ CRASHKERNEL_LOW
}

-static inline void __init reserve_crashkernel_generic(char *cmdline,
- unsigned long long crash_size,
- unsigned long long crash_base,
- unsigned long long crash_low_size,
- bool high)
-{}
+struct crashkernl {
+ enum crashkernel_type type;
+ unsigned long long size;
+ unsigned long long offet;
+};
+
+int __init parse_crashkernel(char *cmdline, struct crashkernel *ck);
+
+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+void __init reserve_crashkernel_generic(char *cmdline);
+#else
+void __init reserve_crashkernel_generic(char *cmdline) {}
#endif

#endif /* LINUX_CRASH_CORE_H */
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b82dc8c970de..c04a8675446b 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -203,8 +203,7 @@ static int __init parse_crashkernel_suffix(char *cmdline,
}

static __init char *get_last_crashkernel(char *cmdline,
- const char *name,
- const char *suffix)
+ const char *name)
{
char *p = cmdline, *ck_cmdline = NULL;

@@ -217,23 +216,6 @@ static __init char *get_last_crashkernel(char *cmdline,
if (!end_p)
end_p = p + strlen(p);

- if (!suffix) {
- int i;
-
- /* skip the one with any known suffix */
- for (i = 0; suffix_tbl[i]; i++) {
- q = end_p - strlen(suffix_tbl[i]);
- if (!strncmp(q, suffix_tbl[i],
- strlen(suffix_tbl[i])))
- goto next;
- }
- ck_cmdline = p;
- } else {
- q = end_p - strlen(suffix);
- if (!strncmp(q, suffix, strlen(suffix)))
- ck_cmdline = p;
- }
-next:
p = strstr(p+1, name);
}

@@ -314,42 +296,144 @@ static int __init parse_crashkernel_dummy(char *arg)
early_param("crashkernel", parse_crashkernel_dummy);


-#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
-int __init parse_crashkernel_generic(char *cmdline,
- unsigned long long *crash_size,
- unsigned long long *crash_base,
- unsigned long long *low_size,
- bool *high)
+/*
+ * This function parses command lines in the format
+ *
+ * crashkernel=[start1-end1:]size1[,...][@offset|,high|,low]
+ *
+ * The function returns 0 on success and -EINVAL on failure.
+ */
+static int __init _parse_crashkernel(char *cmdline, struct crashkernel *ck)
{
- int ret;
+ unsigned long long start, end, size, offset;
+ unsigned long long system_ram;
+ char *next, *cur = cmdline;

- /* crashkernel=X[@offset] */
- ret = parse_crashkernel_common(cmdline, memblock_phys_mem_size(),
- crash_size, crash_base);
- if (ret == -ENOENT) {
- ret = parse_crashkernel_high(cmdline, 0, crash_size, crash_base);
- if (ret || !*crash_size)
- return -1;
-
- /*
- * crashkernel=Y,low can be specified or not, but invalid value
- * is not allowed.
- */
- ret = parse_crashkernel_low(cmdline, 0, low_size, crash_base);
- if (ret == -ENOENT)
- *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
- else if (ret)
- return -1;
-
- *high = true;
- } else if (ret || !*crash_size) {
- /* The specified value is invalid */
- return -1;
+ /*
+ * Firmware sometimes reserves some memory regions for its own use,
+ * so the system memory size is less than the actual physical memory
+ * size. Work around this by rounding up the total size to 128M,
+ * which is enough for most test cases.
+ */
+ system_ram = memblock_phys_mem_size()
+ system_ram = roundup(system_mem, SZ_128M);
+
+ start = 0;
+ end = ULLONG_MAX;
+ size = memparse(cur, &next);
+ if (cur == next) {
+ pr_warn("crashkernel: Memory value expected\n");
+ return -EINVAL;
+ }
+ cur = next;
+ ck->type=CRASHKERNEL_NORMAL;
+
+ /* case crashkerne=size[@offset|,high|,low] */
+ if (!strchr(cmdline, '-')) {
+ ck->size = size;
+ }
+
+ while (*cur != ' ' && *cur != '\0') {
+ switch (*cur) {
+ case '@':
+ offset = memparse(++cur, &next);
+ if (*next != ' ' && *next != '\0') {
+ pr_warn("crashkernel: Offset must be at the end\n");
+ return -EINVAL;
+ }
+ /* allow corner cases with @0 */
+ ck->type=CRASHKERNEL_FIXED_OFFSET;
+ ck->offset = offset;
+ break;
+
+ case '-':
+ start = size;
+ end = memparse(++cur, &next);
+ /* allow <start>-:<size> */
+ if (cur == next) {
+ end = system_ram;
+ next++;
+ }
+
+ if (*next != ':') {
+ pr_warn("crashkernel: ':' expected\n");
+ return -EINVAL;
+ }
+
+ cur = next + 1;
+ size = memparse(cur, &next);
+ if (cur == next) {
+ pr_warn("crashkernel: No size provided\n");
+ return -EINVAL;
+ }
+
+ if (end <= start) {
+ pr_warn("crashkernel: end <= start\n");
+ return -EINVAL;
+ }
+
+ if (start <= system_ram && end > system_ram)
+ ck->size = size;
+
+
+ cur = next + 1;
+ break;
+
+ case ',':
+ cur++;
+
+ /* check for ,high, ,low */
+ if (strncmp(cur, "high", strlen("high"))) {
+ ck->type=CRASHKERNEL_HIGH;
+ cur+=strlen("high");
+ if (*cur != ' ' || *cur != '\0') {
+ pr_warn("crashkernel: ,high must be at the end\n");
+ return -EINVAL;
+ }
+ break;
+ } else if (strncmp(cur, "low". strlen("low"))) {
+ ck->type=CRASHKERNEL_LOW;
+ cur+=strlen("low");
+ if (*cur != ' ' || *cur != '\0') {
+ pr_warn("crashkernel: ,high must be at the end\n");
+ return -EINVAL;
+ }
+ break;
+ }
+
+ /* start with next entry */
+ size = memparse(cur, &next);
+ if (cur == next) {
+ pr_warn("crashkernel: Memory value expected\n");
+ return -EINVAL;
+ }
+ cur = next;
+ break;
+
+ default:
+ pr_warn("crashkernel: Invalid character '%c' provided\n", *cur);
+ return -EINVAL;
+ }
}

return 0;
}

+#ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+int __init parse_crashkernel(char *cmdline, struct crashkernel *ck)
+{
+ const char *name="crashkernel=";
+ char *ck_cmdline;
+
+ BUG_ON(!ck);
+
+ ck_cmdline = get_last_crashkernel(cmdline, name);
+ if (!ck_cmdline)
+ return -ENOENT;
+ ck_cmdline += strlen(name);
+ return _parse_crashkernel(ck_cmdline, ck);
+}
+
static int __init reserve_crashkernel_low(unsigned long long low_size)
{
#ifdef CONFIG_64BIT
@@ -371,70 +455,57 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
return 0;
}

-void __init reserve_crashkernel_generic(char *cmdline,
- unsigned long long crash_size,
- unsigned long long crash_base,
- unsigned long long crash_low_size,
- bool high)
+void __init reserve_crashkernel_generic(char *cmdline)
{
- unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0;
- bool fixed_base = false;
-
- /* User specifies base address explicitly. */
- if (crash_base) {
- fixed_base = true;
- search_base = crash_base;
- search_end = crash_base + crash_size;
- }
+ struct ck = { 0 };

- if (high) {
- search_base = CRASH_ADDR_LOW_MAX;
- search_end = CRASH_ADDR_HIGH_MAX;
- }
+ parse_crashkernel(cmdline, &ck);

-retry:
- crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN,
- search_base, search_end);
- if (!crash_base) {
- /*
- * For crashkernel=size[KMG]@offset[KMG], print out failure
- * message if can't reserve the specified region.
- */
- if (fixed_base) {
- pr_warn("crashkernel reservation failed - memory is in use.\n");
- return;
- }
+ if (!ck.size)
+ return;

- /*
- * For crashkernel=size[KMG], if the first attempt was for
- * low memory, fall back to high memory, the minimum required
- * low memory will be reserved later.
- */
- if (!high && search_end == CRASH_ADDR_LOW_MAX) {
- search_end = CRASH_ADDR_HIGH_MAX;
- search_base = CRASH_ADDR_LOW_MAX;
- crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE;
- goto retry;
+ switch (ck.type) {
+ CRASHKERNEL_FIXED_OFFSET:
+ crash_base = memblock_phys_alloc_range(ck.size, CRASH_ALIGN,
+ ck.offset,
+ ck.offset + ck.size);
+ break;
+
+ CRASHKERNEL_NORMAL:
+ if (DEFAULT_CRASH_KERNEL_LOW_SIZE) {
+ /* Simply continue in case we fail to allocate the low
+ * memory */
+ if (!reserve_crashkernel_low(DEFAULT_CRASH_KERNEL_LOW_SIZE))
+ ck.size -= DEFAULT_CRASH_KERNEL_LOW_SIZE;
}

- /*
- * For crashkernel=size[KMG],high, if the first attempt was
- * for high memory, fall back to low memory.
- */
- if (high && search_end == CRASH_ADDR_HIGH_MAX) {
- search_end = CRASH_ADDR_LOW_MAX;
- search_base = 0;
- goto retry;
- }
- pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
- crash_size);
+ fallthrough;
+ CRASHKERNEL_HIGH:
+ crash_base = memblock_phys_alloc_range(ck.size, CRASH_ALIGN,
+ CRASH_ADDR_LOW_MAX,
+ CRASH_ADDR_HIGH_MAX);
+ if (crash_base)
+ break;
+
+ fallthrough;
+ CRASHKERNEL_LOW:
+ crash_base = memblock_phys_alloc_range(ck.size, CRASH_ALIGN, 0,
+ CRASH_ADDR_LOW_MAX);
+ break;
+
+ default:
+ pr_warn("Invalid crashkernel type %i\n", ck.type);
return;
}

- if ((crash_base > CRASH_ADDR_LOW_MAX) &&
- crash_low_size && reserve_crashkernel_low(crash_low_size)) {
- memblock_phys_free(crash_base, crash_size);
- return;
+ if (!crash_base) {
+ pr_warn("could not allocate crashkernel (size:0x%llx)\n",
+ ck.size);
+ if (crashk_low_res.end) {
+ memblock_phys_free(crashk_low_res.start,
+ crashk_low_res.end - crashk_low_res.start + 1);
+ }
+ return
}

pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
@@ -449,7 +520,7 @@ void __init reserve_crashkernel_generic(char *cmdline,
kmemleak_ignore_phys(crashk_low_res.start);

crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
+ crashk_res.end = crash_base + ck.size - 1;
insert_resource(&iomem_resource, &crashk_res);
}
#endif