2022-09-09 21:59:27

by Eric DeVolder

[permalink] [raw]
Subject: [PATCH v12 3/7] crash: add generic infrastructure for crash hotplug support

CPU and memory change notifications are received in order to
regenerate the elfcorehdr.

To support cpu hotplug, a callback is registered to capture the
CPUHP_AP_ONLINE_DYN online and offline events via
cpuhp_setup_state_nocalls().

To support memory hotplug, a notifier is registered to capture the
MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().

The cpu callback and memory notifiers call handle_hotplug_event()
which performs needed tasks and then dispatches the event to the
architecture specific arch_crash_handle_hotplug_event(). During the
process, the kexec_mutex is held.

Signed-off-by: Eric DeVolder <[email protected]>
Acked-by: Baoquan He <[email protected]>
---
include/linux/crash_core.h | 8 +++
include/linux/kexec.h | 26 +++++++
kernel/crash_core.c | 134 +++++++++++++++++++++++++++++++++++++
3 files changed, 168 insertions(+)

diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index de62a722431e..a270f8660538 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);

+#define KEXEC_CRASH_HP_REMOVE_CPU 0
+#define KEXEC_CRASH_HP_ADD_CPU 1
+#define KEXEC_CRASH_HP_REMOVE_MEMORY 2
+#define KEXEC_CRASH_HP_ADD_MEMORY 3
+#define KEXEC_CRASH_HP_INVALID_CPU -1U
+
+struct kimage;
+
#endif /* LINUX_CRASH_CORE_H */
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 4eefa631e0ae..9597b41136ec 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -374,6 +374,13 @@ struct kimage {
struct purgatory_info purgatory_info;
#endif

+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
+ bool hotplug_event;
+ unsigned int offlinecpu;
+ bool elfcorehdr_index_valid;
+ int elfcorehdr_index;
+#endif
+
#ifdef CONFIG_IMA_KEXEC
/* Virtual address of IMA measurement buffer for kexec syscall */
void *ima_buffer;
@@ -503,6 +510,25 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
#endif

+#ifndef arch_map_crash_pages
+static inline void *arch_map_crash_pages(unsigned long paddr,
+ unsigned long size)
+{
+ return NULL;
+}
+#endif
+
+#ifndef arch_unmap_crash_pages
+static inline void arch_unmap_crash_pages(void **ptr) { }
+#endif
+
+#ifndef arch_crash_handle_hotplug_event
+static inline void arch_crash_handle_hotplug_event(struct kimage *image,
+ unsigned int hp_action)
+{
+}
+#endif
+
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 8c648fd5897a..4b15d91f0b21 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -11,6 +11,8 @@
#include <linux/vmalloc.h>
#include <linux/sizes.h>
#include <linux/kexec.h>
+#include <linux/memory.h>
+#include <linux/cpuhotplug.h>

#include <asm/page.h>
#include <asm/sections.h>
@@ -18,6 +20,7 @@
#include <crypto/sha1.h>

#include "kallsyms_internal.h"
+#include "kexec_internal.h"

/* vmcoreinfo stuff */
unsigned char *vmcoreinfo_data;
@@ -612,3 +615,134 @@ static int __init crash_save_vmcoreinfo_init(void)
}

subsys_initcall(crash_save_vmcoreinfo_init);
+
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
+/*
+ * To accurately reflect hot un/plug changes, the elfcorehdr (which
+ * is passed to the crash kernel via the elfcorehdr= parameter)
+ * must be updated with the new list of CPUs and memories.
+ *
+ * In order to make changes to elfcorehdr, two conditions are needed:
+ * First, the segment containing the elfcorehdr must be large enough
+ * to permit a growing number of resources. The elfcorehdr memory is
+ * typically based on CONFIG_NR_CPUS and CONFIG_CRASH_MAX_MEMORY_RANGES.
+ * Second, purgatory must explicitly exclude the elfcorehdr from the
+ * list of segments it checks (since the elfcorehdr changes and thus
+ * would require an update to purgatory itself to update the digest).
+ */
+static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
+{
+ /* Obtain lock while changing crash information */
+ mutex_lock(&kexec_mutex);
+
+ /* Check kdump is loaded */
+ if (kexec_crash_image) {
+ struct kimage *image = kexec_crash_image;
+
+ if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
+ hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
+ pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, cpu);
+ else
+ pr_debug("crash hp: hp_action %u\n", hp_action);
+
+ /*
+ * When the struct kimage is allocated, it is wiped to zero, so
+ * the elfcorehdr_index_valid defaults to false. Find the
+ * segment containing the elfcorehdr, if not already found.
+ * This works for both the kexec_load and kexec_file_load paths.
+ */
+ if (!image->elfcorehdr_index_valid) {
+ unsigned char *ptr;
+ unsigned long mem, memsz;
+ unsigned int n;
+
+ for (n = 0; n < image->nr_segments; n++) {
+ mem = image->segment[n].mem;
+ memsz = image->segment[n].memsz;
+ ptr = arch_map_crash_pages(mem, memsz);
+ if (ptr) {
+ /* The segment containing elfcorehdr */
+ if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
+ image->elfcorehdr_index = (int)n;
+ image->elfcorehdr_index_valid = true;
+ }
+ }
+ arch_unmap_crash_pages((void **)&ptr);
+ }
+ }
+
+ if (!image->elfcorehdr_index_valid) {
+ pr_err("crash hp: unable to locate elfcorehdr segment");
+ goto out;
+ }
+
+ /* Needed in order for the segments to be updated */
+ arch_kexec_unprotect_crashkres();
+
+ /* Flag to differentiate between normal load and hotplug */
+ image->hotplug_event = true;
+
+ /* Now invoke arch-specific update handler */
+ arch_crash_handle_hotplug_event(image, hp_action);
+
+ /* No longer handling a hotplug event */
+ image->hotplug_event = false;
+
+ /* Change back to read-only */
+ arch_kexec_protect_crashkres();
+ }
+
+out:
+ /* Release lock now that update complete */
+ mutex_unlock(&kexec_mutex);
+}
+
+static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
+{
+ switch (val) {
+ case MEM_ONLINE:
+ handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 0);
+ break;
+
+ case MEM_OFFLINE:
+ handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 0);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block crash_memhp_nb = {
+ .notifier_call = crash_memhp_notifier,
+ .priority = 0
+};
+
+static int crash_cpuhp_online(unsigned int cpu)
+{
+ handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu);
+ return 0;
+}
+
+static int crash_cpuhp_offline(unsigned int cpu)
+{
+ handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu);
+ return 0;
+}
+
+static int __init crash_hotplug_init(void)
+{
+ int result = 0;
+
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ register_memory_notifier(&crash_memhp_nb);
+
+ if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
+ result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "crash/cpuhp",
+ crash_cpuhp_online,
+ crash_cpuhp_offline);
+
+ return result;
+}
+
+subsys_initcall(crash_hotplug_init);
+#endif
--
2.31.1


2022-10-03 18:14:05

by Sourabh Jain

[permalink] [raw]
Subject: Re: [PATCH v12 3/7] crash: add generic infrastructure for crash hotplug support

Hello Eric,

On 10/09/22 02:35, Eric DeVolder wrote:
> CPU and memory change notifications are received in order to
> regenerate the elfcorehdr.
>
> To support cpu hotplug, a callback is registered to capture the
> CPUHP_AP_ONLINE_DYN online and offline events via
> cpuhp_setup_state_nocalls().
>
> To support memory hotplug, a notifier is registered to capture the
> MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().
>
> The cpu callback and memory notifiers call handle_hotplug_event()
> which performs needed tasks and then dispatches the event to the
> architecture specific arch_crash_handle_hotplug_event(). During the
> process, the kexec_mutex is held.
>
> Signed-off-by: Eric DeVolder <[email protected]>
> Acked-by: Baoquan He <[email protected]>
> ---
> include/linux/crash_core.h | 8 +++
> include/linux/kexec.h | 26 +++++++
> kernel/crash_core.c | 134 +++++++++++++++++++++++++++++++++++++
> 3 files changed, 168 insertions(+)
>
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index de62a722431e..a270f8660538 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
> int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
> unsigned long long *crash_size, unsigned long long *crash_base);
>
> +#define KEXEC_CRASH_HP_REMOVE_CPU 0
> +#define KEXEC_CRASH_HP_ADD_CPU 1
> +#define KEXEC_CRASH_HP_REMOVE_MEMORY 2
> +#define KEXEC_CRASH_HP_ADD_MEMORY 3
> +#define KEXEC_CRASH_HP_INVALID_CPU -1U
> +
> +struct kimage;
> +
> #endif /* LINUX_CRASH_CORE_H */
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index 4eefa631e0ae..9597b41136ec 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -374,6 +374,13 @@ struct kimage {
> struct purgatory_info purgatory_info;
> #endif
>
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
> + bool hotplug_event;
> + unsigned int offlinecpu;
> + bool elfcorehdr_index_valid;
> + int elfcorehdr_index;
> +#endif
> +
> #ifdef CONFIG_IMA_KEXEC
> /* Virtual address of IMA measurement buffer for kexec syscall */
> void *ima_buffer;
> @@ -503,6 +510,25 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
> static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
> #endif
>
> +#ifndef arch_map_crash_pages
> +static inline void *arch_map_crash_pages(unsigned long paddr,
> + unsigned long size)
> +{
> + return NULL;
> +}
> +#endif
> +
> +#ifndef arch_unmap_crash_pages
> +static inline void arch_unmap_crash_pages(void **ptr) { }
> +#endif
> +
> +#ifndef arch_crash_handle_hotplug_event
> +static inline void arch_crash_handle_hotplug_event(struct kimage *image,
> + unsigned int hp_action)
> +{
> +}
> +#endif
> +
> #else /* !CONFIG_KEXEC_CORE */
> struct pt_regs;
> struct task_struct;
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 8c648fd5897a..4b15d91f0b21 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -11,6 +11,8 @@
> #include <linux/vmalloc.h>
> #include <linux/sizes.h>
> #include <linux/kexec.h>
> +#include <linux/memory.h>
> +#include <linux/cpuhotplug.h>
>
> #include <asm/page.h>
> #include <asm/sections.h>
> @@ -18,6 +20,7 @@
> #include <crypto/sha1.h>
>
> #include "kallsyms_internal.h"
> +#include "kexec_internal.h"
>
> /* vmcoreinfo stuff */
> unsigned char *vmcoreinfo_data;
> @@ -612,3 +615,134 @@ static int __init crash_save_vmcoreinfo_init(void)
> }
>
> subsys_initcall(crash_save_vmcoreinfo_init);
> +
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
> +/*
> + * To accurately reflect hot un/plug changes, the elfcorehdr (which
> + * is passed to the crash kernel via the elfcorehdr= parameter)
> + * must be updated with the new list of CPUs and memories.
> + *
> + * In order to make changes to elfcorehdr, two conditions are needed:
> + * First, the segment containing the elfcorehdr must be large enough
> + * to permit a growing number of resources. The elfcorehdr memory is
> + * typically based on CONFIG_NR_CPUS and CONFIG_CRASH_MAX_MEMORY_RANGES.
> + * Second, purgatory must explicitly exclude the elfcorehdr from the
> + * list of segments it checks (since the elfcorehdr changes and thus
> + * would require an update to purgatory itself to update the digest).
> + */
> +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> +{
> + /* Obtain lock while changing crash information */
> + mutex_lock(&kexec_mutex);
> +
> + /* Check kdump is loaded */
> + if (kexec_crash_image) {
> + struct kimage *image = kexec_crash_image;
> +
> + if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
> + hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
> + pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, cpu);
> + else
> + pr_debug("crash hp: hp_action %u\n", hp_action);
> +
> + /*
> + * When the struct kimage is allocated, it is wiped to zero, so
> + * the elfcorehdr_index_valid defaults to false. Find the
> + * segment containing the elfcorehdr, if not already found.
> + * This works for both the kexec_load and kexec_file_load paths.
> + */
> + if (!image->elfcorehdr_index_valid) {
> + unsigned char *ptr;
> + unsigned long mem, memsz;
> + unsigned int n;
> +
> + for (n = 0; n < image->nr_segments; n++) {
> + mem = image->segment[n].mem;
> + memsz = image->segment[n].memsz;
> + ptr = arch_map_crash_pages(mem, memsz);
> + if (ptr) {
> + /* The segment containing elfcorehdr */
> + if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
> + image->elfcorehdr_index = (int)n;
> + image->elfcorehdr_index_valid = true;
> + }
> + }
> + arch_unmap_crash_pages((void **)&ptr);
> + }
> + }
> +
> + if (!image->elfcorehdr_index_valid) {
> + pr_err("crash hp: unable to locate elfcorehdr segment");
> + goto out;
> + }
> +
> + /* Needed in order for the segments to be updated */
> + arch_kexec_unprotect_crashkres();
> +
> + /* Flag to differentiate between normal load and hotplug */
> + image->hotplug_event = true;
> +
> + /* Now invoke arch-specific update handler */
> + arch_crash_handle_hotplug_event(image, hp_action);
> +
> + /* No longer handling a hotplug event */
> + image->hotplug_event = false;
> +
> + /* Change back to read-only */
> + arch_kexec_protect_crashkres();
> + }
> +
> +out:
> + /* Release lock now that update complete */
> + mutex_unlock(&kexec_mutex);
> +}
> +
> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
> +{
> + switch (val) {
> + case MEM_ONLINE:
> + handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 0);
> + break;
> +
> + case MEM_OFFLINE:
> + handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 0);
> + break;
> + }
> + return NOTIFY_OK;

Can we pass v (memory_notify) argument to
arch_crash_handle_hotplug_event function
via handle_hotplug_event?

Because the way memory hotplug is handled on PowerPC, it is hard to
update the elfcorehdr
without memory_notify args.

On PowePC memblock data structure is used to prepare elfcorehdr for
kdump. Since the notifier
used for memory hotplug crash handler get initiated before the memblock
data structure update
happens (as depicted below), the newly prepared elfcorehdr still holds
the old memory regions.
So if the system crash with obsolete elfcorehdr, makedumpfile failed to
collect vmcore.

Sequence of actions done on PowerPC to server the memory hotplug:

 Initiate memory hot remove
          |
          v
 offline pages
          |
          v
 initiate memory notify call chain
 for MEM_OFFLINE event.
 (same is used for crash update)
          |
          v
 prepare new elfcorehdr for kdump using
 memblock data structure
          |
          v
 update memblock data structure

How passing memory_notify to arch crash hotplug handler will help?

memory_notify holds the start PFN and page count, with that we can get
the base address and size of hot unplugged memory and can use the same
to avoid hot unplugged memeory region to get added in the elfcorehdr..

Thanks,
Sourabh Jain

2022-10-04 07:05:57

by Sourabh Jain

[permalink] [raw]
Subject: Re: [PATCH v12 3/7] crash: add generic infrastructure for crash hotplug support


On 10/09/22 02:35, Eric DeVolder wrote:
> CPU and memory change notifications are received in order to
> regenerate the elfcorehdr.
>
> To support cpu hotplug, a callback is registered to capture the
> CPUHP_AP_ONLINE_DYN online and offline events via
> cpuhp_setup_state_nocalls().
>
> To support memory hotplug, a notifier is registered to capture the
> MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().
>
> The cpu callback and memory notifiers call handle_hotplug_event()
> which performs needed tasks and then dispatches the event to the
> architecture specific arch_crash_handle_hotplug_event(). During the
> process, the kexec_mutex is held.
>
> Signed-off-by: Eric DeVolder <[email protected]>
> Acked-by: Baoquan He <[email protected]>
> ---
> include/linux/crash_core.h | 8 +++
> include/linux/kexec.h | 26 +++++++
> kernel/crash_core.c | 134 +++++++++++++++++++++++++++++++++++++
> 3 files changed, 168 insertions(+)
>
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index de62a722431e..a270f8660538 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
> int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
> unsigned long long *crash_size, unsigned long long *crash_base);
>
> +#define KEXEC_CRASH_HP_REMOVE_CPU 0
> +#define KEXEC_CRASH_HP_ADD_CPU 1
> +#define KEXEC_CRASH_HP_REMOVE_MEMORY 2
> +#define KEXEC_CRASH_HP_ADD_MEMORY 3
> +#define KEXEC_CRASH_HP_INVALID_CPU -1U
> +
> +struct kimage;
> +
> #endif /* LINUX_CRASH_CORE_H */
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index 4eefa631e0ae..9597b41136ec 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -374,6 +374,13 @@ struct kimage {
> struct purgatory_info purgatory_info;
> #endif
>
> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
> + bool hotplug_event;
> + unsigned int offlinecpu;
> + bool elfcorehdr_index_valid;
> + int elfcorehdr_index;

Do we really need elfcorehdr_index_valid to decide elfcorehdr_index
holds a valid index?

How about initializing elfcorehdr_index to a negative number while
loading kdump kernel (or kexec kernel if needed)
for both kexec_load and kexec_file_load case and consider that as
invalid index to find the correct one.

Some thing like this:

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 5bc5159d9cb1..0cccdb2f7f26 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -656,7 +656,7 @@ static void handle_hotplug_event(unsigned int
hp_action, unsigned int cpu)
                 * segment containing the elfcorehdr, if not already found.
                 * This works for both the kexec_load and
kexec_file_load paths.
                 */
-               if (!image->elfcorehdr_index_valid) {
+               if (image->elfcorehdr_index < 0) {
                        unsigned char *ptr;
                        unsigned long mem, memsz;
                        unsigned int n;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index b5e40f069768..ed1c6a88879b 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -156,6 +156,10 @@ static int do_kexec_load(unsigned long entry,
unsigned long nr_segments,
        if (ret)
                goto out;

+       /* Below check is not necessary */
+       if (flags & KEXEC_FILE_ON_CRASH)
+               image->elfcorehdr_index = -1;
+
        /* Install the new kernel and uninstall the old */
        image = xchg(dest_image, image);

diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index d0c2661b3509..535dbc26930a 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -400,6 +400,10 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd,
int, initrd_fd,
        if (ret)
                goto out;

+       /* Below check is not necessary */
+       if (flags & KEXEC_FILE_ON_CRASH)
+               image->elfcorehdr_index = -1;
+
        /*
         * Free up any temporary buffers allocated which are not needed
         * after image has been loaded

Thanks,
Sourabh Jain

2022-10-07 19:18:19

by Eric DeVolder

[permalink] [raw]
Subject: Re: [PATCH v12 3/7] crash: add generic infrastructure for crash hotplug support



On 10/3/22 12:51, Sourabh Jain wrote:
> Hello Eric,
>
> On 10/09/22 02:35, Eric DeVolder wrote:
>> CPU and memory change notifications are received in order to
>> regenerate the elfcorehdr.
>>
>> To support cpu hotplug, a callback is registered to capture the
>> CPUHP_AP_ONLINE_DYN online and offline events via
>> cpuhp_setup_state_nocalls().
>>
>> To support memory hotplug, a notifier is registered to capture the
>> MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().
>>
>> The cpu callback and memory notifiers call handle_hotplug_event()
>> which performs needed tasks and then dispatches the event to the
>> architecture specific arch_crash_handle_hotplug_event(). During the
>> process, the kexec_mutex is held.
>>
>> Signed-off-by: Eric DeVolder <[email protected]>
>> Acked-by: Baoquan He <[email protected]>
>> ---
>>   include/linux/crash_core.h |   8 +++
>>   include/linux/kexec.h      |  26 +++++++
>>   kernel/crash_core.c        | 134 +++++++++++++++++++++++++++++++++++++
>>   3 files changed, 168 insertions(+)
>>
>> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
>> index de62a722431e..a270f8660538 100644
>> --- a/include/linux/crash_core.h
>> +++ b/include/linux/crash_core.h
>> @@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
>>   int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
>>           unsigned long long *crash_size, unsigned long long *crash_base);
>> +#define KEXEC_CRASH_HP_REMOVE_CPU        0
>> +#define KEXEC_CRASH_HP_ADD_CPU            1
>> +#define KEXEC_CRASH_HP_REMOVE_MEMORY        2
>> +#define KEXEC_CRASH_HP_ADD_MEMORY        3
>> +#define KEXEC_CRASH_HP_INVALID_CPU        -1U
>> +
>> +struct kimage;
>> +
>>   #endif /* LINUX_CRASH_CORE_H */
>> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
>> index 4eefa631e0ae..9597b41136ec 100644
>> --- a/include/linux/kexec.h
>> +++ b/include/linux/kexec.h
>> @@ -374,6 +374,13 @@ struct kimage {
>>       struct purgatory_info purgatory_info;
>>   #endif
>> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
>> +    bool hotplug_event;
>> +    unsigned int offlinecpu;
>> +    bool elfcorehdr_index_valid;
>> +    int elfcorehdr_index;
>> +#endif
>> +
>>   #ifdef CONFIG_IMA_KEXEC
>>       /* Virtual address of IMA measurement buffer for kexec syscall */
>>       void *ima_buffer;
>> @@ -503,6 +510,25 @@ static inline int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
>>   static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { }
>>   #endif
>> +#ifndef arch_map_crash_pages
>> +static inline void *arch_map_crash_pages(unsigned long paddr,
>> +        unsigned long size)
>> +{
>> +    return NULL;
>> +}
>> +#endif
>> +
>> +#ifndef arch_unmap_crash_pages
>> +static inline void arch_unmap_crash_pages(void **ptr) { }
>> +#endif
>> +
>> +#ifndef arch_crash_handle_hotplug_event
>> +static inline void arch_crash_handle_hotplug_event(struct kimage *image,
>> +        unsigned int hp_action)
>> +{
>> +}
>> +#endif
>> +
>>   #else /* !CONFIG_KEXEC_CORE */
>>   struct pt_regs;
>>   struct task_struct;
>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>> index 8c648fd5897a..4b15d91f0b21 100644
>> --- a/kernel/crash_core.c
>> +++ b/kernel/crash_core.c
>> @@ -11,6 +11,8 @@
>>   #include <linux/vmalloc.h>
>>   #include <linux/sizes.h>
>>   #include <linux/kexec.h>
>> +#include <linux/memory.h>
>> +#include <linux/cpuhotplug.h>
>>   #include <asm/page.h>
>>   #include <asm/sections.h>
>> @@ -18,6 +20,7 @@
>>   #include <crypto/sha1.h>
>>   #include "kallsyms_internal.h"
>> +#include "kexec_internal.h"
>>   /* vmcoreinfo stuff */
>>   unsigned char *vmcoreinfo_data;
>> @@ -612,3 +615,134 @@ static int __init crash_save_vmcoreinfo_init(void)
>>   }
>>   subsys_initcall(crash_save_vmcoreinfo_init);
>> +
>> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
>> +/*
>> + * To accurately reflect hot un/plug changes, the elfcorehdr (which
>> + * is passed to the crash kernel via the elfcorehdr= parameter)
>> + * must be updated with the new list of CPUs and memories.
>> + *
>> + * In order to make changes to elfcorehdr, two conditions are needed:
>> + * First, the segment containing the elfcorehdr must be large enough
>> + * to permit a growing number of resources. The elfcorehdr memory is
>> + * typically based on CONFIG_NR_CPUS and CONFIG_CRASH_MAX_MEMORY_RANGES.
>> + * Second, purgatory must explicitly exclude the elfcorehdr from the
>> + * list of segments it checks (since the elfcorehdr changes and thus
>> + * would require an update to purgatory itself to update the digest).
>> + */
>> +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>> +{
>> +    /* Obtain lock while changing crash information */
>> +    mutex_lock(&kexec_mutex);
>> +
>> +    /* Check kdump is loaded */
>> +    if (kexec_crash_image) {
>> +        struct kimage *image = kexec_crash_image;
>> +
>> +        if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
>> +            hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
>> +            pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, cpu);
>> +        else
>> +            pr_debug("crash hp: hp_action %u\n", hp_action);
>> +
>> +        /*
>> +         * When the struct kimage is allocated, it is wiped to zero, so
>> +         * the elfcorehdr_index_valid defaults to false. Find the
>> +         * segment containing the elfcorehdr, if not already found.
>> +         * This works for both the kexec_load and kexec_file_load paths.
>> +         */
>> +        if (!image->elfcorehdr_index_valid) {
>> +            unsigned char *ptr;
>> +            unsigned long mem, memsz;
>> +            unsigned int n;
>> +
>> +            for (n = 0; n < image->nr_segments; n++) {
>> +                mem = image->segment[n].mem;
>> +                memsz = image->segment[n].memsz;
>> +                ptr = arch_map_crash_pages(mem, memsz);
>> +                if (ptr) {
>> +                    /* The segment containing elfcorehdr */
>> +                    if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
>> +                        image->elfcorehdr_index = (int)n;
>> +                        image->elfcorehdr_index_valid = true;
>> +                    }
>> +                }
>> +                arch_unmap_crash_pages((void **)&ptr);
>> +            }
>> +        }
>> +
>> +        if (!image->elfcorehdr_index_valid) {
>> +            pr_err("crash hp: unable to locate elfcorehdr segment");
>> +            goto out;
>> +        }
>> +
>> +        /* Needed in order for the segments to be updated */
>> +        arch_kexec_unprotect_crashkres();
>> +
>> +        /* Flag to differentiate between normal load and hotplug */
>> +        image->hotplug_event = true;
>> +
>> +        /* Now invoke arch-specific update handler */
>> +        arch_crash_handle_hotplug_event(image, hp_action);
>> +
>> +        /* No longer handling a hotplug event */
>> +        image->hotplug_event = false;
>> +
>> +        /* Change back to read-only */
>> +        arch_kexec_protect_crashkres();
>> +    }
>> +
>> +out:
>> +    /* Release lock now that update complete */
>> +    mutex_unlock(&kexec_mutex);
>> +}
>> +
>> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
>> +{
>> +    switch (val) {
>> +    case MEM_ONLINE:
>> +        handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 0);
>> +        break;
>> +
>> +    case MEM_OFFLINE:
>> +        handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 0);
>> +        break;
>> +    }
>> +    return NOTIFY_OK;
>
> Can we pass v (memory_notify) argument to arch_crash_handle_hotplug_event function
> via handle_hotplug_event?
>
> Because the way memory hotplug is handled on PowerPC, it is hard to update the elfcorehdr
> without memory_notify args.
>
> On PowePC memblock data structure is used to prepare elfcorehdr for kdump. Since the notifier
> used for memory hotplug crash handler get initiated before the memblock data structure update
> happens (as depicted below), the newly prepared elfcorehdr still holds the old memory regions.
> So if the system crash with obsolete elfcorehdr, makedumpfile failed to collect vmcore.
>
> Sequence of actions done on PowerPC to server the memory hotplug:
>
>  Initiate memory hot remove
>           |
>           v
>  offline pages
>           |
>           v
>  initiate memory notify call chain
>  for MEM_OFFLINE event.
>  (same is used for crash update)
>           |
>           v
>  prepare new elfcorehdr for kdump using
>  memblock data structure
>           |
>           v
>  update memblock data structure
>
> How passing memory_notify to arch crash hotplug handler will help?
>
> memory_notify holds the start PFN and page count, with that we can get
> the base address and size of hot unplugged memory and can use the same
> to avoid hot unplugged memeory region to get added in the elfcorehdr..
>
> Thanks,
> Sourabh Jain
>

Sourabh, let's see what Baoquan thinks.

Baoquan, are you OK with this request? I once had these parameters to the
crash hotplug handler and since they were unused at the time, you asked
that I remove them, which I did.

To accommodate this, how about this:

static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu,
unsigned long mem_start, unsigned long mem_size)

For CPU events, I would just pass zeros for mem_start/size. For memory events,
I would pass KEXEC_CRASH_HP_INVALID_CPU.

Thanks,
eric

2022-10-07 19:35:04

by Eric DeVolder

[permalink] [raw]
Subject: Re: [PATCH v12 3/7] crash: add generic infrastructure for crash hotplug support



On 10/4/22 01:38, Sourabh Jain wrote:
>
> On 10/09/22 02:35, Eric DeVolder wrote:
>> CPU and memory change notifications are received in order to
>> regenerate the elfcorehdr.
>>
>> To support cpu hotplug, a callback is registered to capture the
>> CPUHP_AP_ONLINE_DYN online and offline events via
>> cpuhp_setup_state_nocalls().
>>
>> To support memory hotplug, a notifier is registered to capture the
>> MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().
>>
>> The cpu callback and memory notifiers call handle_hotplug_event()
>> which performs needed tasks and then dispatches the event to the
>> architecture specific arch_crash_handle_hotplug_event(). During the
>> process, the kexec_mutex is held.
>>
>> Signed-off-by: Eric DeVolder <[email protected]>
>> Acked-by: Baoquan He <[email protected]>
>> ---
>>   include/linux/crash_core.h |   8 +++
>>   include/linux/kexec.h      |  26 +++++++
>>   kernel/crash_core.c        | 134 +++++++++++++++++++++++++++++++++++++
>>   3 files changed, 168 insertions(+)
>>
>> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
>> index de62a722431e..a270f8660538 100644
>> --- a/include/linux/crash_core.h
>> +++ b/include/linux/crash_core.h
>> @@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
>>   int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
>>           unsigned long long *crash_size, unsigned long long *crash_base);
>> +#define KEXEC_CRASH_HP_REMOVE_CPU        0
>> +#define KEXEC_CRASH_HP_ADD_CPU            1
>> +#define KEXEC_CRASH_HP_REMOVE_MEMORY        2
>> +#define KEXEC_CRASH_HP_ADD_MEMORY        3
>> +#define KEXEC_CRASH_HP_INVALID_CPU        -1U
>> +
>> +struct kimage;
>> +
>>   #endif /* LINUX_CRASH_CORE_H */
>> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
>> index 4eefa631e0ae..9597b41136ec 100644
>> --- a/include/linux/kexec.h
>> +++ b/include/linux/kexec.h
>> @@ -374,6 +374,13 @@ struct kimage {
>>       struct purgatory_info purgatory_info;
>>   #endif
>> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
>> +    bool hotplug_event;
>> +    unsigned int offlinecpu;
>> +    bool elfcorehdr_index_valid;
>> +    int elfcorehdr_index;
>
> Do we really need elfcorehdr_index_valid to decide elfcorehdr_index holds a valid index?
No, as you point out you can overload the index value itself.
(In fact I originally went this route but encountered trouble
with locating the proper locations to place the initialization code).

However, the current approach has the advantage that it is
automatically zero'd and thus set to its correct (false) setting
immediatley upon kexec load without any additional code. As the
diff you have below indicates, there are several sites that need
to set the index to its false (-1) value to accomplish the same.

I prefer the index_valid approach, but if there is strong support
for overloading the index, then it can be changed.

eric


>
> How about initializing elfcorehdr_index to a negative number while loading kdump kernel (or kexec
> kernel if needed)
> for both kexec_load and kexec_file_load case and consider that as invalid index to find the correct
> one.
>
> Some thing like this:
>
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 5bc5159d9cb1..0cccdb2f7f26 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -656,7 +656,7 @@ static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>                  * segment containing the elfcorehdr, if not already found.
>                  * This works for both the kexec_load and kexec_file_load paths.
>                  */
> -               if (!image->elfcorehdr_index_valid) {
> +               if (image->elfcorehdr_index < 0) {
>                         unsigned char *ptr;
>                         unsigned long mem, memsz;
>                         unsigned int n;
> diff --git a/kernel/kexec.c b/kernel/kexec.c
> index b5e40f069768..ed1c6a88879b 100644
> --- a/kernel/kexec.c
> +++ b/kernel/kexec.c
> @@ -156,6 +156,10 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
>         if (ret)
>                 goto out;
>
> +       /* Below check is not necessary */
> +       if (flags & KEXEC_FILE_ON_CRASH)
> +               image->elfcorehdr_index = -1;
> +
>         /* Install the new kernel and uninstall the old */
>         image = xchg(dest_image, image);
>
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index d0c2661b3509..535dbc26930a 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -400,6 +400,10 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
>         if (ret)
>                 goto out;
>
> +       /* Below check is not necessary */
> +       if (flags & KEXEC_FILE_ON_CRASH)
> +               image->elfcorehdr_index = -1;
> +
>         /*
>          * Free up any temporary buffers allocated which are not needed
>          * after image has been loaded
>
> Thanks,
> Sourabh Jain
>

2022-10-17 06:52:05

by Sourabh Jain

[permalink] [raw]
Subject: Re: [PATCH v12 3/7] crash: add generic infrastructure for crash hotplug support


On 08/10/22 00:44, Eric DeVolder wrote:
>
>
> On 10/3/22 12:51, Sourabh Jain wrote:
>> Hello Eric,
>>
>> On 10/09/22 02:35, Eric DeVolder wrote:
>>> CPU and memory change notifications are received in order to
>>> regenerate the elfcorehdr.
>>>
>>> To support cpu hotplug, a callback is registered to capture the
>>> CPUHP_AP_ONLINE_DYN online and offline events via
>>> cpuhp_setup_state_nocalls().
>>>
>>> To support memory hotplug, a notifier is registered to capture the
>>> MEM_ONLINE and MEM_OFFLINE events via register_memory_notifier().
>>>
>>> The cpu callback and memory notifiers call handle_hotplug_event()
>>> which performs needed tasks and then dispatches the event to the
>>> architecture specific arch_crash_handle_hotplug_event(). During the
>>> process, the kexec_mutex is held.
>>>
>>> Signed-off-by: Eric DeVolder <[email protected]>
>>> Acked-by: Baoquan He <[email protected]>
>>> ---
>>>   include/linux/crash_core.h |   8 +++
>>>   include/linux/kexec.h      |  26 +++++++
>>>   kernel/crash_core.c        | 134
>>> +++++++++++++++++++++++++++++++++++++
>>>   3 files changed, 168 insertions(+)
>>>
>>> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
>>> index de62a722431e..a270f8660538 100644
>>> --- a/include/linux/crash_core.h
>>> +++ b/include/linux/crash_core.h
>>> @@ -84,4 +84,12 @@ int parse_crashkernel_high(char *cmdline,
>>> unsigned long long system_ram,
>>>   int parse_crashkernel_low(char *cmdline, unsigned long long
>>> system_ram,
>>>           unsigned long long *crash_size, unsigned long long
>>> *crash_base);
>>> +#define KEXEC_CRASH_HP_REMOVE_CPU        0
>>> +#define KEXEC_CRASH_HP_ADD_CPU            1
>>> +#define KEXEC_CRASH_HP_REMOVE_MEMORY        2
>>> +#define KEXEC_CRASH_HP_ADD_MEMORY        3
>>> +#define KEXEC_CRASH_HP_INVALID_CPU        -1U
>>> +
>>> +struct kimage;
>>> +
>>>   #endif /* LINUX_CRASH_CORE_H */
>>> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
>>> index 4eefa631e0ae..9597b41136ec 100644
>>> --- a/include/linux/kexec.h
>>> +++ b/include/linux/kexec.h
>>> @@ -374,6 +374,13 @@ struct kimage {
>>>       struct purgatory_info purgatory_info;
>>>   #endif
>>> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
>>> +    bool hotplug_event;
>>> +    unsigned int offlinecpu;
>>> +    bool elfcorehdr_index_valid;
>>> +    int elfcorehdr_index;
>>> +#endif
>>> +
>>>   #ifdef CONFIG_IMA_KEXEC
>>>       /* Virtual address of IMA measurement buffer for kexec syscall */
>>>       void *ima_buffer;
>>> @@ -503,6 +510,25 @@ static inline int
>>> arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, g
>>>   static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned
>>> int pages) { }
>>>   #endif
>>> +#ifndef arch_map_crash_pages
>>> +static inline void *arch_map_crash_pages(unsigned long paddr,
>>> +        unsigned long size)
>>> +{
>>> +    return NULL;
>>> +}
>>> +#endif
>>> +
>>> +#ifndef arch_unmap_crash_pages
>>> +static inline void arch_unmap_crash_pages(void **ptr) { }
>>> +#endif
>>> +
>>> +#ifndef arch_crash_handle_hotplug_event
>>> +static inline void arch_crash_handle_hotplug_event(struct kimage
>>> *image,
>>> +        unsigned int hp_action)
>>> +{
>>> +}
>>> +#endif
>>> +
>>>   #else /* !CONFIG_KEXEC_CORE */
>>>   struct pt_regs;
>>>   struct task_struct;
>>> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
>>> index 8c648fd5897a..4b15d91f0b21 100644
>>> --- a/kernel/crash_core.c
>>> +++ b/kernel/crash_core.c
>>> @@ -11,6 +11,8 @@
>>>   #include <linux/vmalloc.h>
>>>   #include <linux/sizes.h>
>>>   #include <linux/kexec.h>
>>> +#include <linux/memory.h>
>>> +#include <linux/cpuhotplug.h>
>>>   #include <asm/page.h>
>>>   #include <asm/sections.h>
>>> @@ -18,6 +20,7 @@
>>>   #include <crypto/sha1.h>
>>>   #include "kallsyms_internal.h"
>>> +#include "kexec_internal.h"
>>>   /* vmcoreinfo stuff */
>>>   unsigned char *vmcoreinfo_data;
>>> @@ -612,3 +615,134 @@ static int __init
>>> crash_save_vmcoreinfo_init(void)
>>>   }
>>>   subsys_initcall(crash_save_vmcoreinfo_init);
>>> +
>>> +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG)
>>> +/*
>>> + * To accurately reflect hot un/plug changes, the elfcorehdr (which
>>> + * is passed to the crash kernel via the elfcorehdr= parameter)
>>> + * must be updated with the new list of CPUs and memories.
>>> + *
>>> + * In order to make changes to elfcorehdr, two conditions are needed:
>>> + * First, the segment containing the elfcorehdr must be large enough
>>> + * to permit a growing number of resources. The elfcorehdr memory is
>>> + * typically based on CONFIG_NR_CPUS and
>>> CONFIG_CRASH_MAX_MEMORY_RANGES.
>>> + * Second, purgatory must explicitly exclude the elfcorehdr from the
>>> + * list of segments it checks (since the elfcorehdr changes and thus
>>> + * would require an update to purgatory itself to update the digest).
>>> + */
>>> +static void handle_hotplug_event(unsigned int hp_action, unsigned
>>> int cpu)
>>> +{
>>> +    /* Obtain lock while changing crash information */
>>> +    mutex_lock(&kexec_mutex);
>>> +
>>> +    /* Check kdump is loaded */
>>> +    if (kexec_crash_image) {
>>> +        struct kimage *image = kexec_crash_image;
>>> +
>>> +        if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
>>> +            hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
>>> +            pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action,
>>> cpu);
>>> +        else
>>> +            pr_debug("crash hp: hp_action %u\n", hp_action);
>>> +
>>> +        /*
>>> +         * When the struct kimage is allocated, it is wiped to
>>> zero, so
>>> +         * the elfcorehdr_index_valid defaults to false. Find the
>>> +         * segment containing the elfcorehdr, if not already found.
>>> +         * This works for both the kexec_load and kexec_file_load
>>> paths.
>>> +         */
>>> +        if (!image->elfcorehdr_index_valid) {
>>> +            unsigned char *ptr;
>>> +            unsigned long mem, memsz;
>>> +            unsigned int n;
>>> +
>>> +            for (n = 0; n < image->nr_segments; n++) {
>>> +                mem = image->segment[n].mem;
>>> +                memsz = image->segment[n].memsz;
>>> +                ptr = arch_map_crash_pages(mem, memsz);
>>> +                if (ptr) {
>>> +                    /* The segment containing elfcorehdr */
>>> +                    if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
>>> +                        image->elfcorehdr_index = (int)n;
>>> +                        image->elfcorehdr_index_valid = true;
>>> +                    }
>>> +                }
>>> +                arch_unmap_crash_pages((void **)&ptr);
>>> +            }
>>> +        }
>>> +
>>> +        if (!image->elfcorehdr_index_valid) {
>>> +            pr_err("crash hp: unable to locate elfcorehdr segment");
>>> +            goto out;
>>> +        }
>>> +
>>> +        /* Needed in order for the segments to be updated */
>>> +        arch_kexec_unprotect_crashkres();
>>> +
>>> +        /* Flag to differentiate between normal load and hotplug */
>>> +        image->hotplug_event = true;
>>> +
>>> +        /* Now invoke arch-specific update handler */
>>> +        arch_crash_handle_hotplug_event(image, hp_action);
>>> +
>>> +        /* No longer handling a hotplug event */
>>> +        image->hotplug_event = false;
>>> +
>>> +        /* Change back to read-only */
>>> +        arch_kexec_protect_crashkres();
>>> +    }
>>> +
>>> +out:
>>> +    /* Release lock now that update complete */
>>> +    mutex_unlock(&kexec_mutex);
>>> +}
>>> +
>>> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned
>>> long val, void *v)
>>> +{
>>> +    switch (val) {
>>> +    case MEM_ONLINE:
>>> +        handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 0);
>>> +        break;
>>> +
>>> +    case MEM_OFFLINE:
>>> +        handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 0);
>>> +        break;
>>> +    }
>>> +    return NOTIFY_OK;
>>
>> Can we pass v (memory_notify) argument to
>> arch_crash_handle_hotplug_event function
>> via handle_hotplug_event?
>>
>> Because the way memory hotplug is handled on PowerPC, it is hard to
>> update the elfcorehdr
>> without memory_notify args.
>>
>> On PowePC memblock data structure is used to prepare elfcorehdr for
>> kdump. Since the notifier
>> used for memory hotplug crash handler get initiated before the
>> memblock data structure update
>> happens (as depicted below), the newly prepared elfcorehdr still
>> holds the old memory regions.
>> So if the system crash with obsolete elfcorehdr, makedumpfile failed
>> to collect vmcore.
>>
>> Sequence of actions done on PowerPC to server the memory hotplug:
>>
>>   Initiate memory hot remove
>>            |
>>            v
>>   offline pages
>>            |
>>            v
>>   initiate memory notify call chain
>>   for MEM_OFFLINE event.
>>   (same is used for crash update)
>>            |
>>            v
>>   prepare new elfcorehdr for kdump using
>>   memblock data structure
>>            |
>>            v
>>   update memblock data structure
>>
>> How passing memory_notify to arch crash hotplug handler will help?
>>
>> memory_notify holds the start PFN and page count, with that we can get
>> the base address and size of hot unplugged memory and can use the same
>> to avoid hot unplugged memeory region to get added in the elfcorehdr..
>>
>> Thanks,
>> Sourabh Jain
>>
>
> Sourabh, let's see what Baoquan thinks.
>
> Baoquan, are you OK with this request? I once had these parameters to the
> crash hotplug handler and since they were unused at the time, you asked
> that I remove them, which I did.
>
> To accommodate this, how about this:
>
> static void handle_hotplug_event(unsigned int hp_action, unsigned int
> cpu,
>      unsigned long mem_start, unsigned long mem_size)
>
> For CPU events, I would just pass zeros for mem_start/size. For memory
> events,
> I would pass KEXEC_CRASH_HP_INVALID_CPU.

How about passing memory_notify struct as is and let architecture handle
the rest?

Thanks,
Sourabh Jain

2022-10-24 09:13:39

by Baoquan He

[permalink] [raw]
Subject: Re: [PATCH v12 3/7] crash: add generic infrastructure for crash hotplug support

Hi Eric, Sourabh,

On 10/07/22 at 02:14pm, Eric DeVolder wrote:
>
>
> On 10/3/22 12:51, Sourabh Jain wrote:
> > Hello Eric,
> >
> > On 10/09/22 02:35, Eric DeVolder wrote:
......
> > > +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
> > > +{
> > > +??? /* Obtain lock while changing crash information */
> > > +??? mutex_lock(&kexec_mutex);
> > > +
> > > +??? /* Check kdump is loaded */
> > > +??? if (kexec_crash_image) {
> > > +??????? struct kimage *image = kexec_crash_image;
> > > +
> > > +??????? if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
> > > +??????????? hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
> > > +??????????? pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, cpu);
> > > +??????? else
> > > +??????????? pr_debug("crash hp: hp_action %u\n", hp_action);
> > > +
> > > +??????? /*
> > > +???????? * When the struct kimage is allocated, it is wiped to zero, so
> > > +???????? * the elfcorehdr_index_valid defaults to false. Find the
> > > +???????? * segment containing the elfcorehdr, if not already found.
> > > +???????? * This works for both the kexec_load and kexec_file_load paths.
> > > +???????? */
> > > +??????? if (!image->elfcorehdr_index_valid) {
> > > +??????????? unsigned char *ptr;
> > > +??????????? unsigned long mem, memsz;
> > > +??????????? unsigned int n;
> > > +
> > > +??????????? for (n = 0; n < image->nr_segments; n++) {
> > > +??????????????? mem = image->segment[n].mem;
> > > +??????????????? memsz = image->segment[n].memsz;
> > > +??????????????? ptr = arch_map_crash_pages(mem, memsz);
> > > +??????????????? if (ptr) {
> > > +??????????????????? /* The segment containing elfcorehdr */
> > > +??????????????????? if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
> > > +??????????????????????? image->elfcorehdr_index = (int)n;
> > > +??????????????????????? image->elfcorehdr_index_valid = true;
> > > +??????????????????? }
> > > +??????????????? }
> > > +??????????????? arch_unmap_crash_pages((void **)&ptr);
> > > +??????????? }
> > > +??????? }
> > > +
> > > +??????? if (!image->elfcorehdr_index_valid) {
> > > +??????????? pr_err("crash hp: unable to locate elfcorehdr segment");
> > > +??????????? goto out;
> > > +??????? }
> > > +
> > > +??????? /* Needed in order for the segments to be updated */
> > > +??????? arch_kexec_unprotect_crashkres();
> > > +
> > > +??????? /* Flag to differentiate between normal load and hotplug */
> > > +??????? image->hotplug_event = true;
> > > +
> > > +??????? /* Now invoke arch-specific update handler */
> > > +??????? arch_crash_handle_hotplug_event(image, hp_action);
> > > +
> > > +??????? /* No longer handling a hotplug event */
> > > +??????? image->hotplug_event = false;
> > > +
> > > +??????? /* Change back to read-only */
> > > +??????? arch_kexec_protect_crashkres();
> > > +??? }
> > > +
> > > +out:
> > > +??? /* Release lock now that update complete */
> > > +??? mutex_unlock(&kexec_mutex);
> > > +}
> > > +
> > > +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
> > > +{
> > > +??? switch (val) {
> > > +??? case MEM_ONLINE:
> > > +??????? handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 0);
> > > +??????? break;
> > > +
> > > +??? case MEM_OFFLINE:
> > > +??????? handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 0);
> > > +??????? break;
> > > +??? }
> > > +??? return NOTIFY_OK;
> >
> > Can we pass v (memory_notify) argument to arch_crash_handle_hotplug_event function
> > via handle_hotplug_event?
> >
> > Because the way memory hotplug is handled on PowerPC, it is hard to update the elfcorehdr
> > without memory_notify args.
> >
> > On PowePC memblock data structure is used to prepare elfcorehdr for kdump. Since the notifier
> > used for memory hotplug crash handler get initiated before the memblock data structure update
> > happens (as depicted below), the newly prepared elfcorehdr still holds the old memory regions.
> > So if the system crash with obsolete elfcorehdr, makedumpfile failed to collect vmcore.
> >
> > Sequence of actions done on PowerPC to server the memory hotplug:
> >
> > ?Initiate memory hot remove
> > ????????? |
> > ????????? v
> > ?offline pages
> > ????????? |
> > ????????? v
> > ?initiate memory notify call chain
> > ?for MEM_OFFLINE event.
> > ?(same is used for crash update)
> > ????????? |
> > ????????? v
> > ?prepare new elfcorehdr for kdump using
> > ?memblock data structure
> > ????????? |
> > ????????? v
> > ?update memblock data structure
> >
> > How passing memory_notify to arch crash hotplug handler will help?
> >
> > memory_notify holds the start PFN and page count, with that we can get
> > the base address and size of hot unplugged memory and can use the same
> > to avoid hot unplugged memeory region to get added in the elfcorehdr..
> >
> > Thanks,
> > Sourabh Jain
> >
>
> Sourabh, let's see what Baoquan thinks.
>
> Baoquan, are you OK with this request? I once had these parameters to the
> crash hotplug handler and since they were unused at the time, you asked
> that I remove them, which I did.

Sorry to miss this mail. I thought both of you were talking about
somthing, and didn't notice this question to me.

I think there are two ways to solve the issue Sourabh raised:
1) make handle_hotplug_event() get and pass down the memory_notify as
Sourabh said, or the hp_action, mem_start|size as Eric suggested. I
have to admit I haven't carefully checked which one is better.

2) let the current code as is since it's aiming at x86 only. Later
Sourabh can modify code according to his need on ppc. This can give
satisfying why on code change each time.

I personally like the 2nd way, while also like seeing 1st one if the
code change and log is convincing to any reviewer.

>
> To accommodate this, how about this:
>
> static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu,
> unsigned long mem_start, unsigned long mem_size)
>
> For CPU events, I would just pass zeros for mem_start/size. For memory events,
> I would pass KEXEC_CRASH_HP_INVALID_CPU.
>
> Thanks,
> eric

2022-10-26 07:06:12

by Sourabh Jain

[permalink] [raw]
Subject: Re: [PATCH v12 3/7] crash: add generic infrastructure for crash hotplug support

Hello Baoquan,

On 24/10/22 14:40, Baoquan He wrote:
> Hi Eric, Sourabh,
>
> On 10/07/22 at 02:14pm, Eric DeVolder wrote:
>>
>> On 10/3/22 12:51, Sourabh Jain wrote:
>>> Hello Eric,
>>>
>>> On 10/09/22 02:35, Eric DeVolder wrote:
> ......
>>>> +static void handle_hotplug_event(unsigned int hp_action, unsigned int cpu)
>>>> +{
>>>> +    /* Obtain lock while changing crash information */
>>>> +    mutex_lock(&kexec_mutex);
>>>> +
>>>> +    /* Check kdump is loaded */
>>>> +    if (kexec_crash_image) {
>>>> +        struct kimage *image = kexec_crash_image;
>>>> +
>>>> +        if (hp_action == KEXEC_CRASH_HP_ADD_CPU ||
>>>> +            hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
>>>> +            pr_debug("crash hp: hp_action %u, cpu %u\n", hp_action, cpu);
>>>> +        else
>>>> +            pr_debug("crash hp: hp_action %u\n", hp_action);
>>>> +
>>>> +        /*
>>>> +         * When the struct kimage is allocated, it is wiped to zero, so
>>>> +         * the elfcorehdr_index_valid defaults to false. Find the
>>>> +         * segment containing the elfcorehdr, if not already found.
>>>> +         * This works for both the kexec_load and kexec_file_load paths.
>>>> +         */
>>>> +        if (!image->elfcorehdr_index_valid) {
>>>> +            unsigned char *ptr;
>>>> +            unsigned long mem, memsz;
>>>> +            unsigned int n;
>>>> +
>>>> +            for (n = 0; n < image->nr_segments; n++) {
>>>> +                mem = image->segment[n].mem;
>>>> +                memsz = image->segment[n].memsz;
>>>> +                ptr = arch_map_crash_pages(mem, memsz);
>>>> +                if (ptr) {
>>>> +                    /* The segment containing elfcorehdr */
>>>> +                    if (memcmp(ptr, ELFMAG, SELFMAG) == 0) {
>>>> +                        image->elfcorehdr_index = (int)n;
>>>> +                        image->elfcorehdr_index_valid = true;
>>>> +                    }
>>>> +                }
>>>> +                arch_unmap_crash_pages((void **)&ptr);
>>>> +            }
>>>> +        }
>>>> +
>>>> +        if (!image->elfcorehdr_index_valid) {
>>>> +            pr_err("crash hp: unable to locate elfcorehdr segment");
>>>> +            goto out;
>>>> +        }
>>>> +
>>>> +        /* Needed in order for the segments to be updated */
>>>> +        arch_kexec_unprotect_crashkres();
>>>> +
>>>> +        /* Flag to differentiate between normal load and hotplug */
>>>> +        image->hotplug_event = true;
>>>> +
>>>> +        /* Now invoke arch-specific update handler */
>>>> +        arch_crash_handle_hotplug_event(image, hp_action);
>>>> +
>>>> +        /* No longer handling a hotplug event */
>>>> +        image->hotplug_event = false;
>>>> +
>>>> +        /* Change back to read-only */
>>>> +        arch_kexec_protect_crashkres();
>>>> +    }
>>>> +
>>>> +out:
>>>> +    /* Release lock now that update complete */
>>>> +    mutex_unlock(&kexec_mutex);
>>>> +}
>>>> +
>>>> +static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v)
>>>> +{
>>>> +    switch (val) {
>>>> +    case MEM_ONLINE:
>>>> +        handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 0);
>>>> +        break;
>>>> +
>>>> +    case MEM_OFFLINE:
>>>> +        handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 0);
>>>> +        break;
>>>> +    }
>>>> +    return NOTIFY_OK;
>>> Can we pass v (memory_notify) argument to arch_crash_handle_hotplug_event function
>>> via handle_hotplug_event?
>>>
>>> Because the way memory hotplug is handled on PowerPC, it is hard to update the elfcorehdr
>>> without memory_notify args.
>>>
>>> On PowePC memblock data structure is used to prepare elfcorehdr for kdump. Since the notifier
>>> used for memory hotplug crash handler get initiated before the memblock data structure update
>>> happens (as depicted below), the newly prepared elfcorehdr still holds the old memory regions.
>>> So if the system crash with obsolete elfcorehdr, makedumpfile failed to collect vmcore.
>>>
>>> Sequence of actions done on PowerPC to server the memory hotplug:
>>>
>>>  Initiate memory hot remove
>>>           |
>>>           v
>>>  offline pages
>>>           |
>>>           v
>>>  initiate memory notify call chain
>>>  for MEM_OFFLINE event.
>>>  (same is used for crash update)
>>>           |
>>>           v
>>>  prepare new elfcorehdr for kdump using
>>>  memblock data structure
>>>           |
>>>           v
>>>  update memblock data structure
>>>
>>> How passing memory_notify to arch crash hotplug handler will help?
>>>
>>> memory_notify holds the start PFN and page count, with that we can get
>>> the base address and size of hot unplugged memory and can use the same
>>> to avoid hot unplugged memeory region to get added in the elfcorehdr..
>>>
>>> Thanks,
>>> Sourabh Jain
>>>
>> Sourabh, let's see what Baoquan thinks.
>>
>> Baoquan, are you OK with this request? I once had these parameters to the
>> crash hotplug handler and since they were unused at the time, you asked
>> that I remove them, which I did.
> Sorry to miss this mail. I thought both of you were talking about
> somthing, and didn't notice this question to me.
>
> I think there are two ways to solve the issue Sourabh raised:
> 1) make handle_hotplug_event() get and pass down the memory_notify as
> Sourabh said, or the hp_action, mem_start|size as Eric suggested. I
> have to admit I haven't carefully checked which one is better.
>
> 2) let the current code as is since it's aiming at x86 only. Later
> Sourabh can modify code according to his need on ppc. This can give
> satisfying why on code change each time.
>
> I personally like the 2nd way, while also like seeing 1st one if the
> code change and log is convincing to any reviewer.

Ok let's go with second approach. I will introduce a patch in PowerPC
series to update the
handle_hotplug_event function signature and justify the change.

Thanks,
Sourabh Jain