When specifying 'kexec -c -d', kexec_load interface will print loading
information, e.g the regions where kernel/initrd/purgatory/cmdline
are put, the memmap passed to 2nd kernel taken as system RAM ranges,
and printing all contents of struct kexec_segment, etc. These are
very helpful for analyzing or positioning what's happening when
kexec/kdump itself failed. The debugging printing for kexec_load
interface is made in user space utility kexec-tools.
Whereas, with kexec_file_load interface, 'kexec -s -d' print nothing.
Because kexec_file code is mostly implemented in kernel space, and the
debugging printing functionality is missed. It's not convenient when
debugging kexec/kdump loading and jumping with kexec_file_load
interface.
Now add KEXEC_FILE_DEBUG to kexec_file flag to control the debugging
message printing. And add global variable kexec_file_dbg_print and
macro kexec_dprintk() to facilitate the printing.
This is a preparation, later kexec_dprintk() will be used to replace the
existing pr_debug(). Once 'kexec -s -d' is specified, it will print out
kexec/kdump loading information. If '-d' is not specified, it regresses
to pr_debug().
Signed-off-by: Baoquan He <[email protected]>
---
include/linux/kexec.h | 9 ++++++++-
include/uapi/linux/kexec.h | 1 +
kernel/kexec_core.c | 2 ++
kernel/kexec_file.c | 3 +++
4 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 8227455192b7..400cb6c02176 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -403,7 +403,7 @@ bool kexec_load_permitted(int kexec_image_type);
/* List of defined/legal kexec file flags */
#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
- KEXEC_FILE_NO_INITRAMFS)
+ KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG)
/* flag to track if kexec reboot is in progress */
extern bool kexec_in_progress;
@@ -500,6 +500,13 @@ static inline int crash_hotplug_memory_support(void) { return 0; }
static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
#endif
+extern bool kexec_file_dbg_print;
+
+#define kexec_dprintk(fmt, ...) \
+ printk("%s" fmt, \
+ kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \
+ ##__VA_ARGS__)
+
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 01766dd839b0..c17bb096ea68 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -25,6 +25,7 @@
#define KEXEC_FILE_UNLOAD 0x00000001
#define KEXEC_FILE_ON_CRASH 0x00000002
#define KEXEC_FILE_NO_INITRAMFS 0x00000004
+#define KEXEC_FILE_DEBUG 0x00000008
/* These values match the ELF architecture values.
* Unless there is a good reason that should continue to be the case.
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index be5642a4ec49..f70bf3a7885c 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -52,6 +52,8 @@ atomic_t __kexec_lock = ATOMIC_INIT(0);
/* Flag to indicate we are going to kexec a new kernel */
bool kexec_in_progress = false;
+bool kexec_file_dbg_print;
+
int kexec_should_crash(struct task_struct *p)
{
/*
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index f9a419cd22d4..aca5dac74044 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -123,6 +123,8 @@ void kimage_file_post_load_cleanup(struct kimage *image)
*/
kfree(image->image_loader_data);
image->image_loader_data = NULL;
+
+ kexec_file_dbg_print = false;
}
#ifdef CONFIG_KEXEC_SIG
@@ -278,6 +280,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
if (!image)
return -ENOMEM;
+ kexec_file_dbg_print = !!(flags & KEXEC_FILE_DEBUG);
image->file_mode = 1;
if (kexec_on_panic) {
--
2.41.0
On 13. 12. 23, 6:57, Baoquan He wrote:
> When specifying 'kexec -c -d', kexec_load interface will print loading
> information, e.g the regions where kernel/initrd/purgatory/cmdline
> are put, the memmap passed to 2nd kernel taken as system RAM ranges,
> and printing all contents of struct kexec_segment, etc. These are
> very helpful for analyzing or positioning what's happening when
> kexec/kdump itself failed. The debugging printing for kexec_load
> interface is made in user space utility kexec-tools.
>
> Whereas, with kexec_file_load interface, 'kexec -s -d' print nothing.
> Because kexec_file code is mostly implemented in kernel space, and the
> debugging printing functionality is missed. It's not convenient when
> debugging kexec/kdump loading and jumping with kexec_file_load
> interface.
>
> Now add KEXEC_FILE_DEBUG to kexec_file flag to control the debugging
> message printing. And add global variable kexec_file_dbg_print and
> macro kexec_dprintk() to facilitate the printing.
>
> This is a preparation, later kexec_dprintk() will be used to replace the
> existing pr_debug(). Once 'kexec -s -d' is specified, it will print out
> kexec/kdump loading information. If '-d' is not specified, it regresses
> to pr_debug().
>
> Signed-off-by: Baoquan He <[email protected]>
..
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
..
> @@ -500,6 +500,13 @@ static inline int crash_hotplug_memory_support(void) { return 0; }
> static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
> #endif
>
> +extern bool kexec_file_dbg_print;
> +
> +#define kexec_dprintk(fmt, ...) \
> + printk("%s" fmt, \
> + kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \
> + ##__VA_ARGS__)
This means you dump it _always_. Only with different levels.
And without any prefix whatsoever, so people see bloat like this in
their log now:
[ +0.000001] 0000000000001000-000000000009ffff (1)
[ +0.000002] 000000007f96d000-000000007f97efff (3)
[ +0.000002] 0000000000800000-0000000000807fff (4)
[ +0.000001] 000000000080b000-000000000080bfff (4)
[ +0.000002] 0000000000810000-00000000008fffff (4)
[ +0.000001] 000000007f97f000-000000007f9fefff (4)
[ +0.000001] 000000007ff00000-000000007fffffff (4)
[ +0.000002] 0000000000000000-0000000000000fff (2)
without actually knowing what that is.
There should be nothing logged if that is not asked for and especially
if kexec load went fine, right?
Can this be redesigned, please?
Actually what was wrong on the pr_debug()s? Can you simply turn them on
from the kernel when -d is passed to kexec instead of all this?
..
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -52,6 +52,8 @@ atomic_t __kexec_lock = ATOMIC_INIT(0);
> /* Flag to indicate we are going to kexec a new kernel */
> bool kexec_in_progress = false;
>
> +bool kexec_file_dbg_print;
Ugh, and a global flag for this?
thanks,
--
js
suse labs
Hi Jiri,
On 03/12/24 at 10:58am, Jiri Slaby wrote:
> On 13. 12. 23, 6:57, Baoquan He wrote:
... snip...
> > --- a/include/linux/kexec.h
> > +++ b/include/linux/kexec.h
> ...
> > @@ -500,6 +500,13 @@ static inline int crash_hotplug_memory_support(void) { return 0; }
> > static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
> > #endif
> > +extern bool kexec_file_dbg_print;
> > +
> > +#define kexec_dprintk(fmt, ...) \
> > + printk("%s" fmt, \
> > + kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \
> > + ##__VA_ARGS__)
>
> This means you dump it _always_. Only with different levels.
It dumped always too with pr_debug() before, I just add a switch to
control it's pr_info() or pr_debug().
>
> And without any prefix whatsoever, so people see bloat like this in their
> log now:
> [ +0.000001] 0000000000001000-000000000009ffff (1)
> [ +0.000002] 000000007f96d000-000000007f97efff (3)
> [ +0.000002] 0000000000800000-0000000000807fff (4)
> [ +0.000001] 000000000080b000-000000000080bfff (4)
> [ +0.000002] 0000000000810000-00000000008fffff (4)
> [ +0.000001] 000000007f97f000-000000007f9fefff (4)
> [ +0.000001] 000000007ff00000-000000007fffffff (4)
> [ +0.000002] 0000000000000000-0000000000000fff (2)
On which arch are you seeing this? There should be one line above these
range printing to tell what they are, like:
E820 memmap:
0000000000000000-000000000009a3ff (1)
000000000009a400-000000000009ffff (2)
00000000000e0000-00000000000fffff (2)
0000000000100000-000000006ff83fff (1)
000000006ff84000-000000007ac50fff (2)
>
> without actually knowing what that is.
>
> There should be nothing logged if that is not asked for and especially if
> kexec load went fine, right?
Right. Before this patch, those pr_debug() were already there. You need
enable them to print out like add '#define DEBUG' in *.c file, or enable
the dynamic debugging of the file or function. With this patch applied,
you only need specify '-d' when you execute kexec command with
kexec_file load interface, like:
kexec -s -l -d /boot/vmlinuz-xxxx.img --initrd xxx.img --reuse-cmdline
For kexec_file load, it is not logging if not specifying '-d', unless
you take way to make pr_debug() work in that file.
>
> Can this be redesigned, please?
Sure, after making clear what's going on with this, I will try.
>
> Actually what was wrong on the pr_debug()s? Can you simply turn them on from
> the kernel when -d is passed to kexec instead of all this?
Joe suggested this during v1 reviewing:
https://lore.kernel.org/all/[email protected]/T/#u
>
> ...
> > --- a/kernel/kexec_core.c
> > +++ b/kernel/kexec_core.c
> > @@ -52,6 +52,8 @@ atomic_t __kexec_lock = ATOMIC_INIT(0);
> > /* Flag to indicate we are going to kexec a new kernel */
> > bool kexec_in_progress = false;
> > +bool kexec_file_dbg_print;
>
> Ugh, and a global flag for this?
Yeah, kexec_file_dbg_print records if '-d' is specified when 'kexec'
command executed. Anything wrong with the global flag?
Thanks
Baoquan
Hi,
On 13. 03. 24, 1:48, Baoquan He wrote:
> Hi Jiri,
>
> On 03/12/24 at 10:58am, Jiri Slaby wrote:
>> On 13. 12. 23, 6:57, Baoquan He wrote:
> ... snip...
>>> --- a/include/linux/kexec.h
>>> +++ b/include/linux/kexec.h
>> ...
>>> @@ -500,6 +500,13 @@ static inline int crash_hotplug_memory_support(void) { return 0; }
>>> static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
>>> #endif
>>> +extern bool kexec_file_dbg_print;
>>> +
>>> +#define kexec_dprintk(fmt, ...) \
>>> + printk("%s" fmt, \
>>> + kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \
>>> + ##__VA_ARGS__)
>>
>> This means you dump it _always_. Only with different levels.
>
> It dumped always too with pr_debug() before, I just add a switch to
> control it's pr_info() or pr_debug().
Not really, see below.
>>
>> And without any prefix whatsoever, so people see bloat like this in their
>> log now:
>> [ +0.000001] 0000000000001000-000000000009ffff (1)
>> [ +0.000002] 000000007f96d000-000000007f97efff (3)
>> [ +0.000002] 0000000000800000-0000000000807fff (4)
>> [ +0.000001] 000000000080b000-000000000080bfff (4)
>> [ +0.000002] 0000000000810000-00000000008fffff (4)
>> [ +0.000001] 000000007f97f000-000000007f9fefff (4)
>> [ +0.000001] 000000007ff00000-000000007fffffff (4)
>> [ +0.000002] 0000000000000000-0000000000000fff (2)
>
> On which arch are you seeing this? There should be one line above these
> range printing to tell what they are, like:
>
> E820 memmap:
Ah this is there too. It's a lot of output, so I took it out of context,
apparently.
> 0000000000000000-000000000009a3ff (1)
> 000000000009a400-000000000009ffff (2)
> 00000000000e0000-00000000000fffff (2)
> 0000000000100000-000000006ff83fff (1)
> 000000006ff84000-000000007ac50fff (2)
It should all be prefixed like kdump: or kexec: in any way.
>> without actually knowing what that is.
>>
>> There should be nothing logged if that is not asked for and especially if
>> kexec load went fine, right?
>
> Right. Before this patch, those pr_debug() were already there. You need
> enable them to print out like add '#define DEBUG' in *.c file, or enable
> the dynamic debugging of the file or function.
I think it's perfectly fine for DEBUG builds to print this out. And many
(all major?) distros use dyndbg, so it used to print nothing by default.
> With this patch applied,
> you only need specify '-d' when you execute kexec command with
> kexec_file load interface, like:
>
> kexec -s -l -d /boot/vmlinuz-xxxx.img --initrd xxx.img --reuse-cmdline
Perhaps our (SUSE) tooling passes -d? But I am seeing this every time I
boot.
No, it does not seem so:
load.sh[915]: Starting kdump kernel load; kexec cmdline: /sbin/kexec -p
/var/lib/kdump/kernel --append=" loglevel=7 console=tty0 console=ttyS0
video=1920x1080,1024x768,800x600 oops=panic
lsm=lockdown,capability,integrity,selinux sysrq=yes reset_devices
acpi_no_memhotplug cgroup_disable=memory nokaslr numa=off irqpoll
nr_cpus=1 root=kdump rootflags=bind rd.udev.children-max=8
disable_cpu_apicid=0 panic=1" --initrd=/var/lib/kdump/initrd -a
> For kexec_file load, it is not logging if not specifying '-d', unless
> you take way to make pr_debug() work in that file.
So is -d detection malfunctioning under some circumstances?
>> Can this be redesigned, please?
>
> Sure, after making clear what's going on with this, I will try.
>
>>
>> Actually what was wrong on the pr_debug()s? Can you simply turn them on from
>> the kernel when -d is passed to kexec instead of all this?
>
> Joe suggested this during v1 reviewing:
> https://lore.kernel.org/all/[email protected]/T/#u
>
>>
>> ...
>>> --- a/kernel/kexec_core.c
>>> +++ b/kernel/kexec_core.c
>>> @@ -52,6 +52,8 @@ atomic_t __kexec_lock = ATOMIC_INIT(0);
>>> /* Flag to indicate we are going to kexec a new kernel */
>>> bool kexec_in_progress = false;
>>> +bool kexec_file_dbg_print;
>>
>> Ugh, and a global flag for this?
>
> Yeah, kexec_file_dbg_print records if '-d' is specified when 'kexec'
> command executed. Anything wrong with the global flag?
Global variables are frowned upon. To cite coding style: unless you
**really** need them. Here, it looks like you do not.
thanks,
--
js
suse labs
On 03/13/24 at 06:58am, Jiri Slaby wrote:
> Hi,
>
>
> On 13. 03. 24, 1:48, Baoquan He wrote:
> > Hi Jiri,
> >
> > On 03/12/24 at 10:58am, Jiri Slaby wrote:
> > > On 13. 12. 23, 6:57, Baoquan He wrote:
> > ... snip...
> > > > --- a/include/linux/kexec.h
> > > > +++ b/include/linux/kexec.h
> > > ...
> > > > @@ -500,6 +500,13 @@ static inline int crash_hotplug_memory_support(void) { return 0; }
> > > > static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
> > > > #endif
> > > > +extern bool kexec_file_dbg_print;
> > > > +
> > > > +#define kexec_dprintk(fmt, ...) \
> > > > + printk("%s" fmt, \
> > > > + kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \
> > > > + ##__VA_ARGS__)
> > >
> > > This means you dump it _always_. Only with different levels.
> >
> > It dumped always too with pr_debug() before, I just add a switch to
> > control it's pr_info() or pr_debug().
>
> Not really, see below.
>
> > >
> > > And without any prefix whatsoever, so people see bloat like this in their
> > > log now:
> > > [ +0.000001] 0000000000001000-000000000009ffff (1)
> > > [ +0.000002] 000000007f96d000-000000007f97efff (3)
> > > [ +0.000002] 0000000000800000-0000000000807fff (4)
> > > [ +0.000001] 000000000080b000-000000000080bfff (4)
> > > [ +0.000002] 0000000000810000-00000000008fffff (4)
> > > [ +0.000001] 000000007f97f000-000000007f9fefff (4)
> > > [ +0.000001] 000000007ff00000-000000007fffffff (4)
> > > [ +0.000002] 0000000000000000-0000000000000fff (2)
> >
> > On which arch are you seeing this? There should be one line above these
> > range printing to tell what they are, like:
> >
> > E820 memmap:
>
> Ah this is there too. It's a lot of output, so I took it out of context,
> apparently.
>
> > 0000000000000000-000000000009a3ff (1)
> > 000000000009a400-000000000009ffff (2)
> > 00000000000e0000-00000000000fffff (2)
> > 0000000000100000-000000006ff83fff (1)
> > 000000006ff84000-000000007ac50fff (2)
>
> It should all be prefixed like kdump: or kexec: in any way.
I can reproduce it now on fedora. OK, I will add kexec or something
similar to prefix. Thanks.
>
> > > without actually knowing what that is.
> > >
> > > There should be nothing logged if that is not asked for and especially if
> > > kexec load went fine, right?
> >
> > Right. Before this patch, those pr_debug() were already there. You need
> > enable them to print out like add '#define DEBUG' in *.c file, or enable
> > the dynamic debugging of the file or function.
>
> I think it's perfectly fine for DEBUG builds to print this out. And many
> (all major?) distros use dyndbg, so it used to print nothing by default.
>
> > With this patch applied,
> > you only need specify '-d' when you execute kexec command with
> > kexec_file load interface, like:
> >
> > kexec -s -l -d /boot/vmlinuz-xxxx.img --initrd xxx.img --reuse-cmdline
>
> Perhaps our (SUSE) tooling passes -d? But I am seeing this every time I
> boot.
>
> No, it does not seem so:
> load.sh[915]: Starting kdump kernel load; kexec cmdline: /sbin/kexec -p
> /var/lib/kdump/kernel --append=" loglevel=7 console=tty0 console=ttyS0
> video=1920x1080,1024x768,800x600 oops=panic
> lsm=lockdown,capability,integrity,selinux sysrq=yes reset_devices
> acpi_no_memhotplug cgroup_disable=memory nokaslr numa=off irqpoll nr_cpus=1
> root=kdump rootflags=bind rd.udev.children-max=8 disable_cpu_apicid=0
> panic=1" --initrd=/var/lib/kdump/initrd -a
>
> > For kexec_file load, it is not logging if not specifying '-d', unless
> > you take way to make pr_debug() work in that file.
>
> So is -d detection malfunctioning under some circumstances?
>
> > > Can this be redesigned, please?
> >
> > Sure, after making clear what's going on with this, I will try.
> >
> > >
> > > Actually what was wrong on the pr_debug()s? Can you simply turn them on from
> > > the kernel when -d is passed to kexec instead of all this?
> >
> > Joe suggested this during v1 reviewing:
> > https://lore.kernel.org/all/[email protected]/T/#u
> >
> > >
> > > ...
> > > > --- a/kernel/kexec_core.c
> > > > +++ b/kernel/kexec_core.c
> > > > @@ -52,6 +52,8 @@ atomic_t __kexec_lock = ATOMIC_INIT(0);
> > > > /* Flag to indicate we are going to kexec a new kernel */
> > > > bool kexec_in_progress = false;
> > > > +bool kexec_file_dbg_print;
> > >
> > > Ugh, and a global flag for this?
> >
> > Yeah, kexec_file_dbg_print records if '-d' is specified when 'kexec'
> > command executed. Anything wrong with the global flag?
>
> Global variables are frowned upon. To cite coding style: unless you
> **really** need them. Here, it looks like you do not.
I see your point, will consider and change. Thanks again.