Currently vmcoreinfo data is updated at boot time subsys_initcall(),
it has the risk of being modified by some wrong code during system
is running.
As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
when using "crash" or "makedumpfile"(etc) utility to parse this vmcore,
we probably will get "Segmentation fault" or other unexpected/confusing
errors.
As vmcoreinfo is the most fundamental information for vmcore, we better
double check its correctness. Here we generate a signature(using crc32)
after it is saved, then verify it in crash_save_vmcoreinfo() to see if
the signature was broken, if so we have to re-save the vmcoreinfo data
to get the correct vmcoreinfo for kdump as possible as we can.
Signed-off-by: Xunlei Pang <[email protected]>
---
v1->v2:
- Keep crash_save_vmcoreinfo_init() because "makedumpfile --mem-usage"
uses the information.
- Add crc32 verification for vmcoreinfo, re-save when failure.
arch/Kconfig | 1 +
kernel/kexec_core.c | 43 +++++++++++++++++++++++++++++++++++--------
2 files changed, 36 insertions(+), 8 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index c4d6833..66eb296 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -4,6 +4,7 @@
config KEXEC_CORE
bool
+ select CRC32
config HAVE_IMA_KEXEC
bool
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index bfe62d5..012acbe 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -38,6 +38,7 @@
#include <linux/syscore_ops.h>
#include <linux/compiler.h>
#include <linux/hugetlb.h>
+#include <linux/crc32.h>
#include <asm/page.h>
#include <asm/sections.h>
@@ -53,9 +54,10 @@
/* vmcoreinfo stuff */
static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+static u32 vmcoreinfo_sig;
size_t vmcoreinfo_size;
size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
/* Flag to indicate we are going to kexec a new kernel */
bool kexec_in_progress = false;
@@ -1367,12 +1369,6 @@ static void update_vmcoreinfo_note(void)
final_note(buf);
}
-void crash_save_vmcoreinfo(void)
-{
- vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
- update_vmcoreinfo_note();
-}
-
void vmcoreinfo_append_str(const char *fmt, ...)
{
va_list args;
@@ -1402,7 +1398,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
}
-static int __init crash_save_vmcoreinfo_init(void)
+static void do_crash_save_vmcoreinfo_init(void)
{
VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
VMCOREINFO_PAGESIZE(PAGE_SIZE);
@@ -1474,6 +1470,37 @@ static int __init crash_save_vmcoreinfo_init(void)
#endif
arch_crash_save_vmcoreinfo();
+}
+
+static u32 crash_calc_vmcoreinfo_sig(void)
+{
+ return crc32(~0, vmcoreinfo_data, vmcoreinfo_size);
+}
+
+static bool crash_verify_vmcoreinfo(void)
+{
+ if (crash_calc_vmcoreinfo_sig() == vmcoreinfo_sig)
+ return true;
+
+ return false;
+}
+
+void crash_save_vmcoreinfo(void)
+{
+ /* Re-save if verification fails */
+ if (!crash_verify_vmcoreinfo()) {
+ vmcoreinfo_size = 0;
+ do_crash_save_vmcoreinfo_init();
+ }
+
+ vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
+ update_vmcoreinfo_note();
+}
+
+static int __init crash_save_vmcoreinfo_init(void)
+{
+ do_crash_save_vmcoreinfo_init();
+ vmcoreinfo_sig = crash_calc_vmcoreinfo_sig();
update_vmcoreinfo_note();
return 0;
--
1.8.3.1
Xunlei Pang <[email protected]> writes:
> Currently vmcoreinfo data is updated at boot time subsys_initcall(),
> it has the risk of being modified by some wrong code during system
> is running.
>
> As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
> when using "crash" or "makedumpfile"(etc) utility to parse this vmcore,
> we probably will get "Segmentation fault" or other unexpected/confusing
> errors.
If this is a real concern and the previous discussion sounds like it is
part of what we need to do is move the variable vmcoreinfo_note out
of the kernel's .bss section. And modify the code to regenerate
and keep this information in something like the control page.
Definitely something like this needs a page all to itself, and ideally
far away from any other kernel data structures. I clearly was not
watching closely the data someone decided to keep this silly thing
in the kernel's .bss section.
> As vmcoreinfo is the most fundamental information for vmcore, we better
> double check its correctness. Here we generate a signature(using crc32)
> after it is saved, then verify it in crash_save_vmcoreinfo() to see if
> the signature was broken, if so we have to re-save the vmcoreinfo data
> to get the correct vmcoreinfo for kdump as possible as we can.
Sigh. We already have a sha256 that is supposed to cover this sort of
thing. The bug rather is that apparently it isn't covering this data.
That sounds like what we should be fixing.
Please let's not invent new mechanisms we have to maintain. Let's
reorganize this so this static data is protected like all other static
data in the kexec-on-panic path. We have good mechanims and good
strategies for avoiding and detecting corruption we just need to use
them.
Eric
> Signed-off-by: Xunlei Pang <[email protected]>
> ---
> v1->v2:
> - Keep crash_save_vmcoreinfo_init() because "makedumpfile --mem-usage"
> uses the information.
> - Add crc32 verification for vmcoreinfo, re-save when failure.
>
> arch/Kconfig | 1 +
> kernel/kexec_core.c | 43 +++++++++++++++++++++++++++++++++++--------
> 2 files changed, 36 insertions(+), 8 deletions(-)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index c4d6833..66eb296 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -4,6 +4,7 @@
>
> config KEXEC_CORE
> bool
> + select CRC32
>
> config HAVE_IMA_KEXEC
> bool
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index bfe62d5..012acbe 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -38,6 +38,7 @@
> #include <linux/syscore_ops.h>
> #include <linux/compiler.h>
> #include <linux/hugetlb.h>
> +#include <linux/crc32.h>
>
> #include <asm/page.h>
> #include <asm/sections.h>
> @@ -53,9 +54,10 @@
>
> /* vmcoreinfo stuff */
> static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> +static u32 vmcoreinfo_sig;
> size_t vmcoreinfo_size;
> size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
> +u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>
> /* Flag to indicate we are going to kexec a new kernel */
> bool kexec_in_progress = false;
> @@ -1367,12 +1369,6 @@ static void update_vmcoreinfo_note(void)
> final_note(buf);
> }
>
> -void crash_save_vmcoreinfo(void)
> -{
> - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
> - update_vmcoreinfo_note();
> -}
> -
> void vmcoreinfo_append_str(const char *fmt, ...)
> {
> va_list args;
> @@ -1402,7 +1398,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
> return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
> }
>
> -static int __init crash_save_vmcoreinfo_init(void)
> +static void do_crash_save_vmcoreinfo_init(void)
> {
> VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
> VMCOREINFO_PAGESIZE(PAGE_SIZE);
> @@ -1474,6 +1470,37 @@ static int __init crash_save_vmcoreinfo_init(void)
> #endif
>
> arch_crash_save_vmcoreinfo();
> +}
> +
> +static u32 crash_calc_vmcoreinfo_sig(void)
> +{
> + return crc32(~0, vmcoreinfo_data, vmcoreinfo_size);
> +}
> +
> +static bool crash_verify_vmcoreinfo(void)
> +{
> + if (crash_calc_vmcoreinfo_sig() == vmcoreinfo_sig)
> + return true;
> +
> + return false;
> +}
> +
> +void crash_save_vmcoreinfo(void)
> +{
> + /* Re-save if verification fails */
> + if (!crash_verify_vmcoreinfo()) {
> + vmcoreinfo_size = 0;
> + do_crash_save_vmcoreinfo_init();
> + }
> +
> + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
> + update_vmcoreinfo_note();
> +}
> +
> +static int __init crash_save_vmcoreinfo_init(void)
> +{
> + do_crash_save_vmcoreinfo_init();
> + vmcoreinfo_sig = crash_calc_vmcoreinfo_sig();
> update_vmcoreinfo_note();
>
> return 0;
On 03/18/2017 at 01:22 AM, Eric W. Biederman wrote:
> Xunlei Pang <[email protected]> writes:
>
>> Currently vmcoreinfo data is updated at boot time subsys_initcall(),
>> it has the risk of being modified by some wrong code during system
>> is running.
>>
>> As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
>> when using "crash" or "makedumpfile"(etc) utility to parse this vmcore,
>> we probably will get "Segmentation fault" or other unexpected/confusing
>> errors.
> If this is a real concern and the previous discussion sounds like it is
> part of what we need to do is move the variable vmcoreinfo_note out
> of the kernel's .bss section. And modify the code to regenerate
> and keep this information in something like the control page.
>
> Definitely something like this needs a page all to itself, and ideally
> far away from any other kernel data structures. I clearly was not
> watching closely the data someone decided to keep this silly thing
> in the kernel's .bss section.
>
>> As vmcoreinfo is the most fundamental information for vmcore, we better
>> double check its correctness. Here we generate a signature(using crc32)
>> after it is saved, then verify it in crash_save_vmcoreinfo() to see if
>> the signature was broken, if so we have to re-save the vmcoreinfo data
>> to get the correct vmcoreinfo for kdump as possible as we can.
> Sigh. We already have a sha256 that is supposed to cover this sort of
> thing. The bug rather is that apparently it isn't covering this data.
> That sounds like what we should be fixing.
>
> Please let's not invent new mechanisms we have to maintain. Let's
> reorganize this so this static data is protected like all other static
> data in the kexec-on-panic path. We have good mechanims and good
> strategies for avoiding and detecting corruption we just need to use
> them.
>
> Eric
>
Yes, this idea looks way better, I will follow your suggestions, thanks!
Regards,
Xunlei
>
>> Signed-off-by: Xunlei Pang <[email protected]>
>> ---
>> v1->v2:
>> - Keep crash_save_vmcoreinfo_init() because "makedumpfile --mem-usage"
>> uses the information.
>> - Add crc32 verification for vmcoreinfo, re-save when failure.
>>
>> arch/Kconfig | 1 +
>> kernel/kexec_core.c | 43 +++++++++++++++++++++++++++++++++++--------
>> 2 files changed, 36 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/Kconfig b/arch/Kconfig
>> index c4d6833..66eb296 100644
>> --- a/arch/Kconfig
>> +++ b/arch/Kconfig
>> @@ -4,6 +4,7 @@
>>
>> config KEXEC_CORE
>> bool
>> + select CRC32
>>
>> config HAVE_IMA_KEXEC
>> bool
>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
>> index bfe62d5..012acbe 100644
>> --- a/kernel/kexec_core.c
>> +++ b/kernel/kexec_core.c
>> @@ -38,6 +38,7 @@
>> #include <linux/syscore_ops.h>
>> #include <linux/compiler.h>
>> #include <linux/hugetlb.h>
>> +#include <linux/crc32.h>
>>
>> #include <asm/page.h>
>> #include <asm/sections.h>
>> @@ -53,9 +54,10 @@
>>
>> /* vmcoreinfo stuff */
>> static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
>> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>> +static u32 vmcoreinfo_sig;
>> size_t vmcoreinfo_size;
>> size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
>> +u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>>
>> /* Flag to indicate we are going to kexec a new kernel */
>> bool kexec_in_progress = false;
>> @@ -1367,12 +1369,6 @@ static void update_vmcoreinfo_note(void)
>> final_note(buf);
>> }
>>
>> -void crash_save_vmcoreinfo(void)
>> -{
>> - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>> - update_vmcoreinfo_note();
>> -}
>> -
>> void vmcoreinfo_append_str(const char *fmt, ...)
>> {
>> va_list args;
>> @@ -1402,7 +1398,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
>> return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
>> }
>>
>> -static int __init crash_save_vmcoreinfo_init(void)
>> +static void do_crash_save_vmcoreinfo_init(void)
>> {
>> VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
>> VMCOREINFO_PAGESIZE(PAGE_SIZE);
>> @@ -1474,6 +1470,37 @@ static int __init crash_save_vmcoreinfo_init(void)
>> #endif
>>
>> arch_crash_save_vmcoreinfo();
>> +}
>> +
>> +static u32 crash_calc_vmcoreinfo_sig(void)
>> +{
>> + return crc32(~0, vmcoreinfo_data, vmcoreinfo_size);
>> +}
>> +
>> +static bool crash_verify_vmcoreinfo(void)
>> +{
>> + if (crash_calc_vmcoreinfo_sig() == vmcoreinfo_sig)
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> +void crash_save_vmcoreinfo(void)
>> +{
>> + /* Re-save if verification fails */
>> + if (!crash_verify_vmcoreinfo()) {
>> + vmcoreinfo_size = 0;
>> + do_crash_save_vmcoreinfo_init();
>> + }
>> +
>> + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>> + update_vmcoreinfo_note();
>> +}
>> +
>> +static int __init crash_save_vmcoreinfo_init(void)
>> +{
>> + do_crash_save_vmcoreinfo_init();
>> + vmcoreinfo_sig = crash_calc_vmcoreinfo_sig();
>> update_vmcoreinfo_note();
>>
>> return 0;
> _______________________________________________
> kexec mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/kexec
On 03/17/17 at 12:22pm, Eric W. Biederman wrote:
> Xunlei Pang <[email protected]> writes:
>
> > Currently vmcoreinfo data is updated at boot time subsys_initcall(),
> > it has the risk of being modified by some wrong code during system
> > is running.
> >
> > As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
> > when using "crash" or "makedumpfile"(etc) utility to parse this vmcore,
> > we probably will get "Segmentation fault" or other unexpected/confusing
> > errors.
>
> If this is a real concern and the previous discussion sounds like it is
> part of what we need to do is move the variable vmcoreinfo_note out
> of the kernel's .bss section. And modify the code to regenerate
> and keep this information in something like the control page.
I guess this is not from a real issue, just from Xunlei's worry. But
Xunlei didn't give a direct answer to this, and Petr's question. Not
very sure if this will impact other implementation. fadump will be
impacted by this or other dump? Maybe yet or maybe not.
I don't object this strongly, but please at least add code comment to
explain why vmcoreinfo need be saved twice because it does look weird.
>
> Definitely something like this needs a page all to itself, and ideally
> far away from any other kernel data structures. I clearly was not
> watching closely the data someone decided to keep this silly thing
> in the kernel's .bss section.
>
> > As vmcoreinfo is the most fundamental information for vmcore, we better
> > double check its correctness. Here we generate a signature(using crc32)
> > after it is saved, then verify it in crash_save_vmcoreinfo() to see if
> > the signature was broken, if so we have to re-save the vmcoreinfo data
> > to get the correct vmcoreinfo for kdump as possible as we can.
>
> Sigh. We already have a sha256 that is supposed to cover this sort of
> thing. The bug rather is that apparently it isn't covering this data.
> That sounds like what we should be fixing.
>
> Please let's not invent new mechanisms we have to maintain. Let's
> reorganize this so this static data is protected like all other static
> data in the kexec-on-panic path. We have good mechanims and good
> strategies for avoiding and detecting corruption we just need to use
> them.
>
> Eric
>
>
>
> > Signed-off-by: Xunlei Pang <[email protected]>
> > ---
> > v1->v2:
> > - Keep crash_save_vmcoreinfo_init() because "makedumpfile --mem-usage"
> > uses the information.
> > - Add crc32 verification for vmcoreinfo, re-save when failure.
> >
> > arch/Kconfig | 1 +
> > kernel/kexec_core.c | 43 +++++++++++++++++++++++++++++++++++--------
> > 2 files changed, 36 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index c4d6833..66eb296 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -4,6 +4,7 @@
> >
> > config KEXEC_CORE
> > bool
> > + select CRC32
> >
> > config HAVE_IMA_KEXEC
> > bool
> > diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> > index bfe62d5..012acbe 100644
> > --- a/kernel/kexec_core.c
> > +++ b/kernel/kexec_core.c
> > @@ -38,6 +38,7 @@
> > #include <linux/syscore_ops.h>
> > #include <linux/compiler.h>
> > #include <linux/hugetlb.h>
> > +#include <linux/crc32.h>
> >
> > #include <asm/page.h>
> > #include <asm/sections.h>
> > @@ -53,9 +54,10 @@
> >
> > /* vmcoreinfo stuff */
> > static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
> > -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> > +static u32 vmcoreinfo_sig;
> > size_t vmcoreinfo_size;
> > size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
> > +u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> >
> > /* Flag to indicate we are going to kexec a new kernel */
> > bool kexec_in_progress = false;
> > @@ -1367,12 +1369,6 @@ static void update_vmcoreinfo_note(void)
> > final_note(buf);
> > }
> >
> > -void crash_save_vmcoreinfo(void)
> > -{
> > - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
> > - update_vmcoreinfo_note();
> > -}
> > -
> > void vmcoreinfo_append_str(const char *fmt, ...)
> > {
> > va_list args;
> > @@ -1402,7 +1398,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
> > return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
> > }
> >
> > -static int __init crash_save_vmcoreinfo_init(void)
> > +static void do_crash_save_vmcoreinfo_init(void)
> > {
> > VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
> > VMCOREINFO_PAGESIZE(PAGE_SIZE);
> > @@ -1474,6 +1470,37 @@ static int __init crash_save_vmcoreinfo_init(void)
> > #endif
> >
> > arch_crash_save_vmcoreinfo();
> > +}
> > +
> > +static u32 crash_calc_vmcoreinfo_sig(void)
> > +{
> > + return crc32(~0, vmcoreinfo_data, vmcoreinfo_size);
> > +}
> > +
> > +static bool crash_verify_vmcoreinfo(void)
> > +{
> > + if (crash_calc_vmcoreinfo_sig() == vmcoreinfo_sig)
> > + return true;
> > +
> > + return false;
> > +}
> > +
> > +void crash_save_vmcoreinfo(void)
> > +{
> > + /* Re-save if verification fails */
> > + if (!crash_verify_vmcoreinfo()) {
> > + vmcoreinfo_size = 0;
> > + do_crash_save_vmcoreinfo_init();
> > + }
> > +
> > + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
> > + update_vmcoreinfo_note();
> > +}
> > +
> > +static int __init crash_save_vmcoreinfo_init(void)
> > +{
> > + do_crash_save_vmcoreinfo_init();
> > + vmcoreinfo_sig = crash_calc_vmcoreinfo_sig();
> > update_vmcoreinfo_note();
> >
> > return 0;
On 03/20/2017 at 10:13 AM, Baoquan He wrote:
> On 03/17/17 at 12:22pm, Eric W. Biederman wrote:
>> Xunlei Pang <[email protected]> writes:
>>
>>> Currently vmcoreinfo data is updated at boot time subsys_initcall(),
>>> it has the risk of being modified by some wrong code during system
>>> is running.
>>>
>>> As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
>>> when using "crash" or "makedumpfile"(etc) utility to parse this vmcore,
>>> we probably will get "Segmentation fault" or other unexpected/confusing
>>> errors.
>> If this is a real concern and the previous discussion sounds like it is
>> part of what we need to do is move the variable vmcoreinfo_note out
>> of the kernel's .bss section. And modify the code to regenerate
>> and keep this information in something like the control page.
> I guess this is not from a real issue, just from Xunlei's worry. But
> Xunlei didn't give a direct answer to this, and Petr's question. Not
It's easy to reproduce: write a kernel module to modify part content of
vmcoreinfo_data (we surely have many ways to acquire its VA). If it does
exist in theory, we will met it sooner or later in real world due to billions
of applications.
Also there are bugs like this one
https://bugzilla.redhat.com/show_bug.cgi?id=1287097
Not sure if it is makedumpfile issue or this one, maybe we can't know forever.
Regards,
Xunlei
> very sure if this will impact other implementation. fadump will be
> impacted by this or other dump? Maybe yet or maybe not.
>
> I don't object this strongly, but please at least add code comment to
> explain why vmcoreinfo need be saved twice because it does look weird.
>
>> Definitely something like this needs a page all to itself, and ideally
>> far away from any other kernel data structures. I clearly was not
>> watching closely the data someone decided to keep this silly thing
>> in the kernel's .bss section.
>>
>>> As vmcoreinfo is the most fundamental information for vmcore, we better
>>> double check its correctness. Here we generate a signature(using crc32)
>>> after it is saved, then verify it in crash_save_vmcoreinfo() to see if
>>> the signature was broken, if so we have to re-save the vmcoreinfo data
>>> to get the correct vmcoreinfo for kdump as possible as we can.
>> Sigh. We already have a sha256 that is supposed to cover this sort of
>> thing. The bug rather is that apparently it isn't covering this data.
>> That sounds like what we should be fixing.
>>
>> Please let's not invent new mechanisms we have to maintain. Let's
>> reorganize this so this static data is protected like all other static
>> data in the kexec-on-panic path. We have good mechanims and good
>> strategies for avoiding and detecting corruption we just need to use
>> them.
>>
>> Eric
>>
>>
>>
>>> Signed-off-by: Xunlei Pang <[email protected]>
>>> ---
>>> v1->v2:
>>> - Keep crash_save_vmcoreinfo_init() because "makedumpfile --mem-usage"
>>> uses the information.
>>> - Add crc32 verification for vmcoreinfo, re-save when failure.
>>>
>>> arch/Kconfig | 1 +
>>> kernel/kexec_core.c | 43 +++++++++++++++++++++++++++++++++++--------
>>> 2 files changed, 36 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/Kconfig b/arch/Kconfig
>>> index c4d6833..66eb296 100644
>>> --- a/arch/Kconfig
>>> +++ b/arch/Kconfig
>>> @@ -4,6 +4,7 @@
>>>
>>> config KEXEC_CORE
>>> bool
>>> + select CRC32
>>>
>>> config HAVE_IMA_KEXEC
>>> bool
>>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
>>> index bfe62d5..012acbe 100644
>>> --- a/kernel/kexec_core.c
>>> +++ b/kernel/kexec_core.c
>>> @@ -38,6 +38,7 @@
>>> #include <linux/syscore_ops.h>
>>> #include <linux/compiler.h>
>>> #include <linux/hugetlb.h>
>>> +#include <linux/crc32.h>
>>>
>>> #include <asm/page.h>
>>> #include <asm/sections.h>
>>> @@ -53,9 +54,10 @@
>>>
>>> /* vmcoreinfo stuff */
>>> static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
>>> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>>> +static u32 vmcoreinfo_sig;
>>> size_t vmcoreinfo_size;
>>> size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
>>> +u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>>>
>>> /* Flag to indicate we are going to kexec a new kernel */
>>> bool kexec_in_progress = false;
>>> @@ -1367,12 +1369,6 @@ static void update_vmcoreinfo_note(void)
>>> final_note(buf);
>>> }
>>>
>>> -void crash_save_vmcoreinfo(void)
>>> -{
>>> - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>>> - update_vmcoreinfo_note();
>>> -}
>>> -
>>> void vmcoreinfo_append_str(const char *fmt, ...)
>>> {
>>> va_list args;
>>> @@ -1402,7 +1398,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
>>> return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
>>> }
>>>
>>> -static int __init crash_save_vmcoreinfo_init(void)
>>> +static void do_crash_save_vmcoreinfo_init(void)
>>> {
>>> VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
>>> VMCOREINFO_PAGESIZE(PAGE_SIZE);
>>> @@ -1474,6 +1470,37 @@ static int __init crash_save_vmcoreinfo_init(void)
>>> #endif
>>>
>>> arch_crash_save_vmcoreinfo();
>>> +}
>>> +
>>> +static u32 crash_calc_vmcoreinfo_sig(void)
>>> +{
>>> + return crc32(~0, vmcoreinfo_data, vmcoreinfo_size);
>>> +}
>>> +
>>> +static bool crash_verify_vmcoreinfo(void)
>>> +{
>>> + if (crash_calc_vmcoreinfo_sig() == vmcoreinfo_sig)
>>> + return true;
>>> +
>>> + return false;
>>> +}
>>> +
>>> +void crash_save_vmcoreinfo(void)
>>> +{
>>> + /* Re-save if verification fails */
>>> + if (!crash_verify_vmcoreinfo()) {
>>> + vmcoreinfo_size = 0;
>>> + do_crash_save_vmcoreinfo_init();
>>> + }
>>> +
>>> + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>>> + update_vmcoreinfo_note();
>>> +}
>>> +
>>> +static int __init crash_save_vmcoreinfo_init(void)
>>> +{
>>> + do_crash_save_vmcoreinfo_init();
>>> + vmcoreinfo_sig = crash_calc_vmcoreinfo_sig();
>>> update_vmcoreinfo_note();
>>>
>>> return 0;
On 03/20/17 at 10:39am, Xunlei Pang wrote:
> On 03/20/2017 at 10:13 AM, Baoquan He wrote:
> > On 03/17/17 at 12:22pm, Eric W. Biederman wrote:
> >> Xunlei Pang <[email protected]> writes:
> >>
> >>> Currently vmcoreinfo data is updated at boot time subsys_initcall(),
> >>> it has the risk of being modified by some wrong code during system
> >>> is running.
> >>>
> >>> As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
> >>> when using "crash" or "makedumpfile"(etc) utility to parse this vmcore,
> >>> we probably will get "Segmentation fault" or other unexpected/confusing
> >>> errors.
> >> If this is a real concern and the previous discussion sounds like it is
> >> part of what we need to do is move the variable vmcoreinfo_note out
> >> of the kernel's .bss section. And modify the code to regenerate
> >> and keep this information in something like the control page.
> > I guess this is not from a real issue, just from Xunlei's worry. But
> > Xunlei didn't give a direct answer to this, and Petr's question. Not
>
> It's easy to reproduce: write a kernel module to modify part content of
> vmcoreinfo_data (we surely have many ways to acquire its VA). If it does
> exist in theory, we will met it sooner or later in real world due to billions
> of applications.
>
> Also there are bugs like this one
> https://bugzilla.redhat.com/show_bug.cgi?id=1287097
> Not sure if it is makedumpfile issue or this one, maybe we can't know forever.
Well, kdump is not all-purpose. If you write code in module to stomp
page init_level4_pgt is pointing at, you won't get a vmcore.
And you are saying vmcoreinfo_data, it's a intermediate page, should be
vmcoreinfo_note. If the wrong code you mentioned didn't change
vmcoreinfo_note, but other kernel data which need be saved into
vmcoreinfo_note, crash_save_vmcoreinfo_init is doing better than you
re-saved one.
>
> Regards,
> Xunlei
>
> > very sure if this will impact other implementation. fadump will be
> > impacted by this or other dump? Maybe yet or maybe not.
> >
> > I don't object this strongly, but please at least add code comment to
> > explain why vmcoreinfo need be saved twice because it does look weird.
> >
> >> Definitely something like this needs a page all to itself, and ideally
> >> far away from any other kernel data structures. I clearly was not
> >> watching closely the data someone decided to keep this silly thing
> >> in the kernel's .bss section.
> >>
> >>> As vmcoreinfo is the most fundamental information for vmcore, we better
> >>> double check its correctness. Here we generate a signature(using crc32)
> >>> after it is saved, then verify it in crash_save_vmcoreinfo() to see if
> >>> the signature was broken, if so we have to re-save the vmcoreinfo data
> >>> to get the correct vmcoreinfo for kdump as possible as we can.
> >> Sigh. We already have a sha256 that is supposed to cover this sort of
> >> thing. The bug rather is that apparently it isn't covering this data.
> >> That sounds like what we should be fixing.
> >>
> >> Please let's not invent new mechanisms we have to maintain. Let's
> >> reorganize this so this static data is protected like all other static
> >> data in the kexec-on-panic path. We have good mechanims and good
> >> strategies for avoiding and detecting corruption we just need to use
> >> them.
> >>
> >> Eric
> >>
> >>
> >>
> >>> Signed-off-by: Xunlei Pang <[email protected]>
> >>> ---
> >>> v1->v2:
> >>> - Keep crash_save_vmcoreinfo_init() because "makedumpfile --mem-usage"
> >>> uses the information.
> >>> - Add crc32 verification for vmcoreinfo, re-save when failure.
> >>>
> >>> arch/Kconfig | 1 +
> >>> kernel/kexec_core.c | 43 +++++++++++++++++++++++++++++++++++--------
> >>> 2 files changed, 36 insertions(+), 8 deletions(-)
> >>>
> >>> diff --git a/arch/Kconfig b/arch/Kconfig
> >>> index c4d6833..66eb296 100644
> >>> --- a/arch/Kconfig
> >>> +++ b/arch/Kconfig
> >>> @@ -4,6 +4,7 @@
> >>>
> >>> config KEXEC_CORE
> >>> bool
> >>> + select CRC32
> >>>
> >>> config HAVE_IMA_KEXEC
> >>> bool
> >>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> >>> index bfe62d5..012acbe 100644
> >>> --- a/kernel/kexec_core.c
> >>> +++ b/kernel/kexec_core.c
> >>> @@ -38,6 +38,7 @@
> >>> #include <linux/syscore_ops.h>
> >>> #include <linux/compiler.h>
> >>> #include <linux/hugetlb.h>
> >>> +#include <linux/crc32.h>
> >>>
> >>> #include <asm/page.h>
> >>> #include <asm/sections.h>
> >>> @@ -53,9 +54,10 @@
> >>>
> >>> /* vmcoreinfo stuff */
> >>> static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
> >>> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> >>> +static u32 vmcoreinfo_sig;
> >>> size_t vmcoreinfo_size;
> >>> size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
> >>> +u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
> >>>
> >>> /* Flag to indicate we are going to kexec a new kernel */
> >>> bool kexec_in_progress = false;
> >>> @@ -1367,12 +1369,6 @@ static void update_vmcoreinfo_note(void)
> >>> final_note(buf);
> >>> }
> >>>
> >>> -void crash_save_vmcoreinfo(void)
> >>> -{
> >>> - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
> >>> - update_vmcoreinfo_note();
> >>> -}
> >>> -
> >>> void vmcoreinfo_append_str(const char *fmt, ...)
> >>> {
> >>> va_list args;
> >>> @@ -1402,7 +1398,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
> >>> return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
> >>> }
> >>>
> >>> -static int __init crash_save_vmcoreinfo_init(void)
> >>> +static void do_crash_save_vmcoreinfo_init(void)
> >>> {
> >>> VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
> >>> VMCOREINFO_PAGESIZE(PAGE_SIZE);
> >>> @@ -1474,6 +1470,37 @@ static int __init crash_save_vmcoreinfo_init(void)
> >>> #endif
> >>>
> >>> arch_crash_save_vmcoreinfo();
> >>> +}
> >>> +
> >>> +static u32 crash_calc_vmcoreinfo_sig(void)
> >>> +{
> >>> + return crc32(~0, vmcoreinfo_data, vmcoreinfo_size);
> >>> +}
> >>> +
> >>> +static bool crash_verify_vmcoreinfo(void)
> >>> +{
> >>> + if (crash_calc_vmcoreinfo_sig() == vmcoreinfo_sig)
> >>> + return true;
> >>> +
> >>> + return false;
> >>> +}
> >>> +
> >>> +void crash_save_vmcoreinfo(void)
> >>> +{
> >>> + /* Re-save if verification fails */
> >>> + if (!crash_verify_vmcoreinfo()) {
> >>> + vmcoreinfo_size = 0;
> >>> + do_crash_save_vmcoreinfo_init();
> >>> + }
> >>> +
> >>> + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
> >>> + update_vmcoreinfo_note();
> >>> +}
> >>> +
> >>> +static int __init crash_save_vmcoreinfo_init(void)
> >>> +{
> >>> + do_crash_save_vmcoreinfo_init();
> >>> + vmcoreinfo_sig = crash_calc_vmcoreinfo_sig();
> >>> update_vmcoreinfo_note();
> >>>
> >>> return 0;
>
>
> _______________________________________________
> kexec mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/kexec
On 03/20/2017 at 11:55 AM, Baoquan He wrote:
> On 03/20/17 at 10:39am, Xunlei Pang wrote:
>> On 03/20/2017 at 10:13 AM, Baoquan He wrote:
>>> On 03/17/17 at 12:22pm, Eric W. Biederman wrote:
>>>> Xunlei Pang <[email protected]> writes:
>>>>
>>>>> Currently vmcoreinfo data is updated at boot time subsys_initcall(),
>>>>> it has the risk of being modified by some wrong code during system
>>>>> is running.
>>>>>
>>>>> As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on,
>>>>> when using "crash" or "makedumpfile"(etc) utility to parse this vmcore,
>>>>> we probably will get "Segmentation fault" or other unexpected/confusing
>>>>> errors.
>>>> If this is a real concern and the previous discussion sounds like it is
>>>> part of what we need to do is move the variable vmcoreinfo_note out
>>>> of the kernel's .bss section. And modify the code to regenerate
>>>> and keep this information in something like the control page.
>>> I guess this is not from a real issue, just from Xunlei's worry. But
>>> Xunlei didn't give a direct answer to this, and Petr's question. Not
>> It's easy to reproduce: write a kernel module to modify part content of
>> vmcoreinfo_data (we surely have many ways to acquire its VA). If it does
>> exist in theory, we will met it sooner or later in real world due to billions
>> of applications.
>>
>> Also there are bugs like this one
>> https://bugzilla.redhat.com/show_bug.cgi?id=1287097
>> Not sure if it is makedumpfile issue or this one, maybe we can't know forever.
> Well, kdump is not all-purpose. If you write code in module to stomp
> page init_level4_pgt is pointing at, you won't get a vmcore.
vmcoreinfo is a large data chunk prepared for kdump not a normal-sized variable, we better protect it.
> And you are saying vmcoreinfo_data, it's a intermediate page, should be
> vmcoreinfo_note. If the wrong code you mentioned didn't change
> vmcoreinfo_note, but other kernel data which need be saved into
> vmcoreinfo_note, crash_save_vmcoreinfo_init is doing better than you
> re-saved one.
I am not going to touch vmcoreinfo_note, just trying to relocate vmcoreinfo_data into the crash memory,
then use it to update vmcoreinfo_note which is fully overwritten when crash happens just like the cpu
crash note.
Anyway I will send v3 soon, let further discuss it there, thanks!
Regards,
Xunlei
>
>> Regards,
>> Xunlei
>>
>>> very sure if this will impact other implementation. fadump will be
>>> impacted by this or other dump? Maybe yet or maybe not.
>>>
>>> I don't object this strongly, but please at least add code comment to
>>> explain why vmcoreinfo need be saved twice because it does look weird.
>>>
>>>> Definitely something like this needs a page all to itself, and ideally
>>>> far away from any other kernel data structures. I clearly was not
>>>> watching closely the data someone decided to keep this silly thing
>>>> in the kernel's .bss section.
>>>>
>>>>> As vmcoreinfo is the most fundamental information for vmcore, we better
>>>>> double check its correctness. Here we generate a signature(using crc32)
>>>>> after it is saved, then verify it in crash_save_vmcoreinfo() to see if
>>>>> the signature was broken, if so we have to re-save the vmcoreinfo data
>>>>> to get the correct vmcoreinfo for kdump as possible as we can.
>>>> Sigh. We already have a sha256 that is supposed to cover this sort of
>>>> thing. The bug rather is that apparently it isn't covering this data.
>>>> That sounds like what we should be fixing.
>>>>
>>>> Please let's not invent new mechanisms we have to maintain. Let's
>>>> reorganize this so this static data is protected like all other static
>>>> data in the kexec-on-panic path. We have good mechanims and good
>>>> strategies for avoiding and detecting corruption we just need to use
>>>> them.
>>>>
>>>> Eric
>>>>
>>>>
>>>>
>>>>> Signed-off-by: Xunlei Pang <[email protected]>
>>>>> ---
>>>>> v1->v2:
>>>>> - Keep crash_save_vmcoreinfo_init() because "makedumpfile --mem-usage"
>>>>> uses the information.
>>>>> - Add crc32 verification for vmcoreinfo, re-save when failure.
>>>>>
>>>>> arch/Kconfig | 1 +
>>>>> kernel/kexec_core.c | 43 +++++++++++++++++++++++++++++++++++--------
>>>>> 2 files changed, 36 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/arch/Kconfig b/arch/Kconfig
>>>>> index c4d6833..66eb296 100644
>>>>> --- a/arch/Kconfig
>>>>> +++ b/arch/Kconfig
>>>>> @@ -4,6 +4,7 @@
>>>>>
>>>>> config KEXEC_CORE
>>>>> bool
>>>>> + select CRC32
>>>>>
>>>>> config HAVE_IMA_KEXEC
>>>>> bool
>>>>> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
>>>>> index bfe62d5..012acbe 100644
>>>>> --- a/kernel/kexec_core.c
>>>>> +++ b/kernel/kexec_core.c
>>>>> @@ -38,6 +38,7 @@
>>>>> #include <linux/syscore_ops.h>
>>>>> #include <linux/compiler.h>
>>>>> #include <linux/hugetlb.h>
>>>>> +#include <linux/crc32.h>
>>>>>
>>>>> #include <asm/page.h>
>>>>> #include <asm/sections.h>
>>>>> @@ -53,9 +54,10 @@
>>>>>
>>>>> /* vmcoreinfo stuff */
>>>>> static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
>>>>> -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>>>>> +static u32 vmcoreinfo_sig;
>>>>> size_t vmcoreinfo_size;
>>>>> size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
>>>>> +u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
>>>>>
>>>>> /* Flag to indicate we are going to kexec a new kernel */
>>>>> bool kexec_in_progress = false;
>>>>> @@ -1367,12 +1369,6 @@ static void update_vmcoreinfo_note(void)
>>>>> final_note(buf);
>>>>> }
>>>>>
>>>>> -void crash_save_vmcoreinfo(void)
>>>>> -{
>>>>> - vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>>>>> - update_vmcoreinfo_note();
>>>>> -}
>>>>> -
>>>>> void vmcoreinfo_append_str(const char *fmt, ...)
>>>>> {
>>>>> va_list args;
>>>>> @@ -1402,7 +1398,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
>>>>> return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
>>>>> }
>>>>>
>>>>> -static int __init crash_save_vmcoreinfo_init(void)
>>>>> +static void do_crash_save_vmcoreinfo_init(void)
>>>>> {
>>>>> VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
>>>>> VMCOREINFO_PAGESIZE(PAGE_SIZE);
>>>>> @@ -1474,6 +1470,37 @@ static int __init crash_save_vmcoreinfo_init(void)
>>>>> #endif
>>>>>
>>>>> arch_crash_save_vmcoreinfo();
>>>>> +}
>>>>> +
>>>>> +static u32 crash_calc_vmcoreinfo_sig(void)
>>>>> +{
>>>>> + return crc32(~0, vmcoreinfo_data, vmcoreinfo_size);
>>>>> +}
>>>>> +
>>>>> +static bool crash_verify_vmcoreinfo(void)
>>>>> +{
>>>>> + if (crash_calc_vmcoreinfo_sig() == vmcoreinfo_sig)
>>>>> + return true;
>>>>> +
>>>>> + return false;
>>>>> +}
>>>>> +
>>>>> +void crash_save_vmcoreinfo(void)
>>>>> +{
>>>>> + /* Re-save if verification fails */
>>>>> + if (!crash_verify_vmcoreinfo()) {
>>>>> + vmcoreinfo_size = 0;
>>>>> + do_crash_save_vmcoreinfo_init();
>>>>> + }
>>>>> +
>>>>> + vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
>>>>> + update_vmcoreinfo_note();
>>>>> +}
>>>>> +
>>>>> +static int __init crash_save_vmcoreinfo_init(void)
>>>>> +{
>>>>> + do_crash_save_vmcoreinfo_init();
>>>>> + vmcoreinfo_sig = crash_calc_vmcoreinfo_sig();
>>>>> update_vmcoreinfo_note();
>>>>>
>>>>> return 0;
>>
>> _______________________________________________
>> kexec mailing list
>> [email protected]
>> http://lists.infradead.org/mailman/listinfo/kexec