Introduce the capability to dynamically configure the maximum file
note size for ELF core dumps via sysctl. This enhancement removes
the previous static limit of 4MB, allowing system administrators to
adjust the size based on system-specific requirements or constraints.
- Remove hardcoded `MAX_FILE_NOTE_SIZE` from `fs/binfmt_elf.c`.
- Define `max_file_note_size` in `fs/coredump.c` with an initial value
set to 4MB.
- Declare `max_file_note_size` as an external variable in
`include/linux/coredump.h`.
- Add a new sysctl entry in `kernel/sysctl.c` to manage this setting
at runtime.
$ sysctl -a | grep core_file_note_size_max
kernel.core_file_note_size_max = 4194304
$ sysctl -n kernel.core_file_note_size_max
4194304
$echo 519304 > /proc/sys/kernel/core_file_note_size_max
$sysctl -n kernel.core_file_note_size_max
519304
Attempting to write beyond the ceiling value of 16MB
$echo 17194304 > /proc/sys/kernel/core_file_note_size_max
bash: echo: write error: Invalid argument
Why is this being done?
We have observed that during a crash when there are more than 65k mmaps
in memory, the existing fixed limit on the size of the ELF notes section
becomes a bottleneck. The notes section quickly reaches its capacity,
leading to incomplete memory segment information in the resulting coredump.
This truncation compromises the utility of the coredumps, as crucial
information about the memory state at the time of the crash might be
omitted.
Signed-off-by: Vijay Nag <[email protected]>
Signed-off-by: Allen Pais <[email protected]>
---
Chagnes in v3:
- Fix commit message to reflect the correct sysctl knob [Kees]
- Add a ceiling for maximum pssible note size(16M) [Allen]
- Add a pr_warn_once() [Kees]
Changes in v2:
- Move new sysctl to fs/coredump.c [Luis & Kees]
- rename max_file_note_size to core_file_note_size_max [kees]
- Capture "why this is being done?" int he commit message [Luis & Kees]
---
fs/binfmt_elf.c | 8 ++++++--
fs/coredump.c | 15 +++++++++++++++
include/linux/coredump.h | 1 +
3 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5397b552fbeb..5294f8f3a9a8 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1564,7 +1564,6 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
}
-#define MAX_FILE_NOTE_SIZE (4*1024*1024)
/*
* Format of NT_FILE note:
*
@@ -1592,8 +1591,13 @@ static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm
names_ofs = (2 + 3 * count) * sizeof(data[0]);
alloc:
- if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
+ /* paranoia check */
+ if (size >= core_file_note_size_max) {
+ pr_warn_once("coredump Note size too large: %u "
+ "(does kernel.core_file_note_size_max sysctl need adjustment?)\n",
+ size);
return -EINVAL;
+ }
size = round_up(size, PAGE_SIZE);
/*
* "size" can be 0 here legitimately.
diff --git a/fs/coredump.c b/fs/coredump.c
index be6403b4b14b..ffaed8c1b3b0 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -56,10 +56,16 @@
static bool dump_vma_snapshot(struct coredump_params *cprm);
static void free_vma_snapshot(struct coredump_params *cprm);
+#define MAX_FILE_NOTE_SIZE (4*1024*1024)
+/* Define a reasonable max cap */
+#define MAX_ALLOWED_NOTE_SIZE (16*1024*1024)
+
static int core_uses_pid;
static unsigned int core_pipe_limit;
static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
+unsigned int core_file_note_size_max = MAX_FILE_NOTE_SIZE;
+unsigned int core_file_note_size_allowed = MAX_ALLOWED_NOTE_SIZE;
struct core_name {
char *corename;
@@ -1020,6 +1026,15 @@ static struct ctl_table coredump_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "core_file_note_size_max",
+ .data = &core_file_note_size_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = &core_file_note_size_max,
+ .extra2 = &core_file_note_size_allowed,
+ },
};
static int __init init_fs_coredump_sysctls(void)
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index d3eba4360150..14c057643e7f 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -46,6 +46,7 @@ static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
#endif
#if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
+extern unsigned int core_file_note_size_max;
extern void validate_coredump_safety(void);
#else
static inline void validate_coredump_safety(void) {}
--
2.17.1
On Thu, May 02, 2024 at 11:56:03PM +0000, Allen Pais wrote:
> Introduce the capability to dynamically configure the maximum file
> note size for ELF core dumps via sysctl. This enhancement removes
> the previous static limit of 4MB, allowing system administrators to
> adjust the size based on system-specific requirements or constraints.
>
> - Remove hardcoded `MAX_FILE_NOTE_SIZE` from `fs/binfmt_elf.c`.
> - Define `max_file_note_size` in `fs/coredump.c` with an initial value
> set to 4MB.
> - Declare `max_file_note_size` as an external variable in
> `include/linux/coredump.h`.
> - Add a new sysctl entry in `kernel/sysctl.c` to manage this setting
> at runtime.
The above bullet points should be clear from the patch itself. The
commit is really more about rationale and examples (which you have
below). I'd remove the bullets.
>
> $ sysctl -a | grep core_file_note_size_max
> kernel.core_file_note_size_max = 4194304
>
> $ sysctl -n kernel.core_file_note_size_max
> 4194304
>
> $echo 519304 > /proc/sys/kernel/core_file_note_size_max
>
> $sysctl -n kernel.core_file_note_size_max
> 519304
>
> Attempting to write beyond the ceiling value of 16MB
> $echo 17194304 > /proc/sys/kernel/core_file_note_size_max
> bash: echo: write error: Invalid argument
>
> Why is this being done?
> We have observed that during a crash when there are more than 65k mmaps
> in memory, the existing fixed limit on the size of the ELF notes section
> becomes a bottleneck. The notes section quickly reaches its capacity,
> leading to incomplete memory segment information in the resulting coredump.
> This truncation compromises the utility of the coredumps, as crucial
> information about the memory state at the time of the crash might be
> omitted.
I'd make this the first paragraph of the commit log. "We have this
problem" goes first, then "Here's what we did to deal with it", then you
examples. :)
>
> Signed-off-by: Vijay Nag <[email protected]>
> Signed-off-by: Allen Pais <[email protected]>
>
> ---
> Chagnes in v3:
> - Fix commit message to reflect the correct sysctl knob [Kees]
> - Add a ceiling for maximum pssible note size(16M) [Allen]
> - Add a pr_warn_once() [Kees]
> Changes in v2:
> - Move new sysctl to fs/coredump.c [Luis & Kees]
> - rename max_file_note_size to core_file_note_size_max [kees]
> - Capture "why this is being done?" int he commit message [Luis & Kees]
> ---
> fs/binfmt_elf.c | 8 ++++++--
> fs/coredump.c | 15 +++++++++++++++
> include/linux/coredump.h | 1 +
> 3 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> index 5397b552fbeb..5294f8f3a9a8 100644
> --- a/fs/binfmt_elf.c
> +++ b/fs/binfmt_elf.c
> @@ -1564,7 +1564,6 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
> fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
> }
>
> -#define MAX_FILE_NOTE_SIZE (4*1024*1024)
> /*
> * Format of NT_FILE note:
> *
> @@ -1592,8 +1591,13 @@ static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm
>
> names_ofs = (2 + 3 * count) * sizeof(data[0]);
> alloc:
> - if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
> + /* paranoia check */
> + if (size >= core_file_note_size_max) {
> + pr_warn_once("coredump Note size too large: %u "
> + "(does kernel.core_file_note_size_max sysctl need adjustment?)\n",
The string can be on a single line (I think scripts/check_patch.pl will
warn about this, as well as the indentation of "size" below...
> + size);
> return -EINVAL;
> + }
> size = round_up(size, PAGE_SIZE);
> /*
> * "size" can be 0 here legitimately.
> diff --git a/fs/coredump.c b/fs/coredump.c
> index be6403b4b14b..ffaed8c1b3b0 100644
> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -56,10 +56,16 @@
> static bool dump_vma_snapshot(struct coredump_params *cprm);
> static void free_vma_snapshot(struct coredump_params *cprm);
>
> +#define MAX_FILE_NOTE_SIZE (4*1024*1024)
> +/* Define a reasonable max cap */
> +#define MAX_ALLOWED_NOTE_SIZE (16*1024*1024)
Let's call this CORE_FILE_NOTE_SIZE_DEFAULT and
CORE_FILE_NOTE_SIZE_MAX to match the sysctl.
> +
> static int core_uses_pid;
> static unsigned int core_pipe_limit;
> static char core_pattern[CORENAME_MAX_SIZE] = "core";
> static int core_name_size = CORENAME_MAX_SIZE;
> +unsigned int core_file_note_size_max = MAX_FILE_NOTE_SIZE;
> +unsigned int core_file_note_size_allowed = MAX_ALLOWED_NOTE_SIZE;
The latter can be static and const.
For the note below, perhaps add:
static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
>
> struct core_name {
> char *corename;
> @@ -1020,6 +1026,15 @@ static struct ctl_table coredump_sysctls[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec,
> },
> + {
> + .procname = "core_file_note_size_max",
> + .data = &core_file_note_size_max,
> + .maxlen = sizeof(unsigned int),
> + .mode = 0644,
> + .proc_handler = proc_douintvec_minmax,
> + .extra1 = &core_file_note_size_max,
This means you can never shrink it if you raise it from the default.
Let's use the core_file_note_size_min above.
> + .extra2 = &core_file_note_size_allowed,
> + },
> };
>
> static int __init init_fs_coredump_sysctls(void)
> diff --git a/include/linux/coredump.h b/include/linux/coredump.h
> index d3eba4360150..14c057643e7f 100644
> --- a/include/linux/coredump.h
> +++ b/include/linux/coredump.h
> @@ -46,6 +46,7 @@ static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
> #endif
>
> #if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
> +extern unsigned int core_file_note_size_max;
> extern void validate_coredump_safety(void);
> #else
> static inline void validate_coredump_safety(void) {}
> --
> 2.17.1
>
I think v4 will be all good to go, assuming no one else pops up. :)
Thanks for the changes!
-Kees
--
Kees Cook
> > Introduce the capability to dynamically configure the maximum file
> > note size for ELF core dumps via sysctl. This enhancement removes
> > the previous static limit of 4MB, allowing system administrators to
> > adjust the size based on system-specific requirements or constraints.
> >
> > - Remove hardcoded `MAX_FILE_NOTE_SIZE` from `fs/binfmt_elf.c`.
> > - Define `max_file_note_size` in `fs/coredump.c` with an initial value
> > set to 4MB.
> > - Declare `max_file_note_size` as an external variable in
> > `include/linux/coredump.h`.
> > - Add a new sysctl entry in `kernel/sysctl.c` to manage this setting
> > at runtime.
>
> The above bullet points should be clear from the patch itself. The
> commit is really more about rationale and examples (which you have
> below). I'd remove the bullets.
Sure, I have it modified to:
fs/coredump: Enable dynamic configuration of max file note size
Introduce the capability to dynamically configure the maximum file
note size for ELF core dumps via sysctl.
Why is this being done?
We have observed that during a crash when there are more than 65k mmaps
in memory, the existing fixed limit on the size of the ELF notes section
becomes a bottleneck. The notes section quickly reaches its capacity,
leading to incomplete memory segment information in the resulting coredump.
This truncation compromises the utility of the coredumps, as crucial
information about the memory state at the time of the crash might be
omitted.
This enhancement removes the previous static limit of 4MB, allowing
system administrators to adjust the size based on system-specific
requirements or constraints.
Eg:
$ sysctl -a | grep core_file_note_size_max
kernel.core_file_note_size_max = 4194304
......
>
> >
> > $ sysctl -a | grep core_file_note_size_max
> > kernel.core_file_note_size_max = 4194304
> >
> > $ sysctl -n kernel.core_file_note_size_max
> > 4194304
> >
> > $echo 519304 > /proc/sys/kernel/core_file_note_size_max
> >
> > $sysctl -n kernel.core_file_note_size_max
> > 519304
> >
> > Attempting to write beyond the ceiling value of 16MB
> > $echo 17194304 > /proc/sys/kernel/core_file_note_size_max
> > bash: echo: write error: Invalid argument
> >
> > Why is this being done?
> > We have observed that during a crash when there are more than 65k mmaps
> > in memory, the existing fixed limit on the size of the ELF notes section
> > becomes a bottleneck. The notes section quickly reaches its capacity,
> > leading to incomplete memory segment information in the resulting coredump.
> > This truncation compromises the utility of the coredumps, as crucial
> > information about the memory state at the time of the crash might be
> > omitted.
>
> I'd make this the first paragraph of the commit log. "We have this
> problem" goes first, then "Here's what we did to deal with it", then you
> examples. :)
>
Done.
> >
> > Signed-off-by: Vijay Nag <[email protected]>
> > Signed-off-by: Allen Pais <[email protected]>
> >
> > ---
> > Chagnes in v3:
> > - Fix commit message to reflect the correct sysctl knob [Kees]
> > - Add a ceiling for maximum pssible note size(16M) [Allen]
> > - Add a pr_warn_once() [Kees]
> > Changes in v2:
> > - Move new sysctl to fs/coredump.c [Luis & Kees]
> > - rename max_file_note_size to core_file_note_size_max [kees]
> > - Capture "why this is being done?" int he commit message [Luis & Kees]
> > ---
> > fs/binfmt_elf.c | 8 ++++++--
> > fs/coredump.c | 15 +++++++++++++++
> > include/linux/coredump.h | 1 +
> > 3 files changed, 22 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> > index 5397b552fbeb..5294f8f3a9a8 100644
> > --- a/fs/binfmt_elf.c
> > +++ b/fs/binfmt_elf.c
> > @@ -1564,7 +1564,6 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
> > fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
> > }
> >
> > -#define MAX_FILE_NOTE_SIZE (4*1024*1024)
> > /*
> > * Format of NT_FILE note:
> > *
> > @@ -1592,8 +1591,13 @@ static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm
> >
> > names_ofs = (2 + 3 * count) * sizeof(data[0]);
> > alloc:
> > - if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
> > + /* paranoia check */
> > + if (size >= core_file_note_size_max) {
> > + pr_warn_once("coredump Note size too large: %u "
> > + "(does kernel.core_file_note_size_max sysctl need adjustment?)\n",
>
> The string can be on a single line (I think scripts/check_patch.pl will
> warn about this, as well as the indentation of "size" below...
It does warn, but if I leave it as a single line, there's still a warning:
WARNING: line length of 135 exceeds 100 columns, which is why I
split it into multiple lines.
>
> > + size);
> > return -EINVAL;
> > + }
> > size = round_up(size, PAGE_SIZE);
> > /*
> > * "size" can be 0 here legitimately.
> > diff --git a/fs/coredump.c b/fs/coredump.c
> > index be6403b4b14b..ffaed8c1b3b0 100644
> > --- a/fs/coredump.c
> > +++ b/fs/coredump.c
> > @@ -56,10 +56,16 @@
> > static bool dump_vma_snapshot(struct coredump_params *cprm);
> > static void free_vma_snapshot(struct coredump_params *cprm);
> >
> > +#define MAX_FILE_NOTE_SIZE (4*1024*1024)
> > +/* Define a reasonable max cap */
> > +#define MAX_ALLOWED_NOTE_SIZE (16*1024*1024)
>
> Let's call this CORE_FILE_NOTE_SIZE_DEFAULT and
> CORE_FILE_NOTE_SIZE_MAX to match the sysctl.
>
Sure, will update it in v4.
> > +
> > static int core_uses_pid;
> > static unsigned int core_pipe_limit;
> > static char core_pattern[CORENAME_MAX_SIZE] = "core";
> > static int core_name_size = CORENAME_MAX_SIZE;
> > +unsigned int core_file_note_size_max = MAX_FILE_NOTE_SIZE;
> > +unsigned int core_file_note_size_allowed = MAX_ALLOWED_NOTE_SIZE;
>
> The latter can be static and const.
>
> For the note below, perhaps add:
>
> static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
>
core_file_note_size_min will be used in fs/binfmt_elf.c at:
if (size >= core_file_note_size_min) ,
did you mean
static const unsigned int core_file_note_size_allowed =
CORE_FILE_NOTE_SIZE_MAX;??
> >
> > struct core_name {
> > char *corename;
> > @@ -1020,6 +1026,15 @@ static struct ctl_table coredump_sysctls[] = {
> > .mode = 0644,
> > .proc_handler = proc_dointvec,
> > },
> > + {
> > + .procname = "core_file_note_size_max",
> > + .data = &core_file_note_size_max,
> > + .maxlen = sizeof(unsigned int),
> > + .mode = 0644,
> > + .proc_handler = proc_douintvec_minmax,
> > + .extra1 = &core_file_note_size_max,
>
> This means you can never shrink it if you raise it from the default.
> Let's use the core_file_note_size_min above.
Sure, will fix it in v4.
>
> > + .extra2 = &core_file_note_size_allowed,
> > + },
> > };
> >
> > static int __init init_fs_coredump_sysctls(void)
> > diff --git a/include/linux/coredump.h b/include/linux/coredump.h
> > index d3eba4360150..14c057643e7f 100644
> > --- a/include/linux/coredump.h
> > +++ b/include/linux/coredump.h
> > @@ -46,6 +46,7 @@ static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
> > #endif
> >
> > #if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
> > +extern unsigned int core_file_note_size_max;
> > extern void validate_coredump_safety(void);
> > #else
> > static inline void validate_coredump_safety(void) {}
> > --
> > 2.17.1
> >
>
> I think v4 will be all good to go, assuming no one else pops up. :)
> Thanks for the changes!
Thank you for the reviews. Will send out v4 soon.
--
- Allen
> > > Introduce the capability to dynamically configure the maximum file
> > > note size for ELF core dumps via sysctl. This enhancement removes
> > > the previous static limit of 4MB, allowing system administrators to
> > > adjust the size based on system-specific requirements or constraints.
> > >
> > > - Remove hardcoded `MAX_FILE_NOTE_SIZE` from `fs/binfmt_elf.c`.
> > > - Define `max_file_note_size` in `fs/coredump.c` with an initial value
> > > set to 4MB.
> > > - Declare `max_file_note_size` as an external variable in
> > > `include/linux/coredump.h`.
> > > - Add a new sysctl entry in `kernel/sysctl.c` to manage this setting
> > > at runtime.
> >
> > The above bullet points should be clear from the patch itself. The
> > commit is really more about rationale and examples (which you have
> > below). I'd remove the bullets.
>
> Sure, I have it modified to:
>
> fs/coredump: Enable dynamic configuration of max file note size
>
> Introduce the capability to dynamically configure the maximum file
> note size for ELF core dumps via sysctl.
>
> Why is this being done?
> We have observed that during a crash when there are more than 65k mmaps
> in memory, the existing fixed limit on the size of the ELF notes section
> becomes a bottleneck. The notes section quickly reaches its capacity,
> leading to incomplete memory segment information in the resulting coredump.
> This truncation compromises the utility of the coredumps, as crucial
> information about the memory state at the time of the crash might be
> omitted.
>
> This enhancement removes the previous static limit of 4MB, allowing
> system administrators to adjust the size based on system-specific
> requirements or constraints.
>
> Eg:
> $ sysctl -a | grep core_file_note_size_max
> kernel.core_file_note_size_max = 4194304
> .......
> >
> > >
> > > $ sysctl -a | grep core_file_note_size_max
> > > kernel.core_file_note_size_max = 4194304
> > >
> > > $ sysctl -n kernel.core_file_note_size_max
> > > 4194304
> > >
> > > $echo 519304 > /proc/sys/kernel/core_file_note_size_max
> > >
> > > $sysctl -n kernel.core_file_note_size_max
> > > 519304
> > >
> > > Attempting to write beyond the ceiling value of 16MB
> > > $echo 17194304 > /proc/sys/kernel/core_file_note_size_max
> > > bash: echo: write error: Invalid argument
> > >
> > > Why is this being done?
> > > We have observed that during a crash when there are more than 65k mmaps
> > > in memory, the existing fixed limit on the size of the ELF notes section
> > > becomes a bottleneck. The notes section quickly reaches its capacity,
> > > leading to incomplete memory segment information in the resulting coredump.
> > > This truncation compromises the utility of the coredumps, as crucial
> > > information about the memory state at the time of the crash might be
> > > omitted.
> >
> > I'd make this the first paragraph of the commit log. "We have this
> > problem" goes first, then "Here's what we did to deal with it", then you
> > examples. :)
> >
> Done.
>
> > >
> > > Signed-off-by: Vijay Nag <[email protected]>
> > > Signed-off-by: Allen Pais <[email protected]>
> > >
> > > ---
> > > Chagnes in v3:
> > > - Fix commit message to reflect the correct sysctl knob [Kees]
> > > - Add a ceiling for maximum pssible note size(16M) [Allen]
> > > - Add a pr_warn_once() [Kees]
> > > Changes in v2:
> > > - Move new sysctl to fs/coredump.c [Luis & Kees]
> > > - rename max_file_note_size to core_file_note_size_max [kees]
> > > - Capture "why this is being done?" int he commit message [Luis & Kees]
> > > ---
> > > fs/binfmt_elf.c | 8 ++++++--
> > > fs/coredump.c | 15 +++++++++++++++
> > > include/linux/coredump.h | 1 +
> > > 3 files changed, 22 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> > > index 5397b552fbeb..5294f8f3a9a8 100644
> > > --- a/fs/binfmt_elf.c
> > > +++ b/fs/binfmt_elf.c
> > > @@ -1564,7 +1564,6 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
> > > fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
> > > }
> > >
> > > -#define MAX_FILE_NOTE_SIZE (4*1024*1024)
> > > /*
> > > * Format of NT_FILE note:
> > > *
> > > @@ -1592,8 +1591,13 @@ static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm
> > >
> > > names_ofs = (2 + 3 * count) * sizeof(data[0]);
> > > alloc:
> > > - if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
> > > + /* paranoia check */
> > > + if (size >= core_file_note_size_max) {
> > > + pr_warn_once("coredump Note size too large: %u "
> > > + "(does kernel.core_file_note_size_max sysctl need adjustment?)\n",
> >
> > The string can be on a single line (I think scripts/check_patch.pl will
> > warn about this, as well as the indentation of "size" below...
>
> It does warn, but if I leave it as a single line, there's still a warning:
> WARNING: line length of 135 exceeds 100 columns, which is why I
> split it into multiple lines.
>
> >
> > > + size);
> > > return -EINVAL;
> > > + }
> > > size = round_up(size, PAGE_SIZE);
> > > /*
> > > * "size" can be 0 here legitimately.
> > > diff --git a/fs/coredump.c b/fs/coredump.c
> > > index be6403b4b14b..ffaed8c1b3b0 100644
> > > --- a/fs/coredump.c
> > > +++ b/fs/coredump.c
> > > @@ -56,10 +56,16 @@
> > > static bool dump_vma_snapshot(struct coredump_params *cprm);
> > > static void free_vma_snapshot(struct coredump_params *cprm);
> > >
> > > +#define MAX_FILE_NOTE_SIZE (4*1024*1024)
> > > +/* Define a reasonable max cap */
> > > +#define MAX_ALLOWED_NOTE_SIZE (16*1024*1024)
> >
> > Let's call this CORE_FILE_NOTE_SIZE_DEFAULT and
> > CORE_FILE_NOTE_SIZE_MAX to match the sysctl.
> >
>
> Sure, will update it in v4.
>
> > > +
> > > static int core_uses_pid;
> > > static unsigned int core_pipe_limit;
> > > static char core_pattern[CORENAME_MAX_SIZE] = "core";
> > > static int core_name_size = CORENAME_MAX_SIZE;
> > > +unsigned int core_file_note_size_max = MAX_FILE_NOTE_SIZE;
> > > +unsigned int core_file_note_size_allowed = MAX_ALLOWED_NOTE_SIZE;
> >
> > The latter can be static and const.
> >
> > For the note below, perhaps add:
> >
> > static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
> >
>
> core_file_note_size_min will be used in fs/binfmt_elf.c at:
>
> if (size >= core_file_note_size_min) ,
> did you mean
> static const unsigned int core_file_note_size_allowed =
> CORE_FILE_NOTE_SIZE_MAX;??
> > >
Kees,
My bad, I misunderstood what you asked for. Here is the final diff,
if it looks fine,
i can send out a v4.
Note, there is a warning issued by checkpatch.pl (WARNING: line length
of 134 exceeds 100 columns)
for the pr_warn_once() and adding const trigger a build
warning(warning: initialization discards
'const' qualifier from pointer target type), which is why i dropped it.
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 5397b552fbeb..19bd85d1e42e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1564,7 +1564,6 @@ static void fill_siginfo_note(struct memelfnote
*note, user_siginfo_t *csigdata,
fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
}
-#define MAX_FILE_NOTE_SIZE (4*1024*1024)
/*
* Format of NT_FILE note:
*
@@ -1592,8 +1591,11 @@ static int fill_files_note(struct memelfnote
*note, struct coredump_params *cprm
names_ofs = (2 + 3 * count) * sizeof(data[0]);
alloc:
- if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
+ /* paranoia check */
+ if (size >= core_file_note_size_allowed) {
+ pr_warn_once("coredump Note size too large: %u (does
kernel.core_file_note_size_min sysctl need adjustment?\n", size);
return -EINVAL;
+ }
size = round_up(size, PAGE_SIZE);
/*
* "size" can be 0 here legitimately.
diff --git a/fs/coredump.c b/fs/coredump.c
index be6403b4b14b..69085bb494dc 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -56,10 +56,16 @@
static bool dump_vma_snapshot(struct coredump_params *cprm);
static void free_vma_snapshot(struct coredump_params *cprm);
+#define CORE_FILE_NOTE_SIZE_DEFAULT (4*1024*1024)
+/* Define a reasonable max cap */
+#define CORE_FILE_NOTE_SIZE_MAX (16*1024*1024)
+
static int core_uses_pid;
static unsigned int core_pipe_limit;
static char core_pattern[CORENAME_MAX_SIZE] = "core";
static int core_name_size = CORENAME_MAX_SIZE;
+static unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
+unsigned int core_file_note_size_allowed = CORE_FILE_NOTE_SIZE_MAX;
struct core_name {
char *corename;
@@ -1020,6 +1026,15 @@ static struct ctl_table coredump_sysctls[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "core_file_note_size_min",
+ .data = &core_file_note_size_min,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = &core_file_note_size_min,
+ .extra2 = &core_file_note_size_allowed,
+ },
};
static int __init init_fs_coredump_sysctls(void)
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index d3eba4360150..776bde5f9752 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -46,6 +46,7 @@ static inline void do_coredump(const
kernel_siginfo_t *siginfo) {}
#endif
#if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
+extern unsigned int core_file_note_size_allowed;
extern void validate_coredump_safety(void);
#else
static inline void validate_coredump_safety(void) {}
Thanks,
Allen
> > > struct core_name {
> > > char *corename;
> > > @@ -1020,6 +1026,15 @@ static struct ctl_table coredump_sysctls[] = {
> > > .mode = 0644,
> > > .proc_handler = proc_dointvec,
> > > },
> > > + {
> > > + .procname = "core_file_note_size_max",
> > > + .data = &core_file_note_size_max,
> > > + .maxlen = sizeof(unsigned int),
> > > + .mode = 0644,
> > > + .proc_handler = proc_douintvec_minmax,
> > > + .extra1 = &core_file_note_size_max,
> >
> > This means you can never shrink it if you raise it from the default.
> > Let's use the core_file_note_size_min above.
>
> Sure, will fix it in v4.
> >
> > > + .extra2 = &core_file_note_size_allowed,
> > > + },
> > > };
> > >
> > > static int __init init_fs_coredump_sysctls(void)
> > > diff --git a/include/linux/coredump.h b/include/linux/coredump.h
> > > index d3eba4360150..14c057643e7f 100644
> > > --- a/include/linux/coredump.h
> > > +++ b/include/linux/coredump.h
> > > @@ -46,6 +46,7 @@ static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
> > > #endif
> > >
> > > #if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL)
> > > +extern unsigned int core_file_note_size_max;
> > > extern void validate_coredump_safety(void);
> > > #else
> > > static inline void validate_coredump_safety(void) {}
> > > --
> > > 2.17.1
> > >
> >
> > I think v4 will be all good to go, assuming no one else pops up. :)
> > Thanks for the changes!
>
> Thank you for the reviews. Will send out v4 soon.
>
> --
> - Allen
--
- Allen
Thanks for the cleanups, this is certainly now in the right direction.
Generic long term growth questions below.
On Thu, May 02, 2024 at 11:56:03PM +0000, Allen Pais wrote:
> Why is this being done?
> We have observed that during a crash when there are more than 65k mmaps
> in memory, the existing fixed limit on the size of the ELF notes section
> becomes a bottleneck. The notes section quickly reaches its capacity,
I'm not well versed here on how core dumps associate mmaps to ELF notes
section, can you elaborate? Does each new mmap potentially peg
information on ELF notes section? Where do we standardize on this? Does
it also change depending on any criteria of the mmap?
Depending on the above, we might want to be proactive to get a sense of
when we want to go beyond the new 16 MiB max cap on new mmaps for instance.
How many mmaps can we have anyway too?
> leading to incomplete memory segment information in the resulting coredump.
> This truncation compromises the utility of the coredumps, as crucial
> information about the memory state at the time of the crash might be
> omitted.
Luis
On Thu, May 02, 2024 at 06:40:58PM -0700, Allen wrote:
> > > > Introduce the capability to dynamically configure the maximum file
> > > > note size for ELF core dumps via sysctl. This enhancement removes
> > > > the previous static limit of 4MB, allowing system administrators to
> > > > adjust the size based on system-specific requirements or constraints.
> > > >
> > > > - Remove hardcoded `MAX_FILE_NOTE_SIZE` from `fs/binfmt_elf.c`.
> > > > - Define `max_file_note_size` in `fs/coredump.c` with an initial value
> > > > set to 4MB.
> > > > - Declare `max_file_note_size` as an external variable in
> > > > `include/linux/coredump.h`.
> > > > - Add a new sysctl entry in `kernel/sysctl.c` to manage this setting
> > > > at runtime.
> > >
> > > The above bullet points should be clear from the patch itself. The
> > > commit is really more about rationale and examples (which you have
> > > below). I'd remove the bullets.
> >
> > Sure, I have it modified to:
> >
> > fs/coredump: Enable dynamic configuration of max file note size
> >
> > Introduce the capability to dynamically configure the maximum file
> > note size for ELF core dumps via sysctl.
> >
> > Why is this being done?
> > We have observed that during a crash when there are more than 65k mmaps
> > in memory, the existing fixed limit on the size of the ELF notes section
> > becomes a bottleneck. The notes section quickly reaches its capacity,
> > leading to incomplete memory segment information in the resulting coredump.
> > This truncation compromises the utility of the coredumps, as crucial
> > information about the memory state at the time of the crash might be
> > omitted.
> >
> > This enhancement removes the previous static limit of 4MB, allowing
> > system administrators to adjust the size based on system-specific
> > requirements or constraints.
> >
> > Eg:
> > $ sysctl -a | grep core_file_note_size_max
> > kernel.core_file_note_size_max = 4194304
> > .......
> > >
> > > >
> > > > $ sysctl -a | grep core_file_note_size_max
> > > > kernel.core_file_note_size_max = 4194304
> > > >
> > > > $ sysctl -n kernel.core_file_note_size_max
> > > > 4194304
> > > >
> > > > $echo 519304 > /proc/sys/kernel/core_file_note_size_max
> > > >
> > > > $sysctl -n kernel.core_file_note_size_max
> > > > 519304
> > > >
> > > > Attempting to write beyond the ceiling value of 16MB
> > > > $echo 17194304 > /proc/sys/kernel/core_file_note_size_max
> > > > bash: echo: write error: Invalid argument
> > > >
> > > > Why is this being done?
> > > > We have observed that during a crash when there are more than 65k mmaps
> > > > in memory, the existing fixed limit on the size of the ELF notes section
> > > > becomes a bottleneck. The notes section quickly reaches its capacity,
> > > > leading to incomplete memory segment information in the resulting coredump.
> > > > This truncation compromises the utility of the coredumps, as crucial
> > > > information about the memory state at the time of the crash might be
> > > > omitted.
> > >
> > > I'd make this the first paragraph of the commit log. "We have this
> > > problem" goes first, then "Here's what we did to deal with it", then you
> > > examples. :)
> > >
> > Done.
> >
> > > >
> > > > Signed-off-by: Vijay Nag <[email protected]>
> > > > Signed-off-by: Allen Pais <[email protected]>
> > > >
> > > > ---
> > > > Chagnes in v3:
> > > > - Fix commit message to reflect the correct sysctl knob [Kees]
> > > > - Add a ceiling for maximum pssible note size(16M) [Allen]
> > > > - Add a pr_warn_once() [Kees]
> > > > Changes in v2:
> > > > - Move new sysctl to fs/coredump.c [Luis & Kees]
> > > > - rename max_file_note_size to core_file_note_size_max [kees]
> > > > - Capture "why this is being done?" int he commit message [Luis & Kees]
> > > > ---
> > > > fs/binfmt_elf.c | 8 ++++++--
> > > > fs/coredump.c | 15 +++++++++++++++
> > > > include/linux/coredump.h | 1 +
> > > > 3 files changed, 22 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
> > > > index 5397b552fbeb..5294f8f3a9a8 100644
> > > > --- a/fs/binfmt_elf.c
> > > > +++ b/fs/binfmt_elf.c
> > > > @@ -1564,7 +1564,6 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
> > > > fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
> > > > }
> > > >
> > > > -#define MAX_FILE_NOTE_SIZE (4*1024*1024)
> > > > /*
> > > > * Format of NT_FILE note:
> > > > *
> > > > @@ -1592,8 +1591,13 @@ static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm
> > > >
> > > > names_ofs = (2 + 3 * count) * sizeof(data[0]);
> > > > alloc:
> > > > - if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
> > > > + /* paranoia check */
> > > > + if (size >= core_file_note_size_max) {
> > > > + pr_warn_once("coredump Note size too large: %u "
> > > > + "(does kernel.core_file_note_size_max sysctl need adjustment?)\n",
> > >
> > > The string can be on a single line (I think scripts/check_patch.pl will
> > > warn about this, as well as the indentation of "size" below...
> >
> > It does warn, but if I leave it as a single line, there's still a warning:
> > WARNING: line length of 135 exceeds 100 columns, which is why I
> > split it into multiple lines.
> >
> > >
> > > > + size);
> > > > return -EINVAL;
> > > > + }
> > > > size = round_up(size, PAGE_SIZE);
> > > > /*
> > > > * "size" can be 0 here legitimately.
> > > > diff --git a/fs/coredump.c b/fs/coredump.c
> > > > index be6403b4b14b..ffaed8c1b3b0 100644
> > > > --- a/fs/coredump.c
> > > > +++ b/fs/coredump.c
> > > > @@ -56,10 +56,16 @@
> > > > static bool dump_vma_snapshot(struct coredump_params *cprm);
> > > > static void free_vma_snapshot(struct coredump_params *cprm);
> > > >
> > > > +#define MAX_FILE_NOTE_SIZE (4*1024*1024)
> > > > +/* Define a reasonable max cap */
> > > > +#define MAX_ALLOWED_NOTE_SIZE (16*1024*1024)
> > >
> > > Let's call this CORE_FILE_NOTE_SIZE_DEFAULT and
> > > CORE_FILE_NOTE_SIZE_MAX to match the sysctl.
> > >
> >
> > Sure, will update it in v4.
> >
> > > > +
> > > > static int core_uses_pid;
> > > > static unsigned int core_pipe_limit;
> > > > static char core_pattern[CORENAME_MAX_SIZE] = "core";
> > > > static int core_name_size = CORENAME_MAX_SIZE;
> > > > +unsigned int core_file_note_size_max = MAX_FILE_NOTE_SIZE;
> > > > +unsigned int core_file_note_size_allowed = MAX_ALLOWED_NOTE_SIZE;
> > >
> > > The latter can be static and const.
> > >
> > > For the note below, perhaps add:
> > >
> > > static const unsigned int core_file_note_size_min = CORE_FILE_NOTE_SIZE_DEFAULT;
> > >
> >
> > core_file_note_size_min will be used in fs/binfmt_elf.c at:
> >
> > if (size >= core_file_note_size_min) ,
> > did you mean
> > static const unsigned int core_file_note_size_allowed =
> > CORE_FILE_NOTE_SIZE_MAX;??
> > > >
>
> Kees,
>
> My bad, I misunderstood what you asked for. Here is the final diff,
> if it looks fine,
> i can send out a v4.
>
> Note, there is a warning issued by checkpatch.pl (WARNING: line length
> of 134 exceeds 100 columns)
For strings that should be fine. You'll want the ", size);" part on the
next line though.
> for the pr_warn_once() and adding const trigger a build
> warning(warning: initialization discards
> 'const' qualifier from pointer target type), which is why i dropped it.
Yeah, that's a common pattern for sysctl. You can fix it with a cast.
For example:
static const unsigned long nlm_grace_period_min = 0;
static const unsigned long nlm_grace_period_max = 240;
..
.proc_handler = proc_doulongvec_minmax,
.extra1 = (unsigned long *) &nlm_grace_period_min,
.extra2 = (unsigned long *) &nlm_grace_period_max,
But yeah, looks good.
--
Kees Cook
On Fri, May 03, 2024 at 12:51:00PM -0700, Luis Chamberlain wrote:
> Thanks for the cleanups, this is certainly now in the right direction.
> Generic long term growth questions below.
>
> On Thu, May 02, 2024 at 11:56:03PM +0000, Allen Pais wrote:
> > Why is this being done?
> > We have observed that during a crash when there are more than 65k mmaps
> > in memory, the existing fixed limit on the size of the ELF notes section
> > becomes a bottleneck. The notes section quickly reaches its capacity,
>
> I'm not well versed here on how core dumps associate mmaps to ELF notes
> section, can you elaborate? Does each new mmap potentially peg
> information on ELF notes section? Where do we standardize on this? Does
> it also change depending on any criteria of the mmap?
This is all in fs/binfmt_elf.c, fill_note_info(). There's a dump for
each thread's info, and then fill_files_note() (which is what this code
is adjusting) which writes out every filename for any file-map VMAs. The
format of NT_FILE record is documented above fill_files_note(). So, it
all depends on the count of VMAs and length of filenames.
> Depending on the above, we might want to be proactive to get a sense of
> when we want to go beyond the new 16 MiB max cap on new mmaps for instance.
> How many mmaps can we have anyway too?
INT_MAX :)
I'm fine with the new 16MiB max for the coredump. If we really need to
go beyond this, we might need to avoid building the entire thing in
memory, and instead move it all into write_note_info() directly, but I'm
not interested in that refactor unless we have an overwhelmingly good
reason to do so.
--
Kees Cook
Hi Allen,
kernel test robot noticed the following build errors:
[auto build test ERROR on kees/for-next/execve]
[also build test ERROR on brauner-vfs/vfs.all linus/master v6.9-rc6 next-20240503]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Allen-Pais/fs-coredump-Enable-dynamic-configuration-of-max-file-note-size/20240503-075758
base: https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve
patch link: https://lore.kernel.org/r/20240502235603.19290-1-apais%40linux.microsoft.com
patch subject: [PATCH v3] fs/coredump: Enable dynamic configuration of max file note size
config: powerpc64-randconfig-001-20240504 (https://download.01.org/0day-ci/archive/20240504/[email protected]/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 37ae4ad0eef338776c7e2cffb3896153d43dcd90)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240504/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
In file included from fs/compat_binfmt_elf.c:17:
In file included from include/linux/elfcore-compat.h:6:
In file included from include/linux/elfcore.h:11:
In file included from include/linux/ptrace.h:10:
In file included from include/linux/pid_namespace.h:7:
In file included from include/linux/mm.h:2208:
include/linux/vmstat.h:522:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
522 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
In file included from fs/compat_binfmt_elf.c:144:
>> fs/binfmt_elf.c:1598:14: error: use of undeclared identifier 'core_file_note_size_max'
1598 | if (size >= core_file_note_size_max) {
| ^
1 warning and 1 error generated.
vim +/core_file_note_size_max +1598 fs/binfmt_elf.c
1569
1570 /*
1571 * Format of NT_FILE note:
1572 *
1573 * long count -- how many files are mapped
1574 * long page_size -- units for file_ofs
1575 * array of [COUNT] elements of
1576 * long start
1577 * long end
1578 * long file_ofs
1579 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1580 */
1581 static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
1582 {
1583 unsigned count, size, names_ofs, remaining, n;
1584 user_long_t *data;
1585 user_long_t *start_end_ofs;
1586 char *name_base, *name_curpos;
1587 int i;
1588
1589 /* *Estimated* file count and total data size needed */
1590 count = cprm->vma_count;
1591 if (count > UINT_MAX / 64)
1592 return -EINVAL;
1593 size = count * 64;
1594
1595 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1596 alloc:
1597 /* paranoia check */
> 1598 if (size >= core_file_note_size_max) {
1599 pr_warn_once("coredump Note size too large: %u "
1600 "(does kernel.core_file_note_size_max sysctl need adjustment?)\n",
1601 size);
1602 return -EINVAL;
1603 }
1604 size = round_up(size, PAGE_SIZE);
1605 /*
1606 * "size" can be 0 here legitimately.
1607 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1608 */
1609 data = kvmalloc(size, GFP_KERNEL);
1610 if (ZERO_OR_NULL_PTR(data))
1611 return -ENOMEM;
1612
1613 start_end_ofs = data + 2;
1614 name_base = name_curpos = ((char *)data) + names_ofs;
1615 remaining = size - names_ofs;
1616 count = 0;
1617 for (i = 0; i < cprm->vma_count; i++) {
1618 struct core_vma_metadata *m = &cprm->vma_meta[i];
1619 struct file *file;
1620 const char *filename;
1621
1622 file = m->file;
1623 if (!file)
1624 continue;
1625 filename = file_path(file, name_curpos, remaining);
1626 if (IS_ERR(filename)) {
1627 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1628 kvfree(data);
1629 size = size * 5 / 4;
1630 goto alloc;
1631 }
1632 continue;
1633 }
1634
1635 /* file_path() fills at the end, move name down */
1636 /* n = strlen(filename) + 1: */
1637 n = (name_curpos + remaining) - filename;
1638 remaining = filename - name_curpos;
1639 memmove(name_curpos, filename, n);
1640 name_curpos += n;
1641
1642 *start_end_ofs++ = m->start;
1643 *start_end_ofs++ = m->end;
1644 *start_end_ofs++ = m->pgoff;
1645 count++;
1646 }
1647
1648 /* Now we know exact count of files, can store it */
1649 data[0] = count;
1650 data[1] = PAGE_SIZE;
1651 /*
1652 * Count usually is less than mm->map_count,
1653 * we need to move filenames down.
1654 */
1655 n = cprm->vma_count - count;
1656 if (n != 0) {
1657 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1658 memmove(name_base - shift_bytes, name_base,
1659 name_curpos - name_base);
1660 name_curpos -= shift_bytes;
1661 }
1662
1663 size = name_curpos - (char *)data;
1664 fill_note(note, "CORE", NT_FILE, size, data);
1665 return 0;
1666 }
1667
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki