2023-08-02 19:47:00

by Doug Anderson

[permalink] [raw]
Subject: [PATCH v2] watchdog/hardlockup: Avoid large stack frames in watchdog_hardlockup_check()

After commit 77c12fc95980 ("watchdog/hardlockup: add a "cpu" param to
watchdog_hardlockup_check()") we started storing a `struct cpumask` on
the stack in watchdog_hardlockup_check(). On systems with
CONFIG_NR_CPUS set to 8192 this takes up 1K on the stack. That
triggers warnings with `CONFIG_FRAME_WARN` set to 1024.

Instead of putting this `struct cpumask` on the stack, we'll allocate
it on the heap whenever userspace tells us that they want to backtrace
all CPUs upon a hardlockup.

NOTE: the reason that this mask is even needed is to make sure that we
can print the hung CPU first, which makes the logs much easier to
understand.

Fixes: 77c12fc95980 ("watchdog/hardlockup: add a "cpu" param to watchdog_hardlockup_check()")
Reported-by: kernel test robot <[email protected]>
Closes: https://lore.kernel.org/r/[email protected]
Signed-off-by: Douglas Anderson <[email protected]>
---

Changes in v2:
- Allocate space when userspace requests all cpus be backtraced.

kernel/watchdog.c | 44 ++++++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index be38276a365f..25d5627a6580 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -93,6 +93,8 @@ static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
static unsigned long watchdog_hardlockup_all_cpu_dumped;

+static struct cpumask *hardlockup_backtrace_mask;
+
notrace void arch_touch_nmi_watchdog(void)
{
/*
@@ -106,6 +108,29 @@ notrace void arch_touch_nmi_watchdog(void)
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);

+static int hardlockup_all_cpu_backtrace_proc_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ /*
+ * Only allocate memory for the backtrace mask if userspace actually
+ * wants to trace all CPUs since this can take up 1K of space on a
+ * system with CONFIG_NR_CPUS=8192.
+ */
+ if (sysctl_hardlockup_all_cpu_backtrace && !hardlockup_backtrace_mask) {
+ hardlockup_backtrace_mask =
+ kzalloc(sizeof(*hardlockup_backtrace_mask), GFP_KERNEL);
+ } else if (!sysctl_hardlockup_all_cpu_backtrace && hardlockup_backtrace_mask) {
+ kfree(hardlockup_backtrace_mask);
+ hardlockup_backtrace_mask = NULL;
+ }
+
+ return ret;
+}
+
void watchdog_hardlockup_touch_cpu(unsigned int cpu)
{
per_cpu(watchdog_hardlockup_touched, cpu) = true;
@@ -151,9 +176,6 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
*/
if (is_hardlockup(cpu)) {
unsigned int this_cpu = smp_processor_id();
- struct cpumask backtrace_mask;
-
- cpumask_copy(&backtrace_mask, cpu_online_mask);

/* Only print hardlockups once. */
if (per_cpu(watchdog_hardlockup_warned, cpu))
@@ -167,19 +189,20 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
show_regs(regs);
else
dump_stack();
- cpumask_clear_cpu(cpu, &backtrace_mask);
} else {
- if (trigger_single_cpu_backtrace(cpu))
- cpumask_clear_cpu(cpu, &backtrace_mask);
+ trigger_single_cpu_backtrace(cpu);
}

/*
* Perform multi-CPU dump only once to avoid multiple
* hardlockups generating interleaving traces
*/
- if (sysctl_hardlockup_all_cpu_backtrace &&
- !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped))
- trigger_cpumask_backtrace(&backtrace_mask);
+ if (hardlockup_backtrace_mask &&
+ !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped)) {
+ cpumask_copy(hardlockup_backtrace_mask, cpu_online_mask);
+ cpumask_clear_cpu(cpu, hardlockup_backtrace_mask);
+ trigger_cpumask_backtrace(hardlockup_backtrace_mask);
+ }

if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
@@ -192,6 +215,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)

#else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */

+#define hardlockup_all_cpu_backtrace_proc_handler proc_dointvec_minmax
static inline void watchdog_hardlockup_kick(void) { }

#endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
@@ -916,7 +940,7 @@ static struct ctl_table watchdog_sysctls[] = {
.data = &sysctl_hardlockup_all_cpu_backtrace,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = hardlockup_all_cpu_backtrace_proc_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
--
2.41.0.585.gd2178a4bd4-goog



2023-08-03 08:29:09

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH v2] watchdog/hardlockup: Avoid large stack frames in watchdog_hardlockup_check()

On Wed 02-08-23 12:26:00, Douglas Anderson wrote:
> After commit 77c12fc95980 ("watchdog/hardlockup: add a "cpu" param to
> watchdog_hardlockup_check()") we started storing a `struct cpumask` on
> the stack in watchdog_hardlockup_check(). On systems with
> CONFIG_NR_CPUS set to 8192 this takes up 1K on the stack. That
> triggers warnings with `CONFIG_FRAME_WARN` set to 1024.
>
> Instead of putting this `struct cpumask` on the stack, we'll allocate
> it on the heap whenever userspace tells us that they want to backtrace
> all CPUs upon a hardlockup.
>
> NOTE: the reason that this mask is even needed is to make sure that we
> can print the hung CPU first, which makes the logs much easier to
> understand.
>
> Fixes: 77c12fc95980 ("watchdog/hardlockup: add a "cpu" param to watchdog_hardlockup_check()")
> Reported-by: kernel test robot <[email protected]>
> Closes: https://lore.kernel.org/r/[email protected]
> Signed-off-by: Douglas Anderson <[email protected]>
> ---
>
> Changes in v2:
> - Allocate space when userspace requests all cpus be backtraced.
>
> kernel/watchdog.c | 44 ++++++++++++++++++++++++++++++++++----------
> 1 file changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index be38276a365f..25d5627a6580 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -93,6 +93,8 @@ static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
> static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
> static unsigned long watchdog_hardlockup_all_cpu_dumped;
>
> +static struct cpumask *hardlockup_backtrace_mask;
> +
> notrace void arch_touch_nmi_watchdog(void)
> {
> /*
> @@ -106,6 +108,29 @@ notrace void arch_touch_nmi_watchdog(void)
> }
> EXPORT_SYMBOL(arch_touch_nmi_watchdog);
>
> +static int hardlockup_all_cpu_backtrace_proc_handler(struct ctl_table *table, int write,
> + void *buffer, size_t *lenp, loff_t *ppos)
> +{
> + int ret;
> +
> + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
> +
> + /*
> + * Only allocate memory for the backtrace mask if userspace actually
> + * wants to trace all CPUs since this can take up 1K of space on a
> + * system with CONFIG_NR_CPUS=8192.
> + */
> + if (sysctl_hardlockup_all_cpu_backtrace && !hardlockup_backtrace_mask) {
> + hardlockup_backtrace_mask =
> + kzalloc(sizeof(*hardlockup_backtrace_mask), GFP_KERNEL);
> + } else if (!sysctl_hardlockup_all_cpu_backtrace && hardlockup_backtrace_mask) {
> + kfree(hardlockup_backtrace_mask);
> + hardlockup_backtrace_mask = NULL;
> + }

While unlikely, this can race with the consumer and cause either
use-after-free or NULL ptr deref.
--
Michal Hocko
SUSE Labs