2017-08-31 07:34:00

by Thomas Gleixner

[permalink] [raw]
Subject: [patch 24/29] lockup_detector/perf: Implement init time perf validation

The watchdog tries to create perf events even after it figured out that
perf is not functional or the requested event is not supported.

That's braindead as this can be done once at init time and if not supported
the NMI watchdog can be turned off unconditonally.

Implement the perf hardlockup detector functionality for that. This creates
a new event create function, which will replace the unholy mess of the
existing one in later patches.

Signed-off-by: Thomas Gleixner <[email protected]>
---
include/linux/nmi.h | 8 ++++++--
kernel/watchdog_hld.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 43 insertions(+), 2 deletions(-)

--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -93,14 +93,18 @@ extern void hardlockup_detector_perf_sto
extern void hardlockup_detector_perf_restart(void);
extern void hardlockup_detector_perf_disable(void);
extern void hardlockup_detector_perf_cleanup(void);
+extern int hardlockup_detector_perf_init(void);
#else
static inline void hardlockup_detector_perf_stop(void) { }
static inline void hardlockup_detector_perf_restart(void) { }
static inline void hardlockup_detector_perf_disable(void) { }
static inline void hardlockup_detector_perf_cleanup(void) { }
-#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
+# if !defined(CONFIG_HAVE_NMI_WATCHDOG)
+static int hardlockup_detector_perf_init(void) { return -ENODEV; }
static inline void arch_touch_nmi_watchdog(void) {}
-#endif
+# else
+static int hardlockup_detector_perf_init(void) { return 0; }
+# endif
#endif

void watchdog_nmi_reconfigure(bool run);
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -238,6 +238,27 @@ int watchdog_nmi_enable(unsigned int cpu
return 0;
}

+static int hardlockup_detector_event_create(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct perf_event_attr *wd_attr;
+ struct perf_event *evt;
+
+ wd_attr = &wd_hw_attr;
+ wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+
+ /* Try to register using hardware perf events */
+ evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
+ watchdog_overflow_callback, NULL);
+ if (IS_ERR(evt)) {
+ pr_info("Perf event create on CPU %d failed with %ld\n", cpu,
+ PTR_ERR(evt));
+ return PTR_ERR(evt);
+ }
+ this_cpu_write(watchdog_ev, evt);
+ return 0;
+}
+
/**
* hardlockup_detector_perf_disable - Disable the local event
*/
@@ -315,3 +336,19 @@ void __init hardlockup_detector_perf_res
perf_event_enable(event);
}
}
+
+/**
+ * hardlockup_detector_perf_init - Probe whether NMI event is available at all
+ */
+int __init hardlockup_detector_perf_init(void)
+{
+ int ret = hardlockup_detector_event_create();
+
+ if (ret) {
+ pr_info("Perf NMI watchdog permanetely disabled\n");
+ } else {
+ perf_event_release_kernel(this_cpu_read(watchdog_ev));
+ this_cpu_write(watchdog_ev, NULL);
+ }
+ return ret;
+}



2017-09-07 15:58:38

by Don Zickus

[permalink] [raw]
Subject: Re: [patch 24/29] lockup_detector/perf: Implement init time perf validation

On Thu, Aug 31, 2017 at 09:16:22AM +0200, Thomas Gleixner wrote:
> The watchdog tries to create perf events even after it figured out that
> perf is not functional or the requested event is not supported.
>
> That's braindead as this can be done once at init time and if not supported
> the NMI watchdog can be turned off unconditonally.
>
> Implement the perf hardlockup detector functionality for that. This creates
> a new event create function, which will replace the unholy mess of the
> existing one in later patches.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> include/linux/nmi.h | 8 ++++++--
> kernel/watchdog_hld.c | 37 +++++++++++++++++++++++++++++++++++++
> 2 files changed, 43 insertions(+), 2 deletions(-)
>
> --- a/include/linux/nmi.h
> +++ b/include/linux/nmi.h
> @@ -93,14 +93,18 @@ extern void hardlockup_detector_perf_sto
> extern void hardlockup_detector_perf_restart(void);
> extern void hardlockup_detector_perf_disable(void);
> extern void hardlockup_detector_perf_cleanup(void);
> +extern int hardlockup_detector_perf_init(void);
> #else
> static inline void hardlockup_detector_perf_stop(void) { }
> static inline void hardlockup_detector_perf_restart(void) { }
> static inline void hardlockup_detector_perf_disable(void) { }
> static inline void hardlockup_detector_perf_cleanup(void) { }
> -#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
> +# if !defined(CONFIG_HAVE_NMI_WATCHDOG)
> +static int hardlockup_detector_perf_init(void) { return -ENODEV; }
> static inline void arch_touch_nmi_watchdog(void) {}
> -#endif
> +# else
> +static int hardlockup_detector_perf_init(void) { return 0; }

hardlockup_detector_perf_init needs to be 'inline' otherwise fails to
compile with CONFIG_HARDLOCKUP_DETECTOR_PERF turned off.


Cheers,
Don

> +# endif
> #endif
>
> void watchdog_nmi_reconfigure(bool run);
> --- a/kernel/watchdog_hld.c
> +++ b/kernel/watchdog_hld.c
> @@ -238,6 +238,27 @@ int watchdog_nmi_enable(unsigned int cpu
> return 0;
> }
>
> +static int hardlockup_detector_event_create(void)
> +{
> + unsigned int cpu = smp_processor_id();
> + struct perf_event_attr *wd_attr;
> + struct perf_event *evt;
> +
> + wd_attr = &wd_hw_attr;
> + wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
> +
> + /* Try to register using hardware perf events */
> + evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
> + watchdog_overflow_callback, NULL);
> + if (IS_ERR(evt)) {
> + pr_info("Perf event create on CPU %d failed with %ld\n", cpu,
> + PTR_ERR(evt));
> + return PTR_ERR(evt);
> + }
> + this_cpu_write(watchdog_ev, evt);
> + return 0;
> +}
> +
> /**
> * hardlockup_detector_perf_disable - Disable the local event
> */
> @@ -315,3 +336,19 @@ void __init hardlockup_detector_perf_res
> perf_event_enable(event);
> }
> }
> +
> +/**
> + * hardlockup_detector_perf_init - Probe whether NMI event is available at all
> + */
> +int __init hardlockup_detector_perf_init(void)
> +{
> + int ret = hardlockup_detector_event_create();
> +
> + if (ret) {
> + pr_info("Perf NMI watchdog permanetely disabled\n");
> + } else {
> + perf_event_release_kernel(this_cpu_read(watchdog_ev));
> + this_cpu_write(watchdog_ev, NULL);
> + }
> + return ret;
> +}
>
>