THe delay between an HFI interrupt and its corresponding thermal netlink
event has so far been hard-coded to CONFIG_HZ jiffies. This may not suit
the needs of all hardware configurations or listeners of events.
If the update delay is too long, much of the information of consecutive
hardware updates will be lost as the HFI delayed workqueue posts a new
thermal netlink event only when there are no previous pending events. If
the delay is too short, multiple, events may overwhelm listeners.
Listeners are better placed to determine the delay of events. They know how
quickly they can act on them effectively. They may also want to experiment
with different values.
Start a debugfs interface to tune the notification delay.
Keep things simple and do not add extra locking or memory barriers. This
may result in the HFI interrupt ocassionally queueing work using stale
delay values, if at all. This should not be a problem: the debugfs file
will update the delay value atomically, we do not expect users to update
the delay value frequently, and the delayed workqueue operates in jiffies
resolution.
Suggested-by: Srinivas Pandruvada <[email protected]>
Signed-off-by: Ricardo Neri <[email protected]>
---
Cc: Len Brown <[email protected]>
Cc: Srinivas Pandruvada <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Zhang Rui <[email protected]>
Cc: [email protected]
Cc: [email protected]
---
drivers/thermal/intel/intel_hfi.c | 77 ++++++++++++++++++++++++++++++-
1 file changed, 76 insertions(+), 1 deletion(-)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index e2b82d71ab6b..24d708268c68 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -43,6 +43,10 @@
#include <asm/msr.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
+
#include "intel_hfi.h"
#include "thermal_interrupt.h"
@@ -169,6 +173,70 @@ static struct workqueue_struct *hfi_updates_wq;
#define HFI_UPDATE_DELAY HZ
#define HFI_MAX_THERM_NOTIFY_COUNT 16
+/* Keep this variable 8-byte aligned to get atomic accesses. */
+static unsigned long hfi_update_delay = HFI_UPDATE_DELAY;
+
+#ifdef CONFIG_DEBUG_FS
+static int hfi_update_delay_get(void *data, u64 *val)
+{
+ mutex_lock(&hfi_instance_lock);
+ *val = jiffies_to_msecs(hfi_update_delay);
+ mutex_unlock(&hfi_instance_lock);
+ return 0;
+}
+
+static int hfi_update_delay_set(void *data, u64 val)
+{
+ /*
+ * The mutex only serializes access to the debugfs file.
+ *
+ * hfi_process_event() loads @hfi_update_delay from interrupt context.
+ * We could have serialized accesses with a spinlock or a memory barrier.
+ * But this is a debug feature, the store of @hfi_update_delay is
+ * atomic, and will seldom change. A few loads of @hfi_update_delay may
+ * see stale values but the updated value will be seen eventually.
+ */
+ mutex_lock(&hfi_instance_lock);
+ hfi_update_delay = msecs_to_jiffies(val);
+ mutex_unlock(&hfi_instance_lock);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(hfi_update_delay_fops, hfi_update_delay_get,
+ hfi_update_delay_set, "%llu\n");
+
+static struct dentry *hfi_debugfs_dir;
+
+static void hfi_debugfs_unregister(void)
+{
+ debugfs_remove_recursive(hfi_debugfs_dir);
+ hfi_debugfs_dir = NULL;
+}
+
+static void hfi_debugfs_register(void)
+{
+ struct dentry *f;
+
+ hfi_debugfs_dir = debugfs_create_dir("intel_hfi", NULL);
+ if (!hfi_debugfs_dir)
+ return;
+
+ f = debugfs_create_file("update_delay_ms", 0644, hfi_debugfs_dir,
+ NULL, &hfi_update_delay_fops);
+ if (!f)
+ goto err;
+
+ return;
+err:
+ hfi_debugfs_unregister();
+}
+
+#else
+static void hfi_debugfs_register(void)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
static void get_hfi_caps(struct hfi_instance *hfi_instance,
struct thermal_genl_cpu_caps *cpu_caps)
{
@@ -321,8 +389,13 @@ void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
raw_spin_unlock(&hfi_instance->table_lock);
raw_spin_unlock(&hfi_instance->event_lock);
+ /*
+ * debugfs may atomically store @hfi_update_delay without locking. The
+ * updated value may not be immediately observed. See note in
+ * hfi_update_delay_set().
+ */
queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
- HFI_UPDATE_DELAY);
+ hfi_update_delay);
}
static void init_hfi_cpu_index(struct hfi_cpu_info *info)
@@ -708,6 +781,8 @@ void __init intel_hfi_init(void)
register_syscore_ops(&hfi_pm_ops);
+ hfi_debugfs_register();
+
return;
err_nl_notif:
--
2.34.1
On Mon, 2024-04-29 at 16:41 -0700, Ricardo Neri wrote:
> THe delay between an HFI interrupt and its corresponding thermal
s/THe/The
> netlink
> event has so far been hard-coded to CONFIG_HZ jiffies. This may not
> suit
> the needs of all hardware configurations or listeners of events.
>
> If the update delay is too long, much of the information of
> consecutive
> hardware updates will be lost as the HFI delayed workqueue posts a
> new
> thermal netlink event only when there are no previous pending events.
> If
> the delay is too short, multiple, events may overwhelm listeners.
>
> Listeners are better placed to determine the delay of events. They
> know how
> quickly they can act on them effectively. They may also want to
> experiment
> with different values.
>
> Start a debugfs interface to tune the notification delay.
Why this is implemented as debugfs rather than a module param?
thanks,
rui
>
> Keep things simple and do not add extra locking or memory barriers.
> This
> may result in the HFI interrupt ocassionally queueing work using
> stale
> delay values, if at all. This should not be a problem: the debugfs
> file
> will update the delay value atomically, we do not expect users to
> update
> the delay value frequently, and the delayed workqueue operates in
> jiffies
> resolution.
>
> Suggested-by: Srinivas Pandruvada
> <[email protected]>
> Signed-off-by: Ricardo Neri <[email protected]>
> ---
> Cc: Len Brown <[email protected]>
> Cc: Srinivas Pandruvada <[email protected]>
> Cc: Stanislaw Gruszka <[email protected]>
> Cc: Zhang Rui <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> ---
> drivers/thermal/intel/intel_hfi.c | 77
> ++++++++++++++++++++++++++++++-
> 1 file changed, 76 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c
> b/drivers/thermal/intel/intel_hfi.c
> index e2b82d71ab6b..24d708268c68 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -43,6 +43,10 @@
>
> #include <asm/msr.h>
>
> +#ifdef CONFIG_DEBUG_FS
> +#include <linux/debugfs.h>
> +#endif
> +
> #include "intel_hfi.h"
> #include "thermal_interrupt.h"
>
> @@ -169,6 +173,70 @@ static struct workqueue_struct *hfi_updates_wq;
> #define HFI_UPDATE_DELAY HZ
> #define HFI_MAX_THERM_NOTIFY_COUNT 16
>
> +/* Keep this variable 8-byte aligned to get atomic accesses. */
> +static unsigned long hfi_update_delay = HFI_UPDATE_DELAY;
> +
> +#ifdef CONFIG_DEBUG_FS
> +static int hfi_update_delay_get(void *data, u64 *val)
> +{
> + mutex_lock(&hfi_instance_lock);
> + *val = jiffies_to_msecs(hfi_update_delay);
> + mutex_unlock(&hfi_instance_lock);
> + return 0;
> +}
> +
> +static int hfi_update_delay_set(void *data, u64 val)
> +{
> + /*
> + * The mutex only serializes access to the debugfs file.
> + *
> + * hfi_process_event() loads @hfi_update_delay from interrupt
> context.
> + * We could have serialized accesses with a spinlock or a
> memory barrier.
> + * But this is a debug feature, the store of
> @hfi_update_delay is
> + * atomic, and will seldom change. A few loads of
> @hfi_update_delay may
> + * see stale values but the updated value will be seen
> eventually.
> + */
> + mutex_lock(&hfi_instance_lock);
> + hfi_update_delay = msecs_to_jiffies(val);
> + mutex_unlock(&hfi_instance_lock);
> + return 0;
> +}
> +
> +DEFINE_DEBUGFS_ATTRIBUTE(hfi_update_delay_fops,
> hfi_update_delay_get,
> + hfi_update_delay_set, "%llu\n");
> +
> +static struct dentry *hfi_debugfs_dir;
> +
> +static void hfi_debugfs_unregister(void)
> +{
> + debugfs_remove_recursive(hfi_debugfs_dir);
> + hfi_debugfs_dir = NULL;
> +}
> +
> +static void hfi_debugfs_register(void)
> +{
> + struct dentry *f;
> +
> + hfi_debugfs_dir = debugfs_create_dir("intel_hfi", NULL);
> + if (!hfi_debugfs_dir)
> + return;
> +
> + f = debugfs_create_file("update_delay_ms", 0644,
> hfi_debugfs_dir,
> + NULL, &hfi_update_delay_fops);
> + if (!f)
> + goto err;
> +
> + return;
> +err:
> + hfi_debugfs_unregister();
> +}
> +
> +#else
> +static void hfi_debugfs_register(void)
> +{
> +}
> +#endif /* CONFIG_DEBUG_FS */
> +
> static void get_hfi_caps(struct hfi_instance *hfi_instance,
> struct thermal_genl_cpu_caps *cpu_caps)
> {
> @@ -321,8 +389,13 @@ void intel_hfi_process_event(__u64
> pkg_therm_status_msr_val)
> raw_spin_unlock(&hfi_instance->table_lock);
> raw_spin_unlock(&hfi_instance->event_lock);
>
> + /*
> + * debugfs may atomically store @hfi_update_delay without
> locking. The
> + * updated value may not be immediately observed. See note in
> + * hfi_update_delay_set().
> + */
> queue_delayed_work(hfi_updates_wq, &hfi_instance-
> >update_work,
> - HFI_UPDATE_DELAY);
> + hfi_update_delay);
> }
>
> static void init_hfi_cpu_index(struct hfi_cpu_info *info)
> @@ -708,6 +781,8 @@ void __init intel_hfi_init(void)
>
> register_syscore_ops(&hfi_pm_ops);
>
> + hfi_debugfs_register();
> +
> return;
>
> err_nl_notif:
On Tue, Apr 30, 2024 at 04:52:52AM +0000, Zhang, Rui wrote:
> On Mon, 2024-04-29 at 16:41 -0700, Ricardo Neri wrote:
> > THe delay between an HFI interrupt and its corresponding thermal
>
> s/THe/The
Ah, I read the changelog 1000 times and still I misse this. Thanks!
>
> > netlink
> > event has so far been hard-coded to CONFIG_HZ jiffies. This may not
> > suit
> > the needs of all hardware configurations or listeners of events.
> >
> > If the update delay is too long, much of the information of
> > consecutive
> > hardware updates will be lost as the HFI delayed workqueue posts a
> > new
> > thermal netlink event only when there are no previous pending events.
> > If
> > the delay is too short, multiple, events may overwhelm listeners.
> >
> > Listeners are better placed to determine the delay of events. They
> > know how
> > quickly they can act on them effectively. They may also want to
> > experiment
> > with different values.
> >
> > Start a debugfs interface to tune the notification delay.
>
> Why this is implemented as debugfs rather than a module param?
Implementing as module param would require more work. In its current form,
intel_hfi is not a module. Its entry points are device_initcall()
(via therm_throt.c) and CPU hotplug.
I can look into implementing as a module param if folks think its a
better solution.