Hi,
Memory corruption may occur if the location of the HFI memory buffer is not
restored when resuming from hibernation or suspend-to-memory.
During a normal boot, the kernel allocates a memory buffer and gives it to
the hardware for reporting updates in the HFI table. The same allocation
process is done by a restore kernel when resuming from suspend or
hibernation.
The location of the memory that the restore kernel allocates may differ
from that allocated by the image kernel. To prevent memory corruption (the
hardware keeps using the memory buffer from the restore kernel), it is
necessary to disable HFI before transferring control to the image kernel.
Once running, the image kernel must restore the location of the HFI memory
and enable HFI.
The patchset addresses the described bug on systems with one or more HFI
instances (i.e., packages) using CPU hotplug callbacks and a suspend
notifier.
I tested this patchset on Meteor Lake and Sapphire Rapids. The systems
completed 3500 (in two separate tests of 1500 and 2000 repeats) and
1000 hibernate-resume cycles, respectively. I tested it using Rafael's
testing branch as on 20th December 2023.
Thanks and BR,
Ricardo
Ricardo Neri (4):
thermal: intel: hfi: Refactor enabling code into helper functions
thermal: intel: hfi: Enable an HFI instance from its first online CPU
thermal: intel: hfi: Disable an HFI instance when all its CPUs go
offline
thermal: intel: hfi: Add a suspend notifier
drivers/thermal/intel/intel_hfi.c | 142 ++++++++++++++++++++++++------
1 file changed, 116 insertions(+), 26 deletions(-)
--
2.25.1
In preparation to add a suspend notifier, wrap the logic to enable HFI and
program its memory buffer into helper functions. Both the CPU hotplug
callback and the suspend notifier will use it.
Cc: Chen Yu <[email protected]>
Cc: Len Brown <[email protected]>
Cc: Srinivas Pandruvada <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Zhang Rui <[email protected]>
Cc: Zhao Liu <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Ricardo Neri <[email protected]>
---
drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++--------------
1 file changed, 25 insertions(+), 21 deletions(-)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index c69db6c90869..87ac7b196981 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -347,6 +347,25 @@ static void init_hfi_instance(struct hfi_instance *hfi_instance)
hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
}
+static void hfi_enable(void)
+{
+ u64 msr_val;
+
+ rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+}
+
+static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
+{
+ phys_addr_t hw_table_pa;
+ u64 msr_val;
+
+ hw_table_pa = virt_to_phys(hfi_instance->hw_table);
+ msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+}
+
/**
* intel_hfi_online() - Enable HFI on @cpu
* @cpu: CPU in which the HFI will be enabled
@@ -364,8 +383,6 @@ void intel_hfi_online(unsigned int cpu)
{
struct hfi_instance *hfi_instance;
struct hfi_cpu_info *info;
- phys_addr_t hw_table_pa;
- u64 msr_val;
u16 die_id;
/* Nothing to do if hfi_instances are missing. */
@@ -403,14 +420,16 @@ void intel_hfi_online(unsigned int cpu)
/*
* Hardware is programmed with the physical address of the first page
* frame of the table. Hence, the allocated memory must be page-aligned.
+ *
+ * Some processors do not forget the initial address of the HFI table
+ * even after having been reprogrammed. Keep using the same pages. Do
+ * not free them.
*/
hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
GFP_KERNEL | __GFP_ZERO);
if (!hfi_instance->hw_table)
goto unlock;
- hw_table_pa = virt_to_phys(hfi_instance->hw_table);
-
/*
* Allocate memory to keep a local copy of the table that
* hardware generates.
@@ -420,16 +439,6 @@ void intel_hfi_online(unsigned int cpu)
if (!hfi_instance->local_table)
goto free_hw_table;
- /*
- * Program the address of the feedback table of this die/package. On
- * some processors, hardware remembers the old address of the HFI table
- * even after having been reprogrammed and re-enabled. Thus, do not free
- * the pages allocated for the table or reprogram the hardware with a
- * new base address. Namely, program the hardware only once.
- */
- msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
- wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
-
init_hfi_instance(hfi_instance);
INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
@@ -438,13 +447,8 @@ void intel_hfi_online(unsigned int cpu)
cpumask_set_cpu(cpu, hfi_instance->cpus);
- /*
- * Enable the hardware feedback interface and never disable it. See
- * comment on programming the address of the table.
- */
- rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
- msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
- wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ hfi_set_hw_table(hfi_instance);
+ hfi_enable();
unlock:
mutex_unlock(&hfi_instance_lock);
--
2.25.1
Previously, HFI instances were never disabled once enabled. A CPU in an
instance only had to check during boot whether another CPU had previously
initialized the instance and its corresponding data structure.
A subsequent changeset will add functionality to disable instances
to support hibernation. Such change will also make possible to disable an
HFI instance during runtime via CPU hotplug.
Enable an HFI instance from the first of its CPUs that comes online. This
covers the boot, CPU hotplug, and resume-from-suspend cases. It also covers
systems with one or more HFI instances (i.e., packages).
Cc: Chen Yu <[email protected]>
Cc: Len Brown <[email protected]>
Cc: Srinivas Pandruvada <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Zhang Rui <[email protected]>
Cc: Zhao Liu <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Ricardo Neri <[email protected]>
---
drivers/thermal/intel/intel_hfi.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 87ac7b196981..15c8c3b841d2 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -409,13 +409,12 @@ void intel_hfi_online(unsigned int cpu)
/*
* Now check if the HFI instance of the package/die of @cpu has been
* initialized (by checking its header). In such case, all we have to
- * do is to add @cpu to this instance's cpumask.
+ * do is to add @cpu to this instance's cpumask and enable the instance
+ * if needed.
*/
mutex_lock(&hfi_instance_lock);
- if (hfi_instance->hdr) {
- cpumask_set_cpu(cpu, hfi_instance->cpus);
- goto unlock;
- }
+ if (hfi_instance->hdr)
+ goto enable;
/*
* Hardware is programmed with the physical address of the first page
@@ -445,10 +444,14 @@ void intel_hfi_online(unsigned int cpu)
raw_spin_lock_init(&hfi_instance->table_lock);
raw_spin_lock_init(&hfi_instance->event_lock);
+enable:
cpumask_set_cpu(cpu, hfi_instance->cpus);
- hfi_set_hw_table(hfi_instance);
- hfi_enable();
+ /* Enable this HFI instance if this is its first online CPU. */
+ if (cpumask_weight(hfi_instance->cpus) == 1) {
+ hfi_set_hw_table(hfi_instance);
+ hfi_enable();
+ }
unlock:
mutex_unlock(&hfi_instance_lock);
--
2.25.1
In preparation to support hibernation, add functionality to disable an HFI
instance during CPU offline. The last CPU of an instance that goes offline
will disable such instance.
The Intel Software Development Manual states that the operating system must
wait for the hardware to set MSR_IA32_PACKAGE_THERM_STATUS[26] after
disabling an HFI instance to ensure that it will no longer write on the HFI
memory. Some processors, however, do not ever set such bit. Wait a minimum
of 2ms to give time hardware to complete any pending memory writes.
Cc: Chen Yu <[email protected]>
Cc: Len Brown <[email protected]>
Cc: Srinivas Pandruvada <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Zhang Rui <[email protected]>
Cc: Zhao Liu <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Ricardo Neri <[email protected]>
---
drivers/thermal/intel/intel_hfi.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 15c8c3b841d2..d2c874f43786 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -24,6 +24,7 @@
#include <linux/bitops.h>
#include <linux/cpufeature.h>
#include <linux/cpumask.h>
+#include <linux/delay.h>
#include <linux/gfp.h>
#include <linux/io.h>
#include <linux/kernel.h>
@@ -366,6 +367,31 @@ static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
}
+static void hfi_disable(void)
+{
+ u64 msr_val;
+ int i;
+
+ rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+
+ /*
+ * Wait for hardware to acknowledge the disabling of HFI. Some
+ * processors may not do it. Wait for ~2ms. This is a reasonable
+ * time for hardware to complete any pending actions on the HFI
+ * memory.
+ */
+ for (i = 0; i < 2000; i++) {
+ rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
+ if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED)
+ break;
+
+ udelay(1);
+ cpu_relax();
+ }
+}
+
/**
* intel_hfi_online() - Enable HFI on @cpu
* @cpu: CPU in which the HFI will be enabled
@@ -491,6 +517,10 @@ void intel_hfi_offline(unsigned int cpu)
mutex_lock(&hfi_instance_lock);
cpumask_clear_cpu(cpu, hfi_instance->cpus);
+
+ if (!cpumask_weight(hfi_instance->cpus))
+ hfi_disable();
+
mutex_unlock(&hfi_instance_lock);
}
--
2.25.1
The kernel gives the HFI hardware a memory region that the latter uses to
provide updates to the HFI table. The kernel allocates this memory region
at boot. It remains constant throughout runtime time.
When resuming from suspend or hibernation, the restore kernel allocates a
second memory buffer and reprograms the HFI hardware with the new location
as part of a normal boot. The location of the second memory buffer may
differ from the one allocated by the image kernel. Subsequently, when the
restore kernel transfers control to the image kernel, the second buffer
becomes invalid, potentially leading to memory corruption if the hardware
writes to it (hardware continues using the buffer from the restore kernel).
Add a suspend notifier to disable all HFI instances before jumping to the
image kernel and enable them once the image kernel has been restored. Use
the memory buffer that the image kernel allocated.
For non-boot CPUs, rely on the CPU hotplug callbacks as CPUs are disabled
and enabled during suspend and resume, respectively.
The CPU hotplug callbacks do not cover the boot CPU. Handle the HFI
instance of the boot CPU from the suspend notifier callback.
Cc: Chen Yu <[email protected]>
Cc: Len Brown <[email protected]>
Cc: Srinivas Pandruvada <[email protected]>
Cc: Stanislaw Gruszka <[email protected]>
Cc: Zhang Rui <[email protected]>
Cc: Zhao Liu <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Ricardo Neri <[email protected]>
---
drivers/thermal/intel/intel_hfi.c | 53 +++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index d2c874f43786..965c245e5e78 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -30,11 +30,13 @@
#include <linux/kernel.h>
#include <linux/math.h>
#include <linux/mutex.h>
+#include <linux/notifier.h>
#include <linux/percpu-defs.h>
#include <linux/printk.h>
#include <linux/processor.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/suspend.h>
#include <linux/string.h>
#include <linux/topology.h>
#include <linux/workqueue.h>
@@ -569,11 +571,62 @@ static __init int hfi_parse_features(void)
return 0;
}
+static void hfi_do_pm_enable(void *info)
+{
+ struct hfi_instance *hfi_instance = info;
+
+ hfi_set_hw_table(hfi_instance);
+ hfi_enable();
+}
+
+static void hfi_do_pm_disable(void *info)
+{
+ hfi_disable();
+}
+
+static int hfi_pm_notify(struct notifier_block *nb,
+ unsigned long mode, void *unused)
+{
+ struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
+ struct hfi_instance *hfi_instance = info->hfi_instance;
+
+ /* HFI may not be in use. */
+ if (!hfi_instance)
+ return 0;
+
+ /*
+ * Only handle the HFI instance of the package of the boot CPU. The
+ * instances of other packages are handled in the CPU hotplug callbacks.
+ */
+ switch (mode) {
+ case PM_HIBERNATION_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ case PM_RESTORE_PREPARE:
+ return smp_call_function_single(0, hfi_do_pm_disable,
+ NULL, true);
+
+ case PM_POST_RESTORE:
+ case PM_POST_HIBERNATION:
+ case PM_POST_SUSPEND:
+ return smp_call_function_single(0, hfi_do_pm_enable,
+ hfi_instance, true);
+ default:
+ return -EINVAL;
+ }
+}
+
+static struct notifier_block hfi_pm_nb = {
+ .notifier_call = hfi_pm_notify,
+};
+
void __init intel_hfi_init(void)
{
struct hfi_instance *hfi_instance;
int i, j;
+ if (register_pm_notifier(&hfi_pm_nb))
+ return;
+
if (hfi_parse_features())
return;
--
2.25.1
On Wed, Dec 27, 2023 at 7:28 AM Ricardo Neri
<[email protected]> wrote:
>
> In preparation to add a suspend notifier, wrap the logic to enable HFI and
> program its memory buffer into helper functions. Both the CPU hotplug
> callback and the suspend notifier will use it.
No functional impact?
> Cc: Chen Yu <[email protected]>
> Cc: Len Brown <[email protected]>
> Cc: Srinivas Pandruvada <[email protected]>
> Cc: Stanislaw Gruszka <[email protected]>
> Cc: Zhang Rui <[email protected]>
> Cc: Zhao Liu <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
Please don't CC stable@vger on patch submissions, although you may add
a Cc: stable tag without actually CCing it for my information, but in
that case please add a full tag including the earliest stable series
the patch is intended to apply to.
> Signed-off-by: Ricardo Neri <[email protected]>
> ---
> drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++--------------
> 1 file changed, 25 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index c69db6c90869..87ac7b196981 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -347,6 +347,25 @@ static void init_hfi_instance(struct hfi_instance *hfi_instance)
> hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
> }
>
> +static void hfi_enable(void)
> +{
> + u64 msr_val;
> +
> + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
> + msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
> + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
> +}
> +
> +static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
> +{
> + phys_addr_t hw_table_pa;
> + u64 msr_val;
> +
> + hw_table_pa = virt_to_phys(hfi_instance->hw_table);
> + msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
> + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
> +}
> +
> /**
> * intel_hfi_online() - Enable HFI on @cpu
> * @cpu: CPU in which the HFI will be enabled
> @@ -364,8 +383,6 @@ void intel_hfi_online(unsigned int cpu)
> {
> struct hfi_instance *hfi_instance;
> struct hfi_cpu_info *info;
> - phys_addr_t hw_table_pa;
> - u64 msr_val;
> u16 die_id;
>
> /* Nothing to do if hfi_instances are missing. */
> @@ -403,14 +420,16 @@ void intel_hfi_online(unsigned int cpu)
> /*
> * Hardware is programmed with the physical address of the first page
> * frame of the table. Hence, the allocated memory must be page-aligned.
> + *
> + * Some processors do not forget the initial address of the HFI table
> + * even after having been reprogrammed. Keep using the same pages. Do
> + * not free them.
This comment change does not seem to belong to this patch. I guess it
needs to go to one of the subsequent patches?
> */
> hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
> GFP_KERNEL | __GFP_ZERO);
> if (!hfi_instance->hw_table)
> goto unlock;
>
> - hw_table_pa = virt_to_phys(hfi_instance->hw_table);
> -
> /*
> * Allocate memory to keep a local copy of the table that
> * hardware generates.
> @@ -420,16 +439,6 @@ void intel_hfi_online(unsigned int cpu)
> if (!hfi_instance->local_table)
> goto free_hw_table;
>
> - /*
> - * Program the address of the feedback table of this die/package. On
> - * some processors, hardware remembers the old address of the HFI table
> - * even after having been reprogrammed and re-enabled. Thus, do not free
> - * the pages allocated for the table or reprogram the hardware with a
> - * new base address. Namely, program the hardware only once.
> - */
> - msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
> - wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
> -
> init_hfi_instance(hfi_instance);
>
> INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
> @@ -438,13 +447,8 @@ void intel_hfi_online(unsigned int cpu)
>
> cpumask_set_cpu(cpu, hfi_instance->cpus);
>
> - /*
> - * Enable the hardware feedback interface and never disable it. See
> - * comment on programming the address of the table.
> - */
> - rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
> - msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
> - wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
> + hfi_set_hw_table(hfi_instance);
> + hfi_enable();
>
> unlock:
> mutex_unlock(&hfi_instance_lock);
> --
On Wed, Dec 27, 2023 at 7:28 AM Ricardo Neri
<[email protected]> wrote:
>
> The kernel gives the HFI hardware a memory region that the latter uses to
> provide updates to the HFI table. The kernel allocates this memory region
> at boot. It remains constant throughout runtime time.
>
> When resuming from suspend or hibernation, the restore kernel allocates a
> second memory buffer and reprograms the HFI hardware with the new location
> as part of a normal boot. The location of the second memory buffer may
> differ from the one allocated by the image kernel. Subsequently, when the
> restore kernel transfers control to the image kernel, the second buffer
> becomes invalid, potentially leading to memory corruption if the hardware
> writes to it (hardware continues using the buffer from the restore kernel).
>
> Add a suspend notifier to disable all HFI instances before jumping to the
> image kernel and enable them once the image kernel has been restored. Use
> the memory buffer that the image kernel allocated.
>
> For non-boot CPUs, rely on the CPU hotplug callbacks as CPUs are disabled
> and enabled during suspend and resume, respectively.
>
> The CPU hotplug callbacks do not cover the boot CPU. Handle the HFI
> instance of the boot CPU from the suspend notifier callback.
>
> Cc: Chen Yu <[email protected]>
> Cc: Len Brown <[email protected]>
> Cc: Srinivas Pandruvada <[email protected]>
> Cc: Stanislaw Gruszka <[email protected]>
> Cc: Zhang Rui <[email protected]>
> Cc: Zhao Liu <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Ricardo Neri <[email protected]>
> ---
> drivers/thermal/intel/intel_hfi.c | 53 +++++++++++++++++++++++++++++++
> 1 file changed, 53 insertions(+)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index d2c874f43786..965c245e5e78 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -30,11 +30,13 @@
> #include <linux/kernel.h>
> #include <linux/math.h>
> #include <linux/mutex.h>
> +#include <linux/notifier.h>
> #include <linux/percpu-defs.h>
> #include <linux/printk.h>
> #include <linux/processor.h>
> #include <linux/slab.h>
> #include <linux/spinlock.h>
> +#include <linux/suspend.h>
> #include <linux/string.h>
> #include <linux/topology.h>
> #include <linux/workqueue.h>
> @@ -569,11 +571,62 @@ static __init int hfi_parse_features(void)
> return 0;
> }
>
> +static void hfi_do_pm_enable(void *info)
> +{
> + struct hfi_instance *hfi_instance = info;
> +
> + hfi_set_hw_table(hfi_instance);
> + hfi_enable();
The above do RMW, so should locking be used here?
> +}
> +
> +static void hfi_do_pm_disable(void *info)
> +{
> + hfi_disable();
> +}
And here?
> +
> +static int hfi_pm_notify(struct notifier_block *nb,
> + unsigned long mode, void *unused)
> +{
> + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
> + struct hfi_instance *hfi_instance = info->hfi_instance;
> +
> + /* HFI may not be in use. */
> + if (!hfi_instance)
> + return 0;
> +
> + /*
> + * Only handle the HFI instance of the package of the boot CPU. The
> + * instances of other packages are handled in the CPU hotplug callbacks.
> + */
> + switch (mode) {
> + case PM_HIBERNATION_PREPARE:
> + case PM_SUSPEND_PREPARE:
> + case PM_RESTORE_PREPARE:
> + return smp_call_function_single(0, hfi_do_pm_disable,
> + NULL, true);
> +
> + case PM_POST_RESTORE:
> + case PM_POST_HIBERNATION:
> + case PM_POST_SUSPEND:
> + return smp_call_function_single(0, hfi_do_pm_enable,
> + hfi_instance, true);
> + default:
> + return -EINVAL;
> + }
> +}
> +
> +static struct notifier_block hfi_pm_nb = {
> + .notifier_call = hfi_pm_notify,
> +};
> +
> void __init intel_hfi_init(void)
> {
> struct hfi_instance *hfi_instance;
> int i, j;
>
> + if (register_pm_notifier(&hfi_pm_nb))
> + return;
> +
> if (hfi_parse_features())
> return;
>
> --
On Fri, Dec 29, 2023 at 06:22:25PM +0100, Rafael J. Wysocki wrote:
> On Wed, Dec 27, 2023 at 7:28 AM Ricardo Neri
> <[email protected]> wrote:
> >
> > In preparation to add a suspend notifier, wrap the logic to enable HFI and
> > program its memory buffer into helper functions. Both the CPU hotplug
> > callback and the suspend notifier will use it.
>
> No functional impact?
Thank you for your review!
Correct. There is no functional impact. I will update the commit message
in my next version.
>
> > Cc: Chen Yu <[email protected]>
> > Cc: Len Brown <[email protected]>
> > Cc: Srinivas Pandruvada <[email protected]>
> > Cc: Stanislaw Gruszka <[email protected]>
> > Cc: Zhang Rui <[email protected]>
> > Cc: Zhao Liu <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
>
> Please don't CC stable@vger on patch submissions, although you may add
> a Cc: stable tag without actually CCing it for my information, but in
> that case please add a full tag including the earliest stable series
> the patch is intended to apply to.
I see. Sure Rafael, I can do this.
>
> > Signed-off-by: Ricardo Neri <[email protected]>
> > ---
> > drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++--------------
> > 1 file changed, 25 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> > index c69db6c90869..87ac7b196981 100644
> > --- a/drivers/thermal/intel/intel_hfi.c
> > +++ b/drivers/thermal/intel/intel_hfi.c
> > @@ -347,6 +347,25 @@ static void init_hfi_instance(struct hfi_instance *hfi_instance)
> > hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
> > }
> >
> > +static void hfi_enable(void)
> > +{
> > + u64 msr_val;
> > +
> > + rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
> > + msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
> > + wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
> > +}
> > +
> > +static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
> > +{
> > + phys_addr_t hw_table_pa;
> > + u64 msr_val;
> > +
> > + hw_table_pa = virt_to_phys(hfi_instance->hw_table);
> > + msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
> > + wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
> > +}
> > +
> > /**
> > * intel_hfi_online() - Enable HFI on @cpu
> > * @cpu: CPU in which the HFI will be enabled
> > @@ -364,8 +383,6 @@ void intel_hfi_online(unsigned int cpu)
> > {
> > struct hfi_instance *hfi_instance;
> > struct hfi_cpu_info *info;
> > - phys_addr_t hw_table_pa;
> > - u64 msr_val;
> > u16 die_id;
> >
> > /* Nothing to do if hfi_instances are missing. */
> > @@ -403,14 +420,16 @@ void intel_hfi_online(unsigned int cpu)
> > /*
> > * Hardware is programmed with the physical address of the first page
> > * frame of the table. Hence, the allocated memory must be page-aligned.
> > + *
> > + * Some processors do not forget the initial address of the HFI table
> > + * even after having been reprogrammed. Keep using the same pages. Do
> > + * not free them.
>
> This comment change does not seem to belong to this patch. I guess it
> needs to go to one of the subsequent patches?
My intention was is to relocate here the comment I deleted in the
subsequent hunk (after some rewording). Sure, I can put this comment in
patch 3/4, which deals with disabling HFI.
>
> > */
> > hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
> > GFP_KERNEL | __GFP_ZERO);
> > if (!hfi_instance->hw_table)
> > goto unlock;
> >
> > - hw_table_pa = virt_to_phys(hfi_instance->hw_table);
> > -
> > /*
> > * Allocate memory to keep a local copy of the table that
> > * hardware generates.
> > @@ -420,16 +439,6 @@ void intel_hfi_online(unsigned int cpu)
> > if (!hfi_instance->local_table)
> > goto free_hw_table;
> >
> > - /*
> > - * Program the address of the feedback table of this die/package. On
> > - * some processors, hardware remembers the old address of the HFI table
> > - * even after having been reprogrammed and re-enabled. Thus, do not free
> > - * the pages allocated for the table or reprogram the hardware with a
> > - * new base address. Namely, program the hardware only once.
> > - */
> > - msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
> > - wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
> > -
> > init_hfi_instance(hfi_instance);
> >
> > INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
> > @@ -438,13 +447,8 @@ void intel_hfi_online(unsigned int cpu)
> >
> > cpumask_set_cpu(cpu, hfi_instance->cpus);
> >
> > - /*
> > - * Enable the hardware feedback interface and never disable it. See
> > - * comment on programming the address of the table.
> > - */
> > - rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
> > - msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
> > - wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
> > + hfi_set_hw_table(hfi_instance);
> > + hfi_enable();
> >
> > unlock:
> > mutex_unlock(&hfi_instance_lock);
> > --
On Fri, Dec 29, 2023 at 06:27:30PM +0100, Rafael J. Wysocki wrote:
> On Wed, Dec 27, 2023 at 7:28 AM Ricardo Neri
> <[email protected]> wrote:
> >
> > The kernel gives the HFI hardware a memory region that the latter uses to
> > provide updates to the HFI table. The kernel allocates this memory region
> > at boot. It remains constant throughout runtime time.
> >
> > When resuming from suspend or hibernation, the restore kernel allocates a
> > second memory buffer and reprograms the HFI hardware with the new location
> > as part of a normal boot. The location of the second memory buffer may
> > differ from the one allocated by the image kernel. Subsequently, when the
> > restore kernel transfers control to the image kernel, the second buffer
> > becomes invalid, potentially leading to memory corruption if the hardware
> > writes to it (hardware continues using the buffer from the restore kernel).
> >
> > Add a suspend notifier to disable all HFI instances before jumping to the
> > image kernel and enable them once the image kernel has been restored. Use
> > the memory buffer that the image kernel allocated.
> >
> > For non-boot CPUs, rely on the CPU hotplug callbacks as CPUs are disabled
> > and enabled during suspend and resume, respectively.
> >
> > The CPU hotplug callbacks do not cover the boot CPU. Handle the HFI
> > instance of the boot CPU from the suspend notifier callback.
> >
> > Cc: Chen Yu <[email protected]>
> > Cc: Len Brown <[email protected]>
> > Cc: Srinivas Pandruvada <[email protected]>
> > Cc: Stanislaw Gruszka <[email protected]>
> > Cc: Zhang Rui <[email protected]>
> > Cc: Zhao Liu <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Signed-off-by: Ricardo Neri <[email protected]>
> > ---
> > drivers/thermal/intel/intel_hfi.c | 53 +++++++++++++++++++++++++++++++
> > 1 file changed, 53 insertions(+)
> >
> > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> > index d2c874f43786..965c245e5e78 100644
> > --- a/drivers/thermal/intel/intel_hfi.c
> > +++ b/drivers/thermal/intel/intel_hfi.c
> > @@ -30,11 +30,13 @@
> > #include <linux/kernel.h>
> > #include <linux/math.h>
> > #include <linux/mutex.h>
> > +#include <linux/notifier.h>
> > #include <linux/percpu-defs.h>
> > #include <linux/printk.h>
> > #include <linux/processor.h>
> > #include <linux/slab.h>
> > #include <linux/spinlock.h>
> > +#include <linux/suspend.h>
> > #include <linux/string.h>
> > #include <linux/topology.h>
> > #include <linux/workqueue.h>
> > @@ -569,11 +571,62 @@ static __init int hfi_parse_features(void)
> > return 0;
> > }
> >
> > +static void hfi_do_pm_enable(void *info)
> > +{
> > + struct hfi_instance *hfi_instance = info;
> > +
> > + hfi_set_hw_table(hfi_instance);
> > + hfi_enable();
>
> The above do RMW, so should locking be used here?
>
> > +}
> > +
> > +static void hfi_do_pm_disable(void *info)
> > +{
> > + hfi_disable();
> > +}
>
> And here?
On single-package systems, HFI enable/disable only happens on the boot CPU,
via either the CPU hotplug callbacks or the suspend notifier. It is
unlikely they will run concurrently, IMO. It also looks as good hygiene to
me to use locking, just in case. I will use the hfi_instance_lock in my
next version.
On multi-package systems, HFI instance of non-boot packages are always
handled via the CPU hotplug callbacks.
>
> > +
> > +static int hfi_pm_notify(struct notifier_block *nb,
> > + unsigned long mode, void *unused)
> > +{
> > + struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
> > + struct hfi_instance *hfi_instance = info->hfi_instance;
> > +
> > + /* HFI may not be in use. */
> > + if (!hfi_instance)
> > + return 0;
> > +
> > + /*
> > + * Only handle the HFI instance of the package of the boot CPU. The
> > + * instances of other packages are handled in the CPU hotplug callbacks.
> > + */
> > + switch (mode) {
> > + case PM_HIBERNATION_PREPARE:
> > + case PM_SUSPEND_PREPARE:
> > + case PM_RESTORE_PREPARE:
> > + return smp_call_function_single(0, hfi_do_pm_disable,
> > + NULL, true);
> > +
> > + case PM_POST_RESTORE:
> > + case PM_POST_HIBERNATION:
> > + case PM_POST_SUSPEND:
> > + return smp_call_function_single(0, hfi_do_pm_enable,
> > + hfi_instance, true);
> > + default:
> > + return -EINVAL;
> > + }
> > +}
> > +
> > +static struct notifier_block hfi_pm_nb = {
> > + .notifier_call = hfi_pm_notify,
> > +};
> > +
> > void __init intel_hfi_init(void)
> > {
> > struct hfi_instance *hfi_instance;
> > int i, j;
> >
> > + if (register_pm_notifier(&hfi_pm_nb))
> > + return;
> > +
> > if (hfi_parse_features())
> > return;
> >
> > --