When a CPU is taken offline the resctrl filesystem code needs to check
if it was the CPU nominated to perform the periodic overflow and limbo
work. If so, another CPU needs to be chosen to do this work.
This is currently done in core.c, mixed in with the code that removes
the CPU from the domain's mask, and potentially free()s the domain.
Move the migration of the overflow and limbo helpers into the filesystem
code, into resctrl_offline_cpu(). As resctrl_offline_cpu() runs before
the architecture code has removed the CPU from the domain mask, the
callers need to be told which CPU is being removed, to avoid picking
it as the new CPU. This uses the exclude_cpu feature previously
added.
Signed-off-by: James Morse <[email protected]>
Tested-by: Shaopeng Tan <[email protected]>
Tested-by: Peter Newman <[email protected]>
Tested-by: Babu Moger <[email protected]>
Tested-by: Carl Worth <[email protected]> # arm64
Reviewed-by: Shaopeng Tan <[email protected]>
Reviewed-by: Reinette Chatre <[email protected]>
Reviewed-by: Babu Moger <[email protected]>
---
Changes since v5:
* Changed fir tree order of variables.
* Added mon-capable check for cpu offline.
---
arch/x86/kernel/cpu/resctrl/core.c | 16 ----------------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 18 ++++++++++++++++++
2 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 4aedefa22f61..b03a6c658ae5 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -580,22 +580,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
return;
}
-
- if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
- if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
- cancel_delayed_work(&d->mbm_over);
- /*
- * temporary: exclude_cpu=-1 as this CPU has already
- * been removed by cpumask_clear_cpu()d
- */
- mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
- }
- if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
- has_busy_rmid(d)) {
- cancel_delayed_work(&d->cqm_limbo);
- cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
- }
- }
}
static void clear_closid_rmid(int cpu)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 5bd3d8fb3f67..777e9f680332 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -4029,7 +4029,9 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
void resctrl_offline_cpu(unsigned int cpu)
{
+ struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
struct rdtgroup *rdtgrp;
+ struct rdt_domain *d;
lockdep_assert_held(&rdtgroup_mutex);
@@ -4039,6 +4041,22 @@ void resctrl_offline_cpu(unsigned int cpu)
break;
}
}
+
+ if (!l3->mon_capable)
+ return;
+
+ d = get_domain_from_cpu(cpu, l3);
+ if (d) {
+ if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+ cancel_delayed_work(&d->mbm_over);
+ mbm_setup_overflow_handler(d, 0, cpu);
+ }
+ if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+ has_busy_rmid(d)) {
+ cancel_delayed_work(&d->cqm_limbo);
+ cqm_setup_limbo_handler(d, 0, cpu);
+ }
+ }
}
/*
--
2.39.2
The following commit has been merged into the x86/cache branch of tip:
Commit-ID: eeff1d4f118bdf0870227fee5a770f03056e3adc
Gitweb: https://git.kernel.org/tip/eeff1d4f118bdf0870227fee5a770f03056e3adc
Author: James Morse <[email protected]>
AuthorDate: Tue, 13 Feb 2024 18:44:37
Committer: Borislav Petkov (AMD) <[email protected]>
CommitterDate: Fri, 16 Feb 2024 19:18:33 +01:00
x86/resctrl: Move domain helper migration into resctrl_offline_cpu()
When a CPU is taken offline the resctrl filesystem code needs to check if it
was the CPU nominated to perform the periodic overflow and limbo work. If so,
another CPU needs to be chosen to do this work.
This is currently done in core.c, mixed in with the code that removes the CPU
from the domain's mask, and potentially free()s the domain.
Move the migration of the overflow and limbo helpers into the filesystem code,
into resctrl_offline_cpu(). As resctrl_offline_cpu() runs before the
architecture code has removed the CPU from the domain mask, the callers need to
be told which CPU is being removed, to avoid picking it as the new CPU. This
uses the exclude_cpu feature previously added.
Signed-off-by: James Morse <[email protected]>
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
Reviewed-by: Shaopeng Tan <[email protected]>
Reviewed-by: Reinette Chatre <[email protected]>
Reviewed-by: Babu Moger <[email protected]>
Tested-by: Shaopeng Tan <[email protected]>
Tested-by: Peter Newman <[email protected]>
Tested-by: Babu Moger <[email protected]>
Tested-by: Carl Worth <[email protected]> # arm64
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov (AMD) <[email protected]>
---
arch/x86/kernel/cpu/resctrl/core.c | 16 ----------------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 18 ++++++++++++++++++
2 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 4aedefa..b03a6c6 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -580,22 +580,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
return;
}
-
- if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
- if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
- cancel_delayed_work(&d->mbm_over);
- /*
- * temporary: exclude_cpu=-1 as this CPU has already
- * been removed by cpumask_clear_cpu()d
- */
- mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
- }
- if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
- has_busy_rmid(d)) {
- cancel_delayed_work(&d->cqm_limbo);
- cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
- }
- }
}
static void clear_closid_rmid(int cpu)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 5bd3d8f..777e9f6 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -4029,7 +4029,9 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
void resctrl_offline_cpu(unsigned int cpu)
{
+ struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
struct rdtgroup *rdtgrp;
+ struct rdt_domain *d;
lockdep_assert_held(&rdtgroup_mutex);
@@ -4039,6 +4041,22 @@ void resctrl_offline_cpu(unsigned int cpu)
break;
}
}
+
+ if (!l3->mon_capable)
+ return;
+
+ d = get_domain_from_cpu(cpu, l3);
+ if (d) {
+ if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+ cancel_delayed_work(&d->mbm_over);
+ mbm_setup_overflow_handler(d, 0, cpu);
+ }
+ if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+ has_busy_rmid(d)) {
+ cancel_delayed_work(&d->cqm_limbo);
+ cqm_setup_limbo_handler(d, 0, cpu);
+ }
+ }
}
/*