2023-07-28 17:16:48

by James Morse

[permalink] [raw]
Subject: [PATCH v5 23/24] x86/resctrl: Move domain helper migration into resctrl_offline_cpu()

When a CPU is taken offline the resctrl filesystem code needs to check
if it was the CPU nominated to perform the periodic overflow and limbo
work. If so, another CPU needs to be chosen to do this work.

This is currently done in core.c, mixed in with the code that removes
the CPU from the domain's mask, and potentially free()s the domain.

Move the migration of the overflow and limbo helpers into the filesystem
code, into resctrl_offline_cpu(). As resctrl_offline_cpu() runs before
the architecture code has removed the CPU from the domain mask, the
callers need to be told which CPU is being removed, to avoid picking
it as the new CPU. This uses the exclude_cpu feature previously
added.

Signed-off-by: James Morse <[email protected]>
---
arch/x86/kernel/cpu/resctrl/core.c | 16 ----------------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 15 +++++++++++++++
2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 6eb9408a942a..edc0dd123317 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -578,22 +578,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)

return;
}
-
- if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
- if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
- cancel_delayed_work(&d->mbm_over);
- /*
- * temporary: exclude_cpu=-1 as this CPU has already
- * been removed by cpumask_clear_cpu()d
- */
- mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
- }
- if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
- has_busy_rmid(d)) {
- cancel_delayed_work(&d->cqm_limbo);
- cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
- }
- }
}

static void clear_closid_rmid(int cpu)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 12a628b5d476..a256a96df487 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3892,7 +3892,9 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)

void resctrl_offline_cpu(unsigned int cpu)
{
+ struct rdt_domain *d;
struct rdtgroup *rdtgrp;
+ struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;

lockdep_assert_held(&rdtgroup_mutex);

@@ -3902,6 +3904,19 @@ void resctrl_offline_cpu(unsigned int cpu)
break;
}
}
+
+ d = get_domain_from_cpu(cpu, l3);
+ if (d) {
+ if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+ cancel_delayed_work(&d->mbm_over);
+ mbm_setup_overflow_handler(d, 0, cpu);
+ }
+ if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+ has_busy_rmid(d)) {
+ cancel_delayed_work(&d->cqm_limbo);
+ cqm_setup_limbo_handler(d, 0, cpu);
+ }
+ }
}

/*
--
2.39.2



2023-08-09 23:25:24

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH v5 23/24] x86/resctrl: Move domain helper migration into resctrl_offline_cpu()

Hi James,

On 7/28/2023 9:42 AM, James Morse wrote:
> When a CPU is taken offline the resctrl filesystem code needs to check
> if it was the CPU nominated to perform the periodic overflow and limbo
> work. If so, another CPU needs to be chosen to do this work.
>
> This is currently done in core.c, mixed in with the code that removes
> the CPU from the domain's mask, and potentially free()s the domain.
>
> Move the migration of the overflow and limbo helpers into the filesystem
> code, into resctrl_offline_cpu(). As resctrl_offline_cpu() runs before
> the architecture code has removed the CPU from the domain mask, the
> callers need to be told which CPU is being removed, to avoid picking
> it as the new CPU. This uses the exclude_cpu feature previously
> added.
>
> Signed-off-by: James Morse <[email protected]>
> ---
> arch/x86/kernel/cpu/resctrl/core.c | 16 ----------------
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 15 +++++++++++++++
> 2 files changed, 15 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 6eb9408a942a..edc0dd123317 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -578,22 +578,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
>
> return;
> }
> -
> - if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
> - if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
> - cancel_delayed_work(&d->mbm_over);
> - /*
> - * temporary: exclude_cpu=-1 as this CPU has already
> - * been removed by cpumask_clear_cpu()d
> - */
> - mbm_setup_overflow_handler(d, 0, RESCTRL_PICK_ANY_CPU);
> - }
> - if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
> - has_busy_rmid(d)) {
> - cancel_delayed_work(&d->cqm_limbo);
> - cqm_setup_limbo_handler(d, 0, RESCTRL_PICK_ANY_CPU);
> - }
> - }
> }
>
> static void clear_closid_rmid(int cpu)
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 12a628b5d476..a256a96df487 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -3892,7 +3892,9 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
>
> void resctrl_offline_cpu(unsigned int cpu)
> {
> + struct rdt_domain *d;
> struct rdtgroup *rdtgrp;
> + struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;

Please always keep reverse fir order.

>
> lockdep_assert_held(&rdtgroup_mutex);
>
> @@ -3902,6 +3904,19 @@ void resctrl_offline_cpu(unsigned int cpu)
> break;
> }
> }
> +

Can there be a l3->mon_capable check here to make things clear?

> + d = get_domain_from_cpu(cpu, l3);
> + if (d) {
> + if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
> + cancel_delayed_work(&d->mbm_over);
> + mbm_setup_overflow_handler(d, 0, cpu);
> + }
> + if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
> + has_busy_rmid(d)) {
> + cancel_delayed_work(&d->cqm_limbo);
> + cqm_setup_limbo_handler(d, 0, cpu);
> + }
> + }
> }
>
> /*

Reinette