The teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE now only invokes
free_cache_attributes() to clear share_cpu_map of cacheinfo list.
At the same time, clearing cpu_topology[].llc_sibling is
called quite late at the teardown code in hotplug STARTING section.
To avoid the incorrect LLC sibling masks generated, move its clearing
right after free_cache_attributes().
Link: https://lore.kernel.org/linux-kernel/[email protected]/
Fixes: 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection in the CPU hotplug path")
Signed-off-by: Song Shuai <[email protected]>
---
changes from V1:
- fix implicit declaration of clear_llc_topology
---
drivers/base/arch_topology.c | 16 ++++++++++++++--
drivers/base/cacheinfo.c | 2 ++
include/linux/arch_topology.h | 3 +++
3 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index b1c1dd38ab01..8681654d6c07 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -769,6 +769,20 @@ void update_siblings_masks(unsigned int cpuid)
}
}
+void clear_llc_topology(unsigned int cpu)
+{
+ int sib;
+
+ for_each_cpu(sib, topology_llc_cpumask(cpu)) {
+ if (sib == cpu)
+ continue;
+ if (last_level_cache_is_shared(cpu, sib)) {
+ cpumask_clear_cpu(cpu, topology_llc_cpumask(sib));
+ cpumask_clear_cpu(sib, topology_llc_cpumask(cpu));
+ }
+ }
+}
+
static void clear_cpu_topology(int cpu)
{
struct cpu_topology *cpu_topo = &cpu_topology[cpu];
@@ -811,8 +825,6 @@ void remove_cpu_topology(unsigned int cpu)
cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
for_each_cpu(sibling, topology_cluster_cpumask(cpu))
cpumask_clear_cpu(cpu, topology_cluster_cpumask(sibling));
- for_each_cpu(sibling, topology_llc_cpumask(cpu))
- cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
clear_cpu_topology(cpu);
}
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index f6573c335f4c..1c276b30fd5a 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/smp.h>
#include <linux/sysfs.h>
+#include <linux/arch_topology.h>
/* pointer to per cpu cacheinfo */
static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
@@ -814,6 +815,7 @@ static int cacheinfo_cpu_pre_down(unsigned int cpu)
cpu_cache_sysfs_exit(cpu);
free_cache_attributes(cpu);
+ clear_llc_topology(cpu);
return 0;
}
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index a07b510e7dc5..569e05607934 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -89,9 +89,12 @@ void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
const struct cpumask *cpu_clustergroup_mask(int cpu);
void update_siblings_masks(unsigned int cpu);
+void clear_llc_topology(unsigned int cpu);
void remove_cpu_topology(unsigned int cpuid);
void reset_cpu_topology(void);
int parse_acpi_topology(void);
+#else
+static inline void clear_llc_topology(unsigned int cpu) { }
#endif
#endif /* _LINUX_ARCH_TOPOLOGY_H_ */
--
2.20.1
On Tue, Mar 14, 2023 at 03:53:45PM +0800, Song Shuai wrote:
> The teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE now only invokes
> free_cache_attributes() to clear share_cpu_map of cacheinfo list.
> At the same time, clearing cpu_topology[].llc_sibling is
> called quite late at the teardown code in hotplug STARTING section.
>
> To avoid the incorrect LLC sibling masks generated, move its clearing
> right after free_cache_attributes().
>
> Link: https://lore.kernel.org/linux-kernel/[email protected]/
btw, I think you've added the wrong link here - this seems to be a link
to your previous submission. Was it meant to link to something else?
Cheers,
Conor.
> Fixes: 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection in the CPU hotplug path")
> Signed-off-by: Song Shuai <[email protected]>
> ---
> changes from V1:
> - fix implicit declaration of clear_llc_topology
> ---
> drivers/base/arch_topology.c | 16 ++++++++++++++--
> drivers/base/cacheinfo.c | 2 ++
> include/linux/arch_topology.h | 3 +++
> 3 files changed, 19 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
> index b1c1dd38ab01..8681654d6c07 100644
> --- a/drivers/base/arch_topology.c
> +++ b/drivers/base/arch_topology.c
> @@ -769,6 +769,20 @@ void update_siblings_masks(unsigned int cpuid)
> }
> }
>
> +void clear_llc_topology(unsigned int cpu)
> +{
> + int sib;
> +
> + for_each_cpu(sib, topology_llc_cpumask(cpu)) {
> + if (sib == cpu)
> + continue;
> + if (last_level_cache_is_shared(cpu, sib)) {
> + cpumask_clear_cpu(cpu, topology_llc_cpumask(sib));
> + cpumask_clear_cpu(sib, topology_llc_cpumask(cpu));
> + }
> + }
> +}
> +
> static void clear_cpu_topology(int cpu)
> {
> struct cpu_topology *cpu_topo = &cpu_topology[cpu];
> @@ -811,8 +825,6 @@ void remove_cpu_topology(unsigned int cpu)
> cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
> for_each_cpu(sibling, topology_cluster_cpumask(cpu))
> cpumask_clear_cpu(cpu, topology_cluster_cpumask(sibling));
> - for_each_cpu(sibling, topology_llc_cpumask(cpu))
> - cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
>
> clear_cpu_topology(cpu);
> }
> diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
> index f6573c335f4c..1c276b30fd5a 100644
> --- a/drivers/base/cacheinfo.c
> +++ b/drivers/base/cacheinfo.c
> @@ -19,6 +19,7 @@
> #include <linux/slab.h>
> #include <linux/smp.h>
> #include <linux/sysfs.h>
> +#include <linux/arch_topology.h>
>
> /* pointer to per cpu cacheinfo */
> static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
> @@ -814,6 +815,7 @@ static int cacheinfo_cpu_pre_down(unsigned int cpu)
> cpu_cache_sysfs_exit(cpu);
>
> free_cache_attributes(cpu);
> + clear_llc_topology(cpu);
> return 0;
> }
>
> diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
> index a07b510e7dc5..569e05607934 100644
> --- a/include/linux/arch_topology.h
> +++ b/include/linux/arch_topology.h
> @@ -89,9 +89,12 @@ void store_cpu_topology(unsigned int cpuid);
> const struct cpumask *cpu_coregroup_mask(int cpu);
> const struct cpumask *cpu_clustergroup_mask(int cpu);
> void update_siblings_masks(unsigned int cpu);
> +void clear_llc_topology(unsigned int cpu);
> void remove_cpu_topology(unsigned int cpuid);
> void reset_cpu_topology(void);
> int parse_acpi_topology(void);
> +#else
> +static inline void clear_llc_topology(unsigned int cpu) { }
> #endif
>
> #endif /* _LINUX_ARCH_TOPOLOGY_H_ */
> --
> 2.20.1
>
On Tue, Mar 14, 2023 at 03:53:45PM +0800, Song Shuai wrote:
> The teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE now only invokes
> free_cache_attributes() to clear share_cpu_map of cacheinfo list.
> At the same time, clearing cpu_topology[].llc_sibling is
> called quite late at the teardown code in hotplug STARTING section.
>
> To avoid the incorrect LLC sibling masks generated, move its clearing
> right after free_cache_attributes().
>
Technically in terms of flow/timing this is correct. However I would like
to know if you are seeing any issues without this change ?
Technically, if a cpu is hotplugged out, the cacheinfo is reset first
and then the topology. Until the cpu is removes, the LLC info in the
topology is still valid. Also I am not sure if anything gets scheduled
and this LLC info is utilised once the teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE
has started.
So I am trying to understand if we really need this change. Please let me
know if I am missing anything here.
--
Regards,
Sudeep
Sudeep Holla <[email protected]> 于2023年3月16日周四 09:29写道:
>
> On Tue, Mar 14, 2023 at 03:53:45PM +0800, Song Shuai wrote:
> > The teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE now only invokes
> > free_cache_attributes() to clear share_cpu_map of cacheinfo list.
> > At the same time, clearing cpu_topology[].llc_sibling is
> > called quite late at the teardown code in hotplug STARTING section.
> >
> > To avoid the incorrect LLC sibling masks generated, move its clearing
> > right after free_cache_attributes().
> >
>
> Technically in terms of flow/timing this is correct. However I would like
> to know if you are seeing any issues without this change ?
>
> Technically, if a cpu is hotplugged out, the cacheinfo is reset first
> and then the topology. Until the cpu is removes, the LLC info in the
> topology is still valid. Also I am not sure if anything gets scheduled
> and this LLC info is utilised once the teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE
> has started.
There is no visible issue in the entire offline process(eg: echo 0 > online).
However, when I hotplugged out the CPU into the state before CACHEINFO_ONLINE on
my kernel with the CONFIG_CPU_HOTPLUG_STATE_CONTROL configured,
the share_cpu_map had been updated but llc_sibling had not, which
would result in a trivial issue:
At the end of stepped hotplugging out, the cpuset_hotplug_work would
be flushed and then sched domain would be rebuilt
where the **cpu_coregroup_mask** in sched_domain_topology got
incorrect llc_sibling,
but the result of rebuilding was correct due to the protection of
cpu_active_mask.
The stepped hotplugging may not be used in the production environment,
but the issue existed.
Even in the entire offline process, it's possible that a future user
gets wrong the llc_sibling when accessing it concurrently or right
after the teardown of CACHEINFO_ONLINE.
>
> So I am trying to understand if we really need this change. Please let me
> know if I am missing anything here.
>
> --
> Regards,
> Sudeep
--
Thanks,
Song
On Thu, Mar 16, 2023 at 10:30:52AM +0000, Song Shuai wrote:
> Sudeep Holla <[email protected]> 于2023年3月16日周四 09:29写道:
> >
> > On Tue, Mar 14, 2023 at 03:53:45PM +0800, Song Shuai wrote:
> > > The teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE now only invokes
> > > free_cache_attributes() to clear share_cpu_map of cacheinfo list.
> > > At the same time, clearing cpu_topology[].llc_sibling is
> > > called quite late at the teardown code in hotplug STARTING section.
> > >
> > > To avoid the incorrect LLC sibling masks generated, move its clearing
> > > right after free_cache_attributes().
> > >
> >
> > Technically in terms of flow/timing this is correct. However I would like
> > to know if you are seeing any issues without this change ?
> >
> > Technically, if a cpu is hotplugged out, the cacheinfo is reset first
> > and then the topology. Until the cpu is removes, the LLC info in the
> > topology is still valid. Also I am not sure if anything gets scheduled
> > and this LLC info is utilised once the teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE
> > has started.
>
> There is no visible issue in the entire offline process(eg: echo 0 > online).
>
> However, when I hotplugged out the CPU into the state before CACHEINFO_ONLINE on
> my kernel with the CONFIG_CPU_HOTPLUG_STATE_CONTROL configured,
> the share_cpu_map had been updated but llc_sibling had not, which
> would result in a trivial issue:
>
> At the end of stepped hotplugging out, the cpuset_hotplug_work would
> be flushed and then sched domain would be rebuilt
> where the **cpu_coregroup_mask** in sched_domain_topology got
> incorrect llc_sibling, but the result of rebuilding was correct due
> to the protection of cpu_active_mask.
>
Wait, I would like to disagree there. While I agree there is inconsistency
between cacheinfo cpu_shared_map and the llc_sibling in the tear down path,
it is still correct and terming it as "incorrect" llc_sibling is wrong.
The cpu is not yet completely offline yet and hence the llc_sibling
represents all the cpus it shares LLC. When the cpu is offlined, the
cpu_topology is anyway removed. So I don't see it as an issue at all.
If you follow __cpu_disable()->remove_cpu_topology(), it gets updated there.
If the sched_domain_topology is not rebuilt after that, then we may have
other issues. What am I missing ?
I am not bothered by cacheinfo cpu_shared_map and cpu_topology llc_sibling
mismatch for short window during the teardown as technically until the cpu
is torndown, it is sharing llc with llc_sibling and it is definitely not
wrong to have it in there.
> The stepped hotplugging may not be used in the production environment,
> but the issue existed.
What issue ? If it is just inconsistency, then I am fine to ignore. That
is just artificial and momentary and it impacts nothing.
> Even in the entire offline process, it's possible that a future user
> gets wrong the llc_sibling when accessing it concurrently or right
> after the teardown of CACHEINFO_ONLINE.
As I said, even if someone access it, it is not wrong information.
--
Regards,
Sudeep
Sudeep Holla <[email protected]> 于2023年3月16日周四 14:51写道:
>
> On Thu, Mar 16, 2023 at 10:30:52AM +0000, Song Shuai wrote:
> > Sudeep Holla <[email protected]> 于2023年3月16日周四 09:29写道:
> > >
> > > On Tue, Mar 14, 2023 at 03:53:45PM +0800, Song Shuai wrote:
> > > > The teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE now only invokes
> > > > free_cache_attributes() to clear share_cpu_map of cacheinfo list.
> > > > At the same time, clearing cpu_topology[].llc_sibling is
> > > > called quite late at the teardown code in hotplug STARTING section.
> > > >
> > > > To avoid the incorrect LLC sibling masks generated, move its clearing
> > > > right after free_cache_attributes().
> > > >
> > >
> > > Technically in terms of flow/timing this is correct. However I would like
> > > to know if you are seeing any issues without this change ?
> > >
> > > Technically, if a cpu is hotplugged out, the cacheinfo is reset first
> > > and then the topology. Until the cpu is removes, the LLC info in the
> > > topology is still valid. Also I am not sure if anything gets scheduled
> > > and this LLC info is utilised once the teardown of CPUHP_AP_BASE_CACHEINFO_ONLINE
> > > has started.
> >
> > There is no visible issue in the entire offline process(eg: echo 0 > online).
> >
> > However, when I hotplugged out the CPU into the state before CACHEINFO_ONLINE on
> > my kernel with the CONFIG_CPU_HOTPLUG_STATE_CONTROL configured,
> > the share_cpu_map had been updated but llc_sibling had not, which
> > would result in a trivial issue:
> >
> > At the end of stepped hotplugging out, the cpuset_hotplug_work would
> > be flushed and then sched domain would be rebuilt
> > where the **cpu_coregroup_mask** in sched_domain_topology got
> > incorrect llc_sibling, but the result of rebuilding was correct due
> > to the protection of cpu_active_mask.
> >
>
> Wait, I would like to disagree there. While I agree there is inconsistency
> between cacheinfo cpu_shared_map and the llc_sibling in the tear down path,
> it is still correct and terming it as "incorrect" llc_sibling is wrong.
> The cpu is not yet completely offline yet and hence the llc_sibling
> represents all the cpus it shares LLC. When the cpu is offlined, the
> cpu_topology is anyway removed. So I don't see it as an issue at all.
> If you follow __cpu_disable()->remove_cpu_topology(), it gets updated there.
> If the sched_domain_topology is not rebuilt after that, then we may have
> other issues. What am I missing ?
>
> I am not bothered by cacheinfo cpu_shared_map and cpu_topology llc_sibling
> mismatch for short window during the teardown as technically until the cpu
> is torndown, it is sharing llc with llc_sibling and it is definitely not
> wrong to have it in there.
>
> > The stepped hotplugging may not be used in the production environment,
> > but the issue existed.
>
> What issue ? If it is just inconsistency, then I am fine to ignore. That
> is just artificial and momentary and it impacts nothing.
My original point is to clear the llc_sibling right after clearing of
share_cpu_map
like what you did in 3fcbf1c77d08.
And the ~~issue~~ I described above was found when I manually tested
the 'base/cacheinfo:online' hpstate,
which can be triggered by the following commands:
```
hpid=$(sed -n '/cacheinfo/s/:.*//p' /sys/devices/system/cpu/hotplug/states)
echo $((hpid-1)) > /sys/devices/system/cpu/cpu2/hotplug/target
```
Anyway, the short inconsistency window you explained seems acceptable to me.
>
> > Even in the entire offline process, it's possible that a future user
> > gets wrong the llc_sibling when accessing it concurrently or right
> > after the teardown of CACHEINFO_ONLINE.
>
> As I said, even if someone access it, it is not wrong information.
>
> --
> Regards,
> Sudeep
--
Thanks,
Song
On Mon, Mar 20, 2023 at 06:20:34AM +0000, Song Shuai wrote:
>
> My original point is to clear the llc_sibling right after clearing of
> share_cpu_map like what you did in 3fcbf1c77d08.
>
Yes I understood that. There were other issues that were fixed later
and hence the state of current code.
> And the ~~issue~~ I described above was found when I manually tested
> the 'base/cacheinfo:online' hpstate, which can be triggered by the
> following commands:
>
> ```
> hpid=$(sed -n '/cacheinfo/s/:.*//p' /sys/devices/system/cpu/hotplug/states)
> echo $((hpid-1)) > /sys/devices/system/cpu/cpu2/hotplug/target
>
> ```
>
Thanks for the detailed steps. I had guessed something very similar.
> Anyway, the short inconsistency window you explained seems acceptable to me.
>
Yes just inconsistency but technically the CPU topology is still correct
including LLC information. I don't see a point in clearing the cacheinfo
at this point and just couple of hotplug state later reset all the topology
as the CPU is being removed. I feel it to be redundant and we can add if
we absolutely need it(i.e. if there are new users of that information and
need it to be aligned to cacheinfo which I think is highly unlikely).
--
Regards,
Sudeep