Resctrl resources operate on subsets of CPUs in the system with the
defining attribute of each subset being an instance of a particular
level of cache. E.g. all CPUs sharing an L3 cache would be part of the
same domain.
In preparation for features that are scoped at the NUMA node level
change the code from explicit references to "cache_level" to a more
generic scope. At this point the only options for this scope are groups
of CPUs that share an L2 cache or L3 cache.
Provide a more detailed warning message if a domain id cannot be found
when adding a CPU. Just check and silent return if the domain id can't
be found when removing a CPU.
No functional change.
Signed-off-by: Tony Luck <[email protected]>
---
Changes since v5:
1) Set enum values of RESCTRL_L2_CACHE and RESCTRL_L3_CACHE to "2" and
"3 respectively so they can be passed to get_cpu_cacheinfo_id()
(in code paths that check that scope is one of the cache types).
2) Simplified the check on scope in pseudo_lock_region_init() and
rdtgroup_cbm_to_size().
3) Added detailed warning if the domain id for a CPU cannot be
determined when adding a CPU. Simpler check on removal.
---
include/linux/resctrl.h | 9 +++++--
arch/x86/kernel/cpu/resctrl/core.c | 33 ++++++++++++++++++-----
arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 6 ++++-
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 5 +++-
4 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 8334eeacfec5..618735e396cb 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -144,13 +144,18 @@ struct resctrl_membw {
struct rdt_parse_data;
struct resctrl_schema;
+enum resctrl_scope {
+ RESCTRL_L2_CACHE = 2,
+ RESCTRL_L3_CACHE = 3,
+};
+
/**
* struct rdt_resource - attributes of a resctrl resource
* @rid: The index of the resource
* @alloc_capable: Is allocation available on this machine
* @mon_capable: Is monitor feature available on this machine
* @num_rmid: Number of RMIDs available
- * @cache_level: Which cache level defines scope of this resource
+ * @scope: Scope of this resource
* @cache: Cache allocation related data
* @membw: If the component has bandwidth controls, their properties.
* @domains: All domains for this resource
@@ -168,7 +173,7 @@ struct rdt_resource {
bool alloc_capable;
bool mon_capable;
int num_rmid;
- int cache_level;
+ enum resctrl_scope scope;
struct resctrl_cache cache;
struct resctrl_membw membw;
struct list_head domains;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 030d3b409768..3b1837e1fb6b 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -65,7 +65,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L3,
.name = "L3",
- .cache_level = 3,
+ .scope = RESCTRL_L3_CACHE,
.domains = domain_init(RDT_RESOURCE_L3),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
@@ -79,7 +79,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L2,
.name = "L2",
- .cache_level = 2,
+ .scope = RESCTRL_L2_CACHE,
.domains = domain_init(RDT_RESOURCE_L2),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
@@ -93,7 +93,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_MBA,
.name = "MB",
- .cache_level = 3,
+ .scope = RESCTRL_L3_CACHE,
.domains = domain_init(RDT_RESOURCE_MBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
@@ -105,7 +105,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_SMBA,
.name = "SMBA",
- .cache_level = 3,
+ .scope = RESCTRL_L3_CACHE,
.domains = domain_init(RDT_RESOURCE_SMBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
@@ -487,6 +487,19 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
return 0;
}
+static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
+{
+ switch (scope) {
+ case RESCTRL_L2_CACHE:
+ case RESCTRL_L3_CACHE:
+ return get_cpu_cacheinfo_id(cpu, scope);
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
/*
* domain_add_cpu - Add a cpu to a resource's domain list.
*
@@ -502,12 +515,17 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
*/
static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
- int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
+ int id = get_domain_id_from_scope(cpu, r->scope);
struct list_head *add_pos = NULL;
struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
int err;
+ if (id < 0) {
+ pr_warn_once("Can't find domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->scope, r->name);
+ return;
+ }
d = rdt_find_domain(r, id, &add_pos);
if (IS_ERR(d)) {
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
@@ -552,10 +570,13 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
static void domain_remove_cpu(int cpu, struct rdt_resource *r)
{
- int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
+ int id = get_domain_id_from_scope(cpu, r->scope);
struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
+ if (id < 0)
+ return;
+
d = rdt_find_domain(r, id, NULL);
if (IS_ERR_OR_NULL(d)) {
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 8f559eeae08e..8c5f932bc00b 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -292,10 +292,14 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
*/
static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
{
+ int scope = plr->s->res->scope;
struct cpu_cacheinfo *ci;
int ret;
int i;
+ if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
+ return -ENODEV;
+
/* Pick the first cpu we find that is associated with the cache. */
plr->cpu = cpumask_first(&plr->d->cpu_mask);
@@ -311,7 +315,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);
for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == plr->s->res->cache_level) {
+ if (ci->info_list[i].level == scope) {
plr->line_size = ci->info_list[i].coherency_line_size;
return 0;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 725344048f85..1cf2b36f5bf8 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1345,10 +1345,13 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
unsigned int size = 0;
int num_b, i;
+ if (WARN_ON_ONCE(r->scope != RESCTRL_L2_CACHE && r->scope != RESCTRL_L3_CACHE))
+ return -EINVAL;
+
num_b = bitmap_weight(&cbm, r->cache.cbm_len);
ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == r->cache_level) {
+ if (ci->info_list[i].level == r->scope) {
size = ci->info_list[i].size / r->cache.cbm_len * num_b;
break;
}
--
2.41.0
Hi Tony,
On Thu, Sep 28, 2023 at 9:14 PM Tony Luck <[email protected]> wrote:
>
> Resctrl resources operate on subsets of CPUs in the system with the
> defining attribute of each subset being an instance of a particular
> level of cache. E.g. all CPUs sharing an L3 cache would be part of the
> same domain.
>
> In preparation for features that are scoped at the NUMA node level
> change the code from explicit references to "cache_level" to a more
> generic scope. At this point the only options for this scope are groups
> of CPUs that share an L2 cache or L3 cache.
>
> Provide a more detailed warning message if a domain id cannot be found
> when adding a CPU. Just check and silent return if the domain id can't
> be found when removing a CPU.
>
> No functional change.
I see a number of diagnostic checks added below. Are you sure there's
no functional change?
> diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> index 8f559eeae08e..8c5f932bc00b 100644
> --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> @@ -292,10 +292,14 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
> */
> static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
> {
> + int scope = plr->s->res->scope;
> struct cpu_cacheinfo *ci;
> int ret;
> int i;
>
> + if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
> + return -ENODEV;
Functional change?
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 725344048f85..1cf2b36f5bf8 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -1345,10 +1345,13 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
> unsigned int size = 0;
> int num_b, i;
>
> + if (WARN_ON_ONCE(r->scope != RESCTRL_L2_CACHE && r->scope != RESCTRL_L3_CACHE))
> + return -EINVAL;
This function returns unsigned int. That's a huge region!
-Peter
On Fri, Sep 29, 2023 at 02:09:42PM +0200, Peter Newman wrote:
> Hi Tony,
>
> On Thu, Sep 28, 2023 at 9:14 PM Tony Luck <[email protected]> wrote:
> >
> > Resctrl resources operate on subsets of CPUs in the system with the
> > defining attribute of each subset being an instance of a particular
> > level of cache. E.g. all CPUs sharing an L3 cache would be part of the
> > same domain.
> >
> > In preparation for features that are scoped at the NUMA node level
> > change the code from explicit references to "cache_level" to a more
> > generic scope. At this point the only options for this scope are groups
> > of CPUs that share an L2 cache or L3 cache.
> >
> > Provide a more detailed warning message if a domain id cannot be found
> > when adding a CPU. Just check and silent return if the domain id can't
> > be found when removing a CPU.
> >
> > No functional change.
>
> I see a number of diagnostic checks added below. Are you sure there's
> no functional change?
Those checks should be for "can't happen" cases where some "scope" that
isn't attached to a cache_level somehow made its way down to a routine
that is only expecting cache scoped.
But I'll remove the "No functional change" from the commit. comment.
>
>
> > diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> > index 8f559eeae08e..8c5f932bc00b 100644
> > --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> > +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> > @@ -292,10 +292,14 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
> > */
> > static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
> > {
> > + int scope = plr->s->res->scope;
> > struct cpu_cacheinfo *ci;
> > int ret;
> > int i;
> >
> > + if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
> > + return -ENODEV;
>
> Functional change?
>
>
> > diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> > index 725344048f85..1cf2b36f5bf8 100644
> > --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> > +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> > @@ -1345,10 +1345,13 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
> > unsigned int size = 0;
> > int num_b, i;
> >
> > + if (WARN_ON_ONCE(r->scope != RESCTRL_L2_CACHE && r->scope != RESCTRL_L3_CACHE))
> > + return -EINVAL;
>
> This function returns unsigned int. That's a huge region!
I wondered for a bit what value to return for this "can't happen" case
and initially went with an error code. But the function can already
fail (in ways that won't happen) and in that case the return is "0".
I'l update to return 0 here too.
> -Peter
Thanks for the review.
-Tony