2024-02-28 19:37:24

by Luck, Tony

[permalink] [raw]
Subject: Cover-cover letter for two resctrl patch sets

Hook these two series together in threaded mail clients
since the SNC series is based on top of the two patch
cleanup.

-Tony


2024-02-28 19:37:31

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v4 0/2] x86/resctrl: Pass domain to target CPU

When a function is called via IPI, it isn't possible for assertions
in source code to check that the right locks are held when those
locks were obtained by the sender of the IPI.

Restructure some code to avoid the need for the check.

Patch 1 has the actual fix

Patch 2 is just some code cleanups

Changes since V3: https://lore.kernel.org/all/[email protected]/

Reinette:
1) Improve commit comments in both patches by changing to
imperative.

2) Move initialzation of "m.res" in domain_setup_ctrlval() from
patch 1 to patch 2

Tony Luck (2):
x86/resctrl: Pass domain to target CPU
x86/resctrl: Simplify call convention for MSR update functions

arch/x86/kernel/cpu/resctrl/internal.h | 5 ++-
arch/x86/kernel/cpu/resctrl/core.c | 55 +++++++++--------------
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 42 +++++------------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 14 ++----
4 files changed, 38 insertions(+), 78 deletions(-)


base-commit: c0d848fcb09d80a5f48b99f85e448185125ef59f
--
2.43.0


2024-02-28 19:37:51

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v4 2/2] x86/resctrl: Simplify call convention for MSR update functions

The per-resource MSR update functions cat_wrmsr(), mba_wrmsr_intel(),
and mba_wrmsr_amd() all take three arguments:

(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)

struct msr_param contains pointers to both struct rdt_resource and struct
rdt_domain, thus only struct msr_param is necessary.

Pass struct msr_param as a single parameter. Clean up formatting and
fix some fir tree declaration ordering.

No functional change.

Suggested-by: Reinette Chatre <[email protected]>
Signed-off-by: Tony Luck <[email protected]>
---
arch/x86/kernel/cpu/resctrl/internal.h | 3 +-
arch/x86/kernel/cpu/resctrl/core.c | 40 +++++++++--------------
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 2 +-
3 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index bc999471f072..8f40fb35db78 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -444,8 +444,7 @@ struct rdt_hw_resource {
struct rdt_resource r_resctrl;
u32 num_closid;
unsigned int msr_base;
- void (*msr_update) (struct rdt_domain *d, struct msr_param *m,
- struct rdt_resource *r);
+ void (*msr_update)(struct msr_param *m);
unsigned int mon_scale;
unsigned int mbm_width;
unsigned int mbm_cfg_mask;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index acf52aa185e0..7751eea19fd2 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -56,14 +56,9 @@ int max_name_width, max_data_width;
*/
bool rdt_alloc_capable;

-static void
-mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
- struct rdt_resource *r);
-static void
-cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
-static void
-mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
- struct rdt_resource *r);
+static void mba_wrmsr_intel(struct msr_param *m);
+static void cat_wrmsr(struct msr_param *m);
+static void mba_wrmsr_amd(struct msr_param *m);

#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)

@@ -309,12 +304,11 @@ static void rdt_get_cdp_l2_config(void)
rdt_get_cdp_config(RDT_RESOURCE_L2);
}

-static void
-mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+static void mba_wrmsr_amd(struct msr_param *m)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
- struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);

for (i = m->low; i < m->high; i++)
wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
@@ -334,25 +328,22 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
return r->default_ctrl;
}

-static void
-mba_wrmsr_intel(struct rdt_domain *d, struct msr_param *m,
- struct rdt_resource *r)
+static void mba_wrmsr_intel(struct msr_param *m)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
- struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);

/* Write the delay values for mba. */
for (i = m->low; i < m->high; i++)
- wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], r));
+ wrmsrl(hw_res->msr_base + i, delay_bw_map(hw_dom->ctrl_val[i], m->res));
}

-static void
-cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+static void cat_wrmsr(struct msr_param *m)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
+ struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
- struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);

for (i = m->low; i < m->high; i++)
wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
@@ -384,7 +375,7 @@ void rdt_ctrl_update(void *arg)
struct msr_param *m = arg;

hw_res = resctrl_to_arch_res(m->res);
- hw_res->msr_update(m->dom, m, m->res);
+ hw_res->msr_update(m);
}

/*
@@ -457,10 +448,11 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
hw_dom->ctrl_val = dc;
setup_default_ctrlval(r, dc);

+ m.res = r;
m.dom = d;
m.low = 0;
m.high = hw_res->num_closid;
- hw_res->msr_update(d, &m, r);
+ hw_res->msr_update(&m);
return 0;
}

diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index a3a0fd80daa8..7471f6b747b6 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -289,7 +289,7 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
msr_param.dom = d;
msr_param.low = idx;
msr_param.high = idx + 1;
- hw_res->msr_update(d, &msr_param, r);
+ hw_res->msr_update(&msr_param);

return 0;
}
--
2.43.0


2024-02-28 19:37:56

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v15 0/8] Add support for Sub-NUMA cluster (SNC) systems

The Sub-NUMA cluster feature on some Intel processors partitions the CPUs
that share an L3 cache into two or more sets. This plays havoc with the
Resource Director Technology (RDT) monitoring features. Prior to this
patch Intel has advised that SNC and RDT are incompatible.

Some of these CPU support an MSR that can partition the RMID counters in
the same way. This allows monitoring features to be used. With the caveat
that users must be aware that Linux may migrate tasks more frequently
between SNC nodes than between "regular" NUMA nodes, so reading counters
from all SNC nodes may be needed to get a complete picture of activity
for tasks.

Cache and memory bandwidth allocation features continue to operate at
the scope of the L3 cache.

Signed-off-by: Tony Luck <[email protected]>

---
Changes since v14: https://lore.kernel.org/all/[email protected]/

1) Rebase to TIP x86/cache + my 2-patch cleanup

2) Dropped all Reviewed/Tested tags (enough changed in TIP that this
needs looking at, and testing, again).

3) Added ATOM_CRESTMONT_X (Sierra Forest) to list of SNC supporting CPUs.

4) Added a console "INFO" message when SNC detected:
pr_info("Sub-NUMA cluster detected with %d nodes per L3 cache\n", ret);

Note that my alternate approach posted as v15-RFC
https://lore.kernel.org/all/[email protected]/
has been abandoned. I'd still like to explore splitting the L3
rdt_resource into separate control/monitor elements, but that
can wait for another series of patches.

Tony Luck (8):
x86/resctrl: Prepare for new domain scope
x86/resctrl: Prepare to split rdt_domain structure
x86/resctrl: Prepare for different scope for control/monitor
operations
x86/resctrl: Split the rdt_domain and rdt_hw_domain structures
x86/resctrl: Add node-scope to the options for feature scope
x86/resctrl: Introduce snc_nodes_per_l3_cache
x86/resctrl: Sub NUMA Cluster detection and enable
x86/resctrl: Update documentation with Sub-NUMA cluster changes

Documentation/arch/x86/resctrl.rst | 25 +-
include/linux/resctrl.h | 85 +++--
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/kernel/cpu/resctrl/internal.h | 67 ++--
arch/x86/kernel/cpu/resctrl/core.c | 428 ++++++++++++++++++----
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 56 +--
arch/x86/kernel/cpu/resctrl/monitor.c | 70 ++--
arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 26 +-
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 156 ++++----
9 files changed, 642 insertions(+), 272 deletions(-)

--
2.43.0


2024-02-28 19:38:25

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v15 1/8] x86/resctrl: Prepare for new domain scope

Resctrl resources operate on subsets of CPUs in the system with the
defining attribute of each subset being an instance of a particular
level of cache. E.g. all CPUs sharing an L3 cache would be part of the
same domain.

In preparation for features that are scoped at the NUMA node level
change the code from explicit references to "cache_level" to a more
generic scope. At this point the only options for this scope are groups
of CPUs that share an L2 cache or L3 cache.

Clean up the error handling when looking up domains. Report invalid id's
before calling rdt_find_domain() in preparation for better messages when
scope can be other than cache scope. This means that rdt_find_domain()
will never return an error. So remove checks for error from the callsites.

Signed-off-by: Tony Luck <[email protected]>
---
include/linux/resctrl.h | 9 +++--
arch/x86/kernel/cpu/resctrl/core.c | 44 +++++++++++++++++------
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 2 +-
arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 6 +++-
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 5 ++-
5 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index a365f67131ec..ed693bfe474d 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -150,13 +150,18 @@ struct resctrl_membw {
struct rdt_parse_data;
struct resctrl_schema;

+enum resctrl_scope {
+ RESCTRL_L2_CACHE = 2,
+ RESCTRL_L3_CACHE = 3,
+};
+
/**
* struct rdt_resource - attributes of a resctrl resource
* @rid: The index of the resource
* @alloc_capable: Is allocation available on this machine
* @mon_capable: Is monitor feature available on this machine
* @num_rmid: Number of RMIDs available
- * @cache_level: Which cache level defines scope of this resource
+ * @scope: Scope of this resource
* @cache: Cache allocation related data
* @membw: If the component has bandwidth controls, their properties.
* @domains: RCU list of all domains for this resource
@@ -174,7 +179,7 @@ struct rdt_resource {
bool alloc_capable;
bool mon_capable;
int num_rmid;
- int cache_level;
+ enum resctrl_scope scope;
struct resctrl_cache cache;
struct resctrl_membw membw;
struct list_head domains;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 7751eea19fd2..13b5461c3cb0 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -68,7 +68,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L3,
.name = "L3",
- .cache_level = 3,
+ .scope = RESCTRL_L3_CACHE,
.domains = domain_init(RDT_RESOURCE_L3),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
@@ -82,7 +82,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L2,
.name = "L2",
- .cache_level = 2,
+ .scope = RESCTRL_L2_CACHE,
.domains = domain_init(RDT_RESOURCE_L2),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
@@ -96,7 +96,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_MBA,
.name = "MB",
- .cache_level = 3,
+ .scope = RESCTRL_L3_CACHE,
.domains = domain_init(RDT_RESOURCE_MBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
@@ -108,7 +108,7 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_SMBA,
.name = "SMBA",
- .cache_level = 3,
+ .scope = RESCTRL_L3_CACHE,
.domains = domain_init(RDT_RESOURCE_SMBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
@@ -392,9 +392,6 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
struct rdt_domain *d;
struct list_head *l;

- if (id < 0)
- return ERR_PTR(-ENODEV);
-
list_for_each(l, &r->domains) {
d = list_entry(l, struct rdt_domain, list);
/* When id is found, return its domain. */
@@ -484,6 +481,19 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
return 0;
}

+static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
+{
+ switch (scope) {
+ case RESCTRL_L2_CACHE:
+ case RESCTRL_L3_CACHE:
+ return get_cpu_cacheinfo_id(cpu, scope);
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
/*
* domain_add_cpu - Add a cpu to a resource's domain list.
*
@@ -499,7 +509,7 @@ static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
*/
static void domain_add_cpu(int cpu, struct rdt_resource *r)
{
- int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
+ int id = get_domain_id_from_scope(cpu, r->scope);
struct list_head *add_pos = NULL;
struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;
@@ -507,6 +517,12 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)

lockdep_assert_held(&domain_list_lock);

+ if (id < 0) {
+ pr_warn_once("Can't find domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->scope, r->name);
+ return;
+ }
+
d = rdt_find_domain(r, id, &add_pos);
if (IS_ERR(d)) {
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
@@ -552,15 +568,21 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)

static void domain_remove_cpu(int cpu, struct rdt_resource *r)
{
- int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
+ int id = get_domain_id_from_scope(cpu, r->scope);
struct rdt_hw_domain *hw_dom;
struct rdt_domain *d;

lockdep_assert_held(&domain_list_lock);

+ if (id < 0) {
+ pr_warn_once("Can't find domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->scope, r->name);
+ return;
+ }
+
d = rdt_find_domain(r, id, NULL);
- if (IS_ERR_OR_NULL(d)) {
- pr_warn("Couldn't find cache id for CPU %d\n", cpu);
+ if (!d) {
+ pr_warn("Couldn't find domain with id=%d for CPU %d\n", id, cpu);
return;
}
hw_dom = resctrl_to_arch_dom(d);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 7471f6b747b6..4b60835f6170 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -579,7 +579,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)

r = &rdt_resources_all[resid].r_resctrl;
d = rdt_find_domain(r, domid, NULL);
- if (IS_ERR_OR_NULL(d)) {
+ if (!d) {
ret = -ENOENT;
goto out;
}
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 884b88e25141..0013b1b39c17 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -292,10 +292,14 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
*/
static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
{
+ enum resctrl_scope scope = plr->s->res->scope;
struct cpu_cacheinfo *ci;
int ret;
int i;

+ if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
+ return -ENODEV;
+
/* Pick the first cpu we find that is associated with the cache. */
plr->cpu = cpumask_first(&plr->d->cpu_mask);

@@ -311,7 +315,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
plr->size = rdtgroup_cbm_to_size(plr->s->res, plr->d, plr->cbm);

for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == plr->s->res->cache_level) {
+ if (ci->info_list[i].level == scope) {
plr->line_size = ci->info_list[i].coherency_line_size;
return 0;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index da4f13db4161..5cdaf08a97ad 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1454,10 +1454,13 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
unsigned int size = 0;
int num_b, i;

+ if (WARN_ON_ONCE(r->scope != RESCTRL_L2_CACHE && r->scope != RESCTRL_L3_CACHE))
+ return size;
+
num_b = bitmap_weight(&cbm, r->cache.cbm_len);
ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == r->cache_level) {
+ if (ci->info_list[i].level == r->scope) {
size = ci->info_list[i].size / r->cache.cbm_len * num_b;
break;
}
--
2.43.0


2024-02-28 19:38:37

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v15 2/8] x86/resctrl: Prepare to split rdt_domain structure

The rdt_domain structure is used for both control and monitor features.
It is about to be split into separate structures for these two usages
because the scope for control and monitoring features for a resource
will be different for future resources.

To allow for common code that scans a list of domains looking for a
specific domain id, move all the common fields ("list", "id", "cpu_mask")
into their own structure within the rdt_domain structure.

Signed-off-by: Tony Luck <[email protected]>
---
include/linux/resctrl.h | 16 ++++--
arch/x86/kernel/cpu/resctrl/core.c | 26 +++++-----
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 24 ++++-----
arch/x86/kernel/cpu/resctrl/monitor.c | 12 ++---
arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 14 +++---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 60 +++++++++++------------
6 files changed, 80 insertions(+), 72 deletions(-)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index ed693bfe474d..f63fcf17a3bc 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -59,10 +59,20 @@ struct resctrl_staged_config {
};

/**
- * struct rdt_domain - group of CPUs sharing a resctrl resource
+ * struct rdt_domain_hdr - common header for different domain types
* @list: all instances of this resource
* @id: unique id for this instance
* @cpu_mask: which CPUs share this resource
+ */
+struct rdt_domain_hdr {
+ struct list_head list;
+ int id;
+ struct cpumask cpu_mask;
+};
+
+/**
+ * struct rdt_domain - group of CPUs sharing a resctrl resource
+ * @hdr: common header for different domain types
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
* @mbm_total: saved state for MBM total bandwidth
* @mbm_local: saved state for MBM local bandwidth
@@ -77,9 +87,7 @@ struct resctrl_staged_config {
* by closid
*/
struct rdt_domain {
- struct list_head list;
- int id;
- struct cpumask cpu_mask;
+ struct rdt_domain_hdr hdr;
unsigned long *rmid_busy_llc;
struct mbm_state *mbm_total;
struct mbm_state *mbm_local;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 13b5461c3cb0..e7f8ab271d86 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -355,9 +355,9 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)

lockdep_assert_cpus_held();

- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
/* Find the domain that contains this CPU */
- if (cpumask_test_cpu(cpu, &d->cpu_mask))
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
return d;
}

@@ -393,12 +393,12 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
struct list_head *l;

list_for_each(l, &r->domains) {
- d = list_entry(l, struct rdt_domain, list);
+ d = list_entry(l, struct rdt_domain, hdr.list);
/* When id is found, return its domain. */
- if (id == d->id)
+ if (id == d->hdr.id)
return d;
/* Stop searching when finding id's position in sorted list. */
- if (id < d->id)
+ if (id < d->hdr.id)
break;
}

@@ -530,7 +530,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
}

if (d) {
- cpumask_set_cpu(cpu, &d->cpu_mask);
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
if (r->cache.arch_has_per_cpu_cfg)
rdt_domain_reconfigure_cdp(r);
return;
@@ -541,8 +541,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;

d = &hw_dom->d_resctrl;
- d->id = id;
- cpumask_set_cpu(cpu, &d->cpu_mask);
+ d->hdr.id = id;
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);

rdt_domain_reconfigure_cdp(r);

@@ -556,11 +556,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
}

- list_add_tail_rcu(&d->list, add_pos);
+ list_add_tail_rcu(&d->hdr.list, add_pos);

err = resctrl_online_domain(r, d);
if (err) {
- list_del_rcu(&d->list);
+ list_del_rcu(&d->hdr.list);
synchronize_rcu();
domain_free(hw_dom);
}
@@ -587,10 +587,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
}
hw_dom = resctrl_to_arch_dom(d);

- cpumask_clear_cpu(cpu, &d->cpu_mask);
- if (cpumask_empty(&d->cpu_mask)) {
+ cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
+ if (cpumask_empty(&d->hdr.cpu_mask)) {
resctrl_offline_domain(r, d);
- list_del_rcu(&d->list);
+ list_del_rcu(&d->hdr.list);
synchronize_rcu();

/*
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 4b60835f6170..f49a96b81346 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -69,7 +69,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,

cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
return -EINVAL;
}

@@ -148,7 +148,7 @@ int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,

cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
return -EINVAL;
}

@@ -231,8 +231,8 @@ static int parse_line(char *line, struct resctrl_schema *s,
return -EINVAL;
}
dom = strim(dom);
- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
data.buf = dom;
data.rdtgrp = rdtgrp;
if (r->parse_ctrlval(&data, s, d))
@@ -280,7 +280,7 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
u32 idx = get_config_index(closid, t);
struct msr_param msr_param;

- if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
+ if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask))
return -EINVAL;

hw_dom->ctrl_val[idx] = cfg_val;
@@ -307,7 +307,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
hw_dom = resctrl_to_arch_dom(d);
msr_param.res = NULL;
msr_param.dom = d;
@@ -320,7 +320,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
continue;
hw_dom->ctrl_val[idx] = cfg->new_ctrl;
- cpu = cpumask_any(&d->cpu_mask);
+ cpu = cpumask_any(&d->hdr.cpu_mask);

if (!msr_param.res) {
msr_param.low = idx;
@@ -452,7 +452,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
lockdep_assert_cpus_held();

seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->domains, hdr.list) {
if (sep)
seq_puts(s, ";");

@@ -462,7 +462,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
ctrl_val = resctrl_arch_get_config(r, dom, closid,
schema->conf_type);

- seq_printf(s, r->format_str, dom->id, max_data_width,
+ seq_printf(s, r->format_str, dom->hdr.id, max_data_width,
ctrl_val);
sep = true;
}
@@ -491,7 +491,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
} else {
seq_printf(s, "%s:%d=%x\n",
rdtgrp->plr->s->res->name,
- rdtgrp->plr->d->id,
+ rdtgrp->plr->d->hdr.id,
rdtgrp->plr->cbm);
}
} else {
@@ -539,7 +539,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
return;
}

- cpu = cpumask_any_housekeeping(&d->cpu_mask, RESCTRL_PICK_ANY_CPU);
+ cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask, RESCTRL_PICK_ANY_CPU);

/*
* cpumask_any_housekeeping() prefers housekeeping CPUs, but
@@ -548,7 +548,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
* counters on some platforms if its called in IRQ context.
*/
if (tick_nohz_full_cpu(cpu))
- smp_call_function_any(&d->cpu_mask, mon_event_count, rr, 1);
+ smp_call_function_any(&d->hdr.cpu_mask, mon_event_count, rr, 1);
else
smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);

diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index c34a35ec0f03..54f7688ee447 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -280,7 +280,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,

resctrl_arch_rmid_read_context_check();

- if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask))
+ if (!cpumask_test_cpu(smp_processor_id(), &d->hdr.cpu_mask))
return -EINVAL;

ret = __rmid_read(rmid, eventid, &msr_val);
@@ -479,7 +479,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);

entry->busy = 0;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
/*
* For the first limbo RMID in the domain,
* setup up the limbo worker.
@@ -790,7 +790,7 @@ void cqm_handle_limbo(struct work_struct *work)
__check_limbo(d, false);

if (has_busy_rmid(d)) {
- d->cqm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+ d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
RESCTRL_PICK_ANY_CPU);
schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
delay);
@@ -814,7 +814,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
unsigned long delay = msecs_to_jiffies(delay_ms);
int cpu;

- cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
+ cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
dom->cqm_work_cpu = cpu;

if (cpu < nr_cpu_ids)
@@ -857,7 +857,7 @@ void mbm_handle_overflow(struct work_struct *work)
* Re-check for housekeeping CPUs. This allows the overflow handler to
* move off a nohz_full CPU quickly.
*/
- d->mbm_work_cpu = cpumask_any_housekeeping(&d->cpu_mask,
+ d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
RESCTRL_PICK_ANY_CPU);
schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);

@@ -886,7 +886,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
*/
if (!resctrl_mounted || !resctrl_arch_mon_capable())
return;
- cpu = cpumask_any_housekeeping(&dom->cpu_mask, exclude_cpu);
+ cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
dom->mbm_work_cpu = cpu;

if (cpu < nr_cpu_ids)
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 0013b1b39c17..52e6935e4c55 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -221,7 +221,7 @@ static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
int cpu;
int ret;

- for_each_cpu(cpu, &plr->d->cpu_mask) {
+ for_each_cpu(cpu, &plr->d->hdr.cpu_mask) {
pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
if (!pm_req) {
rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
@@ -301,7 +301,7 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
return -ENODEV;

/* Pick the first cpu we find that is associated with the cache. */
- plr->cpu = cpumask_first(&plr->d->cpu_mask);
+ plr->cpu = cpumask_first(&plr->d->hdr.cpu_mask);

if (!cpu_online(plr->cpu)) {
rdt_last_cmd_printf("CPU %u associated with cache not online\n",
@@ -859,10 +859,10 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
* associated with them.
*/
for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(d_i, &r->domains, list) {
+ list_for_each_entry(d_i, &r->domains, hdr.list) {
if (d_i->plr)
cpumask_or(cpu_with_psl, cpu_with_psl,
- &d_i->cpu_mask);
+ &d_i->hdr.cpu_mask);
}
}

@@ -870,7 +870,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
* Next test if new pseudo-locked region would intersect with
* existing region.
*/
- if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
+ if (cpumask_intersects(&d->hdr.cpu_mask, cpu_with_psl))
ret = true;

free_cpumask_var(cpu_with_psl);
@@ -1202,7 +1202,7 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
}

plr->thread_done = 0;
- cpu = cpumask_first(&plr->d->cpu_mask);
+ cpu = cpumask_first(&plr->d->hdr.cpu_mask);
if (!cpu_online(cpu)) {
ret = -ENODEV;
goto out;
@@ -1532,7 +1532,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
* may be scheduled elsewhere and invalidate entries in the
* pseudo-locked region.
*/
- if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
+ if (!cpumask_subset(current->cpus_ptr, &plr->d->hdr.cpu_mask)) {
mutex_unlock(&rdtgroup_mutex);
return -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 5cdaf08a97ad..ee8a028e6f97 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -98,7 +98,7 @@ void rdt_staged_configs_clear(void)
lockdep_assert_held(&rdtgroup_mutex);

for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(dom, &r->domains, list)
+ list_for_each_entry(dom, &r->domains, hdr.list)
memset(dom->staged_config, 0, sizeof(dom->staged_config));
}
}
@@ -317,7 +317,7 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
- mask = &rdtgrp->plr->d->cpu_mask;
+ mask = &rdtgrp->plr->d->hdr.cpu_mask;
seq_printf(s, is_cpu_list(of) ?
"%*pbl\n" : "%*pb\n",
cpumask_pr_args(mask));
@@ -1021,12 +1021,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->domains, hdr.list) {
if (sep)
seq_putc(seq, ';');
sw_shareable = 0;
exclusive = 0;
- seq_printf(seq, "%d=", dom->id);
+ seq_printf(seq, "%d=", dom->hdr.id);
for (i = 0; i < closids_supported(); i++) {
if (!closid_allocated(i))
continue;
@@ -1343,7 +1343,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
continue;
has_cache = true;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
ctrl = resctrl_arch_get_config(r, d, closid,
s->conf_type);
if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
@@ -1458,7 +1458,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
return size;

num_b = bitmap_weight(&cbm, r->cache.cbm_len);
- ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
+ ci = get_cpu_cacheinfo(cpumask_any(&d->hdr.cpu_mask));
for (i = 0; i < ci->num_leaves; i++) {
if (ci->info_list[i].level == r->scope) {
size = ci->info_list[i].size / r->cache.cbm_len * num_b;
@@ -1506,7 +1506,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
}
goto out;
}
@@ -1518,7 +1518,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
type = schema->conf_type;
sep = false;
seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
if (sep)
seq_putc(s, ';');
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
@@ -1536,7 +1536,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
else
size = rdtgroup_cbm_to_size(r, d, ctrl);
}
- seq_printf(s, "%d=%u", d->id, size);
+ seq_printf(s, "%d=%u", d->hdr.id, size);
sep = true;
}
seq_putc(s, '\n');
@@ -1596,7 +1596,7 @@ static void mon_event_config_read(void *info)

static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
{
- smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
+ smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1);
}

static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
@@ -1608,7 +1608,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);

- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->domains, hdr.list) {
if (sep)
seq_puts(s, ";");

@@ -1616,7 +1616,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
mon_info.evtid = evtid;
mondata_config_read(dom, &mon_info);

- seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
+ seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
sep = true;
}
seq_puts(s, "\n");
@@ -1682,7 +1682,7 @@ static void mbm_config_write_domain(struct rdt_resource *r,
* are scoped at the domain level. Writing any of these MSRs
* on one CPU is observed by all the CPUs in the domain.
*/
- smp_call_function_any(&d->cpu_mask, mon_event_config_write,
+ smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write,
&mon_info, 1);

/*
@@ -1732,8 +1732,8 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
return -EINVAL;
}

- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
mbm_config_write_domain(r, d, evtid, val);
goto next;
}
@@ -2280,14 +2280,14 @@ static int set_cache_qos_cfg(int level, bool enable)
return -ENOMEM;

r_l = &rdt_resources_all[level].r_resctrl;
- list_for_each_entry(d, &r_l->domains, list) {
+ list_for_each_entry(d, &r_l->domains, hdr.list) {
if (r_l->cache.arch_has_per_cpu_cfg)
/* Pick all the CPUs in the domain instance */
- for_each_cpu(cpu, &d->cpu_mask)
+ for_each_cpu(cpu, &d->hdr.cpu_mask)
cpumask_set_cpu(cpu, cpu_mask);
else
/* Pick one CPU from each domain instance to update MSR */
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+ cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
}

/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
@@ -2316,7 +2316,7 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
{
u32 num_closid = resctrl_arch_get_num_closid(r);
- int cpu = cpumask_any(&d->cpu_mask);
+ int cpu = cpumask_any(&d->hdr.cpu_mask);
int i;

d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
@@ -2365,7 +2365,7 @@ static int set_mba_sc(bool mba_sc)

r->membw.mba_sc = mba_sc;

- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
for (i = 0; i < num_closid; i++)
d->mbps_val[i] = MBA_MAX_MBPS;
}
@@ -2704,7 +2704,7 @@ static int rdt_get_tree(struct fs_context *fc)

if (is_mbm_enabled()) {
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- list_for_each_entry(dom, &r->domains, list)
+ list_for_each_entry(dom, &r->domains, hdr.list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
RESCTRL_PICK_ANY_CPU);
}
@@ -2832,9 +2832,9 @@ static int reset_all_ctrls(struct rdt_resource *r)
* CBMs in all domains to the maximum mask value. Pick one CPU
* from each domain to update the MSRs below.
*/
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
hw_dom = resctrl_to_arch_dom(d);
- cpu = cpumask_any(&d->cpu_mask);
+ cpu = cpumask_any(&d->hdr.cpu_mask);

for (i = 0; i < hw_res->num_closid; i++)
hw_dom->ctrl_val[i] = r->default_ctrl;
@@ -3037,7 +3037,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
char name[32];
int ret;

- sprintf(name, "mon_%s_%02d", r->name, d->id);
+ sprintf(name, "mon_%s_%02d", r->name, d->hdr.id);
/* create the directory */
kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
if (IS_ERR(kn))
@@ -3053,7 +3053,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
}

priv.u.rid = r->rid;
- priv.u.domid = d->id;
+ priv.u.domid = d->hdr.id;
list_for_each_entry(mevt, &r->evt_list, list) {
priv.u.evtid = mevt->evtid;
ret = mon_addfile(kn, mevt->name, priv.priv);
@@ -3104,7 +3104,7 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->domains, hdr.list) {
ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
if (ret)
return ret;
@@ -3263,7 +3263,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
*/
tmp_cbm = cfg->new_ctrl;
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
+ rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
return -ENOSPC;
}
cfg->have_new_ctrl = true;
@@ -3286,7 +3286,7 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
struct rdt_domain *d;
int ret;

- list_for_each_entry(d, &s->res->domains, list) {
+ list_for_each_entry(d, &s->res->domains, hdr.list) {
ret = __init_one_rdt_domain(d, s, closid);
if (ret < 0)
return ret;
@@ -3301,7 +3301,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
struct resctrl_staged_config *cfg;
struct rdt_domain *d;

- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
if (is_mba_sc(r)) {
d->mbps_val[closid] = MBA_MAX_MBPS;
continue;
@@ -3947,7 +3947,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
* per domain monitor data directories.
*/
if (resctrl_mounted && resctrl_arch_mon_capable())
- rmdir_mondata_subdir_allrdtgrp(r, d->id);
+ rmdir_mondata_subdir_allrdtgrp(r, d->hdr.id);

if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
--
2.43.0


2024-02-28 19:38:46

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v15 5/8] x86/resctrl: Add node-scope to the options for feature scope

Currently supported resctrl features are all domain scoped the same as the
scope of the L2 or L3 caches.

Add RESCTRL_NODE as a new option for features that are scoped at the
same granularity as NUMA nodes. This is needed for Intel's Sub-NUMA
Cluster (SNC) feature where monitoring features are node scoped.

Signed-off-by: Tony Luck <[email protected]>
---
include/linux/resctrl.h | 1 +
arch/x86/kernel/cpu/resctrl/core.c | 2 ++
2 files changed, 3 insertions(+)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 1a4ac60522bb..87b360e87215 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -176,6 +176,7 @@ struct resctrl_schema;
enum resctrl_scope {
RESCTRL_L2_CACHE = 2,
RESCTRL_L3_CACHE = 3,
+ RESCTRL_NODE,
};

/**
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index cd58c9d4710f..c34ce367c456 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -510,6 +510,8 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
case RESCTRL_L2_CACHE:
case RESCTRL_L3_CACHE:
return get_cpu_cacheinfo_id(cpu, scope);
+ case RESCTRL_NODE:
+ return cpu_to_node(cpu);
default:
break;
}
--
2.43.0


2024-02-28 19:38:55

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v15 6/8] x86/resctrl: Introduce snc_nodes_per_l3_cache

Intel Sub-NUMA Cluster (SNC) is a feature that subdivides the CPU cores
and memory controllers on a socket into two or more groups. These are
presented to the operating system as NUMA nodes.

This may enable some workloads to have slightly lower latency to memory
as the memory controller(s) in an SNC node are electrically closer to the
CPU cores on that SNC node. This cost may be offset by lower bandwidth
since the memory accesses for each core can only be interleaved between
the memory controllers on the same SNC node.

Resctrl monitoring on an Intel system depends upon attaching RMIDs to tasks
to track L3 cache occupancy and memory bandwidth. There is an MSR that
controls how the RMIDs are shared between SNC nodes.

The default mode divides them numerically. E.g. when there are two SNC
nodes on a socket the lower number half of the RMIDs are given to the
first node, the remainder to the second node. This would be difficult
to use with the Linux resctrl interface as specific RMID values assigned
to resctrl groups are not visible to users.

The other mode divides the RMIDs and renumbers the ones on the second
SNC node to start from zero.

Even with this renumbering SNC mode requires several changes in resctrl
behavior for correct operation.

Add a global integer "snc_nodes_per_l3_cache" that shows how many
SNC nodes share each L3 cache. When "snc_nodes_per_l3_cache" is "1",
SNC mode is either not implemented, or not enabled.

Update all places to take appropriate action when SNC mode is enabled:
1) The number of logical RMIDs per L3 cache available for use is the
number of physical RMIDs divided by the number of SNC nodes.
2) Likewise the "mon_scale" value must be divided by the number of SNC
nodes.
3) The RMID renumbering operates when using the value from the
IA32_PQR_ASSOC MSR to count accesses by a task. When reading an RMID
counter, adjust from the logical RMID to the physical
RMID value for the SNC node that it wishes to read and load the
adjusted value into the IA32_QM_EVTSEL MSR.
4) Divide the L3 cache between the SNC nodes. Divide the value
reported in the resctrl "size" file by the number of SNC
nodes because the effective amount of cache that can be allocated
is reduced by that factor.
5) Disable the "-o mba_MBps" mount option in SNC mode
because the monitoring is being done per SNC node, while the
bandwidth allocation is still done at the L3 cache scope.
Trying to use this feedback loop might result in contradictory
changes to the throttling level coming from each of the SNC
node bandwidth measurements.

Signed-off-by: Tony Luck <[email protected]>
---
arch/x86/kernel/cpu/resctrl/internal.h | 2 ++
arch/x86/kernel/cpu/resctrl/core.c | 6 ++++++
arch/x86/kernel/cpu/resctrl/monitor.c | 16 +++++++++++++---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 5 +++--
4 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 41a093feb744..786035eff7fb 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -483,6 +483,8 @@ extern struct rdt_hw_resource rdt_resources_all[];
extern struct rdtgroup rdtgroup_default;
extern struct dentry *debugfs_resctrl;

+extern unsigned int snc_nodes_per_l3_cache;
+
enum resctrl_res_level {
RDT_RESOURCE_L3,
RDT_RESOURCE_L2,
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index c34ce367c456..cb181796f73b 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -331,6 +331,12 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
return r->default_ctrl;
}

+/*
+ * Number of SNC nodes that share each L3 cache. Default is 1 for
+ * systems that do not support SNC, or have SNC disabled.
+ */
+unsigned int snc_nodes_per_l3_cache = 1;
+
static void mba_wrmsr_intel(struct msr_param *m)
{
struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 08d53ce61d01..87badcec2834 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -186,8 +186,18 @@ static inline struct rmid_entry *__rmid_entry(u32 idx)

static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ int cpu = smp_processor_id();
+ int rmid_offset = 0;
u64 msr_val;

+ /*
+ * When SNC mode is on, need to compute the offset to read the
+ * physical RMID counter for the node to which this CPU belongs.
+ */
+ if (snc_nodes_per_l3_cache > 1)
+ rmid_offset = (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->num_rmid;
+
/*
* As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
* with a valid event code for supported resource type and the bits
@@ -196,7 +206,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
* IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
* are error bits.
*/
- wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
+ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid + rmid_offset);
rdmsrl(MSR_IA32_QM_CTR, msr_val);

if (msr_val & RMID_VAL_ERROR)
@@ -1011,8 +1021,8 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
int ret;

resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
- hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale;
- r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1;
+ hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache;
+ r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;

if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 821c32876cc1..1afc64bf46fa 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1466,7 +1466,7 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
}
}

- return size;
+ return size / snc_nodes_per_l3_cache;
}

/*
@@ -2346,7 +2346,8 @@ static bool supports_mba_mbps(void)
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;

return (is_mbm_local_enabled() &&
- r->alloc_capable && is_mba_linear());
+ r->alloc_capable && is_mba_linear() &&
+ snc_nodes_per_l3_cache == 1);
}

/*
--
2.43.0


2024-02-28 19:39:25

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v15 7/8] x86/resctrl: Sub NUMA Cluster detection and enable

There isn't a simple hardware bit that indicates whether a CPU is
running in Sub NUMA Cluster (SNC) mode. Infer the state by comparing
the ratio of NUMA nodes to L3 cache instances.

When SNC mode is detected, reconfigure the RMID counters by updating
the MSR_RMID_SNC_CONFIG MSR on each socket as CPUs are seen.

Clearing bit zero of the MSR divides the RMIDs and renumbers the ones
on the second SNC node to start from zero.

Signed-off-by: Tony Luck <[email protected]>
---
arch/x86/include/asm/msr-index.h | 1 +
arch/x86/kernel/cpu/resctrl/core.c | 119 +++++++++++++++++++++++++++++
2 files changed, 120 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index f1bd7b91b3c6..f6ba7d0397b8 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -1119,6 +1119,7 @@
#define MSR_IA32_QM_CTR 0xc8e
#define MSR_IA32_PQR_ASSOC 0xc8f
#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_RMID_SNC_CONFIG 0xca0
#define MSR_IA32_L2_CBM_BASE 0xd10
#define MSR_IA32_MBA_THRTL_BASE 0xd50

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index cb181796f73b..65cec8c45047 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -21,7 +21,9 @@
#include <linux/err.h>
#include <linux/cacheinfo.h>
#include <linux/cpuhotplug.h>
+#include <linux/mod_devicetable.h>

+#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/resctrl.h>
#include "internal.h"
@@ -744,11 +746,42 @@ static void clear_closid_rmid(int cpu)
RESCTRL_RESERVED_CLOSID);
}

+/*
+ * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1
+ * which indicates that RMIDs are configured in legacy mode.
+ * This mode is incompatible with Linux resctrl semantics
+ * as RMIDs are partitioned between SNC nodes, which requires
+ * a user to know which RMID is allocated to a task.
+ * Clearing bit 0 reconfigures the RMID counters for use
+ * in Sub NUMA Cluster mode. This mode is better for Linux.
+ * The RMID space is divided between all SNC nodes with the
+ * RMIDs renumbered to start from zero in each node when
+ * couning operations from tasks. Code to read the counters
+ * must adjust RMID counter numbers based on SNC node. See
+ * __rmid_read() for code that does this.
+ */
+static void snc_remap_rmids(int cpu)
+{
+ u64 val;
+
+ /* Only need to enable once per package. */
+ if (cpumask_first(topology_core_cpumask(cpu)) != cpu)
+ return;
+
+ rdmsrl(MSR_RMID_SNC_CONFIG, val);
+ val &= ~BIT_ULL(0);
+ wrmsrl(MSR_RMID_SNC_CONFIG, val);
+}
+
static int resctrl_arch_online_cpu(unsigned int cpu)
{
struct rdt_resource *r;

mutex_lock(&domain_list_lock);
+
+ if (snc_nodes_per_l3_cache > 1)
+ snc_remap_rmids(cpu);
+
for_each_capable_rdt_resource(r)
domain_add_cpu(cpu, r);
mutex_unlock(&domain_list_lock);
@@ -988,11 +1021,97 @@ static __init bool get_rdt_resources(void)
return (rdt_mon_capable || rdt_alloc_capable);
}

+/* CPU models that support MSR_RMID_SNC_CONFIG */
+static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, 0),
+ {}
+};
+
+/*
+ * There isn't a simple hardware bit that indicates whether a CPU is running
+ * in Sub NUMA Cluster (SNC) mode. Infer the state by comparing the
+ * ratio of NUMA nodes to L3 cache instances.
+ * It is not possible to accurately determine SNC state if the system is
+ * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
+ * to L3 caches. It will be OK if system is booted with hyperthreading
+ * disabled (since this doesn't affect the ratio).
+ */
+static __init int snc_get_config(void)
+{
+ unsigned long *node_caches;
+ int mem_only_nodes = 0;
+ int cpu, node, ret;
+ int num_l3_caches;
+ int cache_id;
+
+ if (!x86_match_cpu(snc_cpu_ids))
+ return 1;
+
+ node_caches = bitmap_zalloc(num_possible_cpus(), GFP_KERNEL);
+ if (!node_caches)
+ return 1;
+
+ cpus_read_lock();
+
+ if (num_online_cpus() != num_present_cpus())
+ pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
+
+ for_each_node(node) {
+ cpu = cpumask_first(cpumask_of_node(node));
+ if (cpu < nr_cpu_ids) {
+ cache_id = get_cpu_cacheinfo_id(cpu, 3);
+ if (cache_id != -1)
+ set_bit(cache_id, node_caches);
+ } else {
+ mem_only_nodes++;
+ }
+ }
+ cpus_read_unlock();
+
+ num_l3_caches = bitmap_weight(node_caches, num_possible_cpus());
+ kfree(node_caches);
+
+ if (!num_l3_caches)
+ goto insane;
+
+ /* sanity check #1: Number of CPU nodes must be multiple of num_l3_caches */
+ if ((nr_node_ids - mem_only_nodes) % num_l3_caches)
+ goto insane;
+
+ ret = (nr_node_ids - mem_only_nodes) / num_l3_caches;
+
+ /* sanity check #2: Only valid results are 1, 2, 3, 4 */
+ switch (ret) {
+ case 1:
+ break;
+ case 2:
+ case 3:
+ case 4:
+ pr_info("Sub-NUMA cluster detected with %d nodes per L3 cache\n", ret);
+ rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_NODE;
+ break;
+ default:
+ goto insane;
+ }
+
+ return ret;
+insane:
+ pr_warn("SNC insanity: CPU nodes = %d num_l3_caches = %d\n",
+ (nr_node_ids - mem_only_nodes), num_l3_caches);
+ return 1;
+}
+
static __init void rdt_init_res_defs_intel(void)
{
struct rdt_hw_resource *hw_res;
struct rdt_resource *r;

+ snc_nodes_per_l3_cache = snc_get_config();
+
for_each_rdt_resource(r) {
hw_res = resctrl_to_arch_res(r);

--
2.43.0


2024-02-28 19:39:26

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v15 8/8] x86/resctrl: Update documentation with Sub-NUMA cluster changes

With Sub-NUMA Cluster mode enabled the scope of monitoring resources is
per-NODE instead of per-L3 cache. Suffixes of directories with "L3" in
their name refer to Sub-NUMA nodes instead of L3 cache ids.

Users should be aware that SNC mode also affects the amount of L3 cache
available for allocation within each SNC node.

Signed-off-by: Tony Luck <[email protected]>
---
Documentation/arch/x86/resctrl.rst | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/Documentation/arch/x86/resctrl.rst b/Documentation/arch/x86/resctrl.rst
index a6279df64a9d..15f1cff6ee76 100644
--- a/Documentation/arch/x86/resctrl.rst
+++ b/Documentation/arch/x86/resctrl.rst
@@ -366,10 +366,10 @@ When control is enabled all CTRL_MON groups will also contain:
When monitoring is enabled all MON groups will also contain:

"mon_data":
- This contains a set of files organized by L3 domain and by
- RDT event. E.g. on a system with two L3 domains there will
- be subdirectories "mon_L3_00" and "mon_L3_01". Each of these
- directories have one file per event (e.g. "llc_occupancy",
+ This contains a set of files organized by L3 domain or by NUMA
+ node (depending on whether Sub-NUMA Cluster (SNC) mode is disabled
+ or enabled respectively) and by RDT event. Each of these
+ directories has one file per event (e.g. "llc_occupancy",
"mbm_total_bytes", and "mbm_local_bytes"). In a MON group these
files provide a read out of the current value of the event for
all tasks in the group. In CTRL_MON groups these files provide
@@ -478,6 +478,23 @@ if non-contiguous 1s value is supported. On a system with a 20-bit mask
each bit represents 5% of the capacity of the cache. You could partition
the cache into four equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.

+Notes on Sub-NUMA Cluster mode
+==============================
+When SNC mode is enabled, Linux may load balance tasks between Sub-NUMA
+nodes much more readily than between regular NUMA nodes since the CPUs
+on Sub-NUMA nodes share the same L3 cache and the system may report
+the NUMA distance between Sub-NUMA nodes with a lower value than used
+for regular NUMA nodes. Users who do not bind tasks to the CPUs of a
+specific Sub-NUMA node must read the "llc_occupancy", "mbm_total_bytes",
+and "mbm_local_bytes" for all Sub-NUMA nodes where the tasks may execute
+to get the full view of traffic for which the tasks were the source.
+
+The cache allocation feature still provides the same number of
+bits in a mask to control allocation into the L3 cache, but each
+of those ways has its capacity reduced because the cache is divided
+between the SNC nodes. The values reported in the resctrl
+"size" files are adjusted accordingly.
+
Memory bandwidth Allocation and monitoring
==========================================

--
2.43.0


2024-02-28 19:39:45

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v15 4/8] x86/resctrl: Split the rdt_domain and rdt_hw_domain structures

The same rdt_domain structure is used for both control and monitor
functions. But this results in wasted memory as some of the fields are
only used by control functions, while most are only used for monitor
functions.

Split into separate rdt_ctrl_domain and rdt_mon_domain structures with
just the fields required for control and monitoring respectively.

Similar split of the rdt_hw_domain structure into rdt_hw_ctrl_domain
and rdt_hw_mon_domain.

Signed-off-by: Tony Luck <[email protected]>
---
include/linux/resctrl.h | 48 ++++++++-------
arch/x86/kernel/cpu/resctrl/internal.h | 62 ++++++++++++--------
arch/x86/kernel/cpu/resctrl/core.c | 71 ++++++++++++-----------
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 28 ++++-----
arch/x86/kernel/cpu/resctrl/monitor.c | 40 ++++++-------
arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 6 +-
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 64 ++++++++++----------
7 files changed, 174 insertions(+), 145 deletions(-)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 8b4def50430a..1a4ac60522bb 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -78,7 +78,23 @@ struct rdt_domain_hdr {
};

/**
- * struct rdt_domain - group of CPUs sharing a resctrl resource
+ * struct rdt_ctrl_domain - group of CPUs sharing a resctrl control resource
+ * @hdr: common header for different domain types
+ * @plr: pseudo-locked region (if any) associated with domain
+ * @staged_config: parsed configuration to be applied
+ * @mbps_val: When mba_sc is enabled, this holds the array of user
+ * specified control values for mba_sc in MBps, indexed
+ * by closid
+ */
+struct rdt_ctrl_domain {
+ struct rdt_domain_hdr hdr;
+ struct pseudo_lock_region *plr;
+ struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
+ u32 *mbps_val;
+};
+
+/**
+ * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
* @hdr: common header for different domain types
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
* @mbm_total: saved state for MBM total bandwidth
@@ -87,13 +103,8 @@ struct rdt_domain_hdr {
* @cqm_limbo: worker to periodically read CQM h/w counters
* @mbm_work_cpu: worker CPU for MBM h/w counters
* @cqm_work_cpu: worker CPU for CQM h/w counters
- * @plr: pseudo-locked region (if any) associated with domain
- * @staged_config: parsed configuration to be applied
- * @mbps_val: When mba_sc is enabled, this holds the array of user
- * specified control values for mba_sc in MBps, indexed
- * by closid
*/
-struct rdt_domain {
+struct rdt_mon_domain {
struct rdt_domain_hdr hdr;
unsigned long *rmid_busy_llc;
struct mbm_state *mbm_total;
@@ -102,9 +113,6 @@ struct rdt_domain {
struct delayed_work cqm_limbo;
int mbm_work_cpu;
int cqm_work_cpu;
- struct pseudo_lock_region *plr;
- struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
- u32 *mbps_val;
};

/**
@@ -208,7 +216,7 @@ struct rdt_resource {
const char *format_str;
int (*parse_ctrlval)(struct rdt_parse_data *data,
struct resctrl_schema *s,
- struct rdt_domain *d);
+ struct rdt_ctrl_domain *d);
struct list_head evt_list;
unsigned long fflags;
bool cdp_capable;
@@ -242,15 +250,15 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
* Update the ctrl_val and apply this config right now.
* Must be called on one of the domain's CPUs.
*/
-int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val);

-u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
+u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type type);
-int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
-int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
-void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
-void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
void resctrl_online_cpu(unsigned int cpu);
void resctrl_offline_cpu(unsigned int cpu);

@@ -279,7 +287,7 @@ void resctrl_offline_cpu(unsigned int cpu);
* Return:
* 0 on success, or -EIO, -EINVAL etc on error.
*/
-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *arch_mon_ctx);

@@ -312,7 +320,7 @@ static inline void resctrl_arch_rmid_read_context_check(void)
*
* This can be called from any CPU.
*/
-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid,
enum resctrl_event_id eventid);

@@ -325,7 +333,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
*
* This can be called from any CPU.
*/
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d);
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d);

extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 1a251cb5f20f..41a093feb744 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -146,7 +146,7 @@ union mon_data_bits {
struct rmid_read {
struct rdtgroup *rgrp;
struct rdt_resource *r;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
enum resctrl_event_id evtid;
bool first;
int err;
@@ -231,7 +231,7 @@ struct mongroup {
*/
struct pseudo_lock_region {
struct resctrl_schema *s;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
u32 cbm;
wait_queue_head_t lock_thread_wq;
int thread_done;
@@ -354,25 +354,41 @@ struct arch_mbm_state {
};

/**
- * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share
- * a resource
+ * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
+ * a resource for a control function
* @d_resctrl: Properties exposed to the resctrl file system
* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ *
+ * Members of this structure are accessed via helpers that provide abstraction.
+ */
+struct rdt_hw_ctrl_domain {
+ struct rdt_ctrl_domain d_resctrl;
+ u32 *ctrl_val;
+};
+
+/**
+ * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share
+ * a resource for a monitor function
+ * @d_resctrl: Properties exposed to the resctrl file system
* @arch_mbm_total: arch private state for MBM total bandwidth
* @arch_mbm_local: arch private state for MBM local bandwidth
*
* Members of this structure are accessed via helpers that provide abstraction.
*/
-struct rdt_hw_domain {
- struct rdt_domain d_resctrl;
- u32 *ctrl_val;
+struct rdt_hw_mon_domain {
+ struct rdt_mon_domain d_resctrl;
struct arch_mbm_state *arch_mbm_total;
struct arch_mbm_state *arch_mbm_local;
};

-static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
+static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r)
+{
+ return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl);
+}
+
+static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r)
{
- return container_of(r, struct rdt_hw_domain, d_resctrl);
+ return container_of(r, struct rdt_hw_mon_domain, d_resctrl);
}

/**
@@ -384,7 +400,7 @@ static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
*/
struct msr_param {
struct rdt_resource *res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
u32 low;
u32 high;
};
@@ -457,9 +473,9 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r
}

int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d);
+ struct rdt_ctrl_domain *d);
int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d);
+ struct rdt_ctrl_domain *d);

extern struct mutex rdtgroup_mutex;

@@ -563,22 +579,22 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
struct seq_file *s, void *v);
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid, bool exclusive);
-unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
+unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_ctrl_domain *d,
unsigned long cbm);
enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
int rdtgroup_tasks_assigned(struct rdtgroup *r);
int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm);
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d);
int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);
int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
-struct rdt_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r);
-struct rdt_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r);
+struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r);
+struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r);
int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(u32 closid);
@@ -589,19 +605,19 @@ bool __init rdt_cpu_has(int flag);
void mon_event_count(void *info);
int rdtgroup_mondata_show(struct seq_file *m, void *arg);
void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
- struct rdt_domain *d, struct rdtgroup *rdtgrp,
+ struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
int evtid, int first);
-void mbm_setup_overflow_handler(struct rdt_domain *dom,
+void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
unsigned long delay_ms,
int exclude_cpu);
void mbm_handle_overflow(struct work_struct *work);
void __init intel_rdt_mbm_apply_quirk(void);
bool is_mba_sc(struct rdt_resource *r);
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
int exclude_cpu);
void cqm_handle_limbo(struct work_struct *work);
-bool has_busy_rmid(struct rdt_domain *d);
-void __check_limbo(struct rdt_domain *d, bool force_free);
+bool has_busy_rmid(struct rdt_mon_domain *d);
+void __check_limbo(struct rdt_mon_domain *d, bool force_free);
void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
void __init thread_throttle_mode_init(void);
void __init mbm_config_rftype_init(const char *config);
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 66a5a270d66f..cd58c9d4710f 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -309,8 +309,8 @@ static void rdt_get_cdp_l2_config(void)

static void mba_wrmsr_amd(struct msr_param *m)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;

for (i = m->low; i < m->high; i++)
@@ -333,8 +333,8 @@ static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)

static void mba_wrmsr_intel(struct msr_param *m)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;

/* Write the delay values for mba. */
@@ -344,17 +344,17 @@ static void mba_wrmsr_intel(struct msr_param *m)

static void cat_wrmsr(struct msr_param *m)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(m->dom);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(m->dom);
unsigned int i;

for (i = m->low; i < m->high; i++)
wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}

-struct rdt_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r)
+struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r)
{
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;

lockdep_assert_cpus_held();

@@ -367,9 +367,9 @@ struct rdt_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r)
return NULL;
}

-struct rdt_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r)
+struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r)
{
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;

lockdep_assert_cpus_held();

@@ -440,18 +440,23 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
*dc = r->default_ctrl;
}

-static void domain_free(struct rdt_hw_domain *hw_dom)
+static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
+{
+ kfree(hw_dom->ctrl_val);
+ kfree(hw_dom);
+}
+
+static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
{
kfree(hw_dom->arch_mbm_total);
kfree(hw_dom->arch_mbm_local);
- kfree(hw_dom->ctrl_val);
kfree(hw_dom);
}

-static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
+static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct msr_param m;
u32 *dc;

@@ -476,7 +481,7 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
* @num_rmid: The size of the MBM counter array
* @hw_dom: The domain that owns the allocated arrays
*/
-static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_domain *hw_dom)
+static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
{
size_t tsize;

@@ -515,10 +520,10 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
{
int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
+ struct rdt_hw_ctrl_domain *hw_dom;
struct list_head *add_pos = NULL;
- struct rdt_hw_domain *hw_dom;
struct rdt_domain_hdr *hdr;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int err;

lockdep_assert_held(&domain_list_lock);
@@ -533,7 +538,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
if (hdr) {
if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
return;
- d = container_of(hdr, struct rdt_domain, hdr);
+ d = container_of(hdr, struct rdt_ctrl_domain, hdr);

cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
if (r->cache.arch_has_per_cpu_cfg)
@@ -553,7 +558,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
rdt_domain_reconfigure_cdp(r);

if (domain_setup_ctrlval(r, d)) {
- domain_free(hw_dom);
+ ctrl_domain_free(hw_dom);
return;
}

@@ -563,7 +568,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
if (err) {
list_del_rcu(&d->hdr.list);
synchronize_rcu();
- domain_free(hw_dom);
+ ctrl_domain_free(hw_dom);
}
}

@@ -571,9 +576,9 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
{
int id = get_domain_id_from_scope(cpu, r->mon_scope);
struct list_head *add_pos = NULL;
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_mon_domain *hw_dom;
struct rdt_domain_hdr *hdr;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
int err;

lockdep_assert_held(&domain_list_lock);
@@ -588,7 +593,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
if (hdr) {
if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
return;
- d = container_of(hdr, struct rdt_domain, hdr);
+ d = container_of(hdr, struct rdt_mon_domain, hdr);

cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
return;
@@ -604,7 +609,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);

if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
- domain_free(hw_dom);
+ mon_domain_free(hw_dom);
return;
}

@@ -614,7 +619,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
if (err) {
list_del_rcu(&d->hdr.list);
synchronize_rcu();
- domain_free(hw_dom);
+ mon_domain_free(hw_dom);
}
}

@@ -629,9 +634,9 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
{
int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_ctrl_domain *hw_dom;
struct rdt_domain_hdr *hdr;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;

lockdep_assert_held(&domain_list_lock);

@@ -651,8 +656,8 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
return;

- d = container_of(hdr, struct rdt_domain, hdr);
- hw_dom = resctrl_to_arch_dom(d);
+ d = container_of(hdr, struct rdt_ctrl_domain, hdr);
+ hw_dom = resctrl_to_arch_ctrl_dom(d);

cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
if (cpumask_empty(&d->hdr.cpu_mask)) {
@@ -661,12 +666,12 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
synchronize_rcu();

/*
- * rdt_domain "d" is going to be freed below, so clear
+ * rdt_ctrl_domain "d" is going to be freed below, so clear
* its pointer from pseudo_lock_region struct.
*/
if (d->plr)
d->plr->d = NULL;
- domain_free(hw_dom);
+ ctrl_domain_free(hw_dom);

return;
}
@@ -675,9 +680,9 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
{
int id = get_domain_id_from_scope(cpu, r->mon_scope);
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_mon_domain *hw_dom;
struct rdt_domain_hdr *hdr;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;

lockdep_assert_held(&domain_list_lock);

@@ -697,15 +702,15 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
return;

- d = container_of(hdr, struct rdt_domain, hdr);
- hw_dom = resctrl_to_arch_dom(d);
+ d = container_of(hdr, struct rdt_mon_domain, hdr);
+ hw_dom = resctrl_to_arch_mon_dom(d);

cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
if (cpumask_empty(&d->hdr.cpu_mask)) {
resctrl_offline_mon_domain(r, d);
list_del_rcu(&d->hdr.list);
synchronize_rcu();
- domain_free(hw_dom);
+ mon_domain_free(hw_dom);

return;
}
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 6a899e22b84e..d6bf88c1ebe2 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -60,7 +60,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
}

int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
struct resctrl_staged_config *cfg;
u32 closid = data->rdtgrp->closid;
@@ -139,7 +139,7 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
* resource type.
*/
int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
struct rdtgroup *rdtgrp = data->rdtgrp;
struct resctrl_staged_config *cfg;
@@ -208,8 +208,8 @@ static int parse_line(char *line, struct resctrl_schema *s,
struct resctrl_staged_config *cfg;
struct rdt_resource *r = s->res;
struct rdt_parse_data data;
+ struct rdt_ctrl_domain *d;
char *dom = NULL, *id;
- struct rdt_domain *d;
unsigned long dom_id;

/* Walking r->domains, ensure it can't race with cpuhp */
@@ -272,11 +272,11 @@ static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
}
}

-int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
{
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
u32 idx = get_config_index(closid, t);
struct msr_param msr_param;

@@ -297,10 +297,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
{
struct resctrl_staged_config *cfg;
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_ctrl_domain *hw_dom;
struct msr_param msr_param;
+ struct rdt_ctrl_domain *d;
enum resctrl_conf_type t;
- struct rdt_domain *d;
int cpu;
u32 idx;

@@ -308,7 +308,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
lockdep_assert_cpus_held();

list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- hw_dom = resctrl_to_arch_dom(d);
+ hw_dom = resctrl_to_arch_ctrl_dom(d);
msr_param.res = NULL;
msr_param.dom = d;
for (t = 0; t < CDP_NUM_TYPES; t++) {
@@ -432,10 +432,10 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
return ret ?: nbytes;
}

-u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
+u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
u32 closid, enum resctrl_conf_type type)
{
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_ctrl_domain *hw_dom = resctrl_to_arch_ctrl_dom(d);
u32 idx = get_config_index(closid, type);

return hw_dom->ctrl_val[idx];
@@ -444,7 +444,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int closid)
{
struct rdt_resource *r = schema->res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
bool sep = false;
u32 ctrl_val;

@@ -516,7 +516,7 @@ static int smp_mon_event_count(void *arg)
}

void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
- struct rdt_domain *d, struct rdtgroup *rdtgrp,
+ struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
int evtid, int first)
{
int cpu;
@@ -559,11 +559,11 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
struct rdt_domain_hdr *hdr;
+ struct rdt_mon_domain *d;
u32 resid, evtid, domid;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
union mon_data_bits md;
- struct rdt_domain *d;
struct rmid_read rr;
int ret = 0;

@@ -584,7 +584,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
ret = -ENOENT;
goto out;
}
- d = container_of(hdr, struct rdt_domain, hdr);
+ d = container_of(hdr, struct rdt_mon_domain, hdr);

mon_event_read(&rr, r, d, rdtgrp, evtid, false);

diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index b7d831712dc4..08d53ce61d01 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -208,7 +208,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
return 0;
}

-static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
+static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
u32 rmid,
enum resctrl_event_id eventid)
{
@@ -227,11 +227,11 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
return NULL;
}

-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid,
enum resctrl_event_id eventid)
{
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
struct arch_mbm_state *am;

am = get_arch_mbm_state(hw_dom, rmid, eventid);
@@ -247,9 +247,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
* Assumes that hardware counters are also reset and thus that there is
* no need to record initial non-zero counts.
*/
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
{
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);

if (is_mbm_total_enabled())
memset(hw_dom->arch_mbm_total, 0,
@@ -268,12 +268,12 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
return chunks >> shift;
}

-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 unused, u32 rmid, enum resctrl_event_id eventid,
u64 *val, void *ignored)
{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct arch_mbm_state *am;
u64 msr_val, chunks;
int ret;
@@ -319,7 +319,7 @@ static void limbo_release_entry(struct rmid_entry *entry)
* decrement the count. If the busy count gets to zero on an RMID, we
* free the RMID
*/
-void __check_limbo(struct rdt_domain *d, bool force_free)
+void __check_limbo(struct rdt_mon_domain *d, bool force_free)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
@@ -367,7 +367,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
}

-bool has_busy_rmid(struct rdt_domain *d)
+bool has_busy_rmid(struct rdt_mon_domain *d)
{
u32 idx_limit = resctrl_arch_system_num_rmid_idx();

@@ -468,7 +468,7 @@ int alloc_rmid(u32 closid)
static void add_rmid_to_limbo(struct rmid_entry *entry)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;
u32 idx;

lockdep_assert_held(&rdtgroup_mutex);
@@ -520,7 +520,7 @@ void free_rmid(u32 closid, u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}

-static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 closid,
+static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
u32 rmid, enum resctrl_event_id evtid)
{
u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
@@ -656,12 +656,12 @@ void mon_event_count(void *info)
* throttle MSRs already have low percentage values. To avoid
* unnecessarily restricting such rdtgroups, we also increase the bandwidth.
*/
-static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
{
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
+ struct rdt_ctrl_domain *dom_mba;
struct rdt_resource *r_mba;
- struct rdt_domain *dom_mba;
u32 cur_bw, user_bw, idx;
struct list_head *head;
struct rdtgroup *entry;
@@ -722,7 +722,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
}

-static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
+static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
u32 closid, u32 rmid)
{
struct rmid_read rr;
@@ -780,12 +780,12 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
void cqm_handle_limbo(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;

cpus_read_lock();
mutex_lock(&rdtgroup_mutex);

- d = container_of(work, struct rdt_domain, cqm_limbo.work);
+ d = container_of(work, struct rdt_mon_domain, cqm_limbo.work);

__check_limbo(d, false);

@@ -808,7 +808,7 @@ void cqm_handle_limbo(struct work_struct *work)
* @exclude_cpu: Which CPU the handler should not run on,
* RESCTRL_PICK_ANY_CPU to pick any CPU.
*/
-void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
+void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
@@ -825,9 +825,9 @@ void mbm_handle_overflow(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
struct rdtgroup *prgrp, *crgrp;
+ struct rdt_mon_domain *d;
struct list_head *head;
struct rdt_resource *r;
- struct rdt_domain *d;

cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
@@ -840,7 +840,7 @@ void mbm_handle_overflow(struct work_struct *work)
goto out_unlock;

r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- d = container_of(work, struct rdt_domain, mbm_over.work);
+ d = container_of(work, struct rdt_mon_domain, mbm_over.work);

list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
@@ -874,7 +874,7 @@ void mbm_handle_overflow(struct work_struct *work)
* @exclude_cpu: Which CPU the handler should not run on,
* RESCTRL_PICK_ANY_CPU to pick any CPU.
*/
-void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms,
+void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
int exclude_cpu)
{
unsigned long delay = msecs_to_jiffies(delay_ms);
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 2a48eedafe3d..e6e3ad2f8a2e 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -814,7 +814,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
* Return: true if @cbm overlaps with pseudo-locked region on @d, false
* otherwise.
*/
-bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
+bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain *d, unsigned long cbm)
{
unsigned int cbm_len;
unsigned long cbm_b;
@@ -841,11 +841,11 @@ bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm
* if it is not possible to test due to memory allocation issue,
* false otherwise.
*/
-bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
+bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
{
+ struct rdt_ctrl_domain *d_i;
cpumask_var_t cpu_with_psl;
struct rdt_resource *r;
- struct rdt_domain *d_i;
bool ret = false;

/* Walking r->domains, ensure it can't race with cpuhp */
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 01ed7bab1002..821c32876cc1 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -92,8 +92,8 @@ void rdt_last_cmd_printf(const char *fmt, ...)

void rdt_staged_configs_clear(void)
{
+ struct rdt_ctrl_domain *dom;
struct rdt_resource *r;
- struct rdt_domain *dom;

lockdep_assert_held(&rdtgroup_mutex);

@@ -1012,7 +1012,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
unsigned long sw_shareable = 0, hw_shareable = 0;
unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_resource *r = s->res;
- struct rdt_domain *dom;
+ struct rdt_ctrl_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
bool sep = false;
@@ -1243,7 +1243,7 @@ static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
*
* Return: false if CBM does not overlap, true if it does.
*/
-static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
+static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid,
enum resctrl_conf_type type, bool exclusive)
{
@@ -1298,7 +1298,7 @@ static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d
*
* Return: true if CBM overlap detected, false if there is no overlap
*/
-bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
+bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
@@ -1329,10 +1329,10 @@ bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
+ struct rdt_ctrl_domain *d;
struct resctrl_schema *s;
struct rdt_resource *r;
bool has_cache = false;
- struct rdt_domain *d;
u32 ctrl;

/* Walking r->domains, ensure it can't race with cpuhp */
@@ -1448,7 +1448,7 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
* bitmap functions work correctly.
*/
unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
- struct rdt_domain *d, unsigned long cbm)
+ struct rdt_ctrl_domain *d, unsigned long cbm)
{
struct cpu_cacheinfo *ci;
unsigned int size = 0;
@@ -1480,9 +1480,9 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
{
struct resctrl_schema *schema;
enum resctrl_conf_type type;
+ struct rdt_ctrl_domain *d;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
- struct rdt_domain *d;
unsigned int size;
int ret = 0;
u32 closid;
@@ -1594,7 +1594,7 @@ static void mon_event_config_read(void *info)
mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
}

-static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
+static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info)
{
smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1);
}
@@ -1602,7 +1602,7 @@ static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mo
static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
{
struct mon_config_info mon_info = {0};
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
bool sep = false;

cpus_read_lock();
@@ -1661,7 +1661,7 @@ static void mon_event_config_write(void *info)
}

static void mbm_config_write_domain(struct rdt_resource *r,
- struct rdt_domain *d, u32 evtid, u32 val)
+ struct rdt_mon_domain *d, u32 evtid, u32 val)
{
struct mon_config_info mon_info = {0};

@@ -1702,7 +1702,7 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
char *dom_str = NULL, *id_str;
unsigned long dom_id, val;
- struct rdt_domain *d;
+ struct rdt_mon_domain *d;

/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();
@@ -2261,9 +2261,9 @@ static inline bool is_mba_linear(void)
static int set_cache_qos_cfg(int level, bool enable)
{
void (*update)(void *arg);
+ struct rdt_ctrl_domain *d;
struct rdt_resource *r_l;
cpumask_var_t cpu_mask;
- struct rdt_domain *d;
int cpu;

/* Walking r->domains, ensure it can't race with cpuhp */
@@ -2313,7 +2313,7 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
l3_qos_cfg_update(&hw_res->cdp_enabled);
}

-static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
+static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
u32 num_closid = resctrl_arch_get_num_closid(r);
int cpu = cpumask_any(&d->hdr.cpu_mask);
@@ -2331,7 +2331,7 @@ static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
}

static void mba_sc_domain_destroy(struct rdt_resource *r,
- struct rdt_domain *d)
+ struct rdt_ctrl_domain *d)
{
kfree(d->mbps_val);
d->mbps_val = NULL;
@@ -2357,7 +2357,7 @@ static int set_mba_sc(bool mba_sc)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
u32 num_closid = resctrl_arch_get_num_closid(r);
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int i;

if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
@@ -2629,7 +2629,7 @@ static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
unsigned long flags = RFTYPE_CTRL_BASE;
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
struct rdt_resource *r;
int ret;

@@ -2814,9 +2814,9 @@ static int rdt_init_fs_context(struct fs_context *fc)
static int reset_all_ctrls(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- struct rdt_hw_domain *hw_dom;
+ struct rdt_hw_ctrl_domain *hw_dom;
struct msr_param msr_param;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int cpu;
int i;

@@ -2833,7 +2833,7 @@ static int reset_all_ctrls(struct rdt_resource *r)
* from each domain to update the MSRs below.
*/
list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- hw_dom = resctrl_to_arch_dom(d);
+ hw_dom = resctrl_to_arch_ctrl_dom(d);
cpu = cpumask_any(&d->hdr.cpu_mask);

for (i = 0; i < hw_res->num_closid; i++)
@@ -3027,7 +3027,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
}

static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
- struct rdt_domain *d,
+ struct rdt_mon_domain *d,
struct rdt_resource *r, struct rdtgroup *prgrp)
{
union mon_data_bits priv;
@@ -3076,7 +3076,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
* and "monitor" groups with given domain id.
*/
static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
- struct rdt_domain *d)
+ struct rdt_mon_domain *d)
{
struct kernfs_node *parent_kn;
struct rdtgroup *prgrp, *crgrp;
@@ -3098,7 +3098,7 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
struct rdt_resource *r,
struct rdtgroup *prgrp)
{
- struct rdt_domain *dom;
+ struct rdt_mon_domain *dom;
int ret;

/* Walking r->domains, ensure it can't race with cpuhp */
@@ -3203,7 +3203,7 @@ static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
* Set the RDT domain up to start off with all usable allocations. That is,
* all shareable and unused bits. All-zero CBM is invalid.
*/
-static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
+static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
u32 closid)
{
enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
@@ -3283,7 +3283,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
*/
static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
{
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;
int ret;

list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
@@ -3299,7 +3299,7 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
{
struct resctrl_staged_config *cfg;
- struct rdt_domain *d;
+ struct rdt_ctrl_domain *d;

list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (is_mba_sc(r)) {
@@ -3925,14 +3925,14 @@ static void __init rdtgroup_setup_default(void)
mutex_unlock(&rdtgroup_mutex);
}

-static void domain_destroy_mon_state(struct rdt_domain *d)
+static void domain_destroy_mon_state(struct rdt_mon_domain *d)
{
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
}

-void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
mutex_lock(&rdtgroup_mutex);

@@ -3942,7 +3942,7 @@ void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d)
mutex_unlock(&rdtgroup_mutex);
}

-void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
{
mutex_lock(&rdtgroup_mutex);

@@ -3973,7 +3973,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d)
mutex_unlock(&rdtgroup_mutex);
}

-static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
+static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
{
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;
@@ -4004,7 +4004,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
return 0;
}

-int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d)
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
{
int err = 0;

@@ -4020,7 +4020,7 @@ int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d)
return err;
}

-int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d)
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
{
int err;

@@ -4075,8 +4075,8 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
void resctrl_offline_cpu(unsigned int cpu)
{
struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ struct rdt_mon_domain *d;
struct rdtgroup *rdtgrp;
- struct rdt_domain *d;

mutex_lock(&rdtgroup_mutex);
list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
--
2.43.0


2024-02-28 19:39:52

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v4 1/2] x86/resctrl: Pass domain to target CPU

reset_all_ctrls() and resctrl_arch_update_domains() use on_each_cpu_mask()
to call rdt_ctrl_update() on potentially one CPU from each domain.

But this means rdt_ctrl_update() needs to figure out which domain to
apply changes to. Doing so requires a search of all domains in a resource,
which can only be done safely if cpus_lock is held. Both callers do hold
this lock, but there isn't a way for a function called on another CPU
via IPI to verify this.

Commit c0d848fcb09d ("x86/resctrl: Remove lockdep annotation that triggers
false positive") removed the incorrect assertions.

Add the target domain to the msr_param structure and
call rdt_ctrl_update() for each domain separately using
smp_call_function_single(). This means that rdt_ctrl_update() doesn't
need to search for the domain and get_domain_from_cpu() can safely assert
that the cpus_lock is held since the remaining callers do not use IPI.

Signed-off-by: Tony Luck <[email protected]>
---
arch/x86/kernel/cpu/resctrl/internal.h | 2 ++
arch/x86/kernel/cpu/resctrl/core.c | 17 ++++------
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 40 +++++------------------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 14 +++-----
4 files changed, 21 insertions(+), 52 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index c99f26ebe7a6..bc999471f072 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -378,11 +378,13 @@ static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
/**
* struct msr_param - set a range of MSRs from a domain
* @res: The resource to use
+ * @dom: The domain to update
* @low: Beginning index from base MSR
* @high: End index
*/
struct msr_param {
struct rdt_resource *res;
+ struct rdt_domain *dom;
u32 low;
u32 high;
};
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 83e40341583e..acf52aa185e0 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -362,6 +362,8 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
{
struct rdt_domain *d;

+ lockdep_assert_cpus_held();
+
list_for_each_entry(d, &r->domains, list) {
/* Find the domain that contains this CPU */
if (cpumask_test_cpu(cpu, &d->cpu_mask))
@@ -378,19 +380,11 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r)

void rdt_ctrl_update(void *arg)
{
+ struct rdt_hw_resource *hw_res;
struct msr_param *m = arg;
- struct rdt_hw_resource *hw_res = resctrl_to_arch_res(m->res);
- struct rdt_resource *r = m->res;
- int cpu = smp_processor_id();
- struct rdt_domain *d;

- d = get_domain_from_cpu(cpu, r);
- if (d) {
- hw_res->msr_update(d, m, r);
- return;
- }
- pr_warn_once("cpu %d not found in any domain for resource %s\n",
- cpu, r->name);
+ hw_res = resctrl_to_arch_res(m->res);
+ hw_res->msr_update(m->dom, m, m->res);
}

/*
@@ -463,6 +457,7 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
hw_dom->ctrl_val = dc;
setup_default_ctrlval(r, dc);

+ m.dom = d;
m.low = 0;
m.high = hw_res->num_closid;
hw_res->msr_update(d, &m, r);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 7997b47743a2..a3a0fd80daa8 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -272,22 +272,6 @@ static u32 get_config_index(u32 closid, enum resctrl_conf_type type)
}
}

-static bool apply_config(struct rdt_hw_domain *hw_dom,
- struct resctrl_staged_config *cfg, u32 idx,
- cpumask_var_t cpu_mask)
-{
- struct rdt_domain *dom = &hw_dom->d_resctrl;
-
- if (cfg->new_ctrl != hw_dom->ctrl_val[idx]) {
- cpumask_set_cpu(cpumask_any(&dom->cpu_mask), cpu_mask);
- hw_dom->ctrl_val[idx] = cfg->new_ctrl;
-
- return true;
- }
-
- return false;
-}
-
int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, enum resctrl_conf_type t, u32 cfg_val)
{
@@ -302,6 +286,7 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
hw_dom->ctrl_val[idx] = cfg_val;

msr_param.res = r;
+ msr_param.dom = d;
msr_param.low = idx;
msr_param.high = idx + 1;
hw_res->msr_update(d, &msr_param, r);
@@ -315,27 +300,27 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
struct rdt_hw_domain *hw_dom;
struct msr_param msr_param;
enum resctrl_conf_type t;
- cpumask_var_t cpu_mask;
struct rdt_domain *d;
+ int cpu;
u32 idx;

/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

- if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
- return -ENOMEM;
-
- msr_param.res = NULL;
list_for_each_entry(d, &r->domains, list) {
hw_dom = resctrl_to_arch_dom(d);
+ msr_param.res = NULL;
+ msr_param.dom = d;
for (t = 0; t < CDP_NUM_TYPES; t++) {
cfg = &hw_dom->d_resctrl.staged_config[t];
if (!cfg->have_new_ctrl)
continue;

idx = get_config_index(closid, t);
- if (!apply_config(hw_dom, cfg, idx, cpu_mask))
+ if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
continue;
+ hw_dom->ctrl_val[idx] = cfg->new_ctrl;
+ cpu = cpumask_any(&d->cpu_mask);

if (!msr_param.res) {
msr_param.low = idx;
@@ -346,17 +331,10 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
msr_param.high = max(msr_param.high, idx + 1);
}
}
+ if (msr_param.res)
+ smp_call_function_single(cpu, rdt_ctrl_update, &msr_param, 1);
}

- if (cpumask_empty(cpu_mask))
- goto done;
-
- /* Update resource control msr on all the CPUs. */
- on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1);
-
-done:
- free_cpumask_var(cpu_mask);
-
return 0;
}

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 011e17efb1a6..da4f13db4161 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2813,16 +2813,13 @@ static int reset_all_ctrls(struct rdt_resource *r)
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
struct rdt_hw_domain *hw_dom;
struct msr_param msr_param;
- cpumask_var_t cpu_mask;
struct rdt_domain *d;
+ int cpu;
int i;

/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

- if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
- return -ENOMEM;
-
msr_param.res = r;
msr_param.low = 0;
msr_param.high = hw_res->num_closid;
@@ -2834,17 +2831,14 @@ static int reset_all_ctrls(struct rdt_resource *r)
*/
list_for_each_entry(d, &r->domains, list) {
hw_dom = resctrl_to_arch_dom(d);
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+ cpu = cpumask_any(&d->cpu_mask);

for (i = 0; i < hw_res->num_closid; i++)
hw_dom->ctrl_val[i] = r->default_ctrl;
+ msr_param.dom = d;
+ smp_call_function_single(cpu, rdt_ctrl_update, &msr_param, 1);
}

- /* Update CBM on all the CPUs in cpu_mask */
- on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1);
-
- free_cpumask_var(cpu_mask);
-
return 0;
}

--
2.43.0


2024-02-28 19:40:29

by Luck, Tony

[permalink] [raw]
Subject: [PATCH v15 3/8] x86/resctrl: Prepare for different scope for control/monitor operations

Resctrl assumes that control and monitor operations on a resource are
performed at the same scope.

Prepare for systems that use different scope (specifically Intel needs
to split the RDT_RESOURCE_L3 resource to use L3 scope for cache control
and NODE scope for cache occupancy and memory bandwidth monitoring).

Create separate domain lists for control and monitor operations.

Note that errors during initialization of either control or monitor
functions on a domain would previously result in that domain being
excluded from both control and monitor operations. Now the domains are
allocated independently it is no longer required to disable both control
and monitor operations if either fail.

Signed-off-by: Tony Luck <[email protected]>
---
include/linux/resctrl.h | 25 ++-
arch/x86/kernel/cpu/resctrl/internal.h | 7 +-
arch/x86/kernel/cpu/resctrl/core.c | 228 ++++++++++++++++------
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 12 +-
arch/x86/kernel/cpu/resctrl/monitor.c | 4 +-
arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 4 +-
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 60 +++---
7 files changed, 240 insertions(+), 100 deletions(-)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index f63fcf17a3bc..8b4def50430a 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -58,15 +58,22 @@ struct resctrl_staged_config {
bool have_new_ctrl;
};

+enum resctrl_domain_type {
+ RESCTRL_CTRL_DOMAIN,
+ RESCTRL_MON_DOMAIN,
+};
+
/**
* struct rdt_domain_hdr - common header for different domain types
* @list: all instances of this resource
* @id: unique id for this instance
+ * @type: type of this instance
* @cpu_mask: which CPUs share this resource
*/
struct rdt_domain_hdr {
struct list_head list;
int id;
+ enum resctrl_domain_type type;
struct cpumask cpu_mask;
};

@@ -169,10 +176,12 @@ enum resctrl_scope {
* @alloc_capable: Is allocation available on this machine
* @mon_capable: Is monitor feature available on this machine
* @num_rmid: Number of RMIDs available
- * @scope: Scope of this resource
+ * @ctrl_scope: Scope of this resource for control functions
+ * @mon_scope: Scope of this resource for monitor functions
* @cache: Cache allocation related data
* @membw: If the component has bandwidth controls, their properties.
- * @domains: RCU list of all domains for this resource
+ * @ctrl_domains: Control domains for this resource
+ * @mon_domains: Monitor domains for this resource
* @name: Name to use in "schemata" file.
* @data_width: Character width of data when displaying
* @default_ctrl: Specifies default cache cbm or memory B/W percent.
@@ -187,10 +196,12 @@ struct rdt_resource {
bool alloc_capable;
bool mon_capable;
int num_rmid;
- enum resctrl_scope scope;
+ enum resctrl_scope ctrl_scope;
+ enum resctrl_scope mon_scope;
struct resctrl_cache cache;
struct resctrl_membw membw;
- struct list_head domains;
+ struct list_head ctrl_domains;
+ struct list_head mon_domains;
char *name;
int data_width;
u32 default_ctrl;
@@ -236,8 +247,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,

u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, enum resctrl_conf_type type);
-int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
-void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
void resctrl_online_cpu(unsigned int cpu);
void resctrl_offline_cpu(unsigned int cpu);

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 8f40fb35db78..1a251cb5f20f 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -557,8 +557,8 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn);
int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
umode_t mask);
-struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
- struct list_head **pos);
+struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
+ struct list_head **pos);
ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off);
int rdtgroup_schemata_show(struct kernfs_open_file *of,
@@ -577,7 +577,8 @@ int rdt_pseudo_lock_init(void);
void rdt_pseudo_lock_release(void);
int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
-struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
+struct rdt_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r);
+struct rdt_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r);
int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(u32 closid);
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index e7f8ab271d86..66a5a270d66f 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -60,7 +60,8 @@ static void mba_wrmsr_intel(struct msr_param *m);
static void cat_wrmsr(struct msr_param *m);
static void mba_wrmsr_amd(struct msr_param *m);

-#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)
+#define ctrl_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.ctrl_domains)
+#define mon_domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.mon_domains)

struct rdt_hw_resource rdt_resources_all[] = {
[RDT_RESOURCE_L3] =
@@ -68,8 +69,10 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L3,
.name = "L3",
- .scope = RESCTRL_L3_CACHE,
- .domains = domain_init(RDT_RESOURCE_L3),
+ .ctrl_scope = RESCTRL_L3_CACHE,
+ .mon_scope = RESCTRL_L3_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L3),
+ .mon_domains = mon_domain_init(RDT_RESOURCE_L3),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
.fflags = RFTYPE_RES_CACHE,
@@ -82,8 +85,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_L2,
.name = "L2",
- .scope = RESCTRL_L2_CACHE,
- .domains = domain_init(RDT_RESOURCE_L2),
+ .ctrl_scope = RESCTRL_L2_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_L2),
.parse_ctrlval = parse_cbm,
.format_str = "%d=%0*x",
.fflags = RFTYPE_RES_CACHE,
@@ -96,8 +99,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_MBA,
.name = "MB",
- .scope = RESCTRL_L3_CACHE,
- .domains = domain_init(RDT_RESOURCE_MBA),
+ .ctrl_scope = RESCTRL_L3_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_MBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
@@ -108,8 +111,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
.r_resctrl = {
.rid = RDT_RESOURCE_SMBA,
.name = "SMBA",
- .scope = RESCTRL_L3_CACHE,
- .domains = domain_init(RDT_RESOURCE_SMBA),
+ .ctrl_scope = RESCTRL_L3_CACHE,
+ .ctrl_domains = ctrl_domain_init(RDT_RESOURCE_SMBA),
.parse_ctrlval = parse_bw,
.format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
@@ -349,13 +352,28 @@ static void cat_wrmsr(struct msr_param *m)
wrmsrl(hw_res->msr_base + i, hw_dom->ctrl_val[i]);
}

-struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
+struct rdt_domain *get_ctrl_domain_from_cpu(int cpu, struct rdt_resource *r)
{
struct rdt_domain *d;

lockdep_assert_cpus_held();

- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
+ /* Find the domain that contains this CPU */
+ if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
+ return d;
+ }
+
+ return NULL;
+}
+
+struct rdt_domain *get_mon_domain_from_cpu(int cpu, struct rdt_resource *r)
+{
+ struct rdt_domain *d;
+
+ lockdep_assert_cpus_held();
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
/* Find the domain that contains this CPU */
if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
return d;
@@ -379,26 +397,26 @@ void rdt_ctrl_update(void *arg)
}

/*
- * rdt_find_domain - Find a domain in a resource that matches input resource id
+ * rdt_find_domain - Search for a domain id in a resource domain list.
*
- * Search resource r's domain list to find the resource id. If the resource
- * id is found in a domain, return the domain. Otherwise, if requested by
- * caller, return the first domain whose id is bigger than the input id.
- * The domain list is sorted by id in ascending order.
+ * Search the domain list to find the domain id. If the domain id is
+ * found, return the domain. NULL otherwise. If the domain id is not
+ * found (and NULL returned) then the first domain with id bigger than
+ * the input id can be returned to the caller via @pos.
*/
-struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
- struct list_head **pos)
+struct rdt_domain_hdr *rdt_find_domain(struct list_head *h, int id,
+ struct list_head **pos)
{
- struct rdt_domain *d;
+ struct rdt_domain_hdr *d;
struct list_head *l;

- list_for_each(l, &r->domains) {
- d = list_entry(l, struct rdt_domain, hdr.list);
+ list_for_each(l, h) {
+ d = list_entry(l, struct rdt_domain_hdr, list);
/* When id is found, return its domain. */
- if (id == d->hdr.id)
+ if (id == d->id)
return d;
/* Stop searching when finding id's position in sorted list. */
- if (id < d->hdr.id)
+ if (id < d->id)
break;
}

@@ -494,42 +512,29 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
return -EINVAL;
}

-/*
- * domain_add_cpu - Add a cpu to a resource's domain list.
- *
- * If an existing domain in the resource r's domain list matches the cpu's
- * resource id, add the cpu in the domain.
- *
- * Otherwise, a new domain is allocated and inserted into the right position
- * in the domain list sorted by id in ascending order.
- *
- * The order in the domain list is visible to users when we print entries
- * in the schemata file and schemata input is validated to have the same order
- * as this list.
- */
-static void domain_add_cpu(int cpu, struct rdt_resource *r)
+static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
{
- int id = get_domain_id_from_scope(cpu, r->scope);
+ int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
struct list_head *add_pos = NULL;
struct rdt_hw_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
struct rdt_domain *d;
int err;

lockdep_assert_held(&domain_list_lock);

if (id < 0) {
- pr_warn_once("Can't find domain id for CPU:%d scope:%d for resource %s\n",
- cpu, r->scope, r->name);
+ pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->ctrl_scope, r->name);
return;
}

- d = rdt_find_domain(r, id, &add_pos);
- if (IS_ERR(d)) {
- pr_warn("Couldn't find cache id for CPU %d\n", cpu);
- return;
- }
+ hdr = rdt_find_domain(&r->ctrl_domains, id, &add_pos);
+ if (hdr) {
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+ return;
+ d = container_of(hdr, struct rdt_domain, hdr);

- if (d) {
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
if (r->cache.arch_has_per_cpu_cfg)
rdt_domain_reconfigure_cdp(r);
@@ -542,23 +547,70 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)

d = &hw_dom->d_resctrl;
d->hdr.id = id;
+ d->hdr.type = RESCTRL_CTRL_DOMAIN;
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);

rdt_domain_reconfigure_cdp(r);

- if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
+ if (domain_setup_ctrlval(r, d)) {
domain_free(hw_dom);
return;
}

- if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
+ list_add_tail_rcu(&d->hdr.list, add_pos);
+
+ err = resctrl_online_ctrl_domain(r, d);
+ if (err) {
+ list_del_rcu(&d->hdr.list);
+ synchronize_rcu();
+ domain_free(hw_dom);
+ }
+}
+
+static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
+{
+ int id = get_domain_id_from_scope(cpu, r->mon_scope);
+ struct list_head *add_pos = NULL;
+ struct rdt_hw_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_domain *d;
+ int err;
+
+ lockdep_assert_held(&domain_list_lock);
+
+ if (id < 0) {
+ pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->mon_scope, r->name);
+ return;
+ }
+
+ hdr = rdt_find_domain(&r->mon_domains, id, &add_pos);
+ if (hdr) {
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+ return;
+ d = container_of(hdr, struct rdt_domain, hdr);
+
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+ return;
+ }
+
+ hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
+ if (!hw_dom)
+ return;
+
+ d = &hw_dom->d_resctrl;
+ d->hdr.id = id;
+ d->hdr.type = RESCTRL_MON_DOMAIN;
+ cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+
+ if (arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
domain_free(hw_dom);
return;
}

list_add_tail_rcu(&d->hdr.list, add_pos);

- err = resctrl_online_domain(r, d);
+ err = resctrl_online_mon_domain(r, d);
if (err) {
list_del_rcu(&d->hdr.list);
synchronize_rcu();
@@ -566,30 +618,45 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
}
}

-static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+static void domain_add_cpu(int cpu, struct rdt_resource *r)
+{
+ if (r->alloc_capable)
+ domain_add_cpu_ctrl(cpu, r);
+ if (r->mon_capable)
+ domain_add_cpu_mon(cpu, r);
+}
+
+static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
{
- int id = get_domain_id_from_scope(cpu, r->scope);
+ int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
struct rdt_hw_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
struct rdt_domain *d;

lockdep_assert_held(&domain_list_lock);

if (id < 0) {
- pr_warn_once("Can't find domain id for CPU:%d scope:%d for resource %s\n",
- cpu, r->scope, r->name);
+ pr_warn_once("Can't find control domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->ctrl_scope, r->name);
return;
}

- d = rdt_find_domain(r, id, NULL);
- if (!d) {
- pr_warn("Couldn't find domain with id=%d for CPU %d\n", id, cpu);
+ hdr = rdt_find_domain(&r->ctrl_domains, id, NULL);
+ if (!hdr) {
+ pr_warn("Can't find control domain for id=%d for CPU %d for resource %s\n",
+ id, cpu, r->name);
return;
}
+
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+ return;
+
+ d = container_of(hdr, struct rdt_domain, hdr);
hw_dom = resctrl_to_arch_dom(d);

cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
if (cpumask_empty(&d->hdr.cpu_mask)) {
- resctrl_offline_domain(r, d);
+ resctrl_offline_ctrl_domain(r, d);
list_del_rcu(&d->hdr.list);
synchronize_rcu();

@@ -605,6 +672,53 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
}
}

+static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
+{
+ int id = get_domain_id_from_scope(cpu, r->mon_scope);
+ struct rdt_hw_domain *hw_dom;
+ struct rdt_domain_hdr *hdr;
+ struct rdt_domain *d;
+
+ lockdep_assert_held(&domain_list_lock);
+
+ if (id < 0) {
+ pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+ cpu, r->mon_scope, r->name);
+ return;
+ }
+
+ hdr = rdt_find_domain(&r->mon_domains, id, NULL);
+ if (!hdr) {
+ pr_warn("Can't find monitor domain for id=%d for CPU %d for resource %s\n",
+ id, cpu, r->name);
+ return;
+ }
+
+ if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+ return;
+
+ d = container_of(hdr, struct rdt_domain, hdr);
+ hw_dom = resctrl_to_arch_dom(d);
+
+ cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
+ if (cpumask_empty(&d->hdr.cpu_mask)) {
+ resctrl_offline_mon_domain(r, d);
+ list_del_rcu(&d->hdr.list);
+ synchronize_rcu();
+ domain_free(hw_dom);
+
+ return;
+ }
+}
+
+static void domain_remove_cpu(int cpu, struct rdt_resource *r)
+{
+ if (r->alloc_capable)
+ domain_remove_cpu_ctrl(cpu, r);
+ if (r->mon_capable)
+ domain_remove_cpu_mon(cpu, r);
+}
+
static void clear_closid_rmid(int cpu)
{
struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index f49a96b81346..6a899e22b84e 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -231,7 +231,7 @@ static int parse_line(char *line, struct resctrl_schema *s,
return -EINVAL;
}
dom = strim(dom);
- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (d->hdr.id == dom_id) {
data.buf = dom;
data.rdtgrp = rdtgrp;
@@ -307,7 +307,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
hw_dom = resctrl_to_arch_dom(d);
msr_param.res = NULL;
msr_param.dom = d;
@@ -452,7 +452,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
lockdep_assert_cpus_held();

seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(dom, &r->domains, hdr.list) {
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
if (sep)
seq_puts(s, ";");

@@ -558,6 +558,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
int rdtgroup_mondata_show(struct seq_file *m, void *arg)
{
struct kernfs_open_file *of = m->private;
+ struct rdt_domain_hdr *hdr;
u32 resid, evtid, domid;
struct rdtgroup *rdtgrp;
struct rdt_resource *r;
@@ -578,11 +579,12 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
evtid = md.u.evtid;

r = &rdt_resources_all[resid].r_resctrl;
- d = rdt_find_domain(r, domid, NULL);
- if (!d) {
+ hdr = rdt_find_domain(&r->mon_domains, domid, NULL);
+ if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
ret = -ENOENT;
goto out;
}
+ d = container_of(hdr, struct rdt_domain, hdr);

mon_event_read(&rr, r, d, rdtgrp, evtid, false);

diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 54f7688ee447..b7d831712dc4 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -479,7 +479,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);

entry->busy = 0;
- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
/*
* For the first limbo RMID in the domain,
* setup up the limbo worker.
@@ -676,7 +676,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
idx = resctrl_arch_rmid_idx_encode(closid, rmid);
pmbm_data = &dom_mbm->mbm_local[idx];

- dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
+ dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
pr_warn_once("Failure to get domain for MBA update\n");
return;
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 52e6935e4c55..2a48eedafe3d 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -292,7 +292,7 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
*/
static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
{
- enum resctrl_scope scope = plr->s->res->scope;
+ enum resctrl_scope scope = plr->s->res->ctrl_scope;
struct cpu_cacheinfo *ci;
int ret;
int i;
@@ -859,7 +859,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
* associated with them.
*/
for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(d_i, &r->domains, hdr.list) {
+ list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) {
if (d_i->plr)
cpumask_or(cpu_with_psl, cpu_with_psl,
&d_i->hdr.cpu_mask);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index ee8a028e6f97..01ed7bab1002 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -98,7 +98,7 @@ void rdt_staged_configs_clear(void)
lockdep_assert_held(&rdtgroup_mutex);

for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(dom, &r->domains, hdr.list)
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
memset(dom->staged_config, 0, sizeof(dom->staged_config));
}
}
@@ -1021,7 +1021,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
- list_for_each_entry(dom, &r->domains, hdr.list) {
+ list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
if (sep)
seq_putc(seq, ';');
sw_shareable = 0;
@@ -1343,7 +1343,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
continue;
has_cache = true;
- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
ctrl = resctrl_arch_get_config(r, d, closid,
s->conf_type);
if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
@@ -1454,13 +1454,13 @@ unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
unsigned int size = 0;
int num_b, i;

- if (WARN_ON_ONCE(r->scope != RESCTRL_L2_CACHE && r->scope != RESCTRL_L3_CACHE))
+ if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
return size;

num_b = bitmap_weight(&cbm, r->cache.cbm_len);
ci = get_cpu_cacheinfo(cpumask_any(&d->hdr.cpu_mask));
for (i = 0; i < ci->num_leaves; i++) {
- if (ci->info_list[i].level == r->scope) {
+ if (ci->info_list[i].level == r->ctrl_scope) {
size = ci->info_list[i].size / r->cache.cbm_len * num_b;
break;
}
@@ -1518,7 +1518,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
type = schema->conf_type;
sep = false;
seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (sep)
seq_putc(s, ';');
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
@@ -1608,7 +1608,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);

- list_for_each_entry(dom, &r->domains, hdr.list) {
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
if (sep)
seq_puts(s, ";");

@@ -1732,7 +1732,7 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
return -EINVAL;
}

- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->mon_domains, hdr.list) {
if (d->hdr.id == dom_id) {
mbm_config_write_domain(r, d, evtid, val);
goto next;
@@ -2280,7 +2280,7 @@ static int set_cache_qos_cfg(int level, bool enable)
return -ENOMEM;

r_l = &rdt_resources_all[level].r_resctrl;
- list_for_each_entry(d, &r_l->domains, hdr.list) {
+ list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
if (r_l->cache.arch_has_per_cpu_cfg)
/* Pick all the CPUs in the domain instance */
for_each_cpu(cpu, &d->hdr.cpu_mask)
@@ -2365,7 +2365,7 @@ static int set_mba_sc(bool mba_sc)

r->membw.mba_sc = mba_sc;

- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
for (i = 0; i < num_closid; i++)
d->mbps_val[i] = MBA_MAX_MBPS;
}
@@ -2704,7 +2704,7 @@ static int rdt_get_tree(struct fs_context *fc)

if (is_mbm_enabled()) {
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- list_for_each_entry(dom, &r->domains, hdr.list)
+ list_for_each_entry(dom, &r->mon_domains, hdr.list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
RESCTRL_PICK_ANY_CPU);
}
@@ -2829,10 +2829,10 @@ static int reset_all_ctrls(struct rdt_resource *r)

/*
* Disable resource control for this resource by setting all
- * CBMs in all domains to the maximum mask value. Pick one CPU
+ * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
* from each domain to update the MSRs below.
*/
- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
hw_dom = resctrl_to_arch_dom(d);
cpu = cpumask_any(&d->hdr.cpu_mask);

@@ -3104,7 +3104,7 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
/* Walking r->domains, ensure it can't race with cpuhp */
lockdep_assert_cpus_held();

- list_for_each_entry(dom, &r->domains, hdr.list) {
+ list_for_each_entry(dom, &r->mon_domains, hdr.list) {
ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
if (ret)
return ret;
@@ -3286,7 +3286,7 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
struct rdt_domain *d;
int ret;

- list_for_each_entry(d, &s->res->domains, hdr.list) {
+ list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
ret = __init_one_rdt_domain(d, s, closid);
if (ret < 0)
return ret;
@@ -3301,7 +3301,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
struct resctrl_staged_config *cfg;
struct rdt_domain *d;

- list_for_each_entry(d, &r->domains, hdr.list) {
+ list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
if (is_mba_sc(r)) {
d->mbps_val[closid] = MBA_MAX_MBPS;
continue;
@@ -3932,15 +3932,19 @@ static void domain_destroy_mon_state(struct rdt_domain *d)
kfree(d->mbm_local);
}

-void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
+void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d)
{
mutex_lock(&rdtgroup_mutex);

if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
mba_sc_domain_destroy(r, d);

- if (!r->mon_capable)
- goto out_unlock;
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d)
+{
+ mutex_lock(&rdtgroup_mutex);

/*
* If resctrl is mounted, remove all the
@@ -3966,7 +3970,6 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)

domain_destroy_mon_state(d);

-out_unlock:
mutex_unlock(&rdtgroup_mutex);
}

@@ -4001,7 +4004,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
return 0;
}

-int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
+int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d)
{
int err = 0;

@@ -4010,11 +4013,18 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
/* RDT_RESOURCE_MBA is never mon_capable */
err = mba_sc_domain_allocate(r, d);
- goto out_unlock;
}

- if (!r->mon_capable)
- goto out_unlock;
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d)
+{
+ int err;
+
+ mutex_lock(&rdtgroup_mutex);

err = domain_setup_mon_state(r, d);
if (err)
@@ -4079,7 +4089,7 @@ void resctrl_offline_cpu(unsigned int cpu)
if (!l3->mon_capable)
goto out_unlock;

- d = get_domain_from_cpu(cpu, l3);
+ d = get_mon_domain_from_cpu(cpu, l3);
if (d) {
if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
cancel_delayed_work(&d->mbm_over);
--
2.43.0


2024-03-04 23:07:56

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/resctrl: Pass domain to target CPU

Hi Tony,

On 2/28/2024 11:36 AM, Tony Luck wrote:
> reset_all_ctrls() and resctrl_arch_update_domains() use on_each_cpu_mask()
> to call rdt_ctrl_update() on potentially one CPU from each domain.
>
> But this means rdt_ctrl_update() needs to figure out which domain to
> apply changes to. Doing so requires a search of all domains in a resource,
> which can only be done safely if cpus_lock is held. Both callers do hold
> this lock, but there isn't a way for a function called on another CPU
> via IPI to verify this.
>
> Commit c0d848fcb09d ("x86/resctrl: Remove lockdep annotation that triggers
> false positive") removed the incorrect assertions.
>
> Add the target domain to the msr_param structure and
> call rdt_ctrl_update() for each domain separately using
> smp_call_function_single(). This means that rdt_ctrl_update() doesn't
> need to search for the domain and get_domain_from_cpu() can safely assert
> that the cpus_lock is held since the remaining callers do not use IPI.
>
> Signed-off-by: Tony Luck <[email protected]>
> ---

..

> @@ -315,27 +300,27 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
> struct rdt_hw_domain *hw_dom;
> struct msr_param msr_param;
> enum resctrl_conf_type t;
> - cpumask_var_t cpu_mask;
> struct rdt_domain *d;
> + int cpu;
> u32 idx;
>
> /* Walking r->domains, ensure it can't race with cpuhp */
> lockdep_assert_cpus_held();
>
> - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
> - return -ENOMEM;
> -
> - msr_param.res = NULL;
> list_for_each_entry(d, &r->domains, list) {
> hw_dom = resctrl_to_arch_dom(d);
> + msr_param.res = NULL;
> + msr_param.dom = d;
> for (t = 0; t < CDP_NUM_TYPES; t++) {
> cfg = &hw_dom->d_resctrl.staged_config[t];
> if (!cfg->have_new_ctrl)
> continue;
>
> idx = get_config_index(closid, t);
> - if (!apply_config(hw_dom, cfg, idx, cpu_mask))
> + if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
> continue;
> + hw_dom->ctrl_val[idx] = cfg->new_ctrl;
> + cpu = cpumask_any(&d->cpu_mask);
>

cpu only needs to be assigned once. How about initializing cpu to
nr_cpu_ids at the same time msr_param.res and msr_param.dom is
initialized and only assign it when msr_param.res is assigned?
I think that will be more robust.

If you agree and do this then please feel free to add:
Reviewed-by: Reinette Chatre <[email protected]>

Reinette


2024-03-04 23:08:25

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH v4 2/2] x86/resctrl: Simplify call convention for MSR update functions

Hi Tony,

On 2/28/2024 11:36 AM, Tony Luck wrote:
> The per-resource MSR update functions cat_wrmsr(), mba_wrmsr_intel(),
> and mba_wrmsr_amd() all take three arguments:
>
> (struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
>
> struct msr_param contains pointers to both struct rdt_resource and struct
> rdt_domain, thus only struct msr_param is necessary.
>
> Pass struct msr_param as a single parameter. Clean up formatting and
> fix some fir tree declaration ordering.
>
> No functional change.
>
> Suggested-by: Reinette Chatre <[email protected]>
> Signed-off-by: Tony Luck <[email protected]>
> ---

Thank you.

Reviewed-by: Reinette Chatre <[email protected]>

Reinette

2024-03-04 23:15:43

by Reinette Chatre

[permalink] [raw]
Subject: Re: Cover-cover letter for two resctrl patch sets

Hi Tony,

On 2/28/2024 11:36 AM, Tony Luck wrote:
> Hook these two series together in threaded mail clients
> since the SNC series is based on top of the two patch
> cleanup.

Could you please instead send the two series separately
and note the dependency in the cover letter?

Thank you very much

Reinette

2024-03-05 00:18:50

by Luck, Tony

[permalink] [raw]
Subject: RE: [PATCH v4 1/2] x86/resctrl: Pass domain to target CPU

> > + cpu = cpumask_any(&d->cpu_mask);
> >
>
> cpu only needs to be assigned once. How about initializing cpu to
> nr_cpu_ids at the same time msr_param.res and msr_param.dom is
> initialized and only assign it when msr_param.res is assigned?
> I think that will be more robust.
>
> If you agree and do this then please feel free to add:
> Reviewed-by: Reinette Chatre <[email protected]>

Reinette,

I agree. I'll move the assignment to "cpu" outside the CDP_NUM_TYPES
loop.

Thanks for the review.

-Tony

2024-03-05 01:31:56

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/resctrl: Pass domain to target CPU

Hi Tony,

On 3/4/2024 4:17 PM, Luck, Tony wrote:
>>> + cpu = cpumask_any(&d->cpu_mask);
>>>
>>
>> cpu only needs to be assigned once. How about initializing cpu to
>> nr_cpu_ids at the same time msr_param.res and msr_param.dom is
>> initialized and only assign it when msr_param.res is assigned?
>> I think that will be more robust.
>>
>> If you agree and do this then please feel free to add:
>> Reviewed-by: Reinette Chatre <[email protected]>
>
> Reinette,
>
> I agree. I'll move the assignment to "cpu" outside the CDP_NUM_TYPES
> loop.
>

If I understand correctly that would always look for a valid "cpu"
even when none is needed. Not quite what I proposed but should
work. Just some wasted cycles in a non critical path.

Reinette

2024-03-05 07:28:33

by Maciej Wieczor-Retman

[permalink] [raw]
Subject: Re: Cover-cover letter for two resctrl patch sets

On 2024-02-28 at 11:36:51 -0800, Tony Luck wrote:
>Hook these two series together in threaded mail clients
>since the SNC series is based on top of the two patch
>cleanup.
>
>-Tony

Hi, I wanted to ask a mailing technical question.

My mailing setup with b4 is capable of downloading series by their message id.
With these two series tied together it for some reason doesn't want to download
the smaller series (even when I pass the smaller series ID it ignores it and
fetches the SNC one).

Do you have some trick or method to download a series with this type of
formatting easier than manually saving emails?

--
Kind regards
Maciej Wiecz?r-Retman

2024-03-05 16:52:25

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/resctrl: Pass domain to target CPU

Hi Tony,

On 3/5/2024 8:37 AM, Luck, Tony wrote:
>> If I understand correctly that would always look for a valid "cpu"
>> even when none is needed. Not quite what I proposed but should
>> work. Just some wasted cycles in a non critical path.
>
> Reinette,
>
> Sorry for misunderstanding. You are right. Assignment to msr_param.dom
> can also be deferred to the same point. Like this:
>
> list_for_each_entry(d, &r->domains, list) {
> hw_dom = resctrl_to_arch_dom(d);
> msr_param.res = NULL;
> for (t = 0; t < CDP_NUM_TYPES; t++) {
> cfg = &hw_dom->d_resctrl.staged_config[t];
> if (!cfg->have_new_ctrl)
> continue;
>
> idx = get_config_index(closid, t);
> if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
> continue;
> hw_dom->ctrl_val[idx] = cfg->new_ctrl;
>
> if (!msr_param.res) {
> msr_param.low = idx;
> msr_param.high = msr_param.low + 1;
> msr_param.res = r;
> msr_param.dom = d;
> cpu = cpumask_any(&d->cpu_mask);
> } else {
> msr_param.low = min(msr_param.low, idx);
> msr_param.high = max(msr_param.high, idx + 1);
> }
> }
> if (msr_param.res)
> smp_call_function_single(cpu, rdt_ctrl_update, &msr_param, 1);
> }

This looks good to me.

Thank you very much.

Reinette


2024-03-05 16:55:19

by Luck, Tony

[permalink] [raw]
Subject: RE: [PATCH v4 1/2] x86/resctrl: Pass domain to target CPU

> If I understand correctly that would always look for a valid "cpu"
> even when none is needed. Not quite what I proposed but should
> work. Just some wasted cycles in a non critical path.

Reinette,

Sorry for misunderstanding. You are right. Assignment to msr_param.dom
can also be deferred to the same point. Like this:

list_for_each_entry(d, &r->domains, list) {
hw_dom = resctrl_to_arch_dom(d);
msr_param.res = NULL;
for (t = 0; t < CDP_NUM_TYPES; t++) {
cfg = &hw_dom->d_resctrl.staged_config[t];
if (!cfg->have_new_ctrl)
continue;

idx = get_config_index(closid, t);
if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
continue;
hw_dom->ctrl_val[idx] = cfg->new_ctrl;

if (!msr_param.res) {
msr_param.low = idx;
msr_param.high = msr_param.low + 1;
msr_param.res = r;
msr_param.dom = d;
cpu = cpumask_any(&d->cpu_mask);
} else {
msr_param.low = min(msr_param.low, idx);
msr_param.high = max(msr_param.high, idx + 1);
}
}
if (msr_param.res)
smp_call_function_single(cpu, rdt_ctrl_update, &msr_param, 1);
}

-Tony

2024-03-05 17:52:51

by Luck, Tony

[permalink] [raw]
Subject: RE: Cover-cover letter for two resctrl patch sets

> Hi, I wanted to ask a mailing technical question.
>
> My mailing setup with b4 is capable of downloading series by their message id.
> With these two series tied together it for some reason doesn't want to download
> the smaller series (even when I pass the smaller series ID it ignores it and
> fetches the SNC one).
>
> Do you have some trick or method to download a series with this type of
> formatting easier than manually saving emails?

The cover-cover letter seemed like a neat idea, but since it is breaking other
tools and workflows I'm not going to do that again.

I tried asking b4 to specifically pick up version 4:

$ b4 am -v4 [email protected]

which worked for me (though it did apply Reinette's conditional
"Reviewed-by" tag without the suggested change).

-Tony

2024-03-05 22:02:56

by Konstantin Ryabitsev

[permalink] [raw]
Subject: Re: Cover-cover letter for two resctrl patch sets

On Mon, Mar 04, 2024 at 03:07:38PM -0800, Reinette Chatre wrote:
> Could you please instead send the two series separately
> and note the dependency in the cover letter?

FWIW, we're including ability to auto-retrieve and auto-apply dependencies
with b4. It already works with preprequisite-patch-id trailers (as documented
by git-format-patch), but we'll also add prerequisite-change-id in the near
future as well.

-K

2024-03-05 22:28:03

by Luck, Tony

[permalink] [raw]
Subject: RE: Cover-cover letter for two resctrl patch sets

>> Could you please instead send the two series separately
>> and note the dependency in the cover letter?
>
> FWIW, we're including ability to auto-retrieve and auto-apply dependencies
> with b4. It already works with preprequisite-patch-id trailers (as documented
> by git-format-patch), but we'll also add prerequisite-change-id in the near
> future as well.

Other interesting interactions with b4 in these patches:

1) Because of significant rebase changes, I dropped all the Reviewed/Tested tags
from v15. But b4 seems to have noticed that part 5/8 wasn't changed at all since
v14, and picked up some (but not all) of the tags for that patch:

✓ [PATCH v15 5/8] x86/resctrl: Add node-scope to the options for feature scope
+ Reviewed-by: Peter Newman <[email protected]> (✗ DKIM/google.com)
+ Reviewed-by: Reinette Chatre <[email protected]> (✓ DKIM/intel.com)
+ Reviewed-by: Babu Moger <[email protected]> (✓ DKIM/amd.com)

2) My cover-cover letter created some linkage between the two patch series.
Trying to retrieve the 2-part v4 series with "git am" would pick up the 8-part
v15. Using an explicit "-v4" does work to pick up just the 2-part patch.
$ b4 am -v4 [email protected]

3) That b4 am picks up Reinette's "if you make these changes" Reviewed by from:
https://lore.kernel.org/all/[email protected]/
[though I can see why it did and realize that we'll need "b4GPT" to parse
surrounding text to figure out that it should skip that].

Should folks that offer a conditional tag mark that in the text somewhere to
let b4 know not to auto-pick?

-Tony

2024-03-06 03:11:56

by Konstantin Ryabitsev

[permalink] [raw]
Subject: Re: Cover-cover letter for two resctrl patch sets

On Tue, Mar 05, 2024 at 10:27:47PM +0000, Luck, Tony wrote:
> 1) Because of significant rebase changes, I dropped all the Reviewed/Tested tags
> from v15. But b4 seems to have noticed that part 5/8 wasn't changed at all since
> v14, and picked up some (but not all) of the tags for that patch:
>
> ✓ [PATCH v15 5/8] x86/resctrl: Add node-scope to the options for feature scope
> + Reviewed-by: Peter Newman <[email protected]> (✗ DKIM/google.com)
> + Reviewed-by: Reinette Chatre <[email protected]> (✓ DKIM/intel.com)
> + Reviewed-by: Babu Moger <[email protected]> (✓ DKIM/amd.com)

This is a feature -- any trailers sent to a patch that hasn't changed between
series will be applied to the new series. We don't apply this logic to any
trailers sent to the cover letter, which is probably why you aren't seeing all
the trailers show up.

> 2) My cover-cover letter created some linkage between the two patch series.

That's not an expected situation, which is why b4 doesn't do the right thing,
sorry.

> Trying to retrieve the 2-part v4 series with "git am" would pick up the 8-part
> v15. Using an explicit "-v4" does work to pick up just the 2-part patch.

Another option is to use --no-parent to break the thread at the cover letter
of the series you want:

b4 am --no-parent [email protected]

This option exists for these exact situations when unrelated series show up in
the same thread.
> 3) That b4 am picks up Reinette's "if you make these changes" Reviewed by from:
> https://lore.kernel.org/all/[email protected]/
> [though I can see why it did and realize that we'll need "b4GPT" to parse
> surrounding text to figure out that it should skip that].

Correct, we can't possibly do the right thing there, which is why the usual
advise is not to give conditional trailers, or prepend them with a quote, a
pipe, a dot, etc, e.g.:

If you fix these, then you can apply:
| Reviewed-by: ...

> Should folks that offer a conditional tag mark that in the text somewhere to
> let b4 know not to auto-pick?

Using the above strategy effectively achieves the same result.

Best regards,
-K

2024-03-06 07:29:12

by Maciej Wieczor-Retman

[permalink] [raw]
Subject: Re: Cover-cover letter for two resctrl patch sets

On 2024-02-28 at 11:36:51 -0800, Tony Luck wrote:
>Hook these two series together in threaded mail clients
>since the SNC series is based on top of the two patch
>cleanup.
>
>-Tony

Tested with resctrl selftests and printks on an Ice Lake server with a SNC-2
configuration.

Tested-by: Maciej Wieczor-Retman <[email protected]>

--
Kind regards
Maciej Wiecz?r-Retman

2024-03-06 07:32:49

by Maciej Wieczor-Retman

[permalink] [raw]
Subject: Re: Cover-cover letter for two resctrl patch sets

On 2024-03-05 at 18:51:55 +0100, Luck, Tony wrote:
>> Hi, I wanted to ask a mailing technical question.
>>
>> My mailing setup with b4 is capable of downloading series by their message id.
>> With these two series tied together it for some reason doesn't want to download
>> the smaller series (even when I pass the smaller series ID it ignores it and
>> fetches the SNC one).
>>
>> Do you have some trick or method to download a series with this type of
>> formatting easier than manually saving emails?
>
>The cover-cover letter seemed like a neat idea, but since it is breaking other
>tools and workflows I'm not going to do that again.

I just made my setup with the assumption that the cover letter is the root of
the thread but that's easy to correct for cases such as this. And this
cover-cover letter is easy to browse on the lore page.

>
>I tried asking b4 to specifically pick up version 4:
>
>$ b4 am -v4 [email protected]

Thanks, this worked! I don't know why it didn't before.

>
>which worked for me (though it did apply Reinette's conditional
>"Reviewed-by" tag without the suggested change).
>
>-Tony

--
Kind regards
Maciej Wiecz?r-Retman

2024-03-08 18:27:45

by James Morse

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/resctrl: Pass domain to target CPU

Hi Tony,

On 28/02/2024 19:36, Tony Luck wrote:
> reset_all_ctrls() and resctrl_arch_update_domains() use on_each_cpu_mask()
> to call rdt_ctrl_update() on potentially one CPU from each domain.
>
> But this means rdt_ctrl_update() needs to figure out which domain to
> apply changes to. Doing so requires a search of all domains in a resource,
> which can only be done safely if cpus_lock is held. Both callers do hold
> this lock, but there isn't a way for a function called on another CPU
> via IPI to verify this.
>
> Commit c0d848fcb09d ("x86/resctrl: Remove lockdep annotation that triggers
> false positive") removed the incorrect assertions.
>
> Add the target domain to the msr_param structure and
> call rdt_ctrl_update() for each domain separately using
> smp_call_function_single(). This means that rdt_ctrl_update() doesn't
> need to search for the domain and get_domain_from_cpu() can safely assert
> that the cpus_lock is held since the remaining callers do not use IPI.

> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 83e40341583e..acf52aa185e0 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -362,6 +362,8 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
> {
> struct rdt_domain *d;
>
> + lockdep_assert_cpus_held();


(Huzzah!)


> list_for_each_entry(d, &r->domains, list) {
> /* Find the domain that contains this CPU */
> if (cpumask_test_cpu(cpu, &d->cpu_mask))

> diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> index 7997b47743a2..a3a0fd80daa8 100644
> --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> @@ -315,27 +300,27 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)

[..]

> - msr_param.res = NULL;
> list_for_each_entry(d, &r->domains, list) {
> hw_dom = resctrl_to_arch_dom(d);
> + msr_param.res = NULL;
> + msr_param.dom = d;
> for (t = 0; t < CDP_NUM_TYPES; t++) {
> cfg = &hw_dom->d_resctrl.staged_config[t];
> if (!cfg->have_new_ctrl)
> continue;
>
> idx = get_config_index(closid, t);
> - if (!apply_config(hw_dom, cfg, idx, cpu_mask))
> + if (cfg->new_ctrl == hw_dom->ctrl_val[idx])
> continue;
> + hw_dom->ctrl_val[idx] = cfg->new_ctrl;
> + cpu = cpumask_any(&d->cpu_mask);

If this CPU is part of the domain, then cpumask_any() could chose a different CPU leading
to an IPI, where this CPU could have done the work itself.
smp_call_function_any() has the logic to try and run on this CPU if possible.

As d->cpu_mask is still valid when you call smp_call_function_single(), could we use
smp_call_function_any() instead?


> if (!msr_param.res) {
> msr_param.low = idx;
> @@ -346,17 +331,10 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
> msr_param.high = max(msr_param.high, idx + 1);
> }
> }
> + if (msr_param.res)
> + smp_call_function_single(cpu, rdt_ctrl_update, &msr_param, 1);
> }

> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 011e17efb1a6..da4f13db4161 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -2813,16 +2813,13 @@ static int reset_all_ctrls(struct rdt_resource *r)
> @@ -2834,17 +2831,14 @@ static int reset_all_ctrls(struct rdt_resource *r)
> */
> list_for_each_entry(d, &r->domains, list) {
> hw_dom = resctrl_to_arch_dom(d);
> - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
> + cpu = cpumask_any(&d->cpu_mask);

Same comment about picking an unlucky CPU here, smp_call_function_any() can improve our
chances.


> for (i = 0; i < hw_res->num_closid; i++)
> hw_dom->ctrl_val[i] = r->default_ctrl;
> + msr_param.dom = d;
> + smp_call_function_single(cpu, rdt_ctrl_update, &msr_param, 1);
> }

Still:
Reviewed-by: James Morse <[email protected]>


Thanks!

James


2024-03-08 18:51:01

by Luck, Tony

[permalink] [raw]
Subject: RE: [PATCH v4 1/2] x86/resctrl: Pass domain to target CPU

> > + hw_dom->ctrl_val[idx] = cfg->new_ctrl;
> > + cpu = cpumask_any(&d->cpu_mask);
>
> If this CPU is part of the domain, then cpumask_any() could chose a different CPU leading
> to an IPI, where this CPU could have done the work itself.
> smp_call_function_any() has the logic to try and run on this CPU if possible.
>
> As d->cpu_mask is still valid when you call smp_call_function_single(), could we use
> smp_call_function_any() instead?

I'd thought that cpumask_any() was smarter. But I see that it dumbly just picks the first
CPU in the mask. Great suggestion, will add to next version.

>> - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
>> + cpu = cpumask_any(&d->cpu_mask);
>
> Same comment about picking an unlucky CPU here, smp_call_function_any() can improve our
> chances.

Will fix here too.

> Still:
> Reviewed-by: James Morse <[email protected]>

Thanks!

-Tony