2023-09-26 06:06:14

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH v5 3/8] x86/resctrl: Split the rdt_domain structure

Hi Tony,

Subject:

x86/resctrl: Split the rdt_domain and rdt_hw_domain structures


On 8/29/2023 4:44 PM, Tony Luck wrote:
> The same rdt_domain structure is used for both control an monitor

"control an monitor" -> "control and monitor"

> functions. But this results in wasted memory as some of the fields
> are only used by control functions, while most are only used for monitor
> functions.
>
> Create a new rdt_mondomain structure tailored explicitly for use in
> monitor parts of the core. Slim down the rdt_domain structure by
> removing the unused monitor fields.
>

Similar to the previous patch I think it will make the code
easier to understand if the naming is clear for both
monitoring and control structured. Why not rdt_mondomain
and rdt_ctrldomain instead?


> Similar breakout of struct rdt_hw_mondomain from struct rdt_hw_domain.

rdt_hw_mondomain and rdt_hw_ctrldomain?

>
> Signed-off-by: Tony Luck <[email protected]>
> ---
> include/linux/resctrl.h | 46 +++++++++++++++--------
> arch/x86/kernel/cpu/resctrl/internal.h | 38 +++++++++++++------
> arch/x86/kernel/cpu/resctrl/core.c | 18 ++++-----
> arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 4 +-
> arch/x86/kernel/cpu/resctrl/monitor.c | 40 ++++++++++----------
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 24 ++++++------
> 6 files changed, 101 insertions(+), 69 deletions(-)
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 33856943a787..08382548571e 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -53,7 +53,29 @@ struct resctrl_staged_config {
> };
>
> /**
> - * struct rdt_domain - group of CPUs sharing a resctrl resource
> + * struct rdt_domain - group of CPUs sharing a resctrl control resource
> + * @list: all instances of this resource
> + * @id: unique id for this instance
> + * @cpu_mask: which CPUs share this resource
> + * @plr: pseudo-locked region (if any) associated with domain
> + * @staged_config: parsed configuration to be applied
> + * @mbps_val: When mba_sc is enabled, this holds the array of user
> + * specified control values for mba_sc in MBps, indexed
> + * by closid
> + */
> +struct rdt_domain {
> + // First three fields must match struct rdt_mondomain below.

Please avoid comments within declarations. Even so, could you please
elaborate what the above means? Why do the first three fields have to
match? I understand there is common code, for example, __rdt_find_domain()
that operated on the same members of the two structs but does that
require the members be in the same position in the struct?
I understand that a comment may be required if position in the struct
is important but I cannot see that it is.

> + struct list_head list;
> + int id;
> + struct cpumask cpu_mask;
> +
> + struct pseudo_lock_region *plr;
> + struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
> + u32 *mbps_val;
> +};
> +
> +/**
> + * struct rdt_mondomain - group of CPUs sharing a resctrl monitor resource
> * @list: all instances of this resource
> * @id: unique id for this instance
> * @cpu_mask: which CPUs share this resource
> @@ -64,16 +86,13 @@ struct resctrl_staged_config {
> * @cqm_limbo: worker to periodically read CQM h/w counters
> * @mbm_work_cpu: worker CPU for MBM h/w counters
> * @cqm_work_cpu: worker CPU for CQM h/w counters
> - * @plr: pseudo-locked region (if any) associated with domain
> - * @staged_config: parsed configuration to be applied
> - * @mbps_val: When mba_sc is enabled, this holds the array of user
> - * specified control values for mba_sc in MBps, indexed
> - * by closid
> */
> -struct rdt_domain {
> +struct rdt_mondomain {
> + // First three fields must match struct rdt_domain above.

Same comment.

> struct list_head list;
> int id;
> struct cpumask cpu_mask;
> +
> unsigned long *rmid_busy_llc;
> struct mbm_state *mbm_total;
> struct mbm_state *mbm_local;
> @@ -81,9 +100,6 @@ struct rdt_domain {
> struct delayed_work cqm_limbo;
> int mbm_work_cpu;
> int cqm_work_cpu;
> - struct pseudo_lock_region *plr;
> - struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
> - u32 *mbps_val;
> };
>
> /**

...

> diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> index 468c1815edfd..5167ac9cbe98 100644
> --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> @@ -521,7 +521,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
> }
>
> void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
> - struct rdt_domain *d, struct rdtgroup *rdtgrp,
> + struct rdt_mondomain *d, struct rdtgroup *rdtgrp,
> int evtid, int first)
> {
> /*
> @@ -544,7 +544,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
> struct rdtgroup *rdtgrp;
> struct rdt_resource *r;
> union mon_data_bits md;
> - struct rdt_domain *d;
> + struct rdt_mondomain *d;

Reverse fir order.

> struct rmid_read rr;
> int ret = 0;
>
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index 66beca785535..42262d59ef9b 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -170,7 +170,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
> return 0;
> }
>
> -static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
> +static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mondomain *hw_dom,
> u32 rmid,
> enum resctrl_event_id eventid)
> {
> @@ -189,10 +189,10 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
> return NULL;
> }
>
> -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
> +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mondomain *d,
> u32 rmid, enum resctrl_event_id eventid)
> {
> - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> + struct rdt_hw_mondomain *hw_dom = resctrl_to_arch_mondom(d);
> struct arch_mbm_state *am;
>
> am = get_arch_mbm_state(hw_dom, rmid, eventid);
> @@ -208,9 +208,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
> * Assumes that hardware counters are also reset and thus that there is
> * no need to record initial non-zero counts.
> */
> -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d)
> +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mondomain *d)
> {
> - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> + struct rdt_hw_mondomain *hw_dom = resctrl_to_arch_mondom(d);
>
> if (is_mbm_total_enabled())
> memset(hw_dom->arch_mbm_total, 0,
> @@ -229,11 +229,11 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
> return chunks >> shift;
> }
>
> -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mondomain *d,
> u32 rmid, enum resctrl_event_id eventid, u64 *val)
> {
> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> + struct rdt_hw_mondomain *hw_mondom = resctrl_to_arch_mondom(d);

Reverse fir.

> struct arch_mbm_state *am;
> u64 msr_val, chunks;
> int ret;
> @@ -245,7 +245,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> if (ret)
> return ret;
>
> - am = get_arch_mbm_state(hw_dom, rmid, eventid);
> + am = get_arch_mbm_state(hw_mondom, rmid, eventid);
> if (am) {
> am->chunks += mbm_overflow_count(am->prev_msr, msr_val,
> hw_res->mbm_width);
> @@ -266,7 +266,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> * decrement the count. If the busy count gets to zero on an RMID, we
> * free the RMID
> */
> -void __check_limbo(struct rdt_domain *d, bool force_free)
> +void __check_limbo(struct rdt_mondomain *d, bool force_free)
> {
> struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> struct rmid_entry *entry;
> @@ -305,7 +305,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
> }
> }
>
> -bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
> +bool has_busy_rmid(struct rdt_resource *r, struct rdt_mondomain *d)
> {
> return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
> }
> @@ -334,7 +334,7 @@ int alloc_rmid(void)
> static void add_rmid_to_limbo(struct rmid_entry *entry)
> {
> struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> - struct rdt_domain *d;
> + struct rdt_mondomain *d;
> int cpu, err;
> u64 val = 0;
>
> @@ -383,7 +383,7 @@ void free_rmid(u32 rmid)
> list_add_tail(&entry->list, &rmid_free_lru);
> }
>
> -static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 rmid,
> +static struct mbm_state *get_mbm_state(struct rdt_mondomain *d, u32 rmid,
> enum resctrl_event_id evtid)
> {
> switch (evtid) {
> @@ -516,7 +516,7 @@ void mon_event_count(void *info)
> * throttle MSRs already have low percentage values. To avoid
> * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
> */
> -static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
> +static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mondomain *dom_mbm)
> {
> u32 closid, rmid, cur_msr_val, new_msr_val;
> struct mbm_state *pmbm_data, *cmbm_data;
> @@ -600,7 +600,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
> }
> }
>
> -static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
> +static void mbm_update(struct rdt_resource *r, struct rdt_mondomain *d, int rmid)
> {
> struct rmid_read rr;
>
> @@ -641,12 +641,12 @@ void cqm_handle_limbo(struct work_struct *work)
> unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
> int cpu = smp_processor_id();
> struct rdt_resource *r;
> - struct rdt_domain *d;
> + struct rdt_mondomain *d;

Reverse fir (Please check all code).

Reinette


2023-09-26 18:59:36

by Luck, Tony

[permalink] [raw]
Subject: Re: [PATCH v5 3/8] x86/resctrl: Split the rdt_domain structure

On Mon, Sep 25, 2023 at 04:25:15PM -0700, Reinette Chatre wrote:
> > +struct rdt_domain {
> > + // First three fields must match struct rdt_mondomain below.
>
> Please avoid comments within declarations. Even so, could you please
> elaborate what the above means? Why do the first three fields have to
> match? I understand there is common code, for example, __rdt_find_domain()
> that operated on the same members of the two structs but does that
> require the members be in the same position in the struct?
> I understand that a comment may be required if position in the struct
> is important but I cannot see that it is.

[Just replying to this one point in your message to get guidance. I'll
address all the rest in other replies]

I'm wrong about the first *three* fields ... but the first *two* fields
(the "list" and the "id") do need to be at the same offsets in different
structures if a common routine is going to be used to access those
fields.

If the "id" were at offset 0x10 in the control version of the domain
structure, and at offset 0x20 in the monitor version of the domain
structure, there would be no hope for a common routine to access the
"id" field when searching a list that could be either control or
monitor domains.

I'm looking at making this far more explicit with a new patch between
0001 and 0002 that pulls the two fields into a common substructure that
will be included in each of the control and monitor versions of the
structure.

Patch included below.

But this seems like it is a lot of churn to avoid having separate
functions to search control and monitor lists. Each a clone of
the existing ~24 line rdt_find_domain() with just the type changed
for the return value and the list travsersal.

What do you think?

-Tony

commit 08992b4be1f53a3144f8aadd821f815a40a05e75
Author: Tony Luck <[email protected]>
Date: Tue Sep 26 11:07:12 2023 -0700

x86/resctrl: Prepare to split rdt_domain structure

The rdt_domain structure is used for both control and monitor features.
It is about to be split into separate structures for these two usages
because the scope for control and monitoring features for a resource
will be different for future resources.

To allow for common code that scans a list of domains looking for a
specific domain id, move the "list" and "id" fields into their own
structure within the rdt_domain structure.

Signed-off-by: Tony Luck <[email protected]>

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 618735e396cb..a583fa88ea5a 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -53,9 +53,18 @@ struct resctrl_staged_config {
};

/**
- * struct rdt_domain - group of CPUs sharing a resctrl resource
+ * struct rdt_domain_hdr - common header for different domain types
* @list: all instances of this resource
* @id: unique id for this instance
+ */
+struct rdt_domain_hdr {
+ struct list_head list;
+ int id;
+};
+
+/**
+ * struct rdt_domain - group of CPUs sharing a resctrl resource
+ * @hdr: common header for different domain types
* @cpu_mask: which CPUs share this resource
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
* @mbm_total: saved state for MBM total bandwidth
@@ -71,8 +80,7 @@ struct resctrl_staged_config {
* by closid
*/
struct rdt_domain {
- struct list_head list;
- int id;
+ struct rdt_domain_hdr hdr;
struct cpumask cpu_mask;
unsigned long *rmid_busy_llc;
struct mbm_state *mbm_total;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 3b1837e1fb6b..05369add4578 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -352,7 +352,7 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
{
struct rdt_domain *d;

- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
/* Find the domain that contains this CPU */
if (cpumask_test_cpu(cpu, &d->cpu_mask))
return d;
@@ -401,12 +401,12 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
return ERR_PTR(-ENODEV);

list_for_each(l, &r->domains) {
- d = list_entry(l, struct rdt_domain, list);
+ d = list_entry(l, struct rdt_domain, hdr.list);
/* When id is found, return its domain. */
- if (id == d->id)
+ if (id == d->hdr.id)
return d;
/* Stop searching when finding id's position in sorted list. */
- if (id < d->id)
+ if (id < d->hdr.id)
break;
}

@@ -544,7 +544,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;

d = &hw_dom->d_resctrl;
- d->id = id;
+ d->hdr.id = id;
cpumask_set_cpu(cpu, &d->cpu_mask);

rdt_domain_reconfigure_cdp(r);
@@ -559,11 +559,11 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
return;
}

- list_add_tail(&d->list, add_pos);
+ list_add_tail(&d->hdr.list, add_pos);

err = resctrl_online_domain(r, d);
if (err) {
- list_del(&d->list);
+ list_del(&d->hdr.list);
domain_free(hw_dom);
}
}
@@ -587,7 +587,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cpumask_clear_cpu(cpu, &d->cpu_mask);
if (cpumask_empty(&d->cpu_mask)) {
resctrl_offline_domain(r, d);
- list_del(&d->list);
+ list_del(&d->hdr.list);

/*
* rdt_domain "d" is going to be freed below, so clear
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index b44c487727d4..8bce591a1018 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -67,7 +67,7 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,

cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
return -EINVAL;
}

@@ -144,7 +144,7 @@ int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,

cfg = &d->staged_config[s->conf_type];
if (cfg->have_new_ctrl) {
- rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
+ rdt_last_cmd_printf("Duplicate domain %d\n", d->hdr.id);
return -EINVAL;
}

@@ -224,8 +224,8 @@ static int parse_line(char *line, struct resctrl_schema *s,
return -EINVAL;
}
dom = strim(dom);
- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
data.buf = dom;
data.rdtgrp = rdtgrp;
if (r->parse_ctrlval(&data, s, d))
@@ -316,7 +316,7 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
return -ENOMEM;

msr_param.res = NULL;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
hw_dom = resctrl_to_arch_dom(d);
for (t = 0; t < CDP_NUM_TYPES; t++) {
cfg = &hw_dom->d_resctrl.staged_config[t];
@@ -464,7 +464,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
u32 ctrl_val;

seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->domains, hdr.list) {
if (sep)
seq_puts(s, ";");

@@ -474,7 +474,7 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
ctrl_val = resctrl_arch_get_config(r, dom, closid,
schema->conf_type);

- seq_printf(s, r->format_str, dom->id, max_data_width,
+ seq_printf(s, r->format_str, dom->hdr.id, max_data_width,
ctrl_val);
sep = true;
}
@@ -503,7 +503,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
} else {
seq_printf(s, "%s:%d=%x\n",
rdtgrp->plr->s->res->name,
- rdtgrp->plr->d->id,
+ rdtgrp->plr->d->hdr.id,
rdtgrp->plr->cbm);
}
} else {
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index ded1fc7cb7cb..27cda5988d7f 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -340,7 +340,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)

entry->busy = 0;
cpu = get_cpu();
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
err = resctrl_arch_rmid_read(r, d, entry->rmid,
QOS_L3_OCCUP_EVENT_ID,
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 8c5f932bc00b..18b6183a1b48 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -856,7 +856,7 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
* associated with them.
*/
for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(d_i, &r->domains, list) {
+ list_for_each_entry(d_i, &r->domains, hdr.list) {
if (d_i->plr)
cpumask_or(cpu_with_psl, cpu_with_psl,
&d_i->cpu_mask);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 1cf2b36f5bf8..42adf17ea6fa 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -86,7 +86,7 @@ void rdt_staged_configs_clear(void)
lockdep_assert_held(&rdtgroup_mutex);

for_each_alloc_capable_rdt_resource(r) {
- list_for_each_entry(dom, &r->domains, list)
+ list_for_each_entry(dom, &r->domains, hdr.list)
memset(dom->staged_config, 0, sizeof(dom->staged_config));
}
}
@@ -928,12 +928,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,

mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->domains, hdr.list) {
if (sep)
seq_putc(seq, ';');
sw_shareable = 0;
exclusive = 0;
- seq_printf(seq, "%d=", dom->id);
+ seq_printf(seq, "%d=", dom->hdr.id);
for (i = 0; i < closids_supported(); i++) {
if (!closid_allocated(i))
continue;
@@ -1233,7 +1233,7 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
continue;
has_cache = true;
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
ctrl = resctrl_arch_get_config(r, d, closid,
s->conf_type);
if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
@@ -1398,7 +1398,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
- seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+ seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
}
goto out;
}
@@ -1410,7 +1410,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
type = schema->conf_type;
sep = false;
seq_printf(s, "%*s:", max_name_width, schema->name);
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
if (sep)
seq_putc(s, ';');
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
@@ -1428,7 +1428,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
else
size = rdtgroup_cbm_to_size(r, d, ctrl);
}
- seq_printf(s, "%d=%u", d->id, size);
+ seq_printf(s, "%d=%u", d->hdr.id, size);
sep = true;
}
seq_putc(s, '\n');
@@ -1499,7 +1499,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid

mutex_lock(&rdtgroup_mutex);

- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->domains, hdr.list) {
if (sep)
seq_puts(s, ";");

@@ -1507,7 +1507,7 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
mon_info.evtid = evtid;
mondata_config_read(dom, &mon_info);

- seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
+ seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
sep = true;
}
seq_puts(s, "\n");
@@ -1622,8 +1622,8 @@ static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
return -EINVAL;
}

- list_for_each_entry(d, &r->domains, list) {
- if (d->id == dom_id) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
+ if (d->hdr.id == dom_id) {
ret = mbm_config_write_domain(r, d, evtid, val);
if (ret)
return -EINVAL;
@@ -2141,7 +2141,7 @@ static int set_cache_qos_cfg(int level, bool enable)
return -ENOMEM;

r_l = &rdt_resources_all[level].r_resctrl;
- list_for_each_entry(d, &r_l->domains, list) {
+ list_for_each_entry(d, &r_l->domains, hdr.list) {
if (r_l->cache.arch_has_per_cpu_cfg)
/* Pick all the CPUs in the domain instance */
for_each_cpu(cpu, &d->cpu_mask)
@@ -2226,7 +2226,7 @@ static int set_mba_sc(bool mba_sc)

r->membw.mba_sc = mba_sc;

- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
for (i = 0; i < num_closid; i++)
d->mbps_val[i] = MBA_MAX_MBPS;
}
@@ -2528,7 +2528,7 @@ static int rdt_get_tree(struct fs_context *fc)

if (is_mbm_enabled()) {
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
- list_for_each_entry(dom, &r->domains, list)
+ list_for_each_entry(dom, &r->domains, hdr.list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
}

@@ -2652,7 +2652,7 @@ static int reset_all_ctrls(struct rdt_resource *r)
* CBMs in all domains to the maximum mask value. Pick one CPU
* from each domain to update the MSRs below.
*/
- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
hw_dom = resctrl_to_arch_dom(d);
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);

@@ -2858,7 +2858,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
char name[32];
int ret;

- sprintf(name, "mon_%s_%02d", r->name, d->id);
+ sprintf(name, "mon_%s_%02d", r->name, d->hdr.id);
/* create the directory */
kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
if (IS_ERR(kn))
@@ -2874,7 +2874,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
}

priv.u.rid = r->rid;
- priv.u.domid = d->id;
+ priv.u.domid = d->hdr.id;
list_for_each_entry(mevt, &r->evt_list, list) {
priv.u.evtid = mevt->evtid;
ret = mon_addfile(kn, mevt->name, priv.priv);
@@ -2922,7 +2922,7 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
struct rdt_domain *dom;
int ret;

- list_for_each_entry(dom, &r->domains, list) {
+ list_for_each_entry(dom, &r->domains, hdr.list) {
ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
if (ret)
return ret;
@@ -3081,7 +3081,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
*/
tmp_cbm = cfg->new_ctrl;
if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
+ rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
return -ENOSPC;
}
cfg->have_new_ctrl = true;
@@ -3104,7 +3104,7 @@ static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
struct rdt_domain *d;
int ret;

- list_for_each_entry(d, &s->res->domains, list) {
+ list_for_each_entry(d, &s->res->domains, hdr.list) {
ret = __init_one_rdt_domain(d, s, closid);
if (ret < 0)
return ret;
@@ -3119,7 +3119,7 @@ static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
struct resctrl_staged_config *cfg;
struct rdt_domain *d;

- list_for_each_entry(d, &r->domains, list) {
+ list_for_each_entry(d, &r->domains, hdr.list) {
if (is_mba_sc(r)) {
d->mbps_val[closid] = MBA_MAX_MBPS;
continue;
@@ -3726,7 +3726,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
* per domain monitor data directories.
*/
if (static_branch_unlikely(&rdt_mon_enable_key))
- rmdir_mondata_subdir_allrdtgrp(r, d->id);
+ rmdir_mondata_subdir_allrdtgrp(r, d->hdr.id);

if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);

2023-09-27 02:02:27

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH v5 3/8] x86/resctrl: Split the rdt_domain structure

Hi Tony,

On 9/26/2023 11:46 AM, Tony Luck wrote:
> On Mon, Sep 25, 2023 at 04:25:15PM -0700, Reinette Chatre wrote:
>>> +struct rdt_domain {
>>> + // First three fields must match struct rdt_mondomain below.
>>
>> Please avoid comments within declarations. Even so, could you please
>> elaborate what the above means? Why do the first three fields have to
>> match? I understand there is common code, for example, __rdt_find_domain()
>> that operated on the same members of the two structs but does that
>> require the members be in the same position in the struct?
>> I understand that a comment may be required if position in the struct
>> is important but I cannot see that it is.
>
> [Just replying to this one point in your message to get guidance. I'll
> address all the rest in other replies]
>
> I'm wrong about the first *three* fields ... but the first *two* fields
> (the "list" and the "id") do need to be at the same offsets in different
> structures if a common routine is going to be used to access those
> fields.
>
> If the "id" were at offset 0x10 in the control version of the domain
> structure, and at offset 0x20 in the monitor version of the domain
> structure, there would be no hope for a common routine to access the
> "id" field when searching a list that could be either control or
> monitor domains.
>
> I'm looking at making this far more explicit with a new patch between
> 0001 and 0002 that pulls the two fields into a common substructure that
> will be included in each of the control and monitor versions of the
> structure.
>
> Patch included below.
>
> But this seems like it is a lot of churn to avoid having separate
> functions to search control and monitor lists. Each a clone of
> the existing ~24 line rdt_find_domain() with just the type changed
> for the return value and the list travsersal.

Yes. Sorry, I did not realize this implication during the earlier
discussions.

>
> What do you think?
>

It sounds to me as though you are advocating for open coding
rdt_find_ctrl_domain() and rdt_find_mon_domain()? That sounds good
to me.

Sorry for the noise.

Reinette

2023-09-27 10:24:31

by Luck, Tony

[permalink] [raw]
Subject: RE: [PATCH v5 3/8] x86/resctrl: Split the rdt_domain structure

> > But this seems like it is a lot of churn to avoid having separate
> > functions to search control and monitor lists. Each a clone of
> > the existing ~24 line rdt_find_domain() with just the type changed
> > for the return value and the list travsersal.
>
> Yes. Sorry, I did not realize this implication during the earlier
> discussions.
>
> >
> > What do you think?
> >
>
> It sounds to me as though you are advocating for open coding
> rdt_find_ctrl_domain() and rdt_find_mon_domain()? That sounds good
> to me.

Reinette,

While there is some churn, it maybe isn't all that bad. I also ran the open
coding case and having a pair of 24-line functions one after the other with
just two trivial lines changed between them is unlikely to get past the x86
maintainers without running this same conversation again.

-Tony

2023-09-28 19:53:28

by Luck, Tony

[permalink] [raw]
Subject: Re: [PATCH v5 3/8] x86/resctrl: Split the rdt_domain structure

On Mon, Sep 25, 2023 at 04:25:15PM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> Subject:
>
> x86/resctrl: Split the rdt_domain and rdt_hw_domain structures

Fixed subject as suggested.

>
>
> On 8/29/2023 4:44 PM, Tony Luck wrote:
> > The same rdt_domain structure is used for both control an monitor
>
> "control an monitor" -> "control and monitor"

Fixed.

>
> > functions. But this results in wasted memory as some of the fields
> > are only used by control functions, while most are only used for monitor
> > functions.
> >
> > Create a new rdt_mondomain structure tailored explicitly for use in
> > monitor parts of the core. Slim down the rdt_domain structure by
> > removing the unused monitor fields.
> >
>
> Similar to the previous patch I think it will make the code
> easier to understand if the naming is clear for both
> monitoring and control structured. Why not rdt_mondomain
> and rdt_ctrldomain instead?

Done.

>
>
> > Similar breakout of struct rdt_hw_mondomain from struct rdt_hw_domain.
>
> rdt_hw_mondomain and rdt_hw_ctrldomain?

Yes. rdt_hw_domain is split two ways and each gets an appropriate name.
I added an extra "_" to match shape of other names. So these are now
"rdt_hw_ctrl_domain" and "rdt_hw_mon_domain"

> >
> > Signed-off-by: Tony Luck <[email protected]>
> > ---
> > include/linux/resctrl.h | 46 +++++++++++++++--------
> > arch/x86/kernel/cpu/resctrl/internal.h | 38 +++++++++++++------
> > arch/x86/kernel/cpu/resctrl/core.c | 18 ++++-----
> > arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 4 +-
> > arch/x86/kernel/cpu/resctrl/monitor.c | 40 ++++++++++----------
> > arch/x86/kernel/cpu/resctrl/rdtgroup.c | 24 ++++++------
> > 6 files changed, 101 insertions(+), 69 deletions(-)
> >
> > diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> > index 33856943a787..08382548571e 100644
> > --- a/include/linux/resctrl.h
> > +++ b/include/linux/resctrl.h
> > @@ -53,7 +53,29 @@ struct resctrl_staged_config {
> > };
> >
> > /**
> > - * struct rdt_domain - group of CPUs sharing a resctrl resource
> > + * struct rdt_domain - group of CPUs sharing a resctrl control resource
> > + * @list: all instances of this resource
> > + * @id: unique id for this instance
> > + * @cpu_mask: which CPUs share this resource
> > + * @plr: pseudo-locked region (if any) associated with domain
> > + * @staged_config: parsed configuration to be applied
> > + * @mbps_val: When mba_sc is enabled, this holds the array of user
> > + * specified control values for mba_sc in MBps, indexed
> > + * by closid
> > + */
> > +struct rdt_domain {
> > + // First three fields must match struct rdt_mondomain below.
>
> Please avoid comments within declarations. Even so, could you please
> elaborate what the above means? Why do the first three fields have to
> match? I understand there is common code, for example, __rdt_find_domain()
> that operated on the same members of the two structs but does that
> require the members be in the same position in the struct?
> I understand that a comment may be required if position in the struct
> is important but I cannot see that it is.

Discussed in other e-mail thread. Comments go away. The TWO (not three)
fields that must be common are now in an embedded "rdt_domain_hdr"
structure.

>
> > + struct list_head list;
> > + int id;
> > + struct cpumask cpu_mask;
> > +
> > + struct pseudo_lock_region *plr;
> > + struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
> > + u32 *mbps_val;
> > +};
> > +
> > +/**
> > + * struct rdt_mondomain - group of CPUs sharing a resctrl monitor resource
> > * @list: all instances of this resource
> > * @id: unique id for this instance
> > * @cpu_mask: which CPUs share this resource
> > @@ -64,16 +86,13 @@ struct resctrl_staged_config {
> > * @cqm_limbo: worker to periodically read CQM h/w counters
> > * @mbm_work_cpu: worker CPU for MBM h/w counters
> > * @cqm_work_cpu: worker CPU for CQM h/w counters
> > - * @plr: pseudo-locked region (if any) associated with domain
> > - * @staged_config: parsed configuration to be applied
> > - * @mbps_val: When mba_sc is enabled, this holds the array of user
> > - * specified control values for mba_sc in MBps, indexed
> > - * by closid
> > */
> > -struct rdt_domain {
> > +struct rdt_mondomain {
> > + // First three fields must match struct rdt_domain above.
>
> Same comment.

Same solution.
>
> > struct list_head list;
> > int id;
> > struct cpumask cpu_mask;
> > +
> > unsigned long *rmid_busy_llc;
> > struct mbm_state *mbm_total;
> > struct mbm_state *mbm_local;
> > @@ -81,9 +100,6 @@ struct rdt_domain {
> > struct delayed_work cqm_limbo;
> > int mbm_work_cpu;
> > int cqm_work_cpu;
> > - struct pseudo_lock_region *plr;
> > - struct resctrl_staged_config staged_config[CDP_NUM_TYPES];
> > - u32 *mbps_val;
> > };
> >
> > /**
>
> ...
>
> > diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> > index 468c1815edfd..5167ac9cbe98 100644
> > --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> > +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> > @@ -521,7 +521,7 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
> > }
> >
> > void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
> > - struct rdt_domain *d, struct rdtgroup *rdtgrp,
> > + struct rdt_mondomain *d, struct rdtgroup *rdtgrp,
> > int evtid, int first)
> > {
> > /*
> > @@ -544,7 +544,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
> > struct rdtgroup *rdtgrp;
> > struct rdt_resource *r;
> > union mon_data_bits md;
> > - struct rdt_domain *d;
> > + struct rdt_mondomain *d;
>
> Reverse fir order.

Fixed.

>
> > struct rmid_read rr;
> > int ret = 0;
> >
> > diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> > index 66beca785535..42262d59ef9b 100644
> > --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> > +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> > @@ -170,7 +170,7 @@ static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
> > return 0;
> > }
> >
> > -static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
> > +static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mondomain *hw_dom,
> > u32 rmid,
> > enum resctrl_event_id eventid)
> > {
> > @@ -189,10 +189,10 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
> > return NULL;
> > }
> >
> > -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
> > +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mondomain *d,
> > u32 rmid, enum resctrl_event_id eventid)
> > {
> > - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> > + struct rdt_hw_mondomain *hw_dom = resctrl_to_arch_mondom(d);
> > struct arch_mbm_state *am;
> >
> > am = get_arch_mbm_state(hw_dom, rmid, eventid);
> > @@ -208,9 +208,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
> > * Assumes that hardware counters are also reset and thus that there is
> > * no need to record initial non-zero counts.
> > */
> > -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d)
> > +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mondomain *d)
> > {
> > - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> > + struct rdt_hw_mondomain *hw_dom = resctrl_to_arch_mondom(d);
> >
> > if (is_mbm_total_enabled())
> > memset(hw_dom->arch_mbm_total, 0,
> > @@ -229,11 +229,11 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
> > return chunks >> shift;
> > }
> >
> > -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> > +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mondomain *d,
> > u32 rmid, enum resctrl_event_id eventid, u64 *val)
> > {
> > struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> > - struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> > + struct rdt_hw_mondomain *hw_mondom = resctrl_to_arch_mondom(d);
>
> Reverse fir.

Fixed.

>
> > struct arch_mbm_state *am;
> > u64 msr_val, chunks;
> > int ret;
> > @@ -245,7 +245,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> > if (ret)
> > return ret;
> >
> > - am = get_arch_mbm_state(hw_dom, rmid, eventid);
> > + am = get_arch_mbm_state(hw_mondom, rmid, eventid);
> > if (am) {
> > am->chunks += mbm_overflow_count(am->prev_msr, msr_val,
> > hw_res->mbm_width);
> > @@ -266,7 +266,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> > * decrement the count. If the busy count gets to zero on an RMID, we
> > * free the RMID
> > */
> > -void __check_limbo(struct rdt_domain *d, bool force_free)
> > +void __check_limbo(struct rdt_mondomain *d, bool force_free)
> > {
> > struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> > struct rmid_entry *entry;
> > @@ -305,7 +305,7 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
> > }
> > }
> >
> > -bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
> > +bool has_busy_rmid(struct rdt_resource *r, struct rdt_mondomain *d)
> > {
> > return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
> > }
> > @@ -334,7 +334,7 @@ int alloc_rmid(void)
> > static void add_rmid_to_limbo(struct rmid_entry *entry)
> > {
> > struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> > - struct rdt_domain *d;
> > + struct rdt_mondomain *d;
> > int cpu, err;
> > u64 val = 0;
> >
> > @@ -383,7 +383,7 @@ void free_rmid(u32 rmid)
> > list_add_tail(&entry->list, &rmid_free_lru);
> > }
> >
> > -static struct mbm_state *get_mbm_state(struct rdt_domain *d, u32 rmid,
> > +static struct mbm_state *get_mbm_state(struct rdt_mondomain *d, u32 rmid,
> > enum resctrl_event_id evtid)
> > {
> > switch (evtid) {
> > @@ -516,7 +516,7 @@ void mon_event_count(void *info)
> > * throttle MSRs already have low percentage values. To avoid
> > * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
> > */
> > -static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
> > +static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mondomain *dom_mbm)
> > {
> > u32 closid, rmid, cur_msr_val, new_msr_val;
> > struct mbm_state *pmbm_data, *cmbm_data;
> > @@ -600,7 +600,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
> > }
> > }
> >
> > -static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
> > +static void mbm_update(struct rdt_resource *r, struct rdt_mondomain *d, int rmid)
> > {
> > struct rmid_read rr;
> >
> > @@ -641,12 +641,12 @@ void cqm_handle_limbo(struct work_struct *work)
> > unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
> > int cpu = smp_processor_id();
> > struct rdt_resource *r;
> > - struct rdt_domain *d;
> > + struct rdt_mondomain *d;
>
> Reverse fir (Please check all code).

Fixed. (I wrote a simple awk script to find these (as I'm
obviously bad at noticing them). Included at end of this
message).

>
> Reinette


#!/usr/bin/awk -f

BEGIN {
keyw["bool"] = 1
keyw["char"] = 1
keyw["enum"] = 1
keyw["int"] = 1
keyw["long"] = 1
keyw["short"] = 1
keyw["static"] = 1
keyw["struct"] = 1
keyw["u16"] = 1
keyw["u32"] = 1
keyw["u64"] = 1
keyw["u8"] = 1
keyw["unsigned"] = 1
keyw["void"] = 1
}

{
source[NR] = $0

if ($0 == "{") {
infunction = NR
skip
}

if ($0 == "}") {
infunction = 0
skip
}

if (infunction && ($1 in keyw)) {
inlocals++
if (inlocals > 1 && length(source[NR - 1]) < length)
badfir = 1
skip
}
if (inlocals && NF == 0) {
if (badfir) {
print "==== BAD FIR ===="
for (i = infunction - 2; i < NR; i++)
printf("%4d: %s\n", i, source[i])
}
inlocals = 0
badfir = 0
}
}