2022-10-21 14:26:34

by James Morse

[permalink] [raw]
Subject: [PATCH 01/18] x86/resctrl: Track the closid with the rmid

x86's RMID are independent of the CLOSID. An RMID can be allocated,
used and freed without considering the CLOSID.

MPAM's equivalent feature is PMG, which is not an independent number,
it extends the CLOSID/PARTID space. For MPAM, only PMG-bits worth of
'RMID' can be allocated for a single CLOSID.
i.e. if there is 1 bit of PMG space, then each CLOSID can have two
monitor groups.

To allow rescrl to disambiguate RMID values for different CLOSID,
everything in resctrl that keeps an RMID value needs to know the CLOSID
too. This will always be ignored on x86.

Signed-off-by: James Morse <[email protected]>

---
Is there a better term for 'the unique identifier for a monitor group'.
Using RMID for that here may be confusing...
---
arch/x86/kernel/cpu/resctrl/internal.h | 2 +-
arch/x86/kernel/cpu/resctrl/monitor.c | 54 +++++++++++++----------
arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 4 +-
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 ++---
include/linux/resctrl.h | 11 ++++-
5 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 5f7128686cfd..4b243ba88882 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -519,7 +519,7 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(void);
-void free_rmid(u32 rmid);
+void free_rmid(u32 closid, u32 rmid);
int rdt_get_mon_l3_config(struct rdt_resource *r);
void mon_event_count(void *info);
int rdtgroup_mondata_show(struct seq_file *m, void *arg);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index efe0c30d3a12..f1f66c9942a5 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -25,6 +25,7 @@
#include "internal.h"

struct rmid_entry {
+ u32 closid;
u32 rmid;
int busy;
struct list_head list;
@@ -136,7 +137,7 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
return val;
}

-static inline struct rmid_entry *__rmid_entry(u32 rmid)
+static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
{
struct rmid_entry *entry;

@@ -166,7 +167,8 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
}

void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid)
+ u32 closid, u32 rmid,
+ enum resctrl_event_id eventid)
{
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
struct arch_mbm_state *am;
@@ -185,7 +187,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
}

int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid, u64 *val)
+ u32 closid, u32 rmid, enum resctrl_event_id eventid,
+ u64 *val)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
@@ -251,9 +254,9 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
if (nrmid >= r->num_rmid)
break;

- entry = __rmid_entry(nrmid);
+ entry = __rmid_entry(~0, nrmid); // temporary

- if (resctrl_arch_rmid_read(r, d, entry->rmid,
+ if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
QOS_L3_OCCUP_EVENT_ID, &val)) {
rmid_dirty = true;
} else {
@@ -308,7 +311,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
cpu = get_cpu();
list_for_each_entry(d, &r->domains, list) {
if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
- err = resctrl_arch_rmid_read(r, d, entry->rmid,
+ err = resctrl_arch_rmid_read(r, d, entry->closid,
+ entry->rmid,
QOS_L3_OCCUP_EVENT_ID,
&val);
if (err || val <= resctrl_rmid_realloc_threshold)
@@ -332,7 +336,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
list_add_tail(&entry->list, &rmid_free_lru);
}

-void free_rmid(u32 rmid)
+void free_rmid(u32 closid, u32 rmid)
{
struct rmid_entry *entry;

@@ -341,7 +345,7 @@ void free_rmid(u32 rmid)

lockdep_assert_held(&rdtgroup_mutex);

- entry = __rmid_entry(rmid);
+ entry = __rmid_entry(closid, rmid);

if (is_llc_occupancy_enabled())
add_rmid_to_limbo(entry);
@@ -349,15 +353,16 @@ void free_rmid(u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}

-static int __mon_event_count(u32 rmid, struct rmid_read *rr)
+static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
struct mbm_state *m;
u64 tval = 0;

if (rr->first)
- resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid);
+ resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);

- rr->err = resctrl_arch_rmid_read(rr->r, rr->d, rmid, rr->evtid, &tval);
+ rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
+ &tval);
if (rr->err)
return rr->err;

@@ -400,7 +405,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
* __mon_event_count() is compared with the chunks value from the previous
* invocation. This must be called once per second to maintain values in MBps.
*/
-static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
+static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
struct mbm_state *m = &rr->d->mbm_local[rmid];
u64 cur_bw, bytes, cur_bytes;
@@ -430,7 +435,7 @@ void mon_event_count(void *info)

rdtgrp = rr->rgrp;

- ret = __mon_event_count(rdtgrp->mon.rmid, rr);
+ ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);

/*
* For Ctrl groups read data from child monitor groups and
@@ -441,7 +446,8 @@ void mon_event_count(void *info)

if (rdtgrp->type == RDTCTRL_GROUP) {
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- if (__mon_event_count(entry->mon.rmid, rr) == 0)
+ if (__mon_event_count(rdtgrp->closid, entry->mon.rmid,
+ rr) == 0)
ret = 0;
}
}
@@ -571,7 +577,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
}
}

-static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
+static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
+ u32 closid, u32 rmid)
{
struct rmid_read rr;

@@ -586,12 +593,12 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
if (is_mbm_total_enabled()) {
rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
rr.val = 0;
- __mon_event_count(rmid, &rr);
+ __mon_event_count(closid, rmid, &rr);
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
rr.val = 0;
- __mon_event_count(rmid, &rr);
+ __mon_event_count(closid, rmid, &rr);

/*
* Call the MBA software controller only for the
@@ -599,7 +606,7 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
* the software controller explicitly.
*/
if (is_mba_sc(NULL))
- mbm_bw_count(rmid, &rr);
+ mbm_bw_count(closid, rmid, &rr);
}
}

@@ -656,11 +663,11 @@ void mbm_handle_overflow(struct work_struct *work)
d = container_of(work, struct rdt_domain, mbm_over.work);

list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- mbm_update(r, d, prgrp->mon.rmid);
+ mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);

head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list)
- mbm_update(r, d, crgrp->mon.rmid);
+ mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);

if (is_mba_sc(NULL))
update_mba_bw(prgrp, d);
@@ -703,10 +710,11 @@ static int dom_data_init(struct rdt_resource *r)
}

/*
- * RMID 0 is special and is always allocated. It's used for all
- * tasks that are not monitored.
+ * RMID 0 is special and is always allocated. It's used for the
+ * default_rdtgroup control group, which will be setup later. See
+ * rdtgroup_setup_root().
*/
- entry = __rmid_entry(0);
+ entry = __rmid_entry(0, 0);
list_del(&entry->list);

return 0;
diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index d961ae3ed96e..4d3706f71ee3 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -738,7 +738,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
* anymore when this group would be used for pseudo-locking. This
* is safe to call on platforms not capable of monitoring.
*/
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);

ret = 0;
goto out;
@@ -773,7 +773,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)

ret = rdtgroup_locksetup_user_restore(rdtgrp);
if (ret) {
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
return ret;
}

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index e5a48f05e787..f3b739c52e42 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2422,7 +2422,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)

head = &rdtgrp->mon.crdtgrp_list;
list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
- free_rmid(sentry->mon.rmid);
+ free_rmid(sentry->closid, sentry->mon.rmid);
list_del(&sentry->mon.crdtgrp_list);

if (atomic_read(&sentry->waitcount) != 0)
@@ -2462,7 +2462,7 @@ static void rmdir_all_sub(void)
cpumask_or(&rdtgroup_default.cpu_mask,
&rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);

- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);

kernfs_remove(rdtgrp->kn);
list_del(&rdtgrp->rdtgroup_list);
@@ -2955,7 +2955,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
return 0;

out_idfree:
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
out_destroy:
kernfs_put(rdtgrp->kn);
kernfs_remove(rdtgrp->kn);
@@ -2969,7 +2969,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
{
kernfs_remove(rgrp->kn);
- free_rmid(rgrp->mon.rmid);
+ free_rmid(rgrp->closid, rgrp->mon.rmid);
rdtgroup_remove(rgrp);
}

@@ -3118,7 +3118,7 @@ static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
update_closid_rmid(tmpmask, NULL);

rdtgrp->flags = RDT_DELETED;
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);

/*
* Remove the rdtgrp from the parent ctrl_mon group's list
@@ -3164,8 +3164,8 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
update_closid_rmid(tmpmask, NULL);

+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
closid_free(rdtgrp->closid);
- free_rmid(rdtgrp->mon.rmid);

rdtgroup_ctrl_remove(rdtgrp);

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 0cf5b20c6ddf..641aea580a1f 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -225,6 +225,8 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
* for this resource and domain.
* @r: resource that the counter should be read from.
* @d: domain that the counter should be read from.
+ * @closid: closid that matches the rmid. The counter may
+ * match traffic of both closid and rmid, or rmid only.
* @rmid: rmid of the counter to read.
* @eventid: eventid to read, e.g. L3 occupancy.
* @val: result of the counter read in bytes.
@@ -235,20 +237,25 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
* 0 on success, or -EIO, -EINVAL etc on error.
*/
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid, u64 *val);
+ u32 closid, u32 rmid, enum resctrl_event_id eventid,
+ u64 *val);
+

/**
* resctrl_arch_reset_rmid() - Reset any private state associated with rmid
* and eventid.
* @r: The domain's resource.
* @d: The rmid's domain.
+ * @closid: The closid that matches the rmid. Counters may match both
+ * closid and rmid, or rmid only.
* @rmid: The rmid whose counter values should be reset.
* @eventid: The eventid whose counter values should be reset.
*
* This can be called from any CPU.
*/
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
- u32 rmid, enum resctrl_event_id eventid);
+ u32 closid, u32 rmid,
+ enum resctrl_event_id eventid);

extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;
--
2.30.2


2022-10-31 14:50:07

by Peter Newman

[permalink] [raw]
Subject: Re: [PATCH 01/18] x86/resctrl: Track the closid with the rmid

Hi James,

On Fri, Oct 21, 2022 at 3:12 PM James Morse <[email protected]> wrote:
>
> x86's RMID are independent of the CLOSID. An RMID can be allocated,
> used and freed without considering the CLOSID.
>
> MPAM's equivalent feature is PMG, which is not an independent number,
> it extends the CLOSID/PARTID space. For MPAM, only PMG-bits worth of
> 'RMID' can be allocated for a single CLOSID.
> i.e. if there is 1 bit of PMG space, then each CLOSID can have two
> monitor groups.
>
> To allow rescrl to disambiguate RMID values for different CLOSID,
> everything in resctrl that keeps an RMID value needs to know the CLOSID
> too. This will always be ignored on x86.

I recall that the allocation of RMIDs on MPAM was an open topic of
discussion on the other thread. Can we finish that discussion first?

https://lore.kernel.org/lkml/[email protected]/

Thanks!
-Peter

2022-11-25 05:05:38

by haoxin

[permalink] [raw]
Subject: Re: [PATCH 01/18] x86/resctrl: Track the closid with the rmid


在 2022/10/21 下午9:11, James Morse 写道:
> x86's RMID are independent of the CLOSID. An RMID can be allocated,
> used and freed without considering the CLOSID.
>
> MPAM's equivalent feature is PMG, which is not an independent number,
> it extends the CLOSID/PARTID space. For MPAM, only PMG-bits worth of
> 'RMID' can be allocated for a single CLOSID.
> i.e. if there is 1 bit of PMG space, then each CLOSID can have two
> monitor groups.
>
> To allow rescrl to disambiguate RMID values for different CLOSID,
> everything in resctrl that keeps an RMID value needs to know the CLOSID
> too. This will always be ignored on x86.
>
> Signed-off-by: James Morse <[email protected]>
>
> ---
> Is there a better term for 'the unique identifier for a monitor group'.
> Using RMID for that here may be confusing...
> ---
> arch/x86/kernel/cpu/resctrl/internal.h | 2 +-
> arch/x86/kernel/cpu/resctrl/monitor.c | 54 +++++++++++++----------
> arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 4 +-
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 ++---
> include/linux/resctrl.h | 11 ++++-
> 5 files changed, 49 insertions(+), 34 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 5f7128686cfd..4b243ba88882 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -519,7 +519,7 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
> int closids_supported(void);
> void closid_free(int closid);
> int alloc_rmid(void);
> -void free_rmid(u32 rmid);
> +void free_rmid(u32 closid, u32 rmid);
> int rdt_get_mon_l3_config(struct rdt_resource *r);
> void mon_event_count(void *info);
> int rdtgroup_mondata_show(struct seq_file *m, void *arg);
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index efe0c30d3a12..f1f66c9942a5 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -25,6 +25,7 @@
> #include "internal.h"
>
> struct rmid_entry {
> + u32 closid;

We had better add some comments there for closid variable.

Reviewed-by: Xin Hao <[email protected]>

> u32 rmid;
> int busy;
> struct list_head list;
> @@ -136,7 +137,7 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
> return val;
> }
>
> -static inline struct rmid_entry *__rmid_entry(u32 rmid)
> +static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
> {
> struct rmid_entry *entry;
>
> @@ -166,7 +167,8 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
> }
>
> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid)
> + u32 closid, u32 rmid,
> + enum resctrl_event_id eventid)
> {
> struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> struct arch_mbm_state *am;
> @@ -185,7 +187,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
> }
>
> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid, u64 *val)
> + u32 closid, u32 rmid, enum resctrl_event_id eventid,
> + u64 *val)
> {
> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> @@ -251,9 +254,9 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
> if (nrmid >= r->num_rmid)
> break;
>
> - entry = __rmid_entry(nrmid);
> + entry = __rmid_entry(~0, nrmid); // temporary
>
> - if (resctrl_arch_rmid_read(r, d, entry->rmid,
> + if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
> QOS_L3_OCCUP_EVENT_ID, &val)) {
> rmid_dirty = true;
> } else {
> @@ -308,7 +311,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> cpu = get_cpu();
> list_for_each_entry(d, &r->domains, list) {
> if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
> - err = resctrl_arch_rmid_read(r, d, entry->rmid,
> + err = resctrl_arch_rmid_read(r, d, entry->closid,
> + entry->rmid,
> QOS_L3_OCCUP_EVENT_ID,
> &val);
> if (err || val <= resctrl_rmid_realloc_threshold)
> @@ -332,7 +336,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> list_add_tail(&entry->list, &rmid_free_lru);
> }
>
> -void free_rmid(u32 rmid)
> +void free_rmid(u32 closid, u32 rmid)
> {
> struct rmid_entry *entry;
>
> @@ -341,7 +345,7 @@ void free_rmid(u32 rmid)
>
> lockdep_assert_held(&rdtgroup_mutex);
>
> - entry = __rmid_entry(rmid);
> + entry = __rmid_entry(closid, rmid);
>
> if (is_llc_occupancy_enabled())
> add_rmid_to_limbo(entry);
> @@ -349,15 +353,16 @@ void free_rmid(u32 rmid)
> list_add_tail(&entry->list, &rmid_free_lru);
> }
>
> -static int __mon_event_count(u32 rmid, struct rmid_read *rr)
> +static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
> {
> struct mbm_state *m;
> u64 tval = 0;
>
> if (rr->first)
> - resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid);
> + resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
>
> - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, rmid, rr->evtid, &tval);
> + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
> + &tval);
> if (rr->err)
> return rr->err;
>
> @@ -400,7 +405,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
> * __mon_event_count() is compared with the chunks value from the previous
> * invocation. This must be called once per second to maintain values in MBps.
> */
> -static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
> +static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
> {
> struct mbm_state *m = &rr->d->mbm_local[rmid];
> u64 cur_bw, bytes, cur_bytes;
> @@ -430,7 +435,7 @@ void mon_event_count(void *info)
>
> rdtgrp = rr->rgrp;
>
> - ret = __mon_event_count(rdtgrp->mon.rmid, rr);
> + ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
>
> /*
> * For Ctrl groups read data from child monitor groups and
> @@ -441,7 +446,8 @@ void mon_event_count(void *info)
>
> if (rdtgrp->type == RDTCTRL_GROUP) {
> list_for_each_entry(entry, head, mon.crdtgrp_list) {
> - if (__mon_event_count(entry->mon.rmid, rr) == 0)
> + if (__mon_event_count(rdtgrp->closid, entry->mon.rmid,
> + rr) == 0)
> ret = 0;
> }
> }
> @@ -571,7 +577,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
> }
> }
>
> -static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
> +static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
> + u32 closid, u32 rmid)
> {
> struct rmid_read rr;
>
> @@ -586,12 +593,12 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
> if (is_mbm_total_enabled()) {
> rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
> rr.val = 0;
> - __mon_event_count(rmid, &rr);
> + __mon_event_count(closid, rmid, &rr);
> }
> if (is_mbm_local_enabled()) {
> rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
> rr.val = 0;
> - __mon_event_count(rmid, &rr);
> + __mon_event_count(closid, rmid, &rr);
>
> /*
> * Call the MBA software controller only for the
> @@ -599,7 +606,7 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
> * the software controller explicitly.
> */
> if (is_mba_sc(NULL))
> - mbm_bw_count(rmid, &rr);
> + mbm_bw_count(closid, rmid, &rr);
> }
> }
>
> @@ -656,11 +663,11 @@ void mbm_handle_overflow(struct work_struct *work)
> d = container_of(work, struct rdt_domain, mbm_over.work);
>
> list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
> - mbm_update(r, d, prgrp->mon.rmid);
> + mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
>
> head = &prgrp->mon.crdtgrp_list;
> list_for_each_entry(crgrp, head, mon.crdtgrp_list)
> - mbm_update(r, d, crgrp->mon.rmid);
> + mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
>
> if (is_mba_sc(NULL))
> update_mba_bw(prgrp, d);
> @@ -703,10 +710,11 @@ static int dom_data_init(struct rdt_resource *r)
> }
>
> /*
> - * RMID 0 is special and is always allocated. It's used for all
> - * tasks that are not monitored.
> + * RMID 0 is special and is always allocated. It's used for the
> + * default_rdtgroup control group, which will be setup later. See
> + * rdtgroup_setup_root().
> */
> - entry = __rmid_entry(0);
> + entry = __rmid_entry(0, 0);
> list_del(&entry->list);
>
> return 0;
> diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> index d961ae3ed96e..4d3706f71ee3 100644
> --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> @@ -738,7 +738,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
> * anymore when this group would be used for pseudo-locking. This
> * is safe to call on platforms not capable of monitoring.
> */
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
>
> ret = 0;
> goto out;
> @@ -773,7 +773,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
>
> ret = rdtgroup_locksetup_user_restore(rdtgrp);
> if (ret) {
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
> return ret;
> }
>
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index e5a48f05e787..f3b739c52e42 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -2422,7 +2422,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
>
> head = &rdtgrp->mon.crdtgrp_list;
> list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
> - free_rmid(sentry->mon.rmid);
> + free_rmid(sentry->closid, sentry->mon.rmid);
> list_del(&sentry->mon.crdtgrp_list);
>
> if (atomic_read(&sentry->waitcount) != 0)
> @@ -2462,7 +2462,7 @@ static void rmdir_all_sub(void)
> cpumask_or(&rdtgroup_default.cpu_mask,
> &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
>
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
>
> kernfs_remove(rdtgrp->kn);
> list_del(&rdtgrp->rdtgroup_list);
> @@ -2955,7 +2955,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
> return 0;
>
> out_idfree:
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
> out_destroy:
> kernfs_put(rdtgrp->kn);
> kernfs_remove(rdtgrp->kn);
> @@ -2969,7 +2969,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
> static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
> {
> kernfs_remove(rgrp->kn);
> - free_rmid(rgrp->mon.rmid);
> + free_rmid(rgrp->closid, rgrp->mon.rmid);
> rdtgroup_remove(rgrp);
> }
>
> @@ -3118,7 +3118,7 @@ static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
> update_closid_rmid(tmpmask, NULL);
>
> rdtgrp->flags = RDT_DELETED;
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
>
> /*
> * Remove the rdtgrp from the parent ctrl_mon group's list
> @@ -3164,8 +3164,8 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
> cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
> update_closid_rmid(tmpmask, NULL);
>
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
> closid_free(rdtgrp->closid);
> - free_rmid(rdtgrp->mon.rmid);
>
> rdtgroup_ctrl_remove(rdtgrp);
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 0cf5b20c6ddf..641aea580a1f 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -225,6 +225,8 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
> * for this resource and domain.
> * @r: resource that the counter should be read from.
> * @d: domain that the counter should be read from.
> + * @closid: closid that matches the rmid. The counter may
> + * match traffic of both closid and rmid, or rmid only.
> * @rmid: rmid of the counter to read.
> * @eventid: eventid to read, e.g. L3 occupancy.
> * @val: result of the counter read in bytes.
> @@ -235,20 +237,25 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
> * 0 on success, or -EIO, -EINVAL etc on error.
> */
> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid, u64 *val);
> + u32 closid, u32 rmid, enum resctrl_event_id eventid,
> + u64 *val);
> +
>
> /**
> * resctrl_arch_reset_rmid() - Reset any private state associated with rmid
> * and eventid.
> * @r: The domain's resource.
> * @d: The rmid's domain.
> + * @closid: The closid that matches the rmid. Counters may match both
> + * closid and rmid, or rmid only.
> * @rmid: The rmid whose counter values should be reset.
> * @eventid: The eventid whose counter values should be reset.
> *
> * This can be called from any CPU.
> */
> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid);
> + u32 closid, u32 rmid,
> + enum resctrl_event_id eventid);
>
> extern unsigned int resctrl_rmid_realloc_threshold;
> extern unsigned int resctrl_rmid_realloc_limit;

Reviewed-by: Xin Hao <[email protected]>

2023-01-06 03:42:44

by Fenghua Yu

[permalink] [raw]
Subject: RE: [PATCH 01/18] x86/resctrl: Track the closid with the rmid

Hi, James,

> James Morse <[email protected]> writes:
>
> x86's RMID are independent of the CLOSID. An RMID can be allocated, used and
> freed without considering the CLOSID.
>
> MPAM's equivalent feature is PMG, which is not an independent number, it
> extends the CLOSID/PARTID space. For MPAM, only PMG-bits worth of 'RMID'
> can be allocated for a single CLOSID.
> i.e. if there is 1 bit of PMG space, then each CLOSID can have two monitor
> groups.
>
> To allow rescrl to disambiguate RMID values for different CLOSID, everything in

s/rescrl/resctrl/

> resctrl that keeps an RMID value needs to know the CLOSID too. This will always
> be ignored on x86.
>
> Signed-off-by: James Morse <[email protected]>
>
> ---
> Is there a better term for 'the unique identifier for a monitor group'.
> Using RMID for that here may be confusing...
> ---
> arch/x86/kernel/cpu/resctrl/internal.h | 2 +-
> arch/x86/kernel/cpu/resctrl/monitor.c | 54 +++++++++++++----------
> arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 4 +-
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 ++---
> include/linux/resctrl.h | 11 ++++-
> 5 files changed, 49 insertions(+), 34 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h
> b/arch/x86/kernel/cpu/resctrl/internal.h
> index 5f7128686cfd..4b243ba88882 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -519,7 +519,7 @@ struct rdt_domain *get_domain_from_cpu(int cpu,
> struct rdt_resource *r); int closids_supported(void); void closid_free(int closid);
> int alloc_rmid(void); -void free_rmid(u32 rmid);
> +void free_rmid(u32 closid, u32 rmid);
> int rdt_get_mon_l3_config(struct rdt_resource *r); void mon_event_count(void
> *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); diff --git
> a/arch/x86/kernel/cpu/resctrl/monitor.c
> b/arch/x86/kernel/cpu/resctrl/monitor.c
> index efe0c30d3a12..f1f66c9942a5 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -25,6 +25,7 @@
> #include "internal.h"
>
> struct rmid_entry {
> + u32 closid;

Could you please add a comment for this closid field? It's expected to be form x86 RMID entry.
So it's deserved a comment to explain a bit more on this field.

> u32 rmid;
> int busy;
> struct list_head list;
> @@ -136,7 +137,7 @@ static inline u64 get_corrected_mbm_count(u32 rmid,
> unsigned long val)
> return val;
> }
>
> -static inline struct rmid_entry *__rmid_entry(u32 rmid)
> +static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
> {
> struct rmid_entry *entry;
>
> @@ -166,7 +167,8 @@ static struct arch_mbm_state
> *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, }
>
> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid)
> + u32 closid, u32 rmid,
> + enum resctrl_event_id eventid)
> {
> struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> struct arch_mbm_state *am;
> @@ -185,7 +187,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64
> cur_msr, unsigned int width) }
>
> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid, u64 *val)
> + u32 closid, u32 rmid, enum resctrl_event_id eventid,
> + u64 *val)
> {
> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); @@ -251,9
> +254,9 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
> if (nrmid >= r->num_rmid)
> break;
>
> - entry = __rmid_entry(nrmid);
> + entry = __rmid_entry(~0, nrmid); // temporary
> - if (resctrl_arch_rmid_read(r, d, entry->rmid,
> + if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
> QOS_L3_OCCUP_EVENT_ID, &val)) {
> rmid_dirty = true;
> } else {
> @@ -308,7 +311,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> cpu = get_cpu();
> list_for_each_entry(d, &r->domains, list) {
> if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
> - err = resctrl_arch_rmid_read(r, d, entry->rmid,
> + err = resctrl_arch_rmid_read(r, d, entry->closid,
> + entry->rmid,
> QOS_L3_OCCUP_EVENT_ID,
> &val);
> if (err || val <= resctrl_rmid_realloc_threshold) @@ -
> 332,7 +336,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> list_add_tail(&entry->list, &rmid_free_lru); }
>
> -void free_rmid(u32 rmid)
> +void free_rmid(u32 closid, u32 rmid)
> {
> struct rmid_entry *entry;
>
> @@ -341,7 +345,7 @@ void free_rmid(u32 rmid)
>
> lockdep_assert_held(&rdtgroup_mutex);
>
> - entry = __rmid_entry(rmid);
> + entry = __rmid_entry(closid, rmid);
>
> if (is_llc_occupancy_enabled())
> add_rmid_to_limbo(entry);
> @@ -349,15 +353,16 @@ void free_rmid(u32 rmid)
> list_add_tail(&entry->list, &rmid_free_lru); }
>
> -static int __mon_event_count(u32 rmid, struct rmid_read *rr)
> +static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read
> +*rr)
> {
> struct mbm_state *m;
> u64 tval = 0;
>
> if (rr->first)
> - resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid);
> + resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
>
> - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, rmid, rr->evtid, &tval);
> + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
> + &tval);
> if (rr->err)
> return rr->err;
>
> @@ -400,7 +405,7 @@ static int __mon_event_count(u32 rmid, struct
> rmid_read *rr)
> * __mon_event_count() is compared with the chunks value from the previous
> * invocation. This must be called once per second to maintain values in MBps.
> */
> -static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
> +static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
> {
> struct mbm_state *m = &rr->d->mbm_local[rmid];
> u64 cur_bw, bytes, cur_bytes;
> @@ -430,7 +435,7 @@ void mon_event_count(void *info)
>
> rdtgrp = rr->rgrp;
>
> - ret = __mon_event_count(rdtgrp->mon.rmid, rr);
> + ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
>
> /*
> * For Ctrl groups read data from child monitor groups and @@ -441,7
> +446,8 @@ void mon_event_count(void *info)
>
> if (rdtgrp->type == RDTCTRL_GROUP) {
> list_for_each_entry(entry, head, mon.crdtgrp_list) {
> - if (__mon_event_count(entry->mon.rmid, rr) == 0)
> + if (__mon_event_count(rdtgrp->closid, entry-
> >mon.rmid,
> + rr) == 0)
> ret = 0;
> }
> }
> @@ -571,7 +577,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct
> rdt_domain *dom_mbm)
> }
> }
>
> -static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
> +static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
> + u32 closid, u32 rmid)
> {
> struct rmid_read rr;
>
> @@ -586,12 +593,12 @@ static void mbm_update(struct rdt_resource *r, struct
> rdt_domain *d, int rmid)
> if (is_mbm_total_enabled()) {
> rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
> rr.val = 0;
> - __mon_event_count(rmid, &rr);
> + __mon_event_count(closid, rmid, &rr);
> }
> if (is_mbm_local_enabled()) {
> rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
> rr.val = 0;
> - __mon_event_count(rmid, &rr);
> + __mon_event_count(closid, rmid, &rr);
>
> /*
> * Call the MBA software controller only for the @@ -599,7
> +606,7 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain
> *d, int rmid)
> * the software controller explicitly.
> */
> if (is_mba_sc(NULL))
> - mbm_bw_count(rmid, &rr);
> + mbm_bw_count(closid, rmid, &rr);
> }
> }
>
> @@ -656,11 +663,11 @@ void mbm_handle_overflow(struct work_struct
> *work)
> d = container_of(work, struct rdt_domain, mbm_over.work);
>
> list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
> - mbm_update(r, d, prgrp->mon.rmid);
> + mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
>
> head = &prgrp->mon.crdtgrp_list;
> list_for_each_entry(crgrp, head, mon.crdtgrp_list)
> - mbm_update(r, d, crgrp->mon.rmid);
> + mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
>
> if (is_mba_sc(NULL))
> update_mba_bw(prgrp, d);
> @@ -703,10 +710,11 @@ static int dom_data_init(struct rdt_resource *r)
> }
>
> /*
> - * RMID 0 is special and is always allocated. It's used for all
> - * tasks that are not monitored.
> + * RMID 0 is special and is always allocated. It's used for the
> + * default_rdtgroup control group, which will be setup later. See
> + * rdtgroup_setup_root().
> */
> - entry = __rmid_entry(0);
> + entry = __rmid_entry(0, 0);
> list_del(&entry->list);
>
> return 0;
> diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> index d961ae3ed96e..4d3706f71ee3 100644
> --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> @@ -738,7 +738,7 @@ int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
> * anymore when this group would be used for pseudo-locking. This
> * is safe to call on platforms not capable of monitoring.
> */
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
>
> ret = 0;
> goto out;
> @@ -773,7 +773,7 @@ int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
>
> ret = rdtgroup_locksetup_user_restore(rdtgrp);
> if (ret) {
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
> return ret;
> }
>
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index e5a48f05e787..f3b739c52e42 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -2422,7 +2422,7 @@ static void free_all_child_rdtgrp(struct rdtgroup
> *rdtgrp)
>
> head = &rdtgrp->mon.crdtgrp_list;
> list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
> - free_rmid(sentry->mon.rmid);
> + free_rmid(sentry->closid, sentry->mon.rmid);
> list_del(&sentry->mon.crdtgrp_list);
>
> if (atomic_read(&sentry->waitcount) != 0) @@ -2462,7 +2462,7
> @@ static void rmdir_all_sub(void)
> cpumask_or(&rdtgroup_default.cpu_mask,
> &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
>
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
>
> kernfs_remove(rdtgrp->kn);
> list_del(&rdtgrp->rdtgroup_list);
> @@ -2955,7 +2955,7 @@ static int mkdir_rdt_prepare(struct kernfs_node
> *parent_kn,
> return 0;
>
> out_idfree:
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
> out_destroy:
> kernfs_put(rdtgrp->kn);
> kernfs_remove(rdtgrp->kn);
> @@ -2969,7 +2969,7 @@ static int mkdir_rdt_prepare(struct kernfs_node
> *parent_kn, static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) {
> kernfs_remove(rgrp->kn);
> - free_rmid(rgrp->mon.rmid);
> + free_rmid(rgrp->closid, rgrp->mon.rmid);
> rdtgroup_remove(rgrp);
> }
>
> @@ -3118,7 +3118,7 @@ static int rdtgroup_rmdir_mon(struct rdtgroup
> *rdtgrp, cpumask_var_t tmpmask)
> update_closid_rmid(tmpmask, NULL);
>
> rdtgrp->flags = RDT_DELETED;
> - free_rmid(rdtgrp->mon.rmid);
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
>
> /*
> * Remove the rdtgrp from the parent ctrl_mon group's list @@ -3164,8
> +3164,8 @@ static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp,
> cpumask_var_t tmpmask)
> cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
> update_closid_rmid(tmpmask, NULL);
>
> + free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
> closid_free(rdtgrp->closid);
> - free_rmid(rdtgrp->mon.rmid);
>
> rdtgroup_ctrl_remove(rdtgrp);
>
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index
> 0cf5b20c6ddf..641aea580a1f 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -225,6 +225,8 @@ void resctrl_offline_domain(struct rdt_resource *r,
> struct rdt_domain *d);
> * for this resource and domain.
> * @r: resource that the counter should be read from.
> * @d: domain that the counter should be read from.
> + * @closid: closid that matches the rmid. The counter may
> + * match traffic of both closid and rmid, or rmid only.
> * @rmid: rmid of the counter to read.
> * @eventid: eventid to read, e.g. L3 occupancy.
> * @val: result of the counter read in bytes.
> @@ -235,20 +237,25 @@ void resctrl_offline_domain(struct rdt_resource *r,
> struct rdt_domain *d);
> * 0 on success, or -EIO, -EINVAL etc on error.
> */
> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid, u64 *val);
> + u32 closid, u32 rmid, enum resctrl_event_id eventid,
> + u64 *val);
> +
>
> /**
> * resctrl_arch_reset_rmid() - Reset any private state associated with rmid
> * and eventid.
> * @r: The domain's resource.
> * @d: The rmid's domain.
> + * @closid: The closid that matches the rmid. Counters may match both
> + * closid and rmid, or rmid only.
> * @rmid: The rmid whose counter values should be reset.
> * @eventid: The eventid whose counter values should be reset.
> *
> * This can be called from any CPU.
> */
> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid);
> + u32 closid, u32 rmid,
> + enum resctrl_event_id eventid);
>
> extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int
> resctrl_rmid_realloc_limit;
> --
> 2.30.2

Thanks.

-Fenghua

2023-01-10 18:07:27

by Fenghua Yu

[permalink] [raw]
Subject: RE: [PATCH 01/18] x86/resctrl: Track the closid with the rmid

Hi, James,

> >> a/arch/x86/kernel/cpu/resctrl/monitor.c
> >> b/arch/x86/kernel/cpu/resctrl/monitor.c
> >> index efe0c30d3a12..f1f66c9942a5 100644
> >> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> >> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> >> @@ -25,6 +25,7 @@
> >> #include "internal.h"
> >>
> >> struct rmid_entry {
> >> + u32 closid;
> >
> > Could you please add a comment for this closid field? It's expected to be form
> x86 RMID entry.
> > So it's deserved a comment to explain a bit more on this field.
>
> Sure ... what does it need to convey?
> I'm not sure what you mean by "expected to be form x86 RMID entry".

My typo, should be "not expected".

>
> My medium verbosity version looks like this:
> | /*
> | * Some architectures's resctrl_arch_rmid_read() needs the CLOSID value
> | * in order to access the correct monitor. This field provides the
> | * value to list walkers like __check_limbo(). On x86 this is ignored.
> | */
>
>
> Does this cover it?

Looks good to me.

Thanks.

-Fenghua

2023-01-10 18:25:23

by James Morse

[permalink] [raw]
Subject: Re: [PATCH 01/18] x86/resctrl: Track the closid with the rmid

Hi Fenghua,

On 06/01/2023 02:57, Yu, Fenghua wrote:
>> James Morse <[email protected]> writes:
>>
>> x86's RMID are independent of the CLOSID. An RMID can be allocated, used and
>> freed without considering the CLOSID.
>>
>> MPAM's equivalent feature is PMG, which is not an independent number, it
>> extends the CLOSID/PARTID space. For MPAM, only PMG-bits worth of 'RMID'
>> can be allocated for a single CLOSID.
>> i.e. if there is 1 bit of PMG space, then each CLOSID can have two monitor
>> groups.
>>
>> To allow rescrl to disambiguate RMID values for different CLOSID, everything in
>
> s/rescrl/resctrl/
>
>> resctrl that keeps an RMID value needs to know the CLOSID too. This will always
>> be ignored on x86.


>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h
>> b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 5f7128686cfd..4b243ba88882 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -519,7 +519,7 @@ struct rdt_domain *get_domain_from_cpu(int cpu,
>> struct rdt_resource *r); int closids_supported(void); void closid_free(int closid);
>> int alloc_rmid(void); -void free_rmid(u32 rmid);
>> +void free_rmid(u32 closid, u32 rmid);
>> int rdt_get_mon_l3_config(struct rdt_resource *r); void mon_event_count(void
>> *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); diff --git
>> a/arch/x86/kernel/cpu/resctrl/monitor.c
>> b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index efe0c30d3a12..f1f66c9942a5 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -25,6 +25,7 @@
>> #include "internal.h"
>>
>> struct rmid_entry {
>> + u32 closid;
>
> Could you please add a comment for this closid field? It's expected to be form x86 RMID entry.
> So it's deserved a comment to explain a bit more on this field.

Sure ... what does it need to convey?
I'm not sure what you mean by "expected to be form x86 RMID entry".

My medium verbosity version looks like this:
| /*
| * Some architectures's resctrl_arch_rmid_read() needs the CLOSID value
| * in order to access the correct monitor. This field provides the
| * value to list walkers like __check_limbo(). On x86 this is ignored.
| */


Does this cover it?


Thanks,

James


>> u32 rmid;
>> int busy;
>> struct list_head list;