LinuxLists.cc - [PATCH 02/18] x86/resctrl: Access per-rmid structures by index

2022-10-21 14:26:46

Subject: [PATCH 02/18] x86/resctrl: Access per-rmid structures by index

Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
monitors, RMID values on arm64 are not unique unless the CLOSID is
also included. Bitmaps like rmid_busy_llc need to be sized by the
number of unique entries for this resource.

Add helpers to encode/decode the CLOSID and RMID to an index. The
domain's busy_rmid_llc and the rmid_ptrs[] array are then sized by
index. On x86, this is always just the RMID. This gives resctrl a
unique value it can use to store monitor values, and allows MPAM to
decode the closid when reading the hardware counters.

Signed-off-by: James Morse <[email protected]>
---
arch/x86/include/asm/resctrl.h | 17 ++++++
arch/x86/kernel/cpu/resctrl/internal.h | 2 +
arch/x86/kernel/cpu/resctrl/monitor.c | 75 +++++++++++++++++---------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 +--
4 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index d24b04ebf950..523eabfa3193 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -96,6 +96,23 @@ static inline void resctrl_sched_in(void)
__resctrl_sched_in();
}

+static inline u32 resctrl_arch_system_num_rmid_idx(void)
+{
+ /* RMID are independent numbers for x86. num_rmid_idx==num_rmid */
+ return boot_cpu_data.x86_cache_max_rmid + 1;
+}
+
+static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+ *rmid = idx;
+ *closid = ~0;
+}
+
+static inline u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
+{
+ return rmid;
+}
+
void resctrl_cpu_detect(struct cpuinfo_x86 *c);

#else
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 4b243ba88882..cb94c3e3fe36 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -8,6 +8,8 @@
#include <linux/fs_context.h>
#include <linux/jump_label.h>

+#include <asm/resctrl.h>
+
#define MSR_IA32_L3_QOS_CFG 0xc81
#define MSR_IA32_L2_QOS_CFG 0xc82
#define MSR_IA32_L3_CBM_BASE 0xc90
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f1f66c9942a5..c95d259476d4 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -137,11 +137,24 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
return val;
}

-static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
+/*
+ * x86 and arm64 differ in their handling of monitoring.
+ * x86's RMID are an independent number, there is one RMID '1'.
+ * arm64's PMG extend the PARTID/CLOSID space, there is one RMID '1' for each
+ * CLOSID. The RMID is no longer unique.
+ * To account for this, resctrl uses an index. On x86 this is just the RMID,
+ * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
+ *
+ * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The arch code
+ * must accept an attempt to read every index.
+ */
+static inline struct rmid_entry *__rmid_entry(u32 idx)
{
struct rmid_entry *entry;
+ u32 closid, rmid;

- entry = &rmid_ptrs[rmid];
+ entry = &rmid_ptrs[idx];
+ resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
WARN_ON(entry->rmid != rmid);

return entry;
@@ -238,8 +251,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
void __check_limbo(struct rdt_domain *d, bool force_free)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry;
- u32 crmid = 1, nrmid;
+ u32 idx, cur_idx = 1;
bool rmid_dirty;
u64 val = 0;

@@ -250,12 +264,11 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
* RMID and move it to the free list when the counter reaches 0.
*/
for (;;) {
- nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
- if (nrmid >= r->num_rmid)
+ idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
+ if (idx >= idx_limit)
break;

- entry = __rmid_entry(~0, nrmid); // temporary
-
+ entry = __rmid_entry(idx);
if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
QOS_L3_OCCUP_EVENT_ID, &val)) {
rmid_dirty = true;
@@ -264,19 +277,21 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
}

if (force_free || !rmid_dirty) {
- clear_bit(entry->rmid, d->rmid_busy_llc);
+ clear_bit(idx, d->rmid_busy_llc);
if (!--entry->busy) {
rmid_limbo_count--;
list_add_tail(&entry->list, &rmid_free_lru);
}
}
- crmid = nrmid + 1;
+ cur_idx = idx + 1;
}
}

bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
{
- return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+
+ return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
}

/*
@@ -306,6 +321,9 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
struct rdt_domain *d;
int cpu, err;
u64 val = 0;
+ u32 idx;
+
+ idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);

entry->busy = 0;
cpu = get_cpu();
@@ -325,7 +343,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
*/
if (!has_busy_rmid(r, d))
cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
- set_bit(entry->rmid, d->rmid_busy_llc);
+ set_bit(idx, d->rmid_busy_llc);
entry->busy++;
}
put_cpu();
@@ -338,14 +356,16 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)

void free_rmid(u32 closid, u32 rmid)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct rmid_entry *entry;

- if (!rmid)
- return;
-
lockdep_assert_held(&rdtgroup_mutex);

- entry = __rmid_entry(closid, rmid);
+ /* do not allow the default rmid to be free'd */
+ if (!idx)
+ return;
+
+ entry = __rmid_entry(idx);

if (is_llc_occupancy_enabled())
add_rmid_to_limbo(entry);
@@ -355,6 +375,7 @@ void free_rmid(u32 closid, u32 rmid)

static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct mbm_state *m;
u64 tval = 0;

@@ -371,10 +392,10 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
rr->val += tval;
return 0;
case QOS_L3_MBM_TOTAL_EVENT_ID:
- m = &rr->d->mbm_total[rmid];
+ m = &rr->d->mbm_total[idx];
break;
case QOS_L3_MBM_LOCAL_EVENT_ID:
- m = &rr->d->mbm_local[rmid];
+ m = &rr->d->mbm_local[idx];
break;
default:
/*
@@ -407,7 +428,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
- struct mbm_state *m = &rr->d->mbm_local[rmid];
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ struct mbm_state *m = &rr->d->mbm_local[idx];
u64 cur_bw, bytes, cur_bytes;

cur_bytes = rr->val;
@@ -497,7 +519,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
{
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
- u32 cur_bw, delta_bw, user_bw;
+ u32 cur_bw, delta_bw, user_bw, idx;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
struct list_head *head;
@@ -510,7 +532,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)

closid = rgrp->closid;
rmid = rgrp->mon.rmid;
- pmbm_data = &dom_mbm->mbm_local[rmid];
+ idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ pmbm_data = &dom_mbm->mbm_local[idx];

dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@@ -693,19 +716,19 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)

static int dom_data_init(struct rdt_resource *r)
{
+ u32 nr_idx = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry = NULL;
- int i, nr_rmids;
+ int i;

- nr_rmids = r->num_rmid;
- rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
+ rmid_ptrs = kcalloc(nr_idx, sizeof(struct rmid_entry), GFP_KERNEL);
if (!rmid_ptrs)
return -ENOMEM;

- for (i = 0; i < nr_rmids; i++) {
+ for (i = 0; i < nr_idx; i++) {
entry = &rmid_ptrs[i];
INIT_LIST_HEAD(&entry->list);

- entry->rmid = i;
+ resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
list_add_tail(&entry->list, &rmid_free_lru);
}

@@ -714,7 +737,7 @@ static int dom_data_init(struct rdt_resource *r)
* default_rdtgroup control group, which will be setup later. See
* rdtgroup_setup_root().
*/
- entry = __rmid_entry(0, 0);
+ entry = __rmid_entry(resctrl_arch_rmid_idx_encode(0, 0));
list_del(&entry->list);

return 0;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index f3b739c52e42..9ce4746778f4 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3320,16 +3320,17 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)

static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;

if (is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
+ d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
}
if (is_mbm_total_enabled()) {
tsize = sizeof(*d->mbm_total);
- d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_total) {
bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
@@ -3337,7 +3338,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
}
if (is_mbm_local_enabled()) {
tsize = sizeof(*d->mbm_local);
- d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_local) {
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
--
2.30.2

2023-01-06 03:43:54

by Fenghua Yu

[permalink] [raw]

Subject: RE: [PATCH 02/18] x86/resctrl: Access per-rmid structures by index

Hi, James,

> James Morse <[email protected]> writes:
> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
> monitors, RMID values on arm64 are not unique unless the CLOSID is also
> included. Bitmaps like rmid_busy_llc need to be sized by the number of unique
> entries for this resource.
>
> Add helpers to encode/decode the CLOSID and RMID to an index. The domain's
> busy_rmid_llc and the rmid_ptrs[] array are then sized by index. On x86, this is
> always just the RMID. This gives resctrl a unique value it can use to store
> monitor values, and allows MPAM to decode the closid when reading the
> hardware counters.
>
> Signed-off-by: James Morse <[email protected]>
> ---
> arch/x86/include/asm/resctrl.h | 17 ++++++
> arch/x86/kernel/cpu/resctrl/internal.h | 2 +
> arch/x86/kernel/cpu/resctrl/monitor.c | 75 +++++++++++++++++---------
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 +--
> 4 files changed, 72 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
> index d24b04ebf950..523eabfa3193 100644
> --- a/arch/x86/include/asm/resctrl.h
> +++ b/arch/x86/include/asm/resctrl.h
> @@ -96,6 +96,23 @@ static inline void resctrl_sched_in(void)
> __resctrl_sched_in();
> }
>
> +static inline u32 resctrl_arch_system_num_rmid_idx(void)
> +{
> + /* RMID are independent numbers for x86. num_rmid_idx==num_rmid
> */
> + return boot_cpu_data.x86_cache_max_rmid + 1; }
> +
> +static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid,
> +u32 *rmid) {
> + *rmid = idx;
> + *closid = ~0;

Should closid be 0 or ~0 on X86? Any special reason for closid to be ~0?
Seems 0 is a natural value so that it's ignored on X86. And the value should
be consistent on x86 and documented.

> +}
> +
> +static inline u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid) {
> + return rmid;
> +}
> +
> void resctrl_cpu_detect(struct cpuinfo_x86 *c);
>
> #else
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h
> b/arch/x86/kernel/cpu/resctrl/internal.h
> index 4b243ba88882..cb94c3e3fe36 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -8,6 +8,8 @@
> #include <linux/fs_context.h>
> #include <linux/jump_label.h>
>
> +#include <asm/resctrl.h>
> +
> #define MSR_IA32_L3_QOS_CFG 0xc81
> #define MSR_IA32_L2_QOS_CFG 0xc82
> #define MSR_IA32_L3_CBM_BASE 0xc90
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c
> b/arch/x86/kernel/cpu/resctrl/monitor.c
> index f1f66c9942a5..c95d259476d4 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -137,11 +137,24 @@ static inline u64 get_corrected_mbm_count(u32 rmid,
> unsigned long val)
> return val;
> }
>
> -static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
> +/*
> + * x86 and arm64 differ in their handling of monitoring.
> + * x86's RMID are an independent number, there is one RMID '1'.
> + * arm64's PMG extend the PARTID/CLOSID space, there is one RMID '1'
> +for each
> + * CLOSID. The RMID is no longer unique.
> + * To account for this, resctrl uses an index. On x86 this is just the
> +RMID,
> + * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
> + *
> + * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The
> +arch code
> + * must accept an attempt to read every index.
> + */
> +static inline struct rmid_entry *__rmid_entry(u32 idx)
> {
> struct rmid_entry *entry;
> + u32 closid, rmid;
>
> - entry = &rmid_ptrs[rmid];
> + entry = &rmid_ptrs[idx];
> + resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
> WARN_ON(entry->rmid != rmid);

Will __rmid_entry() be moved to fs/?
Should add WARN_ON(entry->closid!=rmid) here?

>
> return entry;
> @@ -238,8 +251,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct
> rdt_domain *d, void __check_limbo(struct rdt_domain *d, bool force_free) {
> struct rdt_resource *r =
> &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> + u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> struct rmid_entry *entry;
> - u32 crmid = 1, nrmid;
> + u32 idx, cur_idx = 1;
> bool rmid_dirty;
> u64 val = 0;
>
> @@ -250,12 +264,11 @@ void __check_limbo(struct rdt_domain *d, bool
> force_free)
> * RMID and move it to the free list when the counter reaches 0.
> */
> for (;;) {
> - nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
> - if (nrmid >= r->num_rmid)
> + idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
> + if (idx >= idx_limit)
> break;
>
> - entry = __rmid_entry(~0, nrmid); // temporary
> -
> + entry = __rmid_entry(idx);
> if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
> QOS_L3_OCCUP_EVENT_ID, &val)) {
> rmid_dirty = true;
> @@ -264,19 +277,21 @@ void __check_limbo(struct rdt_domain *d, bool
> force_free)
> }
>
> if (force_free || !rmid_dirty) {
> - clear_bit(entry->rmid, d->rmid_busy_llc);
> + clear_bit(idx, d->rmid_busy_llc);
> if (!--entry->busy) {
> rmid_limbo_count--;
> list_add_tail(&entry->list, &rmid_free_lru);
> }
> }
> - crmid = nrmid + 1;
> + cur_idx = idx + 1;
> }
> }
>
> bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d) {
> - return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
> + u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> +
> + return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
> }
>
> /*
> @@ -306,6 +321,9 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> struct rdt_domain *d;
> int cpu, err;
> u64 val = 0;
> + u32 idx;
> +
> + idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
>
> entry->busy = 0;
> cpu = get_cpu();
> @@ -325,7 +343,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> */
> if (!has_busy_rmid(r, d))
> cqm_setup_limbo_handler(d,
> CQM_LIMBOCHECK_INTERVAL);
> - set_bit(entry->rmid, d->rmid_busy_llc);
> + set_bit(idx, d->rmid_busy_llc);
> entry->busy++;
> }
> put_cpu();
> @@ -338,14 +356,16 @@ static void add_rmid_to_limbo(struct rmid_entry
> *entry)
>
> void free_rmid(u32 closid, u32 rmid)
> {
> + u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
> struct rmid_entry *entry;
>
> - if (!rmid)
> - return;
> -
> lockdep_assert_held(&rdtgroup_mutex);
>
> - entry = __rmid_entry(closid, rmid);
> + /* do not allow the default rmid to be free'd */
> + if (!idx)
> + return;
> +
> + entry = __rmid_entry(idx);
>
> if (is_llc_occupancy_enabled())
> add_rmid_to_limbo(entry);
> @@ -355,6 +375,7 @@ void free_rmid(u32 closid, u32 rmid)
>
> static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) {
> + u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
> struct mbm_state *m;
> u64 tval = 0;
>
> @@ -371,10 +392,10 @@ static int __mon_event_count(u32 closid, u32 rmid,
> struct rmid_read *rr)
> rr->val += tval;
> return 0;
> case QOS_L3_MBM_TOTAL_EVENT_ID:
> - m = &rr->d->mbm_total[rmid];
> + m = &rr->d->mbm_total[idx];
> break;
> case QOS_L3_MBM_LOCAL_EVENT_ID:
> - m = &rr->d->mbm_local[rmid];
> + m = &rr->d->mbm_local[idx];
> break;
> default:
> /*
> @@ -407,7 +428,8 @@ static int __mon_event_count(u32 closid, u32 rmid,
> struct rmid_read *rr)
> */
> static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr) {
> - struct mbm_state *m = &rr->d->mbm_local[rmid];
> + u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
> + struct mbm_state *m = &rr->d->mbm_local[idx];
> u64 cur_bw, bytes, cur_bytes;
>
> cur_bytes = rr->val;
> @@ -497,7 +519,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct
> rdt_domain *dom_mbm) {
> u32 closid, rmid, cur_msr_val, new_msr_val;
> struct mbm_state *pmbm_data, *cmbm_data;
> - u32 cur_bw, delta_bw, user_bw;
> + u32 cur_bw, delta_bw, user_bw, idx;
> struct rdt_resource *r_mba;
> struct rdt_domain *dom_mba;
> struct list_head *head;
> @@ -510,7 +532,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct
> rdt_domain *dom_mbm)
>
> closid = rgrp->closid;
> rmid = rgrp->mon.rmid;
> - pmbm_data = &dom_mbm->mbm_local[rmid];
> + idx = resctrl_arch_rmid_idx_encode(closid, rmid);
> + pmbm_data = &dom_mbm->mbm_local[idx];
>
> dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
> if (!dom_mba) {
> @@ -693,19 +716,19 @@ void mbm_setup_overflow_handler(struct
> rdt_domain *dom, unsigned long delay_ms)
>
> static int dom_data_init(struct rdt_resource *r) {
> + u32 nr_idx = resctrl_arch_system_num_rmid_idx();
> struct rmid_entry *entry = NULL;
> - int i, nr_rmids;
> + int i;
>
> - nr_rmids = r->num_rmid;
> - rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
> + rmid_ptrs = kcalloc(nr_idx, sizeof(struct rmid_entry), GFP_KERNEL);
> if (!rmid_ptrs)
> return -ENOMEM;
>
> - for (i = 0; i < nr_rmids; i++) {
> + for (i = 0; i < nr_idx; i++) {
> entry = &rmid_ptrs[i];
> INIT_LIST_HEAD(&entry->list);
>
> - entry->rmid = i;
> + resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
> list_add_tail(&entry->list, &rmid_free_lru);
> }
>
> @@ -714,7 +737,7 @@ static int dom_data_init(struct rdt_resource *r)
> * default_rdtgroup control group, which will be setup later. See
> * rdtgroup_setup_root().
> */
> - entry = __rmid_entry(0, 0);
> + entry = __rmid_entry(resctrl_arch_rmid_idx_encode(0, 0));

Closid is 0 here on x86. We need to have a consistent closid value on x86.
Maybe even define a macro for x86 closid value e.g. #define RMID_FIELD_CLOSID_IGNORED 0.
So the macro value is used on X86 always.

> list_del(&entry->list);
>
> return 0;
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index f3b739c52e42..9ce4746778f4 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -3320,16 +3320,17 @@ void resctrl_offline_domain(struct rdt_resource *r,
> struct rdt_domain *d)
>
> static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain
> *d) {
> + u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> size_t tsize;
>
> if (is_llc_occupancy_enabled()) {
> - d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
> + d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
> if (!d->rmid_busy_llc)
> return -ENOMEM;
> }
> if (is_mbm_total_enabled()) {
> tsize = sizeof(*d->mbm_total);
> - d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
> + d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
> if (!d->mbm_total) {
> bitmap_free(d->rmid_busy_llc);
> return -ENOMEM;
> @@ -3337,7 +3338,7 @@ static int domain_setup_mon_state(struct
> rdt_resource *r, struct rdt_domain *d)
> }
> if (is_mbm_local_enabled()) {
> tsize = sizeof(*d->mbm_local);
> - d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
> + d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
> if (!d->mbm_local) {
> bitmap_free(d->rmid_busy_llc);
> kfree(d->mbm_total);
> --
> 2.30.2

Thanks.

-Fenghua

2023-01-10 18:25:19

by James Morse

[permalink] [raw]

Subject: Re: [PATCH 02/18] x86/resctrl: Access per-rmid structures by index

Hi Fenghua,

On 06/01/2023 03:12, Yu, Fenghua wrote:
>> James Morse <[email protected]> writes:
>> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
>> monitors, RMID values on arm64 are not unique unless the CLOSID is also
>> included. Bitmaps like rmid_busy_llc need to be sized by the number of unique
>> entries for this resource.
>>
>> Add helpers to encode/decode the CLOSID and RMID to an index. The domain's
>> busy_rmid_llc and the rmid_ptrs[] array are then sized by index. On x86, this is
>> always just the RMID. This gives resctrl a unique value it can use to store
>> monitor values, and allows MPAM to decode the closid when reading the
>> hardware counters.

>> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
>> index d24b04ebf950..523eabfa3193 100644
>> --- a/arch/x86/include/asm/resctrl.h
>> +++ b/arch/x86/include/asm/resctrl.h
>> @@ -96,6 +96,23 @@ static inline void resctrl_sched_in(void)
>> __resctrl_sched_in();
>> }
>>
>> +static inline u32 resctrl_arch_system_num_rmid_idx(void)
>> +{
>> + /* RMID are independent numbers for x86. num_rmid_idx==num_rmid
>> */
>> + return boot_cpu_data.x86_cache_max_rmid + 1; }
>> +
>> +static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid,
>> +u32 *rmid) {
>> + *rmid = idx;
>> + *closid = ~0;

> Should closid be 0 or ~0 on X86? Any special reason for closid to be ~0?

I picked ~0 as its not a valid CLOSID, so anything that consumes it should choke quickly.

> Seems 0 is a natural value so that it's ignored on X86. And the value should
> be consistent on x86 and documented.

Well 0 is a valid CLOSID value, you can't ignore it based on the value - that's done at
compile time as the helpers don't use the value. Doing nothing here would leave an
uninitialized value on the stack, which would then get passed to the next function (which
should ignore it). I assume this is the sort of thing future compilers will complain about
if they can't see that the next function doesn't use the value.

I'll give it a #define value to make it clear its a deliberate choice to initialise it
with an out of range value.

>
>> +}
>> +
>> +static inline u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid) {
>> + return rmid;
>> +}
>> +
>> void resctrl_cpu_detect(struct cpuinfo_x86 *c);
>>
>> #else

>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c
>> b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index f1f66c9942a5..c95d259476d4 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -137,11 +137,24 @@ static inline u64 get_corrected_mbm_count(u32 rmid,
>> unsigned long val)
>> return val;
>> }
>>
>> -static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
>> +/*
>> + * x86 and arm64 differ in their handling of monitoring.
>> + * x86's RMID are an independent number, there is one RMID '1'.
>> + * arm64's PMG extend the PARTID/CLOSID space, there is one RMID '1'
>> +for each
>> + * CLOSID. The RMID is no longer unique.
>> + * To account for this, resctrl uses an index. On x86 this is just the
>> +RMID,
>> + * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
>> + *
>> + * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The
>> +arch code
>> + * must accept an attempt to read every index.
>> + */
>> +static inline struct rmid_entry *__rmid_entry(u32 idx)
>> {
>> struct rmid_entry *entry;
>> + u32 closid, rmid;
>>
>> - entry = &rmid_ptrs[rmid];
>> + entry = &rmid_ptrs[idx];
>> + resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
>> WARN_ON(entry->rmid != rmid);

> Will __rmid_entry() be moved to fs/?

It does as its not related to accessing the hardware. I hope to move as much as possible
to avoid duplication.

> Should add WARN_ON(entry->closid!=rmid) here?

Presumably WARN_ON(entry->closid != closid)?
That would force everything to be initalised to the official 'unitialised value', which is
probably not a bad thing.

If I'm touching these, I 'll change it to WARN_ON_ONCE(), as if it breaks, chances are its
going to trigger a few hundred times a second, which wouldn't help anyone trying to debug it.

>> return entry;

>> @@ -714,7 +737,7 @@ static int dom_data_init(struct rdt_resource *r)
>> * default_rdtgroup control group, which will be setup later. See
>> * rdtgroup_setup_root().
>> */
>> - entry = __rmid_entry(0, 0);
>> + entry = __rmid_entry(resctrl_arch_rmid_idx_encode(0, 0));

> Closid is 0 here on x86.

It's supposed to be zero. This code is ensuring the monitors for the default control group
will always be available, by removing it from rmid_free_lru, effectively allocating it.

It needs to be 0 for architectures where the CLOSID matters to
resctrl_arch_rmid_idx_encode(), to ensure it allocates/reserves/leaks the correct hardware
monitor.

Changing __rmid_entry() to take a struct rdtgroup might make the whole thing clearer, I
didn't go that far as I thought it would be more churn.

> We need to have a consistent closid value on x86.
> Maybe even define a macro for x86 closid value e.g. #define RMID_FIELD_CLOSID_IGNORED 0.
> So the macro value is used on X86 always.

The value is ignored on encode by x86. You only need to provide a magic value for decode,
and only because deterministic bugs and values you can grep for a good things!

Thanks,

James