2023-01-13 18:04:38

by James Morse

[permalink] [raw]
Subject: [PATCH v2 02/18] x86/resctrl: Access per-rmid structures by index

Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
monitors, RMID values on arm64 are not unique unless the CLOSID is
also included. Bitmaps like rmid_busy_llc need to be sized by the
number of unique entries for this resource.

Add helpers to encode/decode the CLOSID and RMID to an index. The
domain's busy_rmid_llc and the rmid_ptrs[] array are then sized by
index. On x86, this is always just the RMID. This gives resctrl a
unique value it can use to store monitor values, and allows MPAM to
decode the closid when reading the hardware counters.

Tested-by: Shaopeng Tan <[email protected]>
Signed-off-by: James Morse <[email protected]>
---
Changes since v1:
* Added X86_BAD_CLOSID macro to make it clear what this value means
* Added second WARN_ON() for closid checking, and made both _ONCE()
---
arch/x86/include/asm/resctrl.h | 24 ++++++++
arch/x86/kernel/cpu/resctrl/internal.h | 2 +
arch/x86/kernel/cpu/resctrl/monitor.c | 79 +++++++++++++++++---------
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 ++-
4 files changed, 82 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 52788f79786f..44d568f3577c 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -7,6 +7,13 @@
#include <linux/sched.h>
#include <linux/jump_label.h>

+/*
+ * This value can never be a valid CLOSID, and is used when mapping a
+ * (closid, rmid) pair to an index and back. On x86 only the RMID is
+ * needed.
+ */
+#define X86_RESCTRL_BAD_CLOSID ~0
+
/**
* struct resctrl_pqr_state - State cache for the PQR MSR
* @cur_rmid: The cached Resource Monitoring ID
@@ -94,6 +101,23 @@ static inline void resctrl_sched_in(void)
__resctrl_sched_in();
}

+static inline u32 resctrl_arch_system_num_rmid_idx(void)
+{
+ /* RMID are independent numbers for x86. num_rmid_idx==num_rmid */
+ return boot_cpu_data.x86_cache_max_rmid + 1;
+}
+
+static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
+{
+ *rmid = idx;
+ *closid = X86_RESCTRL_BAD_CLOSID;
+}
+
+static inline u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
+{
+ return rmid;
+}
+
void resctrl_cpu_detect(struct cpuinfo_x86 *c);

#else
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 5dbff3035723..af71401c57e2 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -8,6 +8,8 @@
#include <linux/fs_context.h>
#include <linux/jump_label.h>

+#include <asm/resctrl.h>
+
#define L3_QOS_CDP_ENABLE 0x01ULL

#define L2_QOS_CDP_ENABLE 0x01ULL
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 13673cab175a..dbae380e3d1c 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -142,12 +142,27 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
return val;
}

-static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
+/*
+ * x86 and arm64 differ in their handling of monitoring.
+ * x86's RMID are an independent number, there is one RMID '1'.
+ * arm64's PMG extend the PARTID/CLOSID space, there is one RMID '1' for each
+ * CLOSID. The RMID is no longer unique.
+ * To account for this, resctrl uses an index. On x86 this is just the RMID,
+ * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
+ *
+ * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The arch code
+ * must accept an attempt to read every index.
+ */
+static inline struct rmid_entry *__rmid_entry(u32 idx)
{
struct rmid_entry *entry;
+ u32 closid, rmid;

- entry = &rmid_ptrs[rmid];
- WARN_ON(entry->rmid != rmid);
+ entry = &rmid_ptrs[idx];
+ resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
+
+ WARN_ON_ONCE(entry->closid != closid);
+ WARN_ON_ONCE(entry->rmid != rmid);

return entry;
}
@@ -243,8 +258,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
void __check_limbo(struct rdt_domain *d, bool force_free)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry;
- u32 crmid = 1, nrmid;
+ u32 idx, cur_idx = 1;
bool rmid_dirty;
u64 val = 0;

@@ -255,12 +271,11 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
* RMID and move it to the free list when the counter reaches 0.
*/
for (;;) {
- nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
- if (nrmid >= r->num_rmid)
+ idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
+ if (idx >= idx_limit)
break;

- entry = __rmid_entry(~0, nrmid); // temporary
-
+ entry = __rmid_entry(idx);
if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
QOS_L3_OCCUP_EVENT_ID, &val)) {
rmid_dirty = true;
@@ -269,19 +284,21 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
}

if (force_free || !rmid_dirty) {
- clear_bit(entry->rmid, d->rmid_busy_llc);
+ clear_bit(idx, d->rmid_busy_llc);
if (!--entry->busy) {
rmid_limbo_count--;
list_add_tail(&entry->list, &rmid_free_lru);
}
}
- crmid = nrmid + 1;
+ cur_idx = idx + 1;
}
}

bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
{
- return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+
+ return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
}

/*
@@ -311,6 +328,9 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
struct rdt_domain *d;
int cpu, err;
u64 val = 0;
+ u32 idx;
+
+ idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);

entry->busy = 0;
cpu = get_cpu();
@@ -330,7 +350,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
*/
if (!has_busy_rmid(r, d))
cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL);
- set_bit(entry->rmid, d->rmid_busy_llc);
+ set_bit(idx, d->rmid_busy_llc);
entry->busy++;
}
put_cpu();
@@ -343,14 +363,16 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)

void free_rmid(u32 closid, u32 rmid)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct rmid_entry *entry;

- if (!rmid)
- return;
-
lockdep_assert_held(&rdtgroup_mutex);

- entry = __rmid_entry(closid, rmid);
+ /* do not allow the default rmid to be free'd */
+ if (!idx)
+ return;
+
+ entry = __rmid_entry(idx);

if (is_llc_occupancy_enabled())
add_rmid_to_limbo(entry);
@@ -360,6 +382,7 @@ void free_rmid(u32 closid, u32 rmid)

static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
struct mbm_state *m;
u64 tval = 0;

@@ -376,10 +399,10 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
rr->val += tval;
return 0;
case QOS_L3_MBM_TOTAL_EVENT_ID:
- m = &rr->d->mbm_total[rmid];
+ m = &rr->d->mbm_total[idx];
break;
case QOS_L3_MBM_LOCAL_EVENT_ID:
- m = &rr->d->mbm_local[rmid];
+ m = &rr->d->mbm_local[idx];
break;
default:
/*
@@ -412,7 +435,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
- struct mbm_state *m = &rr->d->mbm_local[rmid];
+ u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ struct mbm_state *m = &rr->d->mbm_local[idx];
u64 cur_bw, bytes, cur_bytes;

cur_bytes = rr->val;
@@ -502,7 +526,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
{
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
- u32 cur_bw, delta_bw, user_bw;
+ u32 cur_bw, delta_bw, user_bw, idx;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
struct list_head *head;
@@ -515,7 +539,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)

closid = rgrp->closid;
rmid = rgrp->mon.rmid;
- pmbm_data = &dom_mbm->mbm_local[rmid];
+ idx = resctrl_arch_rmid_idx_encode(closid, rmid);
+ pmbm_data = &dom_mbm->mbm_local[idx];

dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@@ -698,19 +723,19 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)

static int dom_data_init(struct rdt_resource *r)
{
+ u32 nr_idx = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry = NULL;
- int i, nr_rmids;
+ int i;

- nr_rmids = r->num_rmid;
- rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
+ rmid_ptrs = kcalloc(nr_idx, sizeof(struct rmid_entry), GFP_KERNEL);
if (!rmid_ptrs)
return -ENOMEM;

- for (i = 0; i < nr_rmids; i++) {
+ for (i = 0; i < nr_idx; i++) {
entry = &rmid_ptrs[i];
INIT_LIST_HEAD(&entry->list);

- entry->rmid = i;
+ resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
list_add_tail(&entry->list, &rmid_free_lru);
}

@@ -719,7 +744,7 @@ static int dom_data_init(struct rdt_resource *r)
* default_rdtgroup control group, which will be setup later. See
* rdtgroup_setup_root().
*/
- entry = __rmid_entry(0, 0);
+ entry = __rmid_entry(resctrl_arch_rmid_idx_encode(0, 0));
list_del(&entry->list);

return 0;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index f3b739c52e42..9ce4746778f4 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3320,16 +3320,17 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)

static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;

if (is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
+ d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
}
if (is_mbm_total_enabled()) {
tsize = sizeof(*d->mbm_total);
- d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_total) {
bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
@@ -3337,7 +3338,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
}
if (is_mbm_local_enabled()) {
tsize = sizeof(*d->mbm_local);
- d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_local) {
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
--
2.30.2


2023-01-17 20:37:04

by Fenghua Yu

[permalink] [raw]
Subject: RE: [PATCH v2 02/18] x86/resctrl: Access per-rmid structures by index

Hi, James,

> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
> monitors, RMID values on arm64 are not unique unless the CLOSID is also
> included. Bitmaps like rmid_busy_llc need to be sized by the number of unique
> entries for this resource.
>
> Add helpers to encode/decode the CLOSID and RMID to an index. The domain's
> busy_rmid_llc and the rmid_ptrs[] array are then sized by index. On x86, this is
> always just the RMID. This gives resctrl a unique value it can use to store
> monitor values, and allows MPAM to decode the closid when reading the
> hardware counters.
>
> Tested-by: Shaopeng Tan <[email protected]>
> Signed-off-by: James Morse <[email protected]>
> ---
> Changes since v1:
> * Added X86_BAD_CLOSID macro to make it clear what this value means
> * Added second WARN_ON() for closid checking, and made both _ONCE()
> ---
> arch/x86/include/asm/resctrl.h | 24 ++++++++
> arch/x86/kernel/cpu/resctrl/internal.h | 2 +
> arch/x86/kernel/cpu/resctrl/monitor.c | 79 +++++++++++++++++---------
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 ++-
> 4 files changed, 82 insertions(+), 30 deletions(-)
>
> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
> index 52788f79786f..44d568f3577c 100644
> --- a/arch/x86/include/asm/resctrl.h
> +++ b/arch/x86/include/asm/resctrl.h
> @@ -7,6 +7,13 @@
> #include <linux/sched.h>
> #include <linux/jump_label.h>
>
> +/*
> + * This value can never be a valid CLOSID, and is used when mapping a
> + * (closid, rmid) pair to an index and back. On x86 only the RMID is
> + * needed.
> + */
> +#define X86_RESCTRL_BAD_CLOSID ~0
> +
> /**
> * struct resctrl_pqr_state - State cache for the PQR MSR
> * @cur_rmid: The cached Resource Monitoring ID
> @@ -94,6 +101,23 @@ static inline void resctrl_sched_in(void)
> __resctrl_sched_in();
> }
>
> +static inline u32 resctrl_arch_system_num_rmid_idx(void)
> +{
> + /* RMID are independent numbers for x86. num_rmid_idx==num_rmid
> */

Is it better to change the comment to something like:
+ /* RMIDs are independent of CLOSIDs and number of RMIDs is fixed. */

> + return boot_cpu_data.x86_cache_max_rmid + 1; }
> +
> +static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid,
> +u32 *rmid) {
> + *rmid = idx;
> + *closid = X86_RESCTRL_BAD_CLOSID;
> +}
> +
> +static inline u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid) {
> + return rmid;
> +}
> +
> void resctrl_cpu_detect(struct cpuinfo_x86 *c);
>
> #else
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h
> b/arch/x86/kernel/cpu/resctrl/internal.h
> index 5dbff3035723..af71401c57e2 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -8,6 +8,8 @@
> #include <linux/fs_context.h>
> #include <linux/jump_label.h>
>
Please remove this blank line.

> +#include <asm/resctrl.h>
> +
> #define L3_QOS_CDP_ENABLE 0x01ULL
>
> #define L2_QOS_CDP_ENABLE 0x01ULL
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c
> b/arch/x86/kernel/cpu/resctrl/monitor.c
> index 13673cab175a..dbae380e3d1c 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -142,12 +142,27 @@ static inline u64 get_corrected_mbm_count(u32 rmid,
> unsigned long val)
> return val;
> }
>
> -static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
> +/*
> + * x86 and arm64 differ in their handling of monitoring.
> + * x86's RMID are an independent number, there is one RMID '1'.
What do you mean by "one RMID '1'"?
Should this sentence be "x86's RMID is an independent number."?

> + * arm64's PMG extend the PARTID/CLOSID space, there is one RMID '1'
> +for each
> + * CLOSID. The RMID is no longer unique.
> + * To account for this, resctrl uses an index. On x86 this is just the
> +RMID,
> + * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
> + *
> + * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The
> +arch code
> + * must accept an attempt to read every index.
> + */
> +static inline struct rmid_entry *__rmid_entry(u32 idx)
> {
> struct rmid_entry *entry;
> + u32 closid, rmid;
>
> - entry = &rmid_ptrs[rmid];
> - WARN_ON(entry->rmid != rmid);
> + entry = &rmid_ptrs[idx];
> + resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
> +
> + WARN_ON_ONCE(entry->closid != closid);
> + WARN_ON_ONCE(entry->rmid != rmid);
>
> return entry;
> }
> @@ -243,8 +258,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct
> rdt_domain *d, void __check_limbo(struct rdt_domain *d, bool force_free) {
> struct rdt_resource *r =
> &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> + u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> struct rmid_entry *entry;
> - u32 crmid = 1, nrmid;
> + u32 idx, cur_idx = 1;
> bool rmid_dirty;
> u64 val = 0;
>
> @@ -255,12 +271,11 @@ void __check_limbo(struct rdt_domain *d, bool
> force_free)
> * RMID and move it to the free list when the counter reaches 0.
> */
> for (;;) {
> - nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
> - if (nrmid >= r->num_rmid)
> + idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
> + if (idx >= idx_limit)
> break;
>
> - entry = __rmid_entry(~0, nrmid); // temporary
> -
> + entry = __rmid_entry(idx);
> if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
> QOS_L3_OCCUP_EVENT_ID, &val)) {
> rmid_dirty = true;
> @@ -269,19 +284,21 @@ void __check_limbo(struct rdt_domain *d, bool
> force_free)
> }
>
> if (force_free || !rmid_dirty) {
> - clear_bit(entry->rmid, d->rmid_busy_llc);
> + clear_bit(idx, d->rmid_busy_llc);
> if (!--entry->busy) {
> rmid_limbo_count--;
> list_add_tail(&entry->list, &rmid_free_lru);
> }
> }
> - crmid = nrmid + 1;
> + cur_idx = idx + 1;
> }
> }
>
> bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d) {
> - return find_first_bit(d->rmid_busy_llc, r->num_rmid) != r->num_rmid;
> + u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> +
> + return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
> }
>
> /*
> @@ -311,6 +328,9 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> struct rdt_domain *d;
> int cpu, err;
> u64 val = 0;
> + u32 idx;
> +
> + idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
>
> entry->busy = 0;
> cpu = get_cpu();
> @@ -330,7 +350,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
> */
> if (!has_busy_rmid(r, d))
> cqm_setup_limbo_handler(d,
> CQM_LIMBOCHECK_INTERVAL);
> - set_bit(entry->rmid, d->rmid_busy_llc);
> + set_bit(idx, d->rmid_busy_llc);
> entry->busy++;
> }
> put_cpu();
> @@ -343,14 +363,16 @@ static void add_rmid_to_limbo(struct rmid_entry
> *entry)
>
> void free_rmid(u32 closid, u32 rmid)
> {
> + u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
> struct rmid_entry *entry;
>
> - if (!rmid)
> - return;
> -
> lockdep_assert_held(&rdtgroup_mutex);
>
> - entry = __rmid_entry(closid, rmid);
> + /* do not allow the default rmid to be free'd */
> + if (!idx)
> + return;
> +
> + entry = __rmid_entry(idx);
>
> if (is_llc_occupancy_enabled())
> add_rmid_to_limbo(entry);
> @@ -360,6 +382,7 @@ void free_rmid(u32 closid, u32 rmid)
>
> static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) {
> + u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
> struct mbm_state *m;
> u64 tval = 0;
>
> @@ -376,10 +399,10 @@ static int __mon_event_count(u32 closid, u32 rmid,
> struct rmid_read *rr)
> rr->val += tval;
> return 0;
> case QOS_L3_MBM_TOTAL_EVENT_ID:
> - m = &rr->d->mbm_total[rmid];
> + m = &rr->d->mbm_total[idx];
> break;
> case QOS_L3_MBM_LOCAL_EVENT_ID:
> - m = &rr->d->mbm_local[rmid];
> + m = &rr->d->mbm_local[idx];
> break;
> default:
> /*
> @@ -412,7 +435,8 @@ static int __mon_event_count(u32 closid, u32 rmid,
> struct rmid_read *rr)
> */
> static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr) {
> - struct mbm_state *m = &rr->d->mbm_local[rmid];
> + u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
> + struct mbm_state *m = &rr->d->mbm_local[idx];
> u64 cur_bw, bytes, cur_bytes;
>
> cur_bytes = rr->val;
> @@ -502,7 +526,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct
> rdt_domain *dom_mbm) {
> u32 closid, rmid, cur_msr_val, new_msr_val;
> struct mbm_state *pmbm_data, *cmbm_data;
> - u32 cur_bw, delta_bw, user_bw;
> + u32 cur_bw, delta_bw, user_bw, idx;
> struct rdt_resource *r_mba;
> struct rdt_domain *dom_mba;
> struct list_head *head;
> @@ -515,7 +539,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct
> rdt_domain *dom_mbm)
>
> closid = rgrp->closid;
> rmid = rgrp->mon.rmid;
> - pmbm_data = &dom_mbm->mbm_local[rmid];
> + idx = resctrl_arch_rmid_idx_encode(closid, rmid);
> + pmbm_data = &dom_mbm->mbm_local[idx];
>
> dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
> if (!dom_mba) {
> @@ -698,19 +723,19 @@ void mbm_setup_overflow_handler(struct
> rdt_domain *dom, unsigned long delay_ms)
>
> static int dom_data_init(struct rdt_resource *r) {
> + u32 nr_idx = resctrl_arch_system_num_rmid_idx();
> struct rmid_entry *entry = NULL;
> - int i, nr_rmids;
> + int i;
>
> - nr_rmids = r->num_rmid;
> - rmid_ptrs = kcalloc(nr_rmids, sizeof(struct rmid_entry), GFP_KERNEL);
> + rmid_ptrs = kcalloc(nr_idx, sizeof(struct rmid_entry), GFP_KERNEL);
> if (!rmid_ptrs)
> return -ENOMEM;
>
> - for (i = 0; i < nr_rmids; i++) {
> + for (i = 0; i < nr_idx; i++) {
> entry = &rmid_ptrs[i];
> INIT_LIST_HEAD(&entry->list);
>
> - entry->rmid = i;
> + resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
> list_add_tail(&entry->list, &rmid_free_lru);
> }
>
> @@ -719,7 +744,7 @@ static int dom_data_init(struct rdt_resource *r)
> * default_rdtgroup control group, which will be setup later. See
> * rdtgroup_setup_root().
> */
> - entry = __rmid_entry(0, 0);
> + entry = __rmid_entry(resctrl_arch_rmid_idx_encode(0, 0));

Better change to:
+ entry = __rmid_entry(resctrl_arch_rmid_idx_encode(RESCTRL_BAD_CLOSID, 0));
because this explicitly tells CLOSID is invalid here on X86.

> list_del(&entry->list);
>
> return 0;
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index f3b739c52e42..9ce4746778f4 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -3320,16 +3320,17 @@ void resctrl_offline_domain(struct rdt_resource *r,
> struct rdt_domain *d)
>
> static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain
> *d) {
> + u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> size_t tsize;
>
> if (is_llc_occupancy_enabled()) {
> - d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
> + d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
> if (!d->rmid_busy_llc)
> return -ENOMEM;
> }
> if (is_mbm_total_enabled()) {
> tsize = sizeof(*d->mbm_total);
> - d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
> + d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
> if (!d->mbm_total) {
> bitmap_free(d->rmid_busy_llc);
> return -ENOMEM;
> @@ -3337,7 +3338,7 @@ static int domain_setup_mon_state(struct
> rdt_resource *r, struct rdt_domain *d)
> }
> if (is_mbm_local_enabled()) {
> tsize = sizeof(*d->mbm_local);
> - d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
> + d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
> if (!d->mbm_local) {
> bitmap_free(d->rmid_busy_llc);
> kfree(d->mbm_total);
> --
> 2.30.2

Thanks.

-Fenghua

2023-01-17 20:42:40

by Fenghua Yu

[permalink] [raw]
Subject: RE: [PATCH v2 02/18] x86/resctrl: Access per-rmid structures by index

Hi, James,

> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
> monitors, RMID values on arm64 are not unique unless the CLOSID is also
> included. Bitmaps like rmid_busy_llc need to be sized by the number of unique
> entries for this resource.
>
> Add helpers to encode/decode the CLOSID and RMID to an index. The domain's
> busy_rmid_llc and the rmid_ptrs[] array are then sized by index. On x86, this is
> always just the RMID. This gives resctrl a unique value it can use to store
> monitor values, and allows MPAM to decode the closid when reading the
> hardware counters.
>
> Tested-by: Shaopeng Tan <[email protected]>
> Signed-off-by: James Morse <[email protected]>
> ---
> Changes since v1:
> * Added X86_BAD_CLOSID macro to make it clear what this value means
> * Added second WARN_ON() for closid checking, and made both _ONCE()
> ---
> arch/x86/include/asm/resctrl.h | 24 ++++++++
> arch/x86/kernel/cpu/resctrl/internal.h | 2 +
> arch/x86/kernel/cpu/resctrl/monitor.c | 79 +++++++++++++++++---------
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 ++-
> 4 files changed, 82 insertions(+), 30 deletions(-)
>
> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
> index 52788f79786f..44d568f3577c 100644
> --- a/arch/x86/include/asm/resctrl.h
> +++ b/arch/x86/include/asm/resctrl.h
> @@ -7,6 +7,13 @@
> #include <linux/sched.h>
> #include <linux/jump_label.h>
>
> +/*
> + * This value can never be a valid CLOSID, and is used when mapping a
> + * (closid, rmid) pair to an index and back. On x86 only the RMID is
> + * needed.
> + */
> +#define X86_RESCTRL_BAD_CLOSID ~0
> +
> /**
> * struct resctrl_pqr_state - State cache for the PQR MSR
> * @cur_rmid: The cached Resource Monitoring ID
> @@ -94,6 +101,23 @@ static inline void resctrl_sched_in(void)
> __resctrl_sched_in();
> }
>
> +static inline u32 resctrl_arch_system_num_rmid_idx(void)
> +{
> + /* RMID are independent numbers for x86. num_rmid_idx==num_rmid
> */
> + return boot_cpu_data.x86_cache_max_rmid + 1; }
> +
> +static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid,
> +u32 *rmid) {
> + *rmid = idx;
> + *closid = X86_RESCTRL_BAD_CLOSID;
> +}
> +
> +static inline u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid) {

s/u32 closid/u32 ignored/, please.

> + return rmid;
> +}
> +

Thanks.

-Fenghua

2023-02-02 23:44:59

by Reinette Chatre

[permalink] [raw]
Subject: Re: [PATCH v2 02/18] x86/resctrl: Access per-rmid structures by index

Hi James,

On 1/13/2023 9:54 AM, James Morse wrote:
> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
> monitors, RMID values on arm64 are not unique unless the CLOSID is
> also included. Bitmaps like rmid_busy_llc need to be sized by the
> number of unique entries for this resource.
>
> Add helpers to encode/decode the CLOSID and RMID to an index. The
> domain's busy_rmid_llc and the rmid_ptrs[] array are then sized by

busy_rmid_llc -> rmid_busy_llc ?

Could you please also mention the MBM state impacted?

> index. On x86, this is always just the RMID. This gives resctrl a
> unique value it can use to store monitor values, and allows MPAM to
> decode the closid when reading the hardware counters.
>
> Tested-by: Shaopeng Tan <[email protected]>
> Signed-off-by: James Morse <[email protected]>
> ---
> Changes since v1:
> * Added X86_BAD_CLOSID macro to make it clear what this value means
> * Added second WARN_ON() for closid checking, and made both _ONCE()
> ---
> arch/x86/include/asm/resctrl.h | 24 ++++++++
> arch/x86/kernel/cpu/resctrl/internal.h | 2 +
> arch/x86/kernel/cpu/resctrl/monitor.c | 79 +++++++++++++++++---------
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 ++-
> 4 files changed, 82 insertions(+), 30 deletions(-)
>
> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
> index 52788f79786f..44d568f3577c 100644
> --- a/arch/x86/include/asm/resctrl.h
> +++ b/arch/x86/include/asm/resctrl.h
> @@ -7,6 +7,13 @@
> #include <linux/sched.h>
> #include <linux/jump_label.h>
>
> +/*
> + * This value can never be a valid CLOSID, and is used when mapping a
> + * (closid, rmid) pair to an index and back. On x86 only the RMID is
> + * needed.
> + */
> +#define X86_RESCTRL_BAD_CLOSID ~0

Should this be moved to previous patch where first usage of ~0 appears?

Also, not having a size creates opportunity for inconsistencies. How
about ((u32)~0) ?

> +
> /**
> * struct resctrl_pqr_state - State cache for the PQR MSR
> * @cur_rmid: The cached Resource Monitoring ID
> @@ -94,6 +101,23 @@ static inline void resctrl_sched_in(void)
> __resctrl_sched_in();
> }
>
> +static inline u32 resctrl_arch_system_num_rmid_idx(void)
> +{
> + /* RMID are independent numbers for x86. num_rmid_idx==num_rmid */
> + return boot_cpu_data.x86_cache_max_rmid + 1;
> +}

It seems that this helper and its subsequent usage eliminates the
need for struct rdt_resource::num_rmid? Are any users left?

Reinette

2023-03-03 18:33:39

by James Morse

[permalink] [raw]
Subject: Re: [PATCH v2 02/18] x86/resctrl: Access per-rmid structures by index

Hi Fenghua,

On 17/01/2023 18:28, Yu, Fenghua wrote:
>> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
>> monitors, RMID values on arm64 are not unique unless the CLOSID is also
>> included. Bitmaps like rmid_busy_llc need to be sized by the number of unique
>> entries for this resource.
>>
>> Add helpers to encode/decode the CLOSID and RMID to an index. The domain's
>> busy_rmid_llc and the rmid_ptrs[] array are then sized by index. On x86, this is
>> always just the RMID. This gives resctrl a unique value it can use to store
>> monitor values, and allows MPAM to decode the closid when reading the
>> hardware counters.

>> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
>> index 52788f79786f..44d568f3577c 100644
>> --- a/arch/x86/include/asm/resctrl.h
>> +++ b/arch/x86/include/asm/resctrl.h
>> @@ -94,6 +101,23 @@ static inline void resctrl_sched_in(void)
>> __resctrl_sched_in();
>> }
>>
>> +static inline u32 resctrl_arch_system_num_rmid_idx(void)
>> +{
>> + /* RMID are independent numbers for x86. num_rmid_idx==num_rmid
>> */

> Is it better to change the comment to something like:
> + /* RMIDs are independent of CLOSIDs and number of RMIDs is fixed. */

I agree the reference to x86 looks a bit funny, but its to explain why this 'idx'
concept exists before the MPAM code is merged that needs it.

I don't think it helps to say the number of RMID is fixed on x86, for MPAM there is no
direct equivalent, but everything there is fixed too. Its not the fixed property that this
is needed for, but the 'no equivalent'.


>> + return boot_cpu_data.x86_cache_max_rmid + 1; }
>> +

>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c
>> b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index 13673cab175a..dbae380e3d1c 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -142,12 +142,27 @@ static inline u64 get_corrected_mbm_count(u32 rmid,
>> unsigned long val)
>> return val;
>> }
>>
>> -static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
>> +/*
>> + * x86 and arm64 differ in their handling of monitoring.
>> + * x86's RMID are an independent number, there is one RMID '1'.

> What do you mean by "one RMID '1'"?

If I could snoop the traffic on the interconnect, there would only be one source of
traffic with RMID value 1.

In contrast, if you did this on an arm machine, there would be numerous sources of traffic
with PMG value 1. This is because PMG isn't an independent number, it isn't equivalent to
RMID.


> Should this sentence be "x86's RMID is an independent number."?

Does it help to rephrase it as:
| * x86's RMID are an independent number, there is only one source of traffic
| * an RMID value of '1'.
| * arm64's PMG extend the PARTID/CLOSID space, there are multiple sources of
| * traffic with a PMG value of '1', one for each CLOSID, meaining the RMID
| * value is no longer unique.

?

>> + * arm64's PMG extend the PARTID/CLOSID space, there is one RMID '1'
>> +for each
>> + * CLOSID. The RMID is no longer unique.
>> + * To account for this, resctrl uses an index. On x86 this is just the
>> +RMID,
>> + * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
>> + *
>> + * The domain's rmid_busy_llc and rmid_ptrs are sized by index. The
>> +arch code
>> + * must accept an attempt to read every index.
>> + */
>> +static inline struct rmid_entry *__rmid_entry(u32 idx)
>> {
>> struct rmid_entry *entry;
>> + u32 closid, rmid;
>>
>> - entry = &rmid_ptrs[rmid];
>> - WARN_ON(entry->rmid != rmid);
>> + entry = &rmid_ptrs[idx];
>> + resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
>> +
>> + WARN_ON_ONCE(entry->closid != closid);
>> + WARN_ON_ONCE(entry->rmid != rmid);
>>
>> return entry;
>> }

>> @@ -719,7 +744,7 @@ static int dom_data_init(struct rdt_resource *r)
>> * default_rdtgroup control group, which will be setup later. See
>> * rdtgroup_setup_root().
>> */
>> - entry = __rmid_entry(0, 0);
>> + entry = __rmid_entry(resctrl_arch_rmid_idx_encode(0, 0));

> Better change to:
> + entry = __rmid_entry(resctrl_arch_rmid_idx_encode(RESCTRL_BAD_CLOSID, 0));
> because this explicitly tells CLOSID is invalid here on X86.

Its not invalid, its the reserved value that MSR_IA32_PQR_ASSOC is set to when a task is
in the default control group, and when a CPU first comes online.

Reserving CLOSID ~0 would be ignored on x86, but hit a WARN_ON_ONCE() on arm64 because ~0
isn't a valid closid. The X86 in 'X86_RESCTRL_BAD_CLOSID' is the hint it should only
appear in x86 specific code!

How about:
| idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID, 0);
| entry = __rmid_entry(idx);

?

>
>> list_del(&entry->list);
>>
>> return 0;


Thanks,

James

2023-03-03 18:33:52

by James Morse

[permalink] [raw]
Subject: Re: [PATCH v2 02/18] x86/resctrl: Access per-rmid structures by index

Hi Reinette,

On 02/02/2023 23:44, Reinette Chatre wrote:
> On 1/13/2023 9:54 AM, James Morse wrote:
>> Because of the differences between Intel RDT/AMD QoS and Arm's MPAM
>> monitors, RMID values on arm64 are not unique unless the CLOSID is
>> also included. Bitmaps like rmid_busy_llc need to be sized by the
>> number of unique entries for this resource.
>>
>> Add helpers to encode/decode the CLOSID and RMID to an index. The
>> domain's busy_rmid_llc and the rmid_ptrs[] array are then sized by
>
> busy_rmid_llc -> rmid_busy_llc ?
>
> Could you please also mention the MBM state impacted?

Yup, this paragraph reworded as:
| Add helpers to encode/decode the CLOSID and RMID to an index. The
| domain's rmid_busy__llc and the rmid_ptrs[] array are then sized by
| index, as are the domain mbm_local and mbm_total arrays.
| On x86, the index is always just the RMID, so all these structures
| remain the same size.
|
| The index gives resctrl a unique value it can use to store monitor
| values, and allows MPAM to decode the closid when reading the hardware
| counters.

>> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
>> index 52788f79786f..44d568f3577c 100644
>> --- a/arch/x86/include/asm/resctrl.h
>> +++ b/arch/x86/include/asm/resctrl.h
>> @@ -7,6 +7,13 @@
>> #include <linux/sched.h>
>> #include <linux/jump_label.h>
>>
>> +/*
>> + * This value can never be a valid CLOSID, and is used when mapping a
>> + * (closid, rmid) pair to an index and back. On x86 only the RMID is
>> + * needed.
>> + */
>> +#define X86_RESCTRL_BAD_CLOSID ~0

> Should this be moved to previous patch where first usage of ~0 appears?

Makes sense,


> Also, not having a size creates opportunity for inconsistencies. How
> about ((u32)~0) ?

Yes, the compilers secret expectations on sizes always catch me out!


>> +
>> /**
>> * struct resctrl_pqr_state - State cache for the PQR MSR
>> * @cur_rmid: The cached Resource Monitoring ID
>> @@ -94,6 +101,23 @@ static inline void resctrl_sched_in(void)
>> __resctrl_sched_in();
>> }
>>
>> +static inline u32 resctrl_arch_system_num_rmid_idx(void)
>> +{
>> + /* RMID are independent numbers for x86. num_rmid_idx==num_rmid */
>> + return boot_cpu_data.x86_cache_max_rmid + 1;
>> +}

> It seems that this helper and its subsequent usage eliminates the
> need for struct rdt_resource::num_rmid? Are any users left?

The only user in the filesystem parts of resctrl is rdt_num_rmids_show(), which exposes
the value to user-space. The value is unfortunately meaningless on MPAM systems, but as
its user-space ABI, it has to stay.

The remaining users in the x86 specific code:
domain_add_cpu() continues to use r->num_rmid as the arch code can know its the same
number as num_idx, and rdt_get_mon_l3_config() calculates the value.


Thanks,

James