2024-01-26 22:20:42

by Ilkka Koskinen

[permalink] [raw]
Subject: [PATCH 0/3] perf/arm-cmn: Add support for tertiary match group

Tertiary match group was added to CMN-650 and newer implementations.
This patchset makes the parameters model specific to support properly
various models. Also, wp_config registers and filter groups numbers are
decoupled to enable the new match group.

Ilkka Koskinen (3):
perf/arm-cmn: Decouple wp_config registers from filter group number
perf/arm-cmn: Add support for model specific parameters
perf/arm-cmn: Enable support for tertiary match group

drivers/perf/arm-cmn.c | 118 ++++++++++++++++++++++++++++++++---------
1 file changed, 92 insertions(+), 26 deletions(-)

--
2.40.1



2024-01-26 22:21:00

by Ilkka Koskinen

[permalink] [raw]
Subject: [PATCH 1/3] perf/arm-cmn: Decouple wp_config registers from filter group number

Previously, wp_config0/2 registers were used for primary match group and
wp_config1/3 registers for secondary match group. In order to support
tertiary match group, this patch decouples the registers and the groups.

Allocation is changed to dynamic but it's still per mesh instance rather
than per node.

Signed-off-by: Ilkka Koskinen <[email protected]>
---
drivers/perf/arm-cmn.c | 52 ++++++++++++++++++++++++++++++++++--------
1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index c584165b13ba..93eb47ea7e25 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -591,6 +591,7 @@ struct arm_cmn_hw_event {
u8 dtm_offset;
bool wide_sel;
enum cmn_filter_select filter_sel;
+ int wp_idx;
};

#define for_each_hw_dn(hw, dn, i) \
@@ -1337,7 +1338,35 @@ static const struct attribute_group *arm_cmn_attr_groups[] = {

static int arm_cmn_wp_idx(struct perf_event *event)
{
- return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event);
+ struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+
+ return hw->wp_idx;
+}
+
+static int arm_cmn_wp_idx_unused(struct perf_event *event, struct arm_cmn_dtm *dtm,
+ struct arm_cmn_dtc *dtc)
+{
+ struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+ int idx, tmp, direction = CMN_EVENT_EVENTID(event);
+
+ /*
+ * Examine wp 0 & 1 for the up direction,
+ * examine wp 2 & 3 for the down direction
+ */
+ for (idx = direction; idx < direction + 2; idx++)
+ if (dtm->wp_event[idx] < 0)
+ break;
+
+ if (idx == direction + 2)
+ return -ENOSPC;
+
+ tmp = dtm->wp_event[idx ^ 1];
+ if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
+ CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
+ return -ENOSPC;
+
+ hw->wp_idx = idx;
+ return hw->wp_idx;
}

static u32 arm_cmn_wp_config(struct perf_event *event)
@@ -1785,6 +1814,8 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,

for_each_hw_dtc_idx(hw, j, idx)
cmn->dtc[j].counters[idx] = NULL;
+
+ hw->wp_idx = -1;
}

static int arm_cmn_event_add(struct perf_event *event, int flags)
@@ -1794,6 +1825,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
struct arm_cmn_node *dn;
enum cmn_node_type type = CMN_EVENT_TYPE(event);
unsigned int input_sel, i = 0;
+ int wp_idx;

if (type == CMN_TYPE_DTC) {
while (cmn->dtc[i].cycles)
@@ -1822,6 +1854,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
}

/* ...then the local counters to feed them */
+ wp_idx = -1;
for_each_hw_dn(hw, dn, i) {
struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
@@ -1835,16 +1868,17 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
if (type == CMN_TYPE_XP) {
input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx;
} else if (type == CMN_TYPE_WP) {
- int tmp, wp_idx = arm_cmn_wp_idx(event);
u32 cfg = arm_cmn_wp_config(event);

- if (dtm->wp_event[wp_idx] >= 0)
- goto free_dtms;
-
- tmp = dtm->wp_event[wp_idx ^ 1];
- if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
- CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
- goto free_dtms;
+ /*
+ * wp_config register index is currently allocated per
+ * mesh instance rather than per node.
+ */
+ if (wp_idx < 0) {
+ wp_idx = arm_cmn_wp_idx_unused(event, dtm, &cmn->dtc[d]);
+ if (wp_idx < 0)
+ goto free_dtms;
+ }

input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
dtm->wp_event[wp_idx] = hw->dtc_idx[d];
--
2.40.1


2024-01-26 22:21:11

by Ilkka Koskinen

[permalink] [raw]
Subject: [PATCH 2/3] perf/arm-cmn: Add support for model specific parameters

Newer models have slightly different parameter fields or may introduce
completely new ones. Thus, prepare for it by making also the parameters
model specific.

Signed-off-by: Ilkka Koskinen <[email protected]>
---
drivers/perf/arm-cmn.c | 45 +++++++++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 93eb47ea7e25..dc6370396ad0 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -628,6 +628,7 @@ struct arm_cmn_event_attr {

struct arm_cmn_format_attr {
struct device_attribute attr;
+ enum cmn_model model;
u64 field;
int config;
};
@@ -1265,29 +1266,44 @@ static ssize_t arm_cmn_format_show(struct device *dev,
return sysfs_emit(buf, "config%d:%d-%d\n", fmt->config, lo, hi);
}

-#define _CMN_FORMAT_ATTR(_name, _cfg, _fld) \
+static umode_t arm_cmn_format_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr,
+ int unused)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
+ struct arm_cmn_format_attr *fmt = container_of(attr, typeof(*fmt), attr.attr);
+
+ if (!(fmt->model & arm_cmn_model(cmn)))
+ return 0;
+
+ return attr->mode;
+}
+
+#define _CMN_FORMAT_ATTR(_model, _name, _cfg, _fld) \
(&((struct arm_cmn_format_attr[]) {{ \
.attr = __ATTR(_name, 0444, arm_cmn_format_show, NULL), \
+ .model = _model, \
.config = _cfg, \
.field = _fld, \
}})[0].attr.attr)
-#define CMN_FORMAT_ATTR(_name, _fld) _CMN_FORMAT_ATTR(_name, 0, _fld)
+#define CMN_FORMAT_ATTR(_model, _name, _fld) _CMN_FORMAT_ATTR(_model, _name, 0, _fld)

static struct attribute *arm_cmn_format_attrs[] = {
- CMN_FORMAT_ATTR(type, CMN_CONFIG_TYPE),
- CMN_FORMAT_ATTR(eventid, CMN_CONFIG_EVENTID),
- CMN_FORMAT_ATTR(occupid, CMN_CONFIG_OCCUPID),
- CMN_FORMAT_ATTR(bynodeid, CMN_CONFIG_BYNODEID),
- CMN_FORMAT_ATTR(nodeid, CMN_CONFIG_NODEID),
+ CMN_FORMAT_ATTR(CMN_ANY, type, CMN_CONFIG_TYPE),
+ CMN_FORMAT_ATTR(CMN_ANY, eventid, CMN_CONFIG_EVENTID),
+ CMN_FORMAT_ATTR(CMN_ANY, occupid, CMN_CONFIG_OCCUPID),
+ CMN_FORMAT_ATTR(CMN_ANY, bynodeid, CMN_CONFIG_BYNODEID),
+ CMN_FORMAT_ATTR(CMN_ANY, nodeid, CMN_CONFIG_NODEID),

- CMN_FORMAT_ATTR(wp_dev_sel, CMN_CONFIG_WP_DEV_SEL),
- CMN_FORMAT_ATTR(wp_chn_sel, CMN_CONFIG_WP_CHN_SEL),
- CMN_FORMAT_ATTR(wp_grp, CMN_CONFIG_WP_GRP),
- CMN_FORMAT_ATTR(wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE),
- CMN_FORMAT_ATTR(wp_combine, CMN_CONFIG_WP_COMBINE),
+ CMN_FORMAT_ATTR(CMN_ANY, wp_dev_sel, CMN_CONFIG_WP_DEV_SEL),
+ CMN_FORMAT_ATTR(CMN_ANY, wp_chn_sel, CMN_CONFIG_WP_CHN_SEL),
+ CMN_FORMAT_ATTR(CMN_ANY, wp_grp, CMN_CONFIG_WP_GRP),
+ CMN_FORMAT_ATTR(CMN_ANY, wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE),
+ CMN_FORMAT_ATTR(CMN_ANY, wp_combine, CMN_CONFIG_WP_COMBINE),

- _CMN_FORMAT_ATTR(wp_val, 1, CMN_CONFIG1_WP_VAL),
- _CMN_FORMAT_ATTR(wp_mask, 2, CMN_CONFIG2_WP_MASK),
+ _CMN_FORMAT_ATTR(CMN_ANY, wp_val, 1, CMN_CONFIG1_WP_VAL),
+ _CMN_FORMAT_ATTR(CMN_ANY, wp_mask, 2, CMN_CONFIG2_WP_MASK),

NULL
};
@@ -1295,6 +1311,7 @@ static struct attribute *arm_cmn_format_attrs[] = {
static const struct attribute_group arm_cmn_format_attrs_group = {
.name = "format",
.attrs = arm_cmn_format_attrs,
+ .is_visible = arm_cmn_format_attr_is_visible,
};

static ssize_t arm_cmn_cpumask_show(struct device *dev,
--
2.40.1


2024-01-26 22:21:30

by Ilkka Koskinen

[permalink] [raw]
Subject: [PATCH 3/3] perf/arm-cmn: Enable support for tertiary match group

Add support for tertiary match group.

Signed-off-by: Ilkka Koskinen <[email protected]>
---
drivers/perf/arm-cmn.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index dc6370396ad0..ce9fbdcf6144 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -91,10 +91,13 @@
#define CMN600_WPn_CONFIG_WP_COMBINE BIT(6)
#define CMN600_WPn_CONFIG_WP_EXCLUSIVE BIT(5)
#define CMN_DTM_WPn_CONFIG_WP_GRP GENMASK_ULL(5, 4)
+#define CMN600_WPn_CONFIG_WP_GRP BIT(4)
#define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1)
#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0)
#define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08)
#define CMN_DTM_WPn_MASK(n) (CMN_DTM_WPn(n) + 0x10)
+#define CMN_DTM_WP_CHN_SEL_REQ_VC 0
+#define CMN_DTM_WP_GRP_TERTIARY 0x2

#define CMN_DTM_PMU_CONFIG 0x210
#define CMN__PMEVCNT0_INPUT_SEL GENMASK_ULL(37, 32)
@@ -175,8 +178,8 @@
#define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48)
#define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51)
/* Note that we don't yet support the tertiary match group on newer IPs */
-#define CMN_CONFIG_WP_GRP BIT_ULL(56)
-#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57)
+#define CMN_CONFIG_WP_GRP GENMASK_ULL(57, 56)
+#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(58)
#define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0)
#define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0)

@@ -1298,7 +1301,9 @@ static struct attribute *arm_cmn_format_attrs[] = {

CMN_FORMAT_ATTR(CMN_ANY, wp_dev_sel, CMN_CONFIG_WP_DEV_SEL),
CMN_FORMAT_ATTR(CMN_ANY, wp_chn_sel, CMN_CONFIG_WP_CHN_SEL),
- CMN_FORMAT_ATTR(CMN_ANY, wp_grp, CMN_CONFIG_WP_GRP),
+ CMN_FORMAT_ATTR(CMN600, wp_grp, CMN600_WPn_CONFIG_WP_GRP),
+ CMN_FORMAT_ATTR(NOT_CMN600, wp_grp, CMN_CONFIG_WP_GRP),
+
CMN_FORMAT_ATTR(CMN_ANY, wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE),
CMN_FORMAT_ATTR(CMN_ANY, wp_combine, CMN_CONFIG_WP_COMBINE),

@@ -1398,8 +1403,11 @@ static u32 arm_cmn_wp_config(struct perf_event *event)

config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
- FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1);
+
+ if (grp)
+ config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_GRP :
+ FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp);
if (exc)
config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;
@@ -1764,6 +1772,13 @@ static int arm_cmn_event_init(struct perf_event *event)
/* ...and we need a "real" direction */
if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN)
return -EINVAL;
+
+ if (cmn->part != PART_CMN600)
+ if (CMN_EVENT_WP_GRP(event) > CMN_DTM_WP_GRP_TERTIARY ||
+ (CMN_EVENT_WP_GRP(event) == CMN_DTM_WP_GRP_TERTIARY &&
+ CMN_EVENT_WP_CHN_SEL(event) != CMN_DTM_WP_CHN_SEL_REQ_VC))
+ return -EINVAL;
+
/* ...but the DTM may depend on which port we're watching */
if (cmn->multi_dtm)
hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
--
2.40.1


2024-01-29 17:08:56

by Robin Murphy

[permalink] [raw]
Subject: Re: [PATCH 1/3] perf/arm-cmn: Decouple wp_config registers from filter group number

Hi Ilkka,

On 2024-01-26 10:12 pm, Ilkka Koskinen wrote:
> Previously, wp_config0/2 registers were used for primary match group and
> wp_config1/3 registers for secondary match group. In order to support
> tertiary match group, this patch decouples the registers and the groups.

Happy to see you having a stab at this, however I fear I you're in for a
fair dose of "if it were this simple I might have already done it" :)

> Allocation is changed to dynamic but it's still per mesh instance rather
> than per node.
>
> Signed-off-by: Ilkka Koskinen <[email protected]>
> ---
> drivers/perf/arm-cmn.c | 52 ++++++++++++++++++++++++++++++++++--------
> 1 file changed, 43 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index c584165b13ba..93eb47ea7e25 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -591,6 +591,7 @@ struct arm_cmn_hw_event {
> u8 dtm_offset;
> bool wide_sel;
> enum cmn_filter_select filter_sel;
> + int wp_idx;
> };
>
> #define for_each_hw_dn(hw, dn, i) \
> @@ -1337,7 +1338,35 @@ static const struct attribute_group *arm_cmn_attr_groups[] = {
>
> static int arm_cmn_wp_idx(struct perf_event *event)
> {
> - return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event);
> + struct arm_cmn_hw_event *hw = to_cmn_hw(event);
> +
> + return hw->wp_idx;

Sorry, this breaks group validation.

> +}
> +
> +static int arm_cmn_wp_idx_unused(struct perf_event *event, struct arm_cmn_dtm *dtm,
> + struct arm_cmn_dtc *dtc)
> +{
> + struct arm_cmn_hw_event *hw = to_cmn_hw(event);
> + int idx, tmp, direction = CMN_EVENT_EVENTID(event);
> +
> + /*
> + * Examine wp 0 & 1 for the up direction,
> + * examine wp 2 & 3 for the down direction
> + */
> + for (idx = direction; idx < direction + 2; idx++)
> + if (dtm->wp_event[idx] < 0)
> + break;
> +
> + if (idx == direction + 2)
> + return -ENOSPC;
> +
> + tmp = dtm->wp_event[idx ^ 1];
> + if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
> + CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
> + return -ENOSPC;
> +
> + hw->wp_idx = idx;

I don't really get this logic either - we can allocate a potentially
different index for every DTM, but only store the most recent one?

> + return hw->wp_idx;
> }
>
> static u32 arm_cmn_wp_config(struct perf_event *event)
> @@ -1785,6 +1814,8 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
>
> for_each_hw_dtc_idx(hw, j, idx)
> cmn->dtc[j].counters[idx] = NULL;
> +
> + hw->wp_idx = -1;
> }
>
> static int arm_cmn_event_add(struct perf_event *event, int flags)
> @@ -1794,6 +1825,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
> struct arm_cmn_node *dn;
> enum cmn_node_type type = CMN_EVENT_TYPE(event);
> unsigned int input_sel, i = 0;
> + int wp_idx;
>
> if (type == CMN_TYPE_DTC) {
> while (cmn->dtc[i].cycles)
> @@ -1822,6 +1854,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
> }
>
> /* ...then the local counters to feed them */
> + wp_idx = -1;

Oh, I guess this trying to avoid some of that issue, but I still don't
think it works - say we add an event targeted to XP B, which sees WP0 is
free on DTM B so allocates index 0; then we add another event
aggregating across XPs A and B, which sees WP0 is free on DTM A,
allocates index 0, then goes on to stomp WP0 on DTM B as well - oops.

I don't think it's going to be feasible to do this without tracking the
full allocation state with a wp_idx bitmap in the hw_event - at least it
only needs to be half the size of dtm_idx, so I think there's still room.

Thanks,
Robin.

> for_each_hw_dn(hw, dn, i) {
> struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
> unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
> @@ -1835,16 +1868,17 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
> if (type == CMN_TYPE_XP) {
> input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx;
> } else if (type == CMN_TYPE_WP) {
> - int tmp, wp_idx = arm_cmn_wp_idx(event);
> u32 cfg = arm_cmn_wp_config(event);
>
> - if (dtm->wp_event[wp_idx] >= 0)
> - goto free_dtms;
> -
> - tmp = dtm->wp_event[wp_idx ^ 1];
> - if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
> - CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
> - goto free_dtms;
> + /*
> + * wp_config register index is currently allocated per
> + * mesh instance rather than per node.
> + */
> + if (wp_idx < 0) {
> + wp_idx = arm_cmn_wp_idx_unused(event, dtm, &cmn->dtc[d]);
> + if (wp_idx < 0)
> + goto free_dtms;
> + }
>
> input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
> dtm->wp_event[wp_idx] = hw->dtc_idx[d];

2024-01-29 17:13:12

by Robin Murphy

[permalink] [raw]
Subject: Re: [PATCH 3/3] perf/arm-cmn: Enable support for tertiary match group

On 2024-01-26 10:12 pm, Ilkka Koskinen wrote:
> Add support for tertiary match group.
>
> Signed-off-by: Ilkka Koskinen <[email protected]>
> ---
> drivers/perf/arm-cmn.c | 23 +++++++++++++++++++----
> 1 file changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
> index dc6370396ad0..ce9fbdcf6144 100644
> --- a/drivers/perf/arm-cmn.c
> +++ b/drivers/perf/arm-cmn.c
> @@ -91,10 +91,13 @@
> #define CMN600_WPn_CONFIG_WP_COMBINE BIT(6)
> #define CMN600_WPn_CONFIG_WP_EXCLUSIVE BIT(5)
> #define CMN_DTM_WPn_CONFIG_WP_GRP GENMASK_ULL(5, 4)
> +#define CMN600_WPn_CONFIG_WP_GRP BIT(4)
> #define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1)
> #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0)
> #define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08)
> #define CMN_DTM_WPn_MASK(n) (CMN_DTM_WPn(n) + 0x10)
> +#define CMN_DTM_WP_CHN_SEL_REQ_VC 0
> +#define CMN_DTM_WP_GRP_TERTIARY 0x2
>
> #define CMN_DTM_PMU_CONFIG 0x210
> #define CMN__PMEVCNT0_INPUT_SEL GENMASK_ULL(37, 32)
> @@ -175,8 +178,8 @@
> #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48)
> #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51)
> /* Note that we don't yet support the tertiary match group on newer IPs */
> -#define CMN_CONFIG_WP_GRP BIT_ULL(56)
> -#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57)
> +#define CMN_CONFIG_WP_GRP GENMASK_ULL(57, 56)
> +#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(58)
> #define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0)
> #define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0)
>
> @@ -1298,7 +1301,9 @@ static struct attribute *arm_cmn_format_attrs[] = {
>
> CMN_FORMAT_ATTR(CMN_ANY, wp_dev_sel, CMN_CONFIG_WP_DEV_SEL),
> CMN_FORMAT_ATTR(CMN_ANY, wp_chn_sel, CMN_CONFIG_WP_CHN_SEL),
> - CMN_FORMAT_ATTR(CMN_ANY, wp_grp, CMN_CONFIG_WP_GRP),
> + CMN_FORMAT_ATTR(CMN600, wp_grp, CMN600_WPn_CONFIG_WP_GRP),

Perhaps an easy confusion, but 4 != 56: CMN_CONFIG_WP_* represent
perf_event->config{,1,2} attribute fields per the CMN_CONFIG_* pattern,
whereas CMN*_WPn_CONFIG_* are hardware register fields where "config" is
just annoygingly part of the register name.

> + CMN_FORMAT_ATTR(NOT_CMN600, wp_grp, CMN_CONFIG_WP_GRP),

Hmm, I'm sure last time I tried something like this, sysfs wouldn't let
two attributes with the same name exist, regardless of whether one was
meant to be hidden :/

TBH I think that either we change ABI for everyone consistently, or we
extend the field in a backwards-compatible way. If you think an ABI
break would affect existing CMN-600 users, then surely at stands to
affect existing CMN-650 and CMN-700 users just as much?

> +
> CMN_FORMAT_ATTR(CMN_ANY, wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE),
> CMN_FORMAT_ATTR(CMN_ANY, wp_combine, CMN_CONFIG_WP_COMBINE),
>
> @@ -1398,8 +1403,11 @@ static u32 arm_cmn_wp_config(struct perf_event *event)
>
> config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
> FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
> - FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
> FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1);
> +
> + if (grp)
> + config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_GRP :
> + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp);

FWIW I think something more like "if (is_cmn600) grp &= 1;" before the
existing assignent might be clearer. Note that that *is* effectively how
this works already since CMN_DTM_WPn_CONFIG_WP_GRP was updated, it's
just currently implicit in CMN_EVENT_WP_GRP().

> if (exc)
> config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
> CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;

You've missed the "(combine && !grp)" logic below this point, which also
needs to get rather more involved if a combined match across groups 1
and 2 is going to work correctly.

> @@ -1764,6 +1772,13 @@ static int arm_cmn_event_init(struct perf_event *event)
> /* ...and we need a "real" direction */
> if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN)
> return -EINVAL;
> +
> + if (cmn->part != PART_CMN600)
> + if (CMN_EVENT_WP_GRP(event) > CMN_DTM_WP_GRP_TERTIARY ||
> + (CMN_EVENT_WP_GRP(event) == CMN_DTM_WP_GRP_TERTIARY &&
> + CMN_EVENT_WP_CHN_SEL(event) != CMN_DTM_WP_CHN_SEL_REQ_VC))
> + return -EINVAL;
> +

We already don't attempt to sanity-check watchpoint arguments (e.g.
chn>3 or chn=1,grp=1), so I'm not really inclined to start. The aim here
has always been not to try to understand watchpoints at all, and
effectively just pass through the register interface to the user.

Thanks,
Robin.

> /* ...but the DTM may depend on which port we're watching */
> if (cmn->multi_dtm)
> hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;

2024-01-30 05:29:21

by Ilkka Koskinen

[permalink] [raw]
Subject: Re: [PATCH 1/3] perf/arm-cmn: Decouple wp_config registers from filter group number


Hi Robin,

Thanks for the review.

On Mon, 29 Jan 2024, Robin Murphy wrote:
> Hi Ilkka,
>
> On 2024-01-26 10:12 pm, Ilkka Koskinen wrote:
>> Previously, wp_config0/2 registers were used for primary match group and
>> wp_config1/3 registers for secondary match group. In order to support
>> tertiary match group, this patch decouples the registers and the groups.
>
> Happy to see you having a stab at this, however I fear I you're in for a fair
> dose of "if it were this simple I might have already done it" :)

Uh, for a little while I thought it seemed too easy but decided to
continue nevertheless

>
>> Allocation is changed to dynamic but it's still per mesh instance rather
>> than per node.
>>
>> Signed-off-by: Ilkka Koskinen <[email protected]>
>> ---
>> drivers/perf/arm-cmn.c | 52 ++++++++++++++++++++++++++++++++++--------
>> 1 file changed, 43 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
>> index c584165b13ba..93eb47ea7e25 100644
>> --- a/drivers/perf/arm-cmn.c
>> +++ b/drivers/perf/arm-cmn.c
>> @@ -591,6 +591,7 @@ struct arm_cmn_hw_event {
>> u8 dtm_offset;
>> bool wide_sel;
>> enum cmn_filter_select filter_sel;
>> + int wp_idx;
>> };
>> #define for_each_hw_dn(hw, dn, i) \
>> @@ -1337,7 +1338,35 @@ static const struct attribute_group
>> *arm_cmn_attr_groups[] = {
>> static int arm_cmn_wp_idx(struct perf_event *event)
>> {
>> - return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event);
>> + struct arm_cmn_hw_event *hw = to_cmn_hw(event);
>> +
>> + return hw->wp_idx;
>
> Sorry, this breaks group validation.

Clearly, watchpoint group validation was missing from my tests :(

>
>> +}
>> +
>> +static int arm_cmn_wp_idx_unused(struct perf_event *event, struct
>> arm_cmn_dtm *dtm,
>> + struct arm_cmn_dtc *dtc)
>> +{
>> + struct arm_cmn_hw_event *hw = to_cmn_hw(event);
>> + int idx, tmp, direction = CMN_EVENT_EVENTID(event);
>> +
>> + /*
>> + * Examine wp 0 & 1 for the up direction,
>> + * examine wp 2 & 3 for the down direction
>> + */
>> + for (idx = direction; idx < direction + 2; idx++)
>> + if (dtm->wp_event[idx] < 0)
>> + break;
>> +
>> + if (idx == direction + 2)
>> + return -ENOSPC;
>> +
>> + tmp = dtm->wp_event[idx ^ 1];
>> + if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
>> + CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
>> + return -ENOSPC;
>> +
>> + hw->wp_idx = idx;
>
> I don't really get this logic either - we can allocate a potentially
> different index for every DTM, but only store the most recent one?
>
>> + return hw->wp_idx;
>> }
>> static u32 arm_cmn_wp_config(struct perf_event *event)
>> @@ -1785,6 +1814,8 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn,
>> struct perf_event *event,
>> for_each_hw_dtc_idx(hw, j, idx)
>> cmn->dtc[j].counters[idx] = NULL;
>> +
>> + hw->wp_idx = -1;
>> }
>> static int arm_cmn_event_add(struct perf_event *event, int flags)
>> @@ -1794,6 +1825,7 @@ static int arm_cmn_event_add(struct perf_event
>> *event, int flags)
>> struct arm_cmn_node *dn;
>> enum cmn_node_type type = CMN_EVENT_TYPE(event);
>> unsigned int input_sel, i = 0;
>> + int wp_idx;
>> if (type == CMN_TYPE_DTC) {
>> while (cmn->dtc[i].cycles)
>> @@ -1822,6 +1854,7 @@ static int arm_cmn_event_add(struct perf_event
>> *event, int flags)
>> }
>> /* ...then the local counters to feed them */
>> + wp_idx = -1;
>
> Oh, I guess this trying to avoid some of that issue, but I still don't think
> it works - say we add an event targeted to XP B, which sees WP0 is free on
> DTM B so allocates index 0; then we add another event aggregating across XPs
> A and B, which sees WP0 is free on DTM A, allocates index 0, then goes on to
> stomp WP0 on DTM B as well - oops.
>
> I don't think it's going to be feasible to do this without tracking the full
> allocation state with a wp_idx bitmap in the hw_event - at least it only
> needs to be half the size of dtm_idx, so I think there's still room.

Yeah, it was supposed to be simple and stupid version until I'd have time
to make the allocation per node. But the more I think about this, the more
I start to believe that the bitmap solution would be the better option
right away.

I'll take a look at better solution although it might take a while as I'll
probably get other more urgent tasks soon. If you find yourself with too
much free time, feel free to take this task ;)

Cheers, Ilkka

>
> Thanks,
> Robin.
>
>> for_each_hw_dn(hw, dn, i) {
>> struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] +
>> hw->dtm_offset;
>> unsigned int dtm_idx, shift, d = max_t(int, dn->dtc, 0);
>> @@ -1835,16 +1868,17 @@ static int arm_cmn_event_add(struct perf_event
>> *event, int flags)
>> if (type == CMN_TYPE_XP) {
>> input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx;
>> } else if (type == CMN_TYPE_WP) {
>> - int tmp, wp_idx = arm_cmn_wp_idx(event);
>> u32 cfg = arm_cmn_wp_config(event);
>> - if (dtm->wp_event[wp_idx] >= 0)
>> - goto free_dtms;
>> -
>> - tmp = dtm->wp_event[wp_idx ^ 1];
>> - if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
>> -
>> CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
>> - goto free_dtms;
>> + /*
>> + * wp_config register index is currently allocated
>> per
>> + * mesh instance rather than per node.
>> + */
>> + if (wp_idx < 0) {
>> + wp_idx = arm_cmn_wp_idx_unused(event, dtm,
>> &cmn->dtc[d]);
>> + if (wp_idx < 0)
>> + goto free_dtms;
>> + }
>> input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
>> dtm->wp_event[wp_idx] = hw->dtc_idx[d];
>

2024-01-30 05:36:10

by Ilkka Koskinen

[permalink] [raw]
Subject: Re: [PATCH 3/3] perf/arm-cmn: Enable support for tertiary match group



On Mon, 29 Jan 2024, Robin Murphy wrote:

> On 2024-01-26 10:12 pm, Ilkka Koskinen wrote:
>> Add support for tertiary match group.
>>
>> Signed-off-by: Ilkka Koskinen <[email protected]>
>> ---
>> drivers/perf/arm-cmn.c | 23 +++++++++++++++++++----
>> 1 file changed, 19 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
>> index dc6370396ad0..ce9fbdcf6144 100644
>> --- a/drivers/perf/arm-cmn.c
>> +++ b/drivers/perf/arm-cmn.c
>> @@ -91,10 +91,13 @@
>> #define CMN600_WPn_CONFIG_WP_COMBINE BIT(6)
>> #define CMN600_WPn_CONFIG_WP_EXCLUSIVE BIT(5)
>> #define CMN_DTM_WPn_CONFIG_WP_GRP GENMASK_ULL(5, 4)
>> +#define CMN600_WPn_CONFIG_WP_GRP BIT(4)
>> #define CMN_DTM_WPn_CONFIG_WP_CHN_SEL GENMASK_ULL(3, 1)
>> #define CMN_DTM_WPn_CONFIG_WP_DEV_SEL BIT(0)
>> #define CMN_DTM_WPn_VAL(n) (CMN_DTM_WPn(n) + 0x08)
>> #define CMN_DTM_WPn_MASK(n) (CMN_DTM_WPn(n) + 0x10)
>> +#define CMN_DTM_WP_CHN_SEL_REQ_VC 0
>> +#define CMN_DTM_WP_GRP_TERTIARY 0x2
>> #define CMN_DTM_PMU_CONFIG 0x210
>> #define CMN__PMEVCNT0_INPUT_SEL GENMASK_ULL(37, 32)
>> @@ -175,8 +178,8 @@
>> #define CMN_CONFIG_WP_DEV_SEL GENMASK_ULL(50, 48)
>> #define CMN_CONFIG_WP_CHN_SEL GENMASK_ULL(55, 51)
>> /* Note that we don't yet support the tertiary match group on newer IPs
>> */
>> -#define CMN_CONFIG_WP_GRP BIT_ULL(56)
>> -#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(57)
>> +#define CMN_CONFIG_WP_GRP GENMASK_ULL(57, 56)
>> +#define CMN_CONFIG_WP_EXCLUSIVE BIT_ULL(58)
>> #define CMN_CONFIG1_WP_VAL GENMASK_ULL(63, 0)
>> #define CMN_CONFIG2_WP_MASK GENMASK_ULL(63, 0)
>> @@ -1298,7 +1301,9 @@ static struct attribute *arm_cmn_format_attrs[] = {
>> CMN_FORMAT_ATTR(CMN_ANY, wp_dev_sel, CMN_CONFIG_WP_DEV_SEL),
>> CMN_FORMAT_ATTR(CMN_ANY, wp_chn_sel, CMN_CONFIG_WP_CHN_SEL),
>> - CMN_FORMAT_ATTR(CMN_ANY, wp_grp, CMN_CONFIG_WP_GRP),
>> + CMN_FORMAT_ATTR(CMN600, wp_grp, CMN600_WPn_CONFIG_WP_GRP),
>
> Perhaps an easy confusion, but 4 != 56: CMN_CONFIG_WP_* represent
> perf_event->config{,1,2} attribute fields per the CMN_CONFIG_* pattern,
> whereas CMN*_WPn_CONFIG_* are hardware register fields where "config" is just
> annoygingly part of the register name.

Ah, true.

>
>> + CMN_FORMAT_ATTR(NOT_CMN600, wp_grp, CMN_CONFIG_WP_GRP),
>
> Hmm, I'm sure last time I tried something like this, sysfs wouldn't let two
> attributes with the same name exist, regardless of whether one was meant to
> be hidden :/
>
> TBH I think that either we change ABI for everyone consistently, or we extend
> the field in a backwards-compatible way. If you think an ABI break would
> affect existing CMN-600 users, then surely at stands to affect existing
> CMN-650 and CMN-700 users just as much?

Well, I doubt it would really affect. Sounds like extending would be just
fine.

>> +
>> CMN_FORMAT_ATTR(CMN_ANY, wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE),
>> CMN_FORMAT_ATTR(CMN_ANY, wp_combine, CMN_CONFIG_WP_COMBINE),
>> @@ -1398,8 +1403,11 @@ static u32 arm_cmn_wp_config(struct perf_event
>> *event)
>> config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
>> FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
>> - FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
>> FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL2, dev >> 1);
>> +
>> + if (grp)
>> + config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_GRP :
>> + FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP,
>> grp);
>
> FWIW I think something more like "if (is_cmn600) grp &= 1;" before the
> existing assignent might be clearer. Note that that *is* effectively how this
> works already since CMN_DTM_WPn_CONFIG_WP_GRP was updated, it's just
> currently implicit in CMN_EVENT_WP_GRP().

Seems reasonable

>
>> if (exc)
>> config |= is_cmn600 ? CMN600_WPn_CONFIG_WP_EXCLUSIVE :
>> CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE;
>
> You've missed the "(combine && !grp)" logic below this point, which also
> needs to get rather more involved if a combined match across groups 1 and 2
> is going to work correctly.

Ah, that's right

>
>> @@ -1764,6 +1772,13 @@ static int arm_cmn_event_init(struct perf_event
>> *event)
>> /* ...and we need a "real" direction */
>> if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN)
>> return -EINVAL;
>> +
>> + if (cmn->part != PART_CMN600)
>> + if (CMN_EVENT_WP_GRP(event) > CMN_DTM_WP_GRP_TERTIARY
>> ||
>> + (CMN_EVENT_WP_GRP(event) ==
>> CMN_DTM_WP_GRP_TERTIARY &&
>> + CMN_EVENT_WP_CHN_SEL(event) !=
>> CMN_DTM_WP_CHN_SEL_REQ_VC))
>> + return -EINVAL;
>> +
>
> We already don't attempt to sanity-check watchpoint arguments (e.g. chn>3 or
> chn=1,grp=1), so I'm not really inclined to start. The aim here has always
> been not to try to understand watchpoints at all, and effectively just pass
> through the register interface to the user.

Yep, I noticed that. I'm fine with either way

Cheers, Ilkka

>
> Thanks,
> Robin.
>
>> /* ...but the DTM may depend on which port we're watching */
>> if (cmn->multi_dtm)
>> hw->dtm_offset = CMN_EVENT_WP_DEV_SEL(event) / 2;
>