2024-01-22 12:54:09

by Gowthami Thiagarajan

[permalink] [raw]
Subject: [PATCH v3 0/2] Marvell Odyssey uncore performance monitor support

Odyssey is a 64 bit ARM based SoC with multiple performance monitor
units for various blocks.

This series of patches introduces support for uncore performance monitor
units (PMUs) on the Marvell Odyssey platform. The PMUs covered in this
series include the DDR PMU and LLC-TAD PMU.

v2->v3:
- Dropped PEM PMU support, will be sent as a separate patch.
- Dropped device tree support for DDR PMU and LLC TAD as the acpi table
based probing is used.
- Added support for Odyssey tad pmu in the existing driver.

Gowthami Thiagarajan(2):
perf/marvell: Odyssey DDR Performance monitor support
perf/marvell : Odyssey LLC-TAD performance monitor support

drivers/perf/marvell_cn10k_ddr_pmu.c | 421 +++++++++++++++++++++++----
drivers/perf/marvell_cn10k_tad_pmu.c | 41 ++-
2 files changed, 399 insertions(+), 63 deletions(-)

--
2.25.1



2024-01-22 12:54:17

by Gowthami Thiagarajan

[permalink] [raw]
Subject: [PATCH v3 2/2] perf/marvell : Odyssey LLC-TAD performance monitor support

Each TAD provides eight 64-bit counters for monitoring
cache behavior.The driver always configures the same counter for
all the TADs. The user would end up effectively reserving one of
eight counters in every TAD to look across all TADs.
The occurrences of events are aggregated and presented to the user
at the end of running the workload. The driver does not provide a
way for the user to partition TADs so that different TADs are used for
different applications.

The performance events reflect various internal or interface activities.
By combining the values from multiple performance counters, cache
performance can be measured in terms such as: cache miss rate, cache
allocations, interface retry rate, internal resource occupancy, etc.

Each supported counter's event and formatting information is exposed
to sysfs at /sys/devices/tad/. Use perf tool stat command to measure
the pmu events. For instance:

perf stat -e tad_hit_ltg,tad_hit_dtg <workload>

Signed-off-by: Gowthami Thiagarajan <[email protected]>
---
drivers/perf/marvell_cn10k_tad_pmu.c | 41 +++++++++++++++++++++++++++-
1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
index fec8e82edb95..b5786fcec0ec 100644
--- a/drivers/perf/marvell_cn10k_tad_pmu.c
+++ b/drivers/perf/marvell_cn10k_tad_pmu.c
@@ -214,6 +214,24 @@ static const struct attribute_group tad_pmu_events_attr_group = {
.attrs = tad_pmu_event_attrs,
};

+static struct attribute *ody_tad_pmu_event_attrs[] = {
+ TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3),
+ TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a),
+ TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b),
+ TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c),
+ TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d),
+ TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e),
+ TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f),
+ TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20),
+ TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF),
+ NULL
+};
+
+static const struct attribute_group ody_tad_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = ody_tad_pmu_event_attrs,
+};
+
PMU_FORMAT_ATTR(event, "config:0-7");

static struct attribute *tad_pmu_format_attrs[] = {
@@ -252,11 +270,19 @@ static const struct attribute_group *tad_pmu_attr_groups[] = {
NULL
};

+static const struct attribute_group *ody_tad_pmu_attr_groups[] = {
+ &ody_tad_pmu_events_attr_group,
+ &tad_pmu_format_attr_group,
+ &tad_pmu_cpumask_attr_group,
+ NULL
+};
+
static int tad_pmu_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct tad_region *regions;
struct tad_pmu *tad_pmu;
+ const char *compatible;
struct resource *res;
u32 tad_pmu_page_size;
u32 tad_page_size;
@@ -276,6 +302,12 @@ static int tad_pmu_probe(struct platform_device *pdev)
return -ENODEV;
}

+ ret = device_property_read_string(dev, "compatible", &compatible);
+ if (ret) {
+ dev_err(&pdev->dev, "compatible property not found\n");
+ return ret;
+ }
+
ret = device_property_read_u32(dev, "marvell,tad-page-size",
&tad_page_size);
if (ret) {
@@ -319,7 +351,6 @@ static int tad_pmu_probe(struct platform_device *pdev)
tad_pmu->pmu = (struct pmu) {

.module = THIS_MODULE,
- .attr_groups = tad_pmu_attr_groups,
.capabilities = PERF_PMU_CAP_NO_EXCLUDE |
PERF_PMU_CAP_NO_INTERRUPT,
.task_ctx_nr = perf_invalid_context,
@@ -332,6 +363,13 @@ static int tad_pmu_probe(struct platform_device *pdev)
.read = tad_pmu_event_counter_read,
};

+ if ((strncmp("marvell,cn10k-ddr-pmu", compatible,
+ strlen(compatible)) == 0)) {
+ tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
+ } else {
+ tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups;
+ }
+
tad_pmu->cpu = raw_smp_processor_id();

/* Register pmu instance for cpu hotplug */
@@ -372,6 +410,7 @@ static const struct of_device_id tad_pmu_of_match[] = {
#ifdef CONFIG_ACPI
static const struct acpi_device_id tad_pmu_acpi_match[] = {
{"MRVL000B", 0},
+ {"MRVL000D", 0},
{},
};
MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match);
--
2.25.1


2024-01-29 12:08:02

by Jonathan Cameron

[permalink] [raw]
Subject: Re: [PATCH v3 2/2] perf/marvell : Odyssey LLC-TAD performance monitor support

On Mon, 22 Jan 2024 18:19:33 +0530
Gowthami Thiagarajan <[email protected]> wrote:

> Each TAD provides eight 64-bit counters for monitoring
> cache behavior.The driver always configures the same counter for
> all the TADs. The user would end up effectively reserving one of
> eight counters in every TAD to look across all TADs.
> The occurrences of events are aggregated and presented to the user
> at the end of running the workload. The driver does not provide a
> way for the user to partition TADs so that different TADs are used for
> different applications.
>
> The performance events reflect various internal or interface activities.
> By combining the values from multiple performance counters, cache
> performance can be measured in terms such as: cache miss rate, cache
> allocations, interface retry rate, internal resource occupancy, etc.
>
> Each supported counter's event and formatting information is exposed
> to sysfs at /sys/devices/tad/. Use perf tool stat command to measure
> the pmu events. For instance:
>
> perf stat -e tad_hit_ltg,tad_hit_dtg <workload>
>
> Signed-off-by: Gowthami Thiagarajan <[email protected]>
Hi Gowthami,

A few quick comments inline

Jonathan

> ---
> drivers/perf/marvell_cn10k_tad_pmu.c | 41 +++++++++++++++++++++++++++-
> 1 file changed, 40 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
> index fec8e82edb95..b5786fcec0ec 100644
> --- a/drivers/perf/marvell_cn10k_tad_pmu.c
> +++ b/drivers/perf/marvell_cn10k_tad_pmu.c
> @@ -214,6 +214,24 @@ static const struct attribute_group tad_pmu_events_attr_group = {
> .attrs = tad_pmu_event_attrs,
> };
>
> +static struct attribute *ody_tad_pmu_event_attrs[] = {
> + TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3),
> + TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a),
> + TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b),
> + TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c),
> + TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d),
> + TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e),
> + TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f),
> + TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20),
> + TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF),
> + NULL
> +};
> +
> +static const struct attribute_group ody_tad_pmu_events_attr_group = {
> + .name = "events",
> + .attrs = ody_tad_pmu_event_attrs,
> +};
> +
> PMU_FORMAT_ATTR(event, "config:0-7");
>
> static struct attribute *tad_pmu_format_attrs[] = {
> @@ -252,11 +270,19 @@ static const struct attribute_group *tad_pmu_attr_groups[] = {
> NULL
> };
>
> +static const struct attribute_group *ody_tad_pmu_attr_groups[] = {
> + &ody_tad_pmu_events_attr_group,
> + &tad_pmu_format_attr_group,
> + &tad_pmu_cpumask_attr_group,
> + NULL
> +};
> +
> static int tad_pmu_probe(struct platform_device *pdev)
> {
> struct device *dev = &pdev->dev;
> struct tad_region *regions;
> struct tad_pmu *tad_pmu;
> + const char *compatible;
> struct resource *res;
> u32 tad_pmu_page_size;
> u32 tad_page_size;
> @@ -276,6 +302,12 @@ static int tad_pmu_probe(struct platform_device *pdev)
> return -ENODEV;
> }
>
> + ret = device_property_read_string(dev, "compatible", &compatible);
Unusual to find a compatible in an ACPI DSDT table unless PRP0001 is being used
and if that is being used, I'd not expect ACPI ID as below.

Maybe give a DSDT blob (disassembled) in the patch intro?

> + if (ret) {
> + dev_err(&pdev->dev, "compatible property not found\n");
> + return ret;
> + }
> +
> ret = device_property_read_u32(dev, "marvell,tad-page-size",
> &tad_page_size);
> if (ret) {
> @@ -319,7 +351,6 @@ static int tad_pmu_probe(struct platform_device *pdev)
> tad_pmu->pmu = (struct pmu) {
>
> .module = THIS_MODULE,
> - .attr_groups = tad_pmu_attr_groups,
> .capabilities = PERF_PMU_CAP_NO_EXCLUDE |
> PERF_PMU_CAP_NO_INTERRUPT,
> .task_ctx_nr = perf_invalid_context,
> @@ -332,6 +363,13 @@ static int tad_pmu_probe(struct platform_device *pdev)
> .read = tad_pmu_event_counter_read,
> };
>
> + if ((strncmp("marvell,cn10k-ddr-pmu", compatible,
> + strlen(compatible)) == 0)) {

How does this work with the ACPI ID added below? Also, just
put this in the tables so device_get_match_data() can retrieve it
instead of string matching in here.


> + tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
> + } else {
> + tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups;
> + }
> +
> tad_pmu->cpu = raw_smp_processor_id();
>
> /* Register pmu instance for cpu hotplug */
> @@ -372,6 +410,7 @@ static const struct of_device_id tad_pmu_of_match[] = {
> #ifdef CONFIG_ACPI
> static const struct acpi_device_id tad_pmu_acpi_match[] = {
> {"MRVL000B", 0},
> + {"MRVL000D", 0},
> {},
> };
> MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match);