PCI Express Interface PMU includes various performance counters to monitor
the data that is transmitted over the PCIe link. The counters track various
inbound and outbound transactions which includes separate counters for
posted/non-posted/completion TLPs. Also, inbound and outbound memory read
requests along with their latencies can also be monitored. Address
Translation Services(ATS)events such as ATS Translation, ATS Page Request,
ATS Invalidation along with their corresponding latencies are also
supported.
The performance counters are 64 bits wide.
For instance,
perf stat -e ib_tlp_pr <workload>
tracks the inbound posted TLPs for the workload.
Signed-off-by: Linu Cherian <[email protected]>
Signed-off-by: Gowthami Thiagarajan <[email protected]>
---
MAINTAINERS | 7 +
drivers/perf/Kconfig | 7 +
drivers/perf/Makefile | 1 +
drivers/perf/marvell_pem_pmu.c | 433 +++++++++++++++++++++++++++++++++
include/linux/cpuhotplug.h | 1 +
5 files changed, 449 insertions(+)
create mode 100644 drivers/perf/marvell_pem_pmu.c
diff --git a/MAINTAINERS b/MAINTAINERS
index c6545eb54104..55a2a9b6f346 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12473,6 +12473,13 @@ S: Supported
F: Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
F: drivers/net/ethernet/marvell/octeontx2/af/
+MARVELL PEM PMU DRIVER
+M: Linu Cherian <[email protected]>
+M: Gowthami Thiagarajan <[email protected]>
+S: Supported
+F: Documentation/devicetree/bindings/perf/marvell-odyssey-pem.yaml
+F: drivers/perf/marvell_pem_pmu.c
+
MARVELL PRESTERA ETHERNET SWITCH DRIVER
M: Taras Chornyi <[email protected]>
S: Supported
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 66c259000a44..1cd8d07ffefd 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -203,4 +203,11 @@ source "drivers/perf/arm_cspmu/Kconfig"
source "drivers/perf/amlogic/Kconfig"
+config MARVELL_PEM_PMU
+ tristate "MARVELL PEM PMU Support"
+ depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
+ help
+ Enable support for PCIe Interface performance monitoring
+ on Marvell platform.
+
endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 13e45da61100..bf9fe9cacad9 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
+obj-$(CONFIG_MARVELL_PEM_PMU) += marvell_pem_pmu.o
obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
diff --git a/drivers/perf/marvell_pem_pmu.c b/drivers/perf/marvell_pem_pmu.c
new file mode 100644
index 000000000000..fb27112aa7d4
--- /dev/null
+++ b/drivers/perf/marvell_pem_pmu.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell PEM(PCIe RC) Performance Monitor Driver
+ *
+ * Copyright (C) 2023 Marvell.
+ */
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/perf_event.h>
+
+/* Each of these events maps to a free running 64 bit counter
+ * with no event control, but can be reset.
+ *
+ */
+enum pem_events {
+ IB_TLP_NPR,
+ IB_TLP_PR,
+ IB_TLP_CPL,
+ IB_TLP_DWORDS_NPR,
+ IB_TLP_DWORDS_PR,
+ IB_TLP_DWORDS_CPL,
+ IB_INFLIGHT,
+ IB_READS,
+ IB_REQ_NO_RO_NCB,
+ IB_REQ_NO_RO_EBUS,
+ OB_TLP_NPR,
+ OB_TLP_PR,
+ OB_TLP_CPL,
+ OB_TLP_DWORDS_NPR,
+ OB_TLP_DWORDS_PR,
+ OB_TLP_DWORDS_CPL,
+ OB_INFLIGHT,
+ OB_READS,
+ OB_MERGES_NPR,
+ OB_MERGES_PR,
+ OB_MERGES_CPL,
+ ATS_TRANS,
+ ATS_TRANS_LATENCY,
+ ATS_PRI,
+ ATS_PRI_LATENCY,
+ ATS_INV,
+ ATS_INV_LATENCY,
+ PEM_EVENTIDS_MAX,
+};
+
+static u64 eventid_to_offset_table[] = {
+ 0x0,
+ 0x8,
+ 0x10,
+ 0x100,
+ 0x108,
+ 0x110,
+ 0x200,
+ 0x300,
+ 0x400,
+ 0x408,
+ 0x500,
+ 0x508,
+ 0x510,
+ 0x600,
+ 0x608,
+ 0x610,
+ 0x700,
+ 0x800,
+ 0x900,
+ 0x908,
+ 0x910,
+ 0x2D18,
+ 0x2D20,
+ 0x2D28,
+ 0x2D30,
+ 0x2D38,
+ 0x2D40,
+};
+
+struct pem_pmu {
+ struct pmu pmu;
+ void __iomem *base;
+ unsigned int cpu;
+ struct device *dev;
+ struct hlist_node node;
+};
+
+#define to_pem_pmu(p) container_of(p, struct pem_pmu, pmu)
+
+static int eventid_to_offset(int eventid)
+{
+ return eventid_to_offset_table[eventid];
+}
+
+/* Events */
+static ssize_t pem_pmu_event_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define PEM_EVENT_ATTR(_name, _id) \
+ (&((struct perf_pmu_events_attr[]) { \
+ { .attr = __ATTR(_name, 0444, pem_pmu_event_show, NULL), \
+ .id = _id, } \
+ })[0].attr.attr)
+
+static struct attribute *pem_perf_events_attrs[] = {
+ PEM_EVENT_ATTR(ib_tlp_npr, IB_TLP_NPR),
+ PEM_EVENT_ATTR(ib_tlp_pr, IB_TLP_PR),
+ PEM_EVENT_ATTR(ib_tlp_cpl_partid, IB_TLP_CPL),
+ PEM_EVENT_ATTR(ib_tlp_dwords_npr, IB_TLP_DWORDS_NPR),
+ PEM_EVENT_ATTR(ib_tlp_dwords_pr, IB_TLP_DWORDS_PR),
+ PEM_EVENT_ATTR(ib_tlp_dwords_cpl_partid, IB_TLP_DWORDS_CPL),
+ PEM_EVENT_ATTR(ib_inflight, IB_INFLIGHT),
+ PEM_EVENT_ATTR(ib_reads, IB_READS),
+ PEM_EVENT_ATTR(ib_req_no_ro_ncb, IB_REQ_NO_RO_NCB),
+ PEM_EVENT_ATTR(ib_req_no_ro_ebus, IB_REQ_NO_RO_EBUS),
+ PEM_EVENT_ATTR(ob_tlp_npr_partid, OB_TLP_NPR),
+ PEM_EVENT_ATTR(ob_tlp_pr_partid, OB_TLP_PR),
+ PEM_EVENT_ATTR(ob_tlp_cpl_partid, OB_TLP_CPL),
+ PEM_EVENT_ATTR(ob_tlp_dwords_npr_partid, OB_TLP_DWORDS_NPR),
+ PEM_EVENT_ATTR(ob_tlp_dwords_pr_partid, OB_TLP_DWORDS_PR),
+ PEM_EVENT_ATTR(ob_tlp_dwords_cpl_partid, OB_TLP_DWORDS_CPL),
+ PEM_EVENT_ATTR(ob_inflight_partid, OB_INFLIGHT),
+ PEM_EVENT_ATTR(ob_reads_partid, OB_READS),
+ PEM_EVENT_ATTR(ob_merges_npr_partid, OB_MERGES_NPR),
+ PEM_EVENT_ATTR(ob_merges_pr_partid, OB_MERGES_PR),
+ PEM_EVENT_ATTR(ob_merges_cpl_partid, OB_MERGES_CPL),
+ PEM_EVENT_ATTR(ats_trans, ATS_TRANS),
+ PEM_EVENT_ATTR(ats_trans_latency, ATS_TRANS_LATENCY),
+ PEM_EVENT_ATTR(ats_pri, ATS_PRI),
+ PEM_EVENT_ATTR(ats_pri_latency, ATS_PRI_LATENCY),
+ PEM_EVENT_ATTR(ats_inv, ATS_INV),
+ PEM_EVENT_ATTR(ats_inv_latency, ATS_INV_LATENCY),
+ NULL
+};
+
+static struct attribute_group pem_perf_events_attr_group = {
+ .name = "events",
+ .attrs = pem_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-5");
+
+static struct attribute *pem_perf_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL
+};
+
+static struct attribute_group pem_perf_format_attr_group = {
+ .name = "format",
+ .attrs = pem_perf_format_attrs,
+};
+
+/* cpumask */
+static ssize_t pem_perf_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pem_pmu *pmu = dev_get_drvdata(dev);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute pem_perf_cpumask_attr =
+ __ATTR(cpumask, 0444, pem_perf_cpumask_show, NULL);
+
+static struct attribute *pem_perf_cpumask_attrs[] = {
+ &pem_perf_cpumask_attr.attr,
+ NULL
+};
+
+static struct attribute_group pem_perf_cpumask_attr_group = {
+ .attrs = pem_perf_cpumask_attrs,
+};
+
+static const struct attribute_group *pem_perf_attr_groups[] = {
+ &pem_perf_events_attr_group,
+ &pem_perf_cpumask_attr_group,
+ &pem_perf_format_attr_group,
+ NULL
+};
+
+static int pem_perf_event_init(struct perf_event *event)
+{
+ struct pem_pmu *pmu = to_pem_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (is_sampling_event(event)) {
+ dev_info(pmu->dev, "Sampling not supported!\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (event->cpu < 0) {
+ dev_warn(pmu->dev, "Can't provide per-task data!\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* We must NOT create groups containing mixed PMUs */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ /* Set ownership of event to one CPU, same event can not be observed
+ * on multiple cpus at same time.
+ */
+ event->cpu = pmu->cpu;
+ hwc->idx = -1;
+ return 0;
+}
+
+static void pem_perf_counter_reset(struct pem_pmu *pmu,
+ struct perf_event *event, int eventid)
+{
+ writeq_relaxed(0x0, pmu->base + eventid_to_offset(eventid));
+}
+
+static u64 pem_perf_read_counter(struct pem_pmu *pmu,
+ struct perf_event *event, int eventid)
+{
+ return readq_relaxed(pmu->base + eventid_to_offset(eventid));
+}
+
+static void pem_perf_event_update(struct perf_event *event)
+{
+ struct pem_pmu *pmu = to_pem_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev_count, new_count;
+
+ do {
+ prev_count = local64_read(&hwc->prev_count);
+ new_count = pem_perf_read_counter(pmu, event, hwc->idx);
+ } while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
+
+ local64_add((new_count - prev_count), &event->count);
+}
+
+static void pem_perf_event_start(struct perf_event *event, int flags)
+{
+ struct pem_pmu *pmu = to_pem_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int eventid = hwc->idx;
+
+ local64_set(&hwc->prev_count, 0);
+
+ pem_perf_counter_reset(pmu, event, eventid);
+
+ hwc->state = 0;
+}
+
+static int pem_perf_event_add(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->idx = event->attr.config;
+ if (hwc->idx >= PEM_EVENTIDS_MAX)
+ return -EINVAL;
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ pem_perf_event_start(event, flags);
+
+ return 0;
+}
+
+static void pem_perf_event_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (flags & PERF_EF_UPDATE)
+ pem_perf_event_update(event);
+
+ hwc->state |= PERF_HES_STOPPED;
+}
+
+static void pem_perf_event_del(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ pem_perf_event_stop(event, PERF_EF_UPDATE);
+ hwc->idx = -1;
+}
+
+static int pem_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct pem_pmu *pmu = hlist_entry_safe(node, struct pem_pmu,
+ node);
+ unsigned int target;
+
+ if (cpu != pmu->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+ pmu->cpu = target;
+ return 0;
+}
+
+static int pem_perf_probe(struct platform_device *pdev)
+{
+ struct pem_pmu *pem_pmu;
+ struct resource *res;
+ void __iomem *base;
+ char *name;
+ int ret;
+
+ pem_pmu = devm_kzalloc(&pdev->dev, sizeof(*pem_pmu), GFP_KERNEL);
+ if (!pem_pmu)
+ return -ENOMEM;
+
+ pem_pmu->dev = &pdev->dev;
+ platform_set_drvdata(pdev, pem_pmu);
+
+ base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ pem_pmu->base = base;
+
+ pem_pmu->pmu = (struct pmu) {
+ .module = THIS_MODULE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = pem_perf_attr_groups,
+ .event_init = pem_perf_event_init,
+ .add = pem_perf_event_add,
+ .del = pem_perf_event_del,
+ .start = pem_perf_event_start,
+ .stop = pem_perf_event_stop,
+ .read = pem_perf_event_update,
+ };
+
+ /* Choose this cpu to collect perf data */
+ pem_pmu->cpu = raw_smp_processor_id();
+
+ name = devm_kasprintf(pem_pmu->dev, GFP_KERNEL, "mrvl_pcie_rc_pmu_%llx",
+ res->start);
+ if (!name)
+ return -ENOMEM;
+
+ cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_MARVELL_PEM_ONLINE,
+ &pem_pmu->node);
+
+ ret = perf_pmu_register(&pem_pmu->pmu, name, -1);
+ if (ret)
+ goto error;
+
+ pr_info("Marvell PEM(PCIe RC) PMU Driver for pem@%llx\n", res->start);
+ return 0;
+error:
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MARVELL_PEM_ONLINE,
+ &pem_pmu->node);
+ return ret;
+}
+
+static int pem_perf_remove(struct platform_device *pdev)
+{
+ struct pem_pmu *pem_pmu = platform_get_drvdata(pdev);
+
+ cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MARVELL_PEM_ONLINE,
+ &pem_pmu->node);
+
+ perf_pmu_unregister(&pem_pmu->pmu);
+ return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id pem_pmu_of_match[] = {
+ { .compatible = "marvell,pem-pmu", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, pem_pmu_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id pem_pmu_acpi_match[] = {
+ {"MRVL000E", 0},
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, pem_pmu_acpi_match);
+#endif
+
+static struct platform_driver pem_pmu_driver = {
+ .driver = {
+ .name = "pem-pmu",
+ .of_match_table = of_match_ptr(pem_pmu_of_match),
+ .acpi_match_table = ACPI_PTR(pem_pmu_acpi_match),
+ .suppress_bind_attrs = true,
+ },
+ .probe = pem_perf_probe,
+ .remove = pem_perf_remove,
+};
+
+static int __init pem_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_MARVELL_PEM_ONLINE,
+ "perf/marvell/pem:online", NULL,
+ pem_pmu_offline_cpu);
+ if (ret)
+ return ret;
+
+ ret = platform_driver_register(&pem_pmu_driver);
+ if (ret)
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MARVELL_PEM_ONLINE);
+ return ret;
+}
+
+static void __exit pem_pmu_exit(void)
+{
+ platform_driver_unregister(&pem_pmu_driver);
+ cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MARVELL_PEM_ONLINE);
+}
+
+module_init(pem_pmu_init);
+module_exit(pem_pmu_exit);
+
+MODULE_DESCRIPTION("Marvell PEM Perf driver");
+MODULE_AUTHOR("Linu Cherian <[email protected]>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f1001dca0e0..f7710c03d24e 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -235,6 +235,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+ CPUHP_AP_PERF_ARM_MARVELL_PEM_ONLINE,
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
--
2.25.1
Hi,
On Fri, Jun 30, 2023 at 05:33:46PM +0530, Gowthami Thiagarajan wrote:
> PCI Express Interface PMU includes various performance counters to monitor
> the data that is transmitted over the PCIe link. The counters track various
> inbound and outbound transactions which includes separate counters for
> posted/non-posted/completion TLPs. Also, inbound and outbound memory read
> requests along with their latencies can also be monitored. Address
> Translation Services(ATS)events such as ATS Translation, ATS Page Request,
> ATS Invalidation along with their corresponding latencies are also
> supported.
>
> The performance counters are 64 bits wide.
>
> For instance,
> perf stat -e ib_tlp_pr <workload>
> tracks the inbound posted TLPs for the workload.
>
> Signed-off-by: Linu Cherian <[email protected]>
> Signed-off-by: Gowthami Thiagarajan <[email protected]>
This generally looks fine; I have a few comments below.
[...]
> diff --git a/drivers/perf/marvell_pem_pmu.c b/drivers/perf/marvell_pem_pmu.c
> new file mode 100644
> index 000000000000..fb27112aa7d4
> --- /dev/null
> +++ b/drivers/perf/marvell_pem_pmu.c
> @@ -0,0 +1,433 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Marvell PEM(PCIe RC) Performance Monitor Driver
> + *
> + * Copyright (C) 2023 Marvell.
> + */
Nit: please follow the preferred coding style for comments. This should have a
newline immediately after the '/*', e.g.
/*
* Marvell PEM(PCIe RC) Performance Monitor Driver
*
* Copyright (C) 2023 Marvell.
*/
Likewise for all other multi-line comments.
> +#include <linux/acpi.h>
> +#include <linux/init.h>
> +#include <linux/io.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/of_address.h>
> +#include <linux/of_device.h>
> +#include <linux/perf_event.h>
> +
> +/* Each of these events maps to a free running 64 bit counter
> + * with no event control, but can be reset.
> + *
> + */
> +enum pem_events {
> + IB_TLP_NPR,
> + IB_TLP_PR,
> + IB_TLP_CPL,
> +static u64 eventid_to_offset_table[] = {
> + 0x0,
> + 0x8,
> + 0x10,
I assume the event IDs are the values in the pem_events enum, so please use
array initalizers here to make that clear, e.g.
static u64 eventid_to_offset_table[] = {
[IB_TLP_NPR] = 0x0,
[IB_TLP_PR] = 0x8,
[IB_TLP_CPL] 0x10,
...
};
[...]
> +static int pem_perf_event_init(struct perf_event *event)
> +{
> + struct pem_pmu *pmu = to_pem_pmu(event->pmu);
> + struct hw_perf_event *hwc = &event->hw;
> +
> + if (event->attr.type != event->pmu->type)
> + return -ENOENT;
> +
> + if (is_sampling_event(event)) {
Don't we also need to check for:
event->attach_state & PERF_ATTACH_TASK
> + dev_info(pmu->dev, "Sampling not supported!\n");
> + return -EOPNOTSUPP;
> + }
Please delete this dev_info().
> +
> + if (event->cpu < 0) {
> + dev_warn(pmu->dev, "Can't provide per-task data!\n");
> + return -EOPNOTSUPP;
> + }
Likewise, please delete this dev_warn().
> +
> + /* We must NOT create groups containing mixed PMUs */
> + if (event->group_leader->pmu != event->pmu &&
> + !is_software_event(event->group_leader))
> + return -EINVAL;
> +
> + /* Set ownership of event to one CPU, same event can not be observed
> + * on multiple cpus at same time.
> + */
Please fix this comment style (or delete the comment).
> + event->cpu = pmu->cpu;
> + hwc->idx = -1;
> + return 0;
> +}
Thanks,
Mark.
Hi,
> -----Original Message-----
> From: Mark Rutland <[email protected]>
> Sent: Friday, July 28, 2023 8:32 PM
> To: Gowthami Thiagarajan <[email protected]>
> Cc: [email protected]; [email protected]; [email protected]; Sunil Kovvuri
> Goutham <[email protected]>; Bharat Bhushan <[email protected]>; George Cherian
> <[email protected]>; Linu Cherian <[email protected]>
> Subject: [EXT] Re: [PATCH 1/6] perf/marvell: Marvell PEM performance monitor support
>
> External Email
>
> ----------------------------------------------------------------------
> Hi,
>
> On Fri, Jun 30, 2023 at 05:33:46PM +0530, Gowthami Thiagarajan wrote:
> > PCI Express Interface PMU includes various performance counters to monitor
> > the data that is transmitted over the PCIe link. The counters track various
> > inbound and outbound transactions which includes separate counters for
> > posted/non-posted/completion TLPs. Also, inbound and outbound memory read
> > requests along with their latencies can also be monitored. Address
> > Translation Services(ATS)events such as ATS Translation, ATS Page Request,
> > ATS Invalidation along with their corresponding latencies are also
> > supported.
> >
> > The performance counters are 64 bits wide.
> >
> > For instance,
> > perf stat -e ib_tlp_pr <workload>
> > tracks the inbound posted TLPs for the workload.
> >
> > Signed-off-by: Linu Cherian <[email protected]>
> > Signed-off-by: Gowthami Thiagarajan <[email protected]>
>
> This generally looks fine; I have a few comments below.
>
> [...]
>
> > diff --git a/drivers/perf/marvell_pem_pmu.c b/drivers/perf/marvell_pem_pmu.c
> > new file mode 100644
> > index 000000000000..fb27112aa7d4
> > --- /dev/null
> > +++ b/drivers/perf/marvell_pem_pmu.c
> > @@ -0,0 +1,433 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/* Marvell PEM(PCIe RC) Performance Monitor Driver
> > + *
> > + * Copyright (C) 2023 Marvell.
> > + */
>
> Nit: please follow the preferred coding style for comments. This should have a
> newline immediately after the '/*', e.g.
>
> /*
> * Marvell PEM(PCIe RC) Performance Monitor Driver
> *
> * Copyright (C) 2023 Marvell.
> */
>
> Likewise for all other multi-line comments.
Ack.
>
> > +#include <linux/acpi.h>
> > +#include <linux/init.h>
> > +#include <linux/io.h>
> > +#include <linux/module.h>
> > +#include <linux/of.h>
> > +#include <linux/of_address.h>
> > +#include <linux/of_device.h>
> > +#include <linux/perf_event.h>
> > +
> > +/* Each of these events maps to a free running 64 bit counter
> > + * with no event control, but can be reset.
> > + *
> > + */
> > +enum pem_events {
> > + IB_TLP_NPR,
> > + IB_TLP_PR,
> > + IB_TLP_CPL,
>
> > +static u64 eventid_to_offset_table[] = {
> > + 0x0,
> > + 0x8,
> > + 0x10,
>
> I assume the event IDs are the values in the pem_events enum, so please use
> array initalizers here to make that clear, e.g.
>
> static u64 eventid_to_offset_table[] = {
> [IB_TLP_NPR] = 0x0,
> [IB_TLP_PR] = 0x8,
> [IB_TLP_CPL] 0x10,
> ...
> };
>
> [...]
Ack. Yes. IDs refer to the pem_events.
>
> > +static int pem_perf_event_init(struct perf_event *event)
> > +{
> > + struct pem_pmu *pmu = to_pem_pmu(event->pmu);
> > + struct hw_perf_event *hwc = &event->hw;
> > +
> > + if (event->attr.type != event->pmu->type)
> > + return -ENOENT;
> > +
> > + if (is_sampling_event(event)) {
>
> Don't we also need to check for:
>
> event->attach_state & PERF_ATTACH_TASK
Ack. Will add this check.
>
> > + dev_info(pmu->dev, "Sampling not supported!\n");
> > + return -EOPNOTSUPP;
> > + }
>
> Please delete this dev_info().
Ack.
>
> > +
> > + if (event->cpu < 0) {
> > + dev_warn(pmu->dev, "Can't provide per-task data!\n");
> > + return -EOPNOTSUPP;
> > + }
>
> Likewise, please delete this dev_warn().
Ack.
>
> > +
> > + /* We must NOT create groups containing mixed PMUs */
> > + if (event->group_leader->pmu != event->pmu &&
> > + !is_software_event(event->group_leader))
> > + return -EINVAL;
> > +
> > + /* Set ownership of event to one CPU, same event can not be observed
> > + * on multiple cpus at same time.
> > + */
>
> Please fix this comment style (or delete the comment).
Ack.
-Thanks,
Gowthami
>
> > + event->cpu = pmu->cpu;
> > + hwc->idx = -1;
> > + return 0;
> > +}
>
> Thanks,
> Mark.