Add support for various PMU counters found on the Cavium ThunderX and
OcteonTx SoC.
The driver provides common "uncore" functions to avoid code duplication and
support adding more device PMUs (like L2 cache) in the future.
Changes to v7:
- Check return code of kasprintf
- Check return code of ioremap
- Add some more comments
- Only call perf_pmu_register after event_init() assignment and list_add
- Various cosmetics
Changes to v6:
- Make driver stand-alone again without hooking into EDAC
- depend on ARCH_THUNDER
Changes to v5:
- Only allow built-in CONFIG_CAVIUM_PMU
- Drop unneeded export of perf_event_update_userpage()
- Simplify callbacks in edac code, move CONFIG_CAVIUM_PMU check
to header file
- Fix some sparse static warnings
- Add documentation
- Fix OCX TLK event_valid check
- Add group validation in event_init
- Add a guard variable to prevent calling init twice
- Use kasprintf and fix pmu name allocation
- Remove unneeded check for embedded pmu
- Loop around local64_cmpxchg
- Simplify cvm_pmu_lmc_event_valid
- Fix list_add error case
Jan Glauber (3):
perf: cavium: Support memory controller PMU counters
perf: cavium: Support transmit-link PMU counters
perf: cavium: Add Documentation
Documentation/perf/cavium-pmu.txt | 75 +++++
drivers/perf/Kconfig | 8 +
drivers/perf/Makefile | 1 +
drivers/perf/cavium_pmu.c | 646 ++++++++++++++++++++++++++++++++++++++
include/linux/cpuhotplug.h | 1 +
5 files changed, 731 insertions(+)
create mode 100644 Documentation/perf/cavium-pmu.txt
create mode 100644 drivers/perf/cavium_pmu.c
--
2.9.0.rc0.21.g7777322
Add support for the PMU counters on Cavium SOC memory controllers.
This patch also adds generic functions to allow supporting more
devices with PMU counters.
Properties of the LMC PMU counters:
- not stoppable
- fixed purpose
- read-only
- one PCI device per memory controller
Signed-off-by: Jan Glauber <[email protected]>
---
drivers/perf/Kconfig | 8 +
drivers/perf/Makefile | 1 +
drivers/perf/cavium_pmu.c | 424 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/cpuhotplug.h | 1 +
4 files changed, 434 insertions(+)
create mode 100644 drivers/perf/cavium_pmu.c
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index e5197ff..a46c3f0 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -43,4 +43,12 @@ config XGENE_PMU
help
Say y if you want to use APM X-Gene SoC performance monitors.
+config CAVIUM_PMU
+ bool "Cavium SOC PMU"
+ depends on ARCH_THUNDER
+ help
+ Provides access to various performance counters on Caviums
+ ARM64 SOCs. Adds support for memory controller (LMC) and
+ interconnect link (OCX TLK) counters.
+
endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 6420bd4..b304646 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
+obj-$(CONFIG_CAVIUM_PMU) += cavium_pmu.o
diff --git a/drivers/perf/cavium_pmu.c b/drivers/perf/cavium_pmu.c
new file mode 100644
index 0000000..582eb41
--- /dev/null
+++ b/drivers/perf/cavium_pmu.c
@@ -0,0 +1,424 @@
+/*
+ * Cavium ARM SOC "uncore" PMU counters
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright Cavium, Inc. 2017
+ * Author(s): Jan Glauber <[email protected]>
+ *
+ */
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+enum cvm_pmu_type {
+ CVM_PMU_LMC,
+};
+
+/* maximum number of parallel hardware counters for all pmu types */
+#define CVM_PMU_MAX_COUNTERS 64
+
+/* generic struct to cover the different pmu types */
+struct cvm_pmu_dev {
+ struct pmu pmu;
+ const char *pmu_name;
+ bool (*event_valid)(u64);
+ void __iomem *map;
+ struct pci_dev *pdev;
+ int num_counters;
+ struct perf_event *events[CVM_PMU_MAX_COUNTERS];
+ struct list_head entry;
+ struct hlist_node cpuhp_node;
+ cpumask_t active_mask;
+};
+
+static struct list_head cvm_pmu_lmcs;
+
+/*
+ * Common Cavium PMU stuff
+ *
+ * Shared properties of the different PMU types:
+ * - all counters are 64 bit long
+ * - there are no overflow interrupts
+ * - all devices with PMU counters appear as PCI devices
+ *
+ * Counter control, access and device association depends on the
+ * PMU type.
+ */
+
+#define to_pmu_dev(x) container_of((x), struct cvm_pmu_dev, pmu)
+
+static int cvm_pmu_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct cvm_pmu_dev *pmu_dev;
+ struct perf_event *sibling;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* we do not support sampling */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /* PMU counters do not support any these bits */
+ if (event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle)
+ return -EINVAL;
+
+ pmu_dev = to_pmu_dev(event->pmu);
+ if (!pmu_dev->event_valid(event->attr.config))
+ return -EINVAL;
+
+ /*
+ * Forbid groups containing mixed PMUs, software events are acceptable.
+ */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ list_for_each_entry(sibling, &event->group_leader->sibling_list,
+ group_entry)
+ if (sibling->pmu != event->pmu &&
+ !is_software_event(sibling))
+ return -EINVAL;
+
+ hwc->config = event->attr.config;
+ hwc->idx = -1;
+ return 0;
+}
+
+static void cvm_pmu_read(struct perf_event *event)
+{
+ struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 prev, delta, new;
+
+again:
+ prev = local64_read(&hwc->prev_count);
+ new = readq(hwc->event_base + pmu_dev->map);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
+ goto again;
+
+ delta = new - prev;
+ local64_add(delta, &event->count);
+}
+
+static void cvm_pmu_start(struct perf_event *event, int flags)
+{
+ struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 new;
+
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ /* update prev_count always in order support unstoppable counters */
+ new = readq(hwc->event_base + pmu_dev->map);
+ local64_set(&hwc->prev_count, new);
+
+ perf_event_update_userpage(event);
+}
+
+static void cvm_pmu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+ cvm_pmu_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+ }
+}
+
+static int cvm_pmu_add(struct perf_event *event, int flags, u64 config_base,
+ u64 event_base)
+{
+ struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!cmpxchg(&pmu_dev->events[hwc->config], NULL, event))
+ hwc->idx = hwc->config;
+
+ if (hwc->idx == -1)
+ return -EBUSY;
+
+ hwc->config_base = config_base;
+ hwc->event_base = event_base;
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ pmu_dev->pmu.start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void cvm_pmu_del(struct perf_event *event, int flags)
+{
+ struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int i;
+
+ event->pmu->stop(event, PERF_EF_UPDATE);
+
+ /*
+ * For programmable counters we need to check where we installed it.
+ * To keep this function generic always test the more complicated
+ * case (free running counters won't need the loop).
+ */
+ for (i = 0; i < pmu_dev->num_counters; i++)
+ if (cmpxchg(&pmu_dev->events[i], event, NULL) == event)
+ break;
+
+ perf_event_update_userpage(event);
+ hwc->idx = -1;
+}
+
+static ssize_t cvm_pmu_event_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr =
+ container_of(attr, struct perf_pmu_events_attr, attr);
+
+ if (pmu_attr->event_str)
+ return sprintf(page, "%s", pmu_attr->event_str);
+
+ return 0;
+}
+
+/*
+ * The pmu events are independent from CPUs. Provide a cpumask
+ * nevertheless to prevent perf from adding the event per-cpu and just
+ * set the mask to one online CPU. Use the same cpumask for all "uncore"
+ * devices.
+ *
+ * There is a performance penalty for accessing a device from a CPU on
+ * another socket, but we do not care.
+ */
+static int cvm_pmu_offline_cpu(unsigned int old_cpu, struct hlist_node *node)
+{
+ struct cvm_pmu_dev *pmu_dev;
+ int new_cpu;
+
+ pmu_dev = hlist_entry_safe(node, struct cvm_pmu_dev, cpuhp_node);
+ if (!cpumask_test_and_clear_cpu(old_cpu, &pmu_dev->active_mask))
+ return 0;
+
+ new_cpu = cpumask_any_but(cpu_online_mask, old_cpu);
+ if (new_cpu >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu_dev->pmu, old_cpu, new_cpu);
+ cpumask_set_cpu(new_cpu, &pmu_dev->active_mask);
+
+ return 0;
+}
+
+static ssize_t cvm_pmu_attr_show_cpumask(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct cvm_pmu_dev *pmu_dev = container_of(pmu, struct cvm_pmu_dev, pmu);
+
+ return cpumap_print_to_pagebuf(true, buf, &pmu_dev->active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, cvm_pmu_attr_show_cpumask, NULL);
+
+static struct attribute *cvm_pmu_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group cvm_pmu_attr_group = {
+ .attrs = cvm_pmu_attrs,
+};
+
+/*
+ * LMC (memory controller) counters:
+ * - not stoppable, always on, read-only
+ * - one PCI device per memory controller
+ */
+#define LMC_CONFIG_OFFSET 0x188
+#define LMC_CONFIG_RESET_BIT BIT(17)
+
+/* LMC events */
+#define LMC_EVENT_IFB_CNT 0x1d0
+#define LMC_EVENT_OPS_CNT 0x1d8
+#define LMC_EVENT_DCLK_CNT 0x1e0
+#define LMC_EVENT_BANK_CONFLICT1 0x360
+#define LMC_EVENT_BANK_CONFLICT2 0x368
+
+#define CVM_PMU_LMC_EVENT_ATTR(_name, _id) \
+ &((struct perf_pmu_events_attr[]) { \
+ { \
+ __ATTR(_name, S_IRUGO, cvm_pmu_event_sysfs_show, NULL), \
+ _id, \
+ "lmc_event=" __stringify(_id), \
+ } \
+ })[0].attr.attr
+
+/* map counter numbers to register offsets */
+static int lmc_events[] = {
+ LMC_EVENT_IFB_CNT,
+ LMC_EVENT_OPS_CNT,
+ LMC_EVENT_DCLK_CNT,
+ LMC_EVENT_BANK_CONFLICT1,
+ LMC_EVENT_BANK_CONFLICT2,
+};
+
+static int cvm_pmu_lmc_add(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ return cvm_pmu_add(event, flags, LMC_CONFIG_OFFSET,
+ lmc_events[hwc->config]);
+}
+
+PMU_FORMAT_ATTR(lmc_event, "config:0-2");
+
+static struct attribute *cvm_pmu_lmc_format_attr[] = {
+ &format_attr_lmc_event.attr,
+ NULL,
+};
+
+static struct attribute_group cvm_pmu_lmc_format_group = {
+ .name = "format",
+ .attrs = cvm_pmu_lmc_format_attr,
+};
+
+static struct attribute *cvm_pmu_lmc_events_attr[] = {
+ CVM_PMU_LMC_EVENT_ATTR(ifb_cnt, 0),
+ CVM_PMU_LMC_EVENT_ATTR(ops_cnt, 1),
+ CVM_PMU_LMC_EVENT_ATTR(dclk_cnt, 2),
+ CVM_PMU_LMC_EVENT_ATTR(bank_conflict1, 3),
+ CVM_PMU_LMC_EVENT_ATTR(bank_conflict2, 4),
+ NULL,
+};
+
+static struct attribute_group cvm_pmu_lmc_events_group = {
+ .name = "events",
+ .attrs = cvm_pmu_lmc_events_attr,
+};
+
+static const struct attribute_group *cvm_pmu_lmc_attr_groups[] = {
+ &cvm_pmu_attr_group,
+ &cvm_pmu_lmc_format_group,
+ &cvm_pmu_lmc_events_group,
+ NULL,
+};
+
+static bool cvm_pmu_lmc_event_valid(u64 config)
+{
+ return (config < ARRAY_SIZE(lmc_events));
+}
+
+static int cvm_pmu_lmc_probe(struct pci_dev *pdev)
+{
+ struct cvm_pmu_dev *next, *lmc;
+ int nr = 0, ret = -ENOMEM;
+
+ lmc = kzalloc(sizeof(*lmc), GFP_KERNEL);
+ if (!lmc)
+ return -ENOMEM;
+
+ lmc->map = ioremap(pci_resource_start(pdev, 0),
+ pci_resource_len(pdev, 0));
+ if (!lmc->map)
+ goto fail_ioremap;
+
+ list_for_each_entry(next, &cvm_pmu_lmcs, entry)
+ nr++;
+ lmc->pmu_name = kasprintf(GFP_KERNEL, "lmc%d", nr);
+ if (!lmc->pmu_name)
+ goto fail_kasprintf;
+
+ lmc->pdev = pdev;
+ lmc->num_counters = ARRAY_SIZE(lmc_events);
+ lmc->pmu = (struct pmu) {
+ .task_ctx_nr = perf_invalid_context,
+ .event_init = cvm_pmu_event_init,
+ .add = cvm_pmu_lmc_add,
+ .del = cvm_pmu_del,
+ .start = cvm_pmu_start,
+ .stop = cvm_pmu_stop,
+ .read = cvm_pmu_read,
+ .attr_groups = cvm_pmu_lmc_attr_groups,
+ };
+
+ cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CVM_ONLINE,
+ &lmc->cpuhp_node);
+
+ /*
+ * perf PMU is CPU dependent so pick a random CPU and migrate away
+ * if it goes offline.
+ */
+ cpumask_set_cpu(smp_processor_id(), &lmc->active_mask);
+
+ list_add(&lmc->entry, &cvm_pmu_lmcs);
+ lmc->event_valid = cvm_pmu_lmc_event_valid;
+
+ ret = perf_pmu_register(&lmc->pmu, lmc->pmu_name, -1);
+ if (ret)
+ goto fail_pmu;
+
+ dev_info(&pdev->dev, "Enabled %s PMU with %d counters\n",
+ lmc->pmu_name, lmc->num_counters);
+ return 0;
+
+fail_pmu:
+ kfree(lmc->pmu_name);
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_CVM_ONLINE,
+ &lmc->cpuhp_node);
+fail_kasprintf:
+ iounmap(lmc->map);
+fail_ioremap:
+ kfree(lmc);
+ return ret;
+}
+
+static int __init cvm_pmu_init(void)
+{
+ unsigned long implementor = read_cpuid_implementor();
+ unsigned int vendor_id = PCI_VENDOR_ID_CAVIUM;
+ struct pci_dev *pdev = NULL;
+ int rc;
+
+ if (implementor != ARM_CPU_IMP_CAVIUM)
+ return -ENODEV;
+
+ INIT_LIST_HEAD(&cvm_pmu_lmcs);
+
+ rc = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CVM_ONLINE,
+ "perf/arm/cvm:online", NULL,
+ cvm_pmu_offline_cpu);
+
+ /* detect LMC devices */
+ while ((pdev = pci_get_device(vendor_id, 0xa022, pdev))) {
+ if (!pdev)
+ break;
+ rc = cvm_pmu_lmc_probe(pdev);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+late_initcall(cvm_pmu_init); /* should come after PCI init */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index b56573b..78ac3d2 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -141,6 +141,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
+ CPUHP_AP_PERF_ARM_CVM_ONLINE,
CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
CPUHP_AP_X86_HPET_ONLINE,
--
2.9.0.rc0.21.g7777322
Add support for the transmit-link (OCX TLK) PMU counters found
on Caviums SOCs with a processor interconnect.
Properties of the OCX TLK counters:
- per-unit control
- fixed purpose
- writable
- one PCI device with multiple TLK units
Signed-off-by: Jan Glauber <[email protected]>
---
drivers/perf/cavium_pmu.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 222 insertions(+)
diff --git a/drivers/perf/cavium_pmu.c b/drivers/perf/cavium_pmu.c
index 582eb41..d2dd111f 100644
--- a/drivers/perf/cavium_pmu.c
+++ b/drivers/perf/cavium_pmu.c
@@ -19,6 +19,7 @@
enum cvm_pmu_type {
CVM_PMU_LMC,
+ CVM_PMU_TLK,
};
/* maximum number of parallel hardware counters for all pmu types */
@@ -39,6 +40,7 @@ struct cvm_pmu_dev {
};
static struct list_head cvm_pmu_lmcs;
+static struct list_head cvm_pmu_tlks;
/*
* Common Cavium PMU stuff
@@ -395,6 +397,216 @@ static int cvm_pmu_lmc_probe(struct pci_dev *pdev)
return ret;
}
+/*
+ * CCPI interface controller (OCX) Transmit link (TLK) counters:
+ * - per-unit control
+ * - writable
+ * - one PCI device with multiple TLK units
+ */
+
+#define TLK_NR_UNITS 3
+#define TLK_UNIT_OFFSET 0x2000
+#define TLK_UNIT_LEN 0x7ff
+#define TLK_START_ADDR 0x10000
+#define TLK_STAT_CTL_OFFSET 0x40
+#define TLK_STAT_OFFSET 0x400
+
+#define TLK_STAT_ENABLE_BIT BIT(0)
+#define TLK_STAT_RESET_BIT BIT(1)
+
+#define CVM_PMU_TLK_EVENT_ATTR(_name, _id) \
+ &((struct perf_pmu_events_attr[]) { \
+ { \
+ __ATTR(_name, S_IRUGO, cvm_pmu_event_sysfs_show, NULL), \
+ _id, \
+ "tlk_event=" __stringify(_id), \
+ } \
+ })[0].attr.attr
+
+static void cvm_pmu_tlk_enable_pmu(struct pmu *pmu)
+{
+ struct cvm_pmu_dev *pmu_dev = container_of(pmu, struct cvm_pmu_dev, pmu);
+
+ /* enable all counters */
+ writeb(TLK_STAT_ENABLE_BIT, pmu_dev->map + TLK_STAT_CTL_OFFSET);
+}
+
+static void cvm_pmu_tlk_disable_pmu(struct pmu *pmu)
+{
+ struct cvm_pmu_dev *pmu_dev = container_of(pmu, struct cvm_pmu_dev, pmu);
+
+ /* disable all counters */
+ writeb(0, pmu_dev->map + TLK_STAT_CTL_OFFSET);
+}
+
+static int cvm_pmu_tlk_add(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ return cvm_pmu_add(event, flags, TLK_STAT_CTL_OFFSET,
+ TLK_STAT_OFFSET + hwc->config * 8);
+}
+
+PMU_FORMAT_ATTR(tlk_event, "config:0-5");
+
+static struct attribute *cvm_pmu_tlk_format_attr[] = {
+ &format_attr_tlk_event.attr,
+ NULL,
+};
+
+static struct attribute_group cvm_pmu_tlk_format_group = {
+ .name = "format",
+ .attrs = cvm_pmu_tlk_format_attr,
+};
+
+static struct attribute *cvm_pmu_tlk_events_attr[] = {
+ CVM_PMU_TLK_EVENT_ATTR(idle_cnt, 0x00),
+ CVM_PMU_TLK_EVENT_ATTR(data_cnt, 0x01),
+ CVM_PMU_TLK_EVENT_ATTR(sync_cnt, 0x02),
+ CVM_PMU_TLK_EVENT_ATTR(retry_cnt, 0x03),
+ CVM_PMU_TLK_EVENT_ATTR(err_cnt, 0x04),
+ CVM_PMU_TLK_EVENT_ATTR(mat0_cnt, 0x08),
+ CVM_PMU_TLK_EVENT_ATTR(mat1_cnt, 0x09),
+ CVM_PMU_TLK_EVENT_ATTR(mat2_cnt, 0x0a),
+ CVM_PMU_TLK_EVENT_ATTR(mat3_cnt, 0x0b),
+ CVM_PMU_TLK_EVENT_ATTR(vc0_cmd, 0x10),
+ CVM_PMU_TLK_EVENT_ATTR(vc1_cmd, 0x11),
+ CVM_PMU_TLK_EVENT_ATTR(vc2_cmd, 0x12),
+ CVM_PMU_TLK_EVENT_ATTR(vc3_cmd, 0x13),
+ CVM_PMU_TLK_EVENT_ATTR(vc4_cmd, 0x14),
+ CVM_PMU_TLK_EVENT_ATTR(vc5_cmd, 0x15),
+ CVM_PMU_TLK_EVENT_ATTR(vc0_pkt, 0x20),
+ CVM_PMU_TLK_EVENT_ATTR(vc1_pkt, 0x21),
+ CVM_PMU_TLK_EVENT_ATTR(vc2_pkt, 0x22),
+ CVM_PMU_TLK_EVENT_ATTR(vc3_pkt, 0x23),
+ CVM_PMU_TLK_EVENT_ATTR(vc4_pkt, 0x24),
+ CVM_PMU_TLK_EVENT_ATTR(vc5_pkt, 0x25),
+ CVM_PMU_TLK_EVENT_ATTR(vc6_pkt, 0x26),
+ CVM_PMU_TLK_EVENT_ATTR(vc7_pkt, 0x27),
+ CVM_PMU_TLK_EVENT_ATTR(vc8_pkt, 0x28),
+ CVM_PMU_TLK_EVENT_ATTR(vc9_pkt, 0x29),
+ CVM_PMU_TLK_EVENT_ATTR(vc10_pkt, 0x2a),
+ CVM_PMU_TLK_EVENT_ATTR(vc11_pkt, 0x2b),
+ CVM_PMU_TLK_EVENT_ATTR(vc12_pkt, 0x2c),
+ CVM_PMU_TLK_EVENT_ATTR(vc13_pkt, 0x2d),
+ CVM_PMU_TLK_EVENT_ATTR(vc0_con, 0x30),
+ CVM_PMU_TLK_EVENT_ATTR(vc1_con, 0x31),
+ CVM_PMU_TLK_EVENT_ATTR(vc2_con, 0x32),
+ CVM_PMU_TLK_EVENT_ATTR(vc3_con, 0x33),
+ CVM_PMU_TLK_EVENT_ATTR(vc4_con, 0x34),
+ CVM_PMU_TLK_EVENT_ATTR(vc5_con, 0x35),
+ CVM_PMU_TLK_EVENT_ATTR(vc6_con, 0x36),
+ CVM_PMU_TLK_EVENT_ATTR(vc7_con, 0x37),
+ CVM_PMU_TLK_EVENT_ATTR(vc8_con, 0x38),
+ CVM_PMU_TLK_EVENT_ATTR(vc9_con, 0x39),
+ CVM_PMU_TLK_EVENT_ATTR(vc10_con, 0x3a),
+ CVM_PMU_TLK_EVENT_ATTR(vc11_con, 0x3b),
+ CVM_PMU_TLK_EVENT_ATTR(vc12_con, 0x3c),
+ CVM_PMU_TLK_EVENT_ATTR(vc13_con, 0x3d),
+ NULL,
+};
+
+static struct attribute_group cvm_pmu_tlk_events_group = {
+ .name = "events",
+ .attrs = cvm_pmu_tlk_events_attr,
+};
+
+static const struct attribute_group *cvm_pmu_tlk_attr_groups[] = {
+ &cvm_pmu_attr_group,
+ &cvm_pmu_tlk_format_group,
+ &cvm_pmu_tlk_events_group,
+ NULL,
+};
+
+static bool cvm_pmu_tlk_event_valid(u64 config)
+{
+ struct perf_pmu_events_attr *attr;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cvm_pmu_tlk_events_attr) - 1; i++) {
+ attr = (struct perf_pmu_events_attr *)cvm_pmu_tlk_events_attr[i];
+ if (attr->id == config)
+ return true;
+ }
+ return false;
+}
+
+static int cvm_pmu_tlk_probe_unit(struct pci_dev *pdev, int nr)
+{
+ struct cvm_pmu_dev *tlk;
+ int ret = -ENOMEM;
+
+ tlk = kzalloc(sizeof(*tlk), GFP_KERNEL);
+ if (!tlk)
+ return -ENOMEM;
+
+ tlk->map = ioremap(pci_resource_start(pdev, 0) + TLK_START_ADDR +
+ nr * TLK_UNIT_OFFSET, TLK_UNIT_LEN);
+ if (!tlk->map)
+ goto fail_ioremap;
+
+ tlk->pmu_name = kasprintf(GFP_KERNEL, "ocx_tlk%d", nr);
+ if (!tlk->pmu_name)
+ goto fail_kasprintf;
+
+ tlk->pdev = pdev;
+ tlk->num_counters = ARRAY_SIZE(cvm_pmu_tlk_events_attr) - 1;
+ tlk->pmu = (struct pmu) {
+ .task_ctx_nr = perf_invalid_context,
+ .pmu_enable = cvm_pmu_tlk_enable_pmu,
+ .pmu_disable = cvm_pmu_tlk_disable_pmu,
+ .event_init = cvm_pmu_event_init,
+ .add = cvm_pmu_tlk_add,
+ .del = cvm_pmu_del,
+ .start = cvm_pmu_start,
+ .stop = cvm_pmu_stop,
+ .read = cvm_pmu_read,
+ .attr_groups = cvm_pmu_tlk_attr_groups,
+ };
+
+ cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CVM_ONLINE,
+ &tlk->cpuhp_node);
+
+ /*
+ * perf PMU is CPU dependent so pick a random CPU and migrate away
+ * if it goes offline.
+ */
+ cpumask_set_cpu(smp_processor_id(), &tlk->active_mask);
+
+ list_add(&tlk->entry, &cvm_pmu_tlks);
+ tlk->event_valid = cvm_pmu_tlk_event_valid;
+
+ ret = perf_pmu_register(&tlk->pmu, tlk->pmu_name, -1);
+ if (ret)
+ goto fail_pmu;
+
+ dev_info(&pdev->dev, "Enabled %s PMU with %d counters\n",
+ tlk->pmu_name, tlk->num_counters);
+ return 0;
+
+fail_pmu:
+ kfree(tlk->pmu_name);
+ cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_CVM_ONLINE,
+ &tlk->cpuhp_node);
+fail_kasprintf:
+ iounmap(tlk->map);
+fail_ioremap:
+ kfree(tlk);
+ return ret;
+}
+
+static int cvm_pmu_tlk_probe(struct pci_dev *pdev)
+{
+ int rc, i;
+
+ for (i = 0; i < TLK_NR_UNITS; i++) {
+ rc = cvm_pmu_tlk_probe_unit(pdev, i);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
static int __init cvm_pmu_init(void)
{
unsigned long implementor = read_cpuid_implementor();
@@ -406,6 +618,7 @@ static int __init cvm_pmu_init(void)
return -ENODEV;
INIT_LIST_HEAD(&cvm_pmu_lmcs);
+ INIT_LIST_HEAD(&cvm_pmu_tlks);
rc = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CVM_ONLINE,
"perf/arm/cvm:online", NULL,
@@ -419,6 +632,15 @@ static int __init cvm_pmu_init(void)
if (rc)
return rc;
}
+
+ /* detect OCX TLK devices */
+ while ((pdev = pci_get_device(vendor_id, 0xa013, pdev))) {
+ if (!pdev)
+ break;
+ rc = cvm_pmu_tlk_probe(pdev);
+ if (rc)
+ return rc;
+ }
return 0;
}
late_initcall(cvm_pmu_init); /* should come after PCI init */
--
2.9.0.rc0.21.g7777322
Document Cavium SoC PMUs.
Signed-off-by: Jan Glauber <[email protected]>
---
Documentation/perf/cavium-pmu.txt | 75 +++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
create mode 100644 Documentation/perf/cavium-pmu.txt
diff --git a/Documentation/perf/cavium-pmu.txt b/Documentation/perf/cavium-pmu.txt
new file mode 100644
index 0000000..6fbf824
--- /dev/null
+++ b/Documentation/perf/cavium-pmu.txt
@@ -0,0 +1,75 @@
+Cavium ThunderX and OcteonTx Performance Monitoring Unit (PMU)
+==============================================================
+
+Cavium SoCs contain various system devices such as L2 caches, processor
+interconnect and memory controllers. Unfortunately the PMU counters
+are not following a common design so each device has a slightly different
+approach how to control and use the PMU counters.
+
+Common properties of all devices carrying PMU counters:
+- The devices are PCI devices and the counters are embedded somewhere
+ in the PCI register space.
+- All counters are 64 bit wide.
+- There are no overflow interrupts (unnecessary because of the 64 bit wide
+ counters).
+
+Properties depending on the device type:
+- How to start/stop the counters
+- Programmable vs. fixed purpose counters
+- Stoppable vs. always running counters
+- Independent vs. grouped counters
+- Read-only vs. writable counters
+- PCI device to PMU group relationship
+
+
+Devices with PMU counters
+-------------------------
+
+Memory controller (LMC):
+- one PCI device per LMC
+- fixed-purpose counters
+- always running counters without start/stop/reset control
+- read-only counters
+
+CCPI interface controller (OCX) Transmit link (TLK) counters:
+- writable counters
+- only one PCI device exposes multiple TLK units (3 units on T88)
+- start/stop control per unit
+- only present on multi-socket systems
+
+PMU (perf) driver
+-----------------
+
+The cavium-pmu driver registers several perf PMU drivers. Each of the perf
+driver provides description of its available events and configuration options
+in sysfs, see /sys/devices/<lmcX/ocx_tlkX>/.
+
+The "format" directory describes format of the config (event ID),
+The "events" directory shows the names of the events and provides configuration
+templates for all supported event types that can be used with perf tool. For
+example, "lmc0/dclk_cnt/" is an equivalent of "lmc0/config=2/".
+
+Each perf driver also provides a "cpumask" sysfs attribute, which contains a
+single CPU ID of the processor which will be used to handle all the PMU events.
+
+Example for perf tool use:
+
+ / # perf list | grep -e lmc
+ lmc0/bank_conflict1/ [Kernel PMU event]
+ lmc0/bank_conflict2/ [Kernel PMU event]
+ lmc0/dclk_cnt/ [Kernel PMU event]
+ lmc0/ifb_cnt/ [Kernel PMU event]
+ lmc0/ops_cnt/ [Kernel PMU event]
+
+ / # perf stat -a -e lmc0/ops_cnt/,lmc0/dclk_cnt/ -- sleep 1
+
+ Performance counter stats for 'system wide':
+
+ 176,133 lmc0/ops_cnt/
+ 670,243,653 lmc0/dclk_cnt/
+
+ 1.005479295 seconds time elapsed
+
+The driver does not support sampling, therefore "perf record" will
+not work. System wide mode ("-a") must be used as per-task (without "-a")
+perf sessions are not supported.
--
2.9.0.rc0.21.g7777322
On 25/07/17 16:04, Jan Glauber wrote:
> Add support for the PMU counters on Cavium SOC memory controllers.
>
> This patch also adds generic functions to allow supporting more
> devices with PMU counters.
>
> Properties of the LMC PMU counters:
> - not stoppable
> - fixed purpose
> - read-only
> - one PCI device per memory controller
>
> Signed-off-by: Jan Glauber <[email protected]>
> ---
> drivers/perf/Kconfig | 8 +
> drivers/perf/Makefile | 1 +
> drivers/perf/cavium_pmu.c | 424 +++++++++++++++++++++++++++++++++++++++++++++
> include/linux/cpuhotplug.h | 1 +
> 4 files changed, 434 insertions(+)
> create mode 100644 drivers/perf/cavium_pmu.c
>
> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
> index e5197ff..a46c3f0 100644
> --- a/drivers/perf/Kconfig
> +++ b/drivers/perf/Kconfig
> @@ -43,4 +43,12 @@ config XGENE_PMU
> help
> Say y if you want to use APM X-Gene SoC performance monitors.
>
> +config CAVIUM_PMU
> + bool "Cavium SOC PMU"
Is there any specific reason why this can't be built as a module ?
> +#define to_pmu_dev(x) container_of((x), struct cvm_pmu_dev, pmu)
> +
> +static int cvm_pmu_event_init(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + struct cvm_pmu_dev *pmu_dev;
> + struct perf_event *sibling;
> +
> + if (event->attr.type != event->pmu->type)
> + return -ENOENT;
> +
> + /* we do not support sampling */
> + if (is_sampling_event(event))
> + return -EINVAL;
> +
> + /* PMU counters do not support any these bits */
> + if (event->attr.exclude_user ||
> + event->attr.exclude_kernel ||
> + event->attr.exclude_host ||
> + event->attr.exclude_guest ||
> + event->attr.exclude_hv ||
> + event->attr.exclude_idle)
> + return -EINVAL;
> +
> + pmu_dev = to_pmu_dev(event->pmu);
> + if (!pmu_dev->event_valid(event->attr.config))
> + return -EINVAL;
> +
> + /*
> + * Forbid groups containing mixed PMUs, software events are acceptable.
> + */
> + if (event->group_leader->pmu != event->pmu &&
> + !is_software_event(event->group_leader))
> + return -EINVAL;
> +
> + list_for_each_entry(sibling, &event->group_leader->sibling_list,
> + group_entry)
> + if (sibling->pmu != event->pmu &&
> + !is_software_event(sibling))
> + return -EINVAL;
Do we also need to check if the events in the same group can be scheduled
at once ? i.e, there is enough resources to schedule the requested events from
the group.
> +
> + hwc->config = event->attr.config;
> + hwc->idx = -1;
> + return 0;
> +}
> +
...
> +static int cvm_pmu_add(struct perf_event *event, int flags, u64 config_base,
> + u64 event_base)
> +{
> + struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
> + struct hw_perf_event *hwc = &event->hw;
> +
> + if (!cmpxchg(&pmu_dev->events[hwc->config], NULL, event))
> + hwc->idx = hwc->config;
> +
> + if (hwc->idx == -1)
> + return -EBUSY;
> +
> + hwc->config_base = config_base;
> + hwc->event_base = event_base;
> + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> +
> + if (flags & PERF_EF_START)
> + pmu_dev->pmu.start(event, PERF_EF_RELOAD);
> +
> + return 0;
> +}
> +
> +static void cvm_pmu_del(struct perf_event *event, int flags)
> +{
> + struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
> + struct hw_perf_event *hwc = &event->hw;
> + int i;
> +
> + event->pmu->stop(event, PERF_EF_UPDATE);
> +
> + /*
> + * For programmable counters we need to check where we installed it.
> + * To keep this function generic always test the more complicated
> + * case (free running counters won't need the loop).
> + */
> + for (i = 0; i < pmu_dev->num_counters; i++)
> + if (cmpxchg(&pmu_dev->events[i], event, NULL) == event)
> + break;
I couldn't see why hwc->config wouldn't give us the index where we installed
the event in pmu_dev->events. What am I missing ?
> +static int __init cvm_pmu_init(void)
> +{
> + unsigned long implementor = read_cpuid_implementor();
> + unsigned int vendor_id = PCI_VENDOR_ID_CAVIUM;
> + struct pci_dev *pdev = NULL;
> + int rc;
> +
> + if (implementor != ARM_CPU_IMP_CAVIUM)
> + return -ENODEV;
As I mentioned in the beginning, it would be better to modularize it right
from the start, when we can, than coming back to this at a later point in time.
Btw, perf_event_update_userpage() is being exported for use from module.
See [0].
[0] http://lists.infradead.org/pipermail/linux-arm-kernel/2017-July/520682.html
Cheers
Suzuki
On Tue, 25 Jul 2017 17:04:20 +0200
Jan Glauber <[email protected]> wrote:
> Add support for the PMU counters on Cavium SOC memory controllers.
>
> This patch also adds generic functions to allow supporting more
> devices with PMU counters.
>
> Properties of the LMC PMU counters:
> - not stoppable
> - fixed purpose
> - read-only
> - one PCI device per memory controller
>
> Signed-off-by: Jan Glauber <[email protected]>
One trivial point inline which, whilst it obviously makes to actual
difference, makes review a tiny bit easier.
Otherwise looks good to me, but I'm somewhat new to this area
so who knows what I've missed ;)
> ---
> drivers/perf/Kconfig | 8 +
> drivers/perf/Makefile | 1 +
> drivers/perf/cavium_pmu.c | 424 +++++++++++++++++++++++++++++++++++++++++++++
> include/linux/cpuhotplug.h | 1 +
> 4 files changed, 434 insertions(+)
> create mode 100644 drivers/perf/cavium_pmu.c
<snip>
> +static int cvm_pmu_lmc_probe(struct pci_dev *pdev)
> +{
> + struct cvm_pmu_dev *next, *lmc;
> + int nr = 0, ret = -ENOMEM;
> +
> + lmc = kzalloc(sizeof(*lmc), GFP_KERNEL);
> + if (!lmc)
> + return -ENOMEM;
> +
> + lmc->map = ioremap(pci_resource_start(pdev, 0),
> + pci_resource_len(pdev, 0));
> + if (!lmc->map)
> + goto fail_ioremap;
> +
> + list_for_each_entry(next, &cvm_pmu_lmcs, entry)
> + nr++;
> + lmc->pmu_name = kasprintf(GFP_KERNEL, "lmc%d", nr);
> + if (!lmc->pmu_name)
> + goto fail_kasprintf;
> +
> + lmc->pdev = pdev;
> + lmc->num_counters = ARRAY_SIZE(lmc_events);
> + lmc->pmu = (struct pmu) {
> + .task_ctx_nr = perf_invalid_context,
> + .event_init = cvm_pmu_event_init,
> + .add = cvm_pmu_lmc_add,
> + .del = cvm_pmu_del,
> + .start = cvm_pmu_start,
> + .stop = cvm_pmu_stop,
> + .read = cvm_pmu_read,
> + .attr_groups = cvm_pmu_lmc_attr_groups,
> + };
> +
> + cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CVM_ONLINE,
> + &lmc->cpuhp_node);
> +
> + /*
> + * perf PMU is CPU dependent so pick a random CPU and migrate away
> + * if it goes offline.
> + */
> + cpumask_set_cpu(smp_processor_id(), &lmc->active_mask);
> +
> + list_add(&lmc->entry, &cvm_pmu_lmcs);
> + lmc->event_valid = cvm_pmu_lmc_event_valid;
> +
> + ret = perf_pmu_register(&lmc->pmu, lmc->pmu_name, -1);
> + if (ret)
> + goto fail_pmu;
> +
> + dev_info(&pdev->dev, "Enabled %s PMU with %d counters\n",
> + lmc->pmu_name, lmc->num_counters);
> + return 0;
> +
> +fail_pmu:
> + kfree(lmc->pmu_name);
> + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_CVM_ONLINE,
> + &lmc->cpuhp_node);
Expected order to unwind the above would be the reverse of this.
> +fail_kasprintf:
> + iounmap(lmc->map);
> +fail_ioremap:
> + kfree(lmc);
> + return ret;
> +}
> +
> +static int __init cvm_pmu_init(void)
> +{
> + unsigned long implementor = read_cpuid_implementor();
> + unsigned int vendor_id = PCI_VENDOR_ID_CAVIUM;
> + struct pci_dev *pdev = NULL;
> + int rc;
> +
> + if (implementor != ARM_CPU_IMP_CAVIUM)
> + return -ENODEV;
> +
> + INIT_LIST_HEAD(&cvm_pmu_lmcs);
> +
> + rc = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CVM_ONLINE,
> + "perf/arm/cvm:online", NULL,
> + cvm_pmu_offline_cpu);
> +
> + /* detect LMC devices */
> + while ((pdev = pci_get_device(vendor_id, 0xa022, pdev))) {
> + if (!pdev)
> + break;
> + rc = cvm_pmu_lmc_probe(pdev);
> + if (rc)
> + return rc;
> + }
> + return 0;
> +}
> +late_initcall(cvm_pmu_init); /* should come after PCI init */
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index b56573b..78ac3d2 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -141,6 +141,7 @@ enum cpuhp_state {
> CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
> CPUHP_AP_WORKQUEUE_ONLINE,
> CPUHP_AP_RCUTREE_ONLINE,
> + CPUHP_AP_PERF_ARM_CVM_ONLINE,
> CPUHP_AP_ONLINE_DYN,
> CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
> CPUHP_AP_X86_HPET_ONLINE,
On Tue, Jul 25, 2017 at 04:39:18PM +0100, Suzuki K Poulose wrote:
> On 25/07/17 16:04, Jan Glauber wrote:
> >Add support for the PMU counters on Cavium SOC memory controllers.
> >
> >This patch also adds generic functions to allow supporting more
> >devices with PMU counters.
> >
> >Properties of the LMC PMU counters:
> >- not stoppable
> >- fixed purpose
> >- read-only
> >- one PCI device per memory controller
> >
> >Signed-off-by: Jan Glauber <[email protected]>
> >---
> > drivers/perf/Kconfig | 8 +
> > drivers/perf/Makefile | 1 +
> > drivers/perf/cavium_pmu.c | 424 +++++++++++++++++++++++++++++++++++++++++++++
> > include/linux/cpuhotplug.h | 1 +
> > 4 files changed, 434 insertions(+)
> > create mode 100644 drivers/perf/cavium_pmu.c
> >
> >diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
> >index e5197ff..a46c3f0 100644
> >--- a/drivers/perf/Kconfig
> >+++ b/drivers/perf/Kconfig
> >@@ -43,4 +43,12 @@ config XGENE_PMU
> > help
> > Say y if you want to use APM X-Gene SoC performance monitors.
> >
> >+config CAVIUM_PMU
> >+ bool "Cavium SOC PMU"
>
> Is there any specific reason why this can't be built as a module ?
Yes. I don't know how to load the module automatically. I can't make it
a pci driver as the EDAC driver "owns" the device (and having two
drivers for one device wont work as far as I know). I tried to hook
into the EDAC driver but the EDAC maintainer was not overly welcoming
that approach.
And while it would be possible to have it a s a module I think it is of
no use if it requires manualy loading. But maybe there is a simple
solution I'm missing here?
>
> >+#define to_pmu_dev(x) container_of((x), struct cvm_pmu_dev, pmu)
> >+
> >+static int cvm_pmu_event_init(struct perf_event *event)
> >+{
> >+ struct hw_perf_event *hwc = &event->hw;
> >+ struct cvm_pmu_dev *pmu_dev;
> >+ struct perf_event *sibling;
> >+
> >+ if (event->attr.type != event->pmu->type)
> >+ return -ENOENT;
> >+
> >+ /* we do not support sampling */
> >+ if (is_sampling_event(event))
> >+ return -EINVAL;
> >+
> >+ /* PMU counters do not support any these bits */
> >+ if (event->attr.exclude_user ||
> >+ event->attr.exclude_kernel ||
> >+ event->attr.exclude_host ||
> >+ event->attr.exclude_guest ||
> >+ event->attr.exclude_hv ||
> >+ event->attr.exclude_idle)
> >+ return -EINVAL;
> >+
> >+ pmu_dev = to_pmu_dev(event->pmu);
> >+ if (!pmu_dev->event_valid(event->attr.config))
> >+ return -EINVAL;
> >+
> >+ /*
> >+ * Forbid groups containing mixed PMUs, software events are acceptable.
> >+ */
> >+ if (event->group_leader->pmu != event->pmu &&
> >+ !is_software_event(event->group_leader))
> >+ return -EINVAL;
> >+
> >+ list_for_each_entry(sibling, &event->group_leader->sibling_list,
> >+ group_entry)
> >+ if (sibling->pmu != event->pmu &&
> >+ !is_software_event(sibling))
> >+ return -EINVAL;
>
> Do we also need to check if the events in the same group can be scheduled
> at once ? i.e, there is enough resources to schedule the requested events from
> the group.
>
Not sure what you mean, do I need to check for programmable counters
that no more counters are programmed than available?
> >+
> >+ hwc->config = event->attr.config;
> >+ hwc->idx = -1;
> >+ return 0;
> >+}
> >+
> ...
>
> >+static int cvm_pmu_add(struct perf_event *event, int flags, u64 config_base,
> >+ u64 event_base)
> >+{
> >+ struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
> >+ struct hw_perf_event *hwc = &event->hw;
> >+
> >+ if (!cmpxchg(&pmu_dev->events[hwc->config], NULL, event))
> >+ hwc->idx = hwc->config;
> >+
> >+ if (hwc->idx == -1)
> >+ return -EBUSY;
> >+
> >+ hwc->config_base = config_base;
> >+ hwc->event_base = event_base;
> >+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> >+
> >+ if (flags & PERF_EF_START)
> >+ pmu_dev->pmu.start(event, PERF_EF_RELOAD);
> >+
> >+ return 0;
> >+}
> >+
> >+static void cvm_pmu_del(struct perf_event *event, int flags)
> >+{
> >+ struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
> >+ struct hw_perf_event *hwc = &event->hw;
> >+ int i;
> >+
> >+ event->pmu->stop(event, PERF_EF_UPDATE);
> >+
> >+ /*
> >+ * For programmable counters we need to check where we installed it.
> >+ * To keep this function generic always test the more complicated
> >+ * case (free running counters won't need the loop).
> >+ */
> >+ for (i = 0; i < pmu_dev->num_counters; i++)
> >+ if (cmpxchg(&pmu_dev->events[i], event, NULL) == event)
> >+ break;
>
> I couldn't see why hwc->config wouldn't give us the index where we installed
> the event in pmu_dev->events. What am I missing ?
Did you see the comment above? It is not yet needed but will be when I
add support for programmable counters. If it is still confusing I can
also remove that for now and add it back later when it is needed.
> >+static int __init cvm_pmu_init(void)
> >+{
> >+ unsigned long implementor = read_cpuid_implementor();
> >+ unsigned int vendor_id = PCI_VENDOR_ID_CAVIUM;
> >+ struct pci_dev *pdev = NULL;
> >+ int rc;
> >+
> >+ if (implementor != ARM_CPU_IMP_CAVIUM)
> >+ return -ENODEV;
>
> As I mentioned in the beginning, it would be better to modularize it right
> from the start, when we can, than coming back to this at a later point in time.
>
> Btw, perf_event_update_userpage() is being exported for use from module.
> See [0].
>
> [0] http://lists.infradead.org/pipermail/linux-arm-kernel/2017-July/520682.html
Nice, I think I proposed something similar :)
thanks,
Jan
> Cheers
>
> Suzuki
On 26/07/17 12:19, Jan Glauber wrote:
> On Tue, Jul 25, 2017 at 04:39:18PM +0100, Suzuki K Poulose wrote:
>> On 25/07/17 16:04, Jan Glauber wrote:
>>> Add support for the PMU counters on Cavium SOC memory controllers.
>>>
>>> This patch also adds generic functions to allow supporting more
>>> devices with PMU counters.
>>>
>>> Properties of the LMC PMU counters:
>>> - not stoppable
>>> - fixed purpose
>>> - read-only
>>> - one PCI device per memory controller
>>>
>>> Signed-off-by: Jan Glauber <[email protected]>
>>> ---
>>> drivers/perf/Kconfig | 8 +
>>> drivers/perf/Makefile | 1 +
>>> drivers/perf/cavium_pmu.c | 424 +++++++++++++++++++++++++++++++++++++++++++++
>>> include/linux/cpuhotplug.h | 1 +
>>> 4 files changed, 434 insertions(+)
>>> create mode 100644 drivers/perf/cavium_pmu.c
>>>
>>> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
>>> index e5197ff..a46c3f0 100644
>>> --- a/drivers/perf/Kconfig
>>> +++ b/drivers/perf/Kconfig
>>> @@ -43,4 +43,12 @@ config XGENE_PMU
>>> help
>>> Say y if you want to use APM X-Gene SoC performance monitors.
>>>
>>> +config CAVIUM_PMU
>>> + bool "Cavium SOC PMU"
>>
>> Is there any specific reason why this can't be built as a module ?
>
> Yes. I don't know how to load the module automatically. I can't make it
> a pci driver as the EDAC driver "owns" the device (and having two
> drivers for one device wont work as far as I know). I tried to hook
> into the EDAC driver but the EDAC maintainer was not overly welcoming
> that approach.
>
> And while it would be possible to have it a s a module I think it is of
> no use if it requires manualy loading. But maybe there is a simple
> solution I'm missing here?
If you are talking about a Cavium specific EDAC driver, may be we could
make that depend on this driver "at runtime" via symbols (may be even,
trigger the probe of PMU), which will be referenced only when CONFIG_CAVIUM_PMU
is defined. It is not the perfect solution, but that should do the trick.
>>> + /*
>>> + * Forbid groups containing mixed PMUs, software events are acceptable.
>>> + */
>>> + if (event->group_leader->pmu != event->pmu &&
>>> + !is_software_event(event->group_leader))
>>> + return -EINVAL;
>>> +
>>> + list_for_each_entry(sibling, &event->group_leader->sibling_list,
>>> + group_entry)
>>> + if (sibling->pmu != event->pmu &&
>>> + !is_software_event(sibling))
>>> + return -EINVAL;
>>
>> Do we also need to check if the events in the same group can be scheduled
>> at once ? i.e, there is enough resources to schedule the requested events from
>> the group.
>>
>
> Not sure what you mean, do I need to check for programmable counters
> that no more counters are programmed than available?
>
Yes. What if there are two events, both trying to use the same counter (either
due to lack of programmable counters or duplicate events).
>>> +
>>> + hwc->config = event->attr.config;
>>> + hwc->idx = -1;
>>> + return 0;
>>> +}
>>> +
>> ...
>>
>>> +static int cvm_pmu_add(struct perf_event *event, int flags, u64 config_base,
>>> + u64 event_base)
>>> +{
>>> + struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
>>> + struct hw_perf_event *hwc = &event->hw;
>>> +
>>> + if (!cmpxchg(&pmu_dev->events[hwc->config], NULL, event))
>>> + hwc->idx = hwc->config;
>>> +
>>> + if (hwc->idx == -1)
>>> + return -EBUSY;
>>> +
>>> + hwc->config_base = config_base;
>>> + hwc->event_base = event_base;
>>> + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
>>> +
>>> + if (flags & PERF_EF_START)
>>> + pmu_dev->pmu.start(event, PERF_EF_RELOAD);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static void cvm_pmu_del(struct perf_event *event, int flags)
>>> +{
>>> + struct cvm_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
>>> + struct hw_perf_event *hwc = &event->hw;
>>> + int i;
>>> +
>>> + event->pmu->stop(event, PERF_EF_UPDATE);
>>> +
>>> + /*
>>> + * For programmable counters we need to check where we installed it.
>>> + * To keep this function generic always test the more complicated
>>> + * case (free running counters won't need the loop).
>>> + */
>>> + for (i = 0; i < pmu_dev->num_counters; i++)
>>> + if (cmpxchg(&pmu_dev->events[i], event, NULL) == event)
>>> + break;
>>
>> I couldn't see why hwc->config wouldn't give us the index where we installed
>> the event in pmu_dev->events. What am I missing ?
>
> Did you see the comment above? It is not yet needed but will be when I
> add support for programmable counters.
Is it supported in this series ?
> If it is still confusing I can
> also remove that for now and add it back later when it is needed.
What is the hwc->idx for programmable counters ? is it going to be different
than hwc->config ? If so, can we use hwc->idx to keep the idx where we installed
the event ?
Suzuki
On Wed, Jul 26, 2017 at 01:47:35PM +0100, Suzuki K Poulose wrote:
> On 26/07/17 12:19, Jan Glauber wrote:
> >On Tue, Jul 25, 2017 at 04:39:18PM +0100, Suzuki K Poulose wrote:
> >>On 25/07/17 16:04, Jan Glauber wrote:
> >>>Add support for the PMU counters on Cavium SOC memory controllers.
> >>>
> >>>This patch also adds generic functions to allow supporting more
> >>>devices with PMU counters.
> >>>
> >>>Properties of the LMC PMU counters:
> >>>- not stoppable
> >>>- fixed purpose
> >>>- read-only
> >>>- one PCI device per memory controller
> >>>
> >>>Signed-off-by: Jan Glauber <[email protected]>
> >>>---
> >>>drivers/perf/Kconfig | 8 +
> >>>drivers/perf/Makefile | 1 +
> >>>drivers/perf/cavium_pmu.c | 424 +++++++++++++++++++++++++++++++++++++++++++++
> >>>include/linux/cpuhotplug.h | 1 +
> >>>4 files changed, 434 insertions(+)
> >>>create mode 100644 drivers/perf/cavium_pmu.c
> >>>
> >>>diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
> >>>index e5197ff..a46c3f0 100644
> >>>--- a/drivers/perf/Kconfig
> >>>+++ b/drivers/perf/Kconfig
> >>>@@ -43,4 +43,12 @@ config XGENE_PMU
> >>> help
> >>> Say y if you want to use APM X-Gene SoC performance monitors.
> >>>
> >>>+config CAVIUM_PMU
> >>>+ bool "Cavium SOC PMU"
> >>
> >>Is there any specific reason why this can't be built as a module ?
> >
> >Yes. I don't know how to load the module automatically. I can't make it
> >a pci driver as the EDAC driver "owns" the device (and having two
> >drivers for one device wont work as far as I know). I tried to hook
> >into the EDAC driver but the EDAC maintainer was not overly welcoming
> >that approach.
>
> >
> >And while it would be possible to have it a s a module I think it is of
> >no use if it requires manualy loading. But maybe there is a simple
> >solution I'm missing here?
>
>
> If you are talking about a Cavium specific EDAC driver, may be we could
> make that depend on this driver "at runtime" via symbols (may be even,
> trigger the probe of PMU), which will be referenced only when CONFIG_CAVIUM_PMU
> is defined. It is not the perfect solution, but that should do the trick.
I think that is roughly what I proposed in v6. Can you have a look at:
https://lkml.org/lkml/2017/6/23/333
https://patchwork.kernel.org/patch/9806427/
Probably there is a better way to do it. Or maybe we just keep it as
built-in for the time being.
--Jan
+To: Boris
Hi Boris,
On 26/07/17 14:10, Jan Glauber wrote:
> On Wed, Jul 26, 2017 at 01:47:35PM +0100, Suzuki K Poulose wrote:
>> On 26/07/17 12:19, Jan Glauber wrote:
>>> On Tue, Jul 25, 2017 at 04:39:18PM +0100, Suzuki K Poulose wrote:
>>>> On 25/07/17 16:04, Jan Glauber wrote:
>>>>> Add support for the PMU counters on Cavium SOC memory controllers.
>>>>>
>>>>> This patch also adds generic functions to allow supporting more
>>>>> devices with PMU counters.
>>>>>
>>>>> Properties of the LMC PMU counters:
>>>>> - not stoppable
>>>>> - fixed purpose
>>>>> - read-only
>>>>> - one PCI device per memory controller
>>>>>
>>>>> Signed-off-by: Jan Glauber <[email protected]>
>>>>> ---
>>>>> drivers/perf/Kconfig | 8 +
>>>>> drivers/perf/Makefile | 1 +
>>>>> drivers/perf/cavium_pmu.c | 424 +++++++++++++++++++++++++++++++++++++++++++++
>>>>> include/linux/cpuhotplug.h | 1 +
>>>>> 4 files changed, 434 insertions(+)
>>>>> create mode 100644 drivers/perf/cavium_pmu.c
>>>>>
>>>>> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
>>>>> index e5197ff..a46c3f0 100644
>>>>> --- a/drivers/perf/Kconfig
>>>>> +++ b/drivers/perf/Kconfig
>>>>> @@ -43,4 +43,12 @@ config XGENE_PMU
>>>>> help
>>>>> Say y if you want to use APM X-Gene SoC performance monitors.
>>>>>
>>>>> +config CAVIUM_PMU
>>>>> + bool "Cavium SOC PMU"
>>>>
>>>> Is there any specific reason why this can't be built as a module ?
>>>
>>> Yes. I don't know how to load the module automatically. I can't make it
>>> a pci driver as the EDAC driver "owns" the device (and having two
>>> drivers for one device wont work as far as I know). I tried to hook
>>> into the EDAC driver but the EDAC maintainer was not overly welcoming
>>> that approach.
>>
>>>
>>> And while it would be possible to have it a s a module I think it is of
>>> no use if it requires manualy loading. But maybe there is a simple
>>> solution I'm missing here?
>>
>>
>> If you are talking about a Cavium specific EDAC driver, may be we could
>> make that depend on this driver "at runtime" via symbols (may be even,
>> trigger the probe of PMU), which will be referenced only when CONFIG_CAVIUM_PMU
>> is defined. It is not the perfect solution, but that should do the trick.
>
> I think that is roughly what I proposed in v6. Can you have a look at:
>
> https://lkml.org/lkml/2017/6/23/333
> https://patchwork.kernel.org/patch/9806427/
So the Cavium EDACs, which appear as PCI devices have a PMU attached to it.
In order to build this PMU driver as a module, we need a way to load the module
automatically based on the PCI id. However, since the EDAC driver already
registers with that PCI id, we cannot use the same for the PMU. Ideally,
the PMU driver should be loaded when the EDAC driver is loaded. But looking
at the links above, it looks like you don't like the idea of triggering a
probe of the PMU component from the EDAC driver. We may be able to get rid
of the PMU specific information from the EDAC driver by maintaining the PCI
id of the device in the PMU driver. But we may still need to make sure that
the PMU driver gets a chance to probe the PMU when the device is available.
What do you think is the best option here ?
Suzuki
On Wed, Jul 26, 2017 at 03:35:25PM +0100, Suzuki K Poulose wrote:
> So the Cavium EDACs, which appear as PCI devices have a PMU attached to it.
Cavium EDACs?
So let me set something straight first: An EDAC driver simply talks to
some RAS IP block and reports errors. It shouldn't have anything to do
with a PMU.
> In order to build this PMU driver as a module, we need a way to load the module
> automatically based on the PCI id. However, since the EDAC driver already
> registers with that PCI id, we cannot use the same for the PMU. Ideally,
So this is strange. There's a single PCI ID but multiple functionalities
behind it?
> the PMU driver should be loaded when the EDAC driver is loaded. But looking
> at the links above, it looks like you don't like the idea of triggering a
> probe of the PMU component from the EDAC driver. We may be able to get rid
> of the PMU specific information from the EDAC driver by maintaining the PCI
> id of the device in the PMU driver. But we may still need to make sure that
> the PMU driver gets a chance to probe the PMU when the device is available.
>
> What do you think is the best option here ?
Can either of the two - EDAC or PMU driver - use an alternate detection
method?
For example, we moved the main x86 EDAC drivers we moved to x86 CPU
family, model, stepping detection from PCI IDs because the PCI IDs were
clumsy to use.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Wed, Jul 26, 2017 at 04:55:22PM +0200, Borislav Petkov wrote:
> On Wed, Jul 26, 2017 at 03:35:25PM +0100, Suzuki K Poulose wrote:
> > So the Cavium EDACs, which appear as PCI devices have a PMU attached to it.
>
> Cavium EDACs?
>
> So let me set something straight first: An EDAC driver simply talks to
> some RAS IP block and reports errors. It shouldn't have anything to do
> with a PMU.
>
> > In order to build this PMU driver as a module, we need a way to load the module
> > automatically based on the PCI id. However, since the EDAC driver already
> > registers with that PCI id, we cannot use the same for the PMU. Ideally,
>
> So this is strange. There's a single PCI ID but multiple functionalities
> behind it?
Yes, but I would still not call a memory controller a RAS IP block.
There are a number of registers on the memory controller (or on the OCX
TLK interconnect), and while some of them are RAS related there are also
other registers in the same device like the counters we want to access
via PMU code.
> > the PMU driver should be loaded when the EDAC driver is loaded. But looking
> > at the links above, it looks like you don't like the idea of triggering a
> > probe of the PMU component from the EDAC driver. We may be able to get rid
> > of the PMU specific information from the EDAC driver by maintaining the PCI
> > id of the device in the PMU driver. But we may still need to make sure that
> > the PMU driver gets a chance to probe the PMU when the device is available.
> >
> > What do you think is the best option here ?
>
> Can either of the two - EDAC or PMU driver - use an alternate detection
> method?
I'm currently using pci_get_device(vendor-ID, device-ID, ...) which
works fine.
> For example, we moved the main x86 EDAC drivers we moved to x86 CPU
> family, model, stepping detection from PCI IDs because the PCI IDs were
> clumsy to use.
I'm also looking for CPU implementor (MIDR), I could check for the model
too but I still need to detect devices based on PCI IDs as the model
check is not sufficient here (only multi-socket ThunderX has OCX TLK
devices).
--Jan
> --
> Regards/Gruss,
> Boris.
>
> ECO tip #101: Trim your mails when you reply.
> --
On 26/07/17 16:13, Jan Glauber wrote:
> On Wed, Jul 26, 2017 at 04:55:22PM +0200, Borislav Petkov wrote:
>> On Wed, Jul 26, 2017 at 03:35:25PM +0100, Suzuki K Poulose wrote:
>>> So the Cavium EDACs, which appear as PCI devices have a PMU attached to it.
>>
>> Cavium EDACs?
>>
>> So let me set something straight first: An EDAC driver simply talks to
>> some RAS IP block and reports errors. It shouldn't have anything to do
>> with a PMU.
>>
>>> In order to build this PMU driver as a module, we need a way to load the module
>>> automatically based on the PCI id. However, since the EDAC driver already
>>> registers with that PCI id, we cannot use the same for the PMU. Ideally,
>>
>> So this is strange. There's a single PCI ID but multiple functionalities
>> behind it?
>
> Yes, but I would still not call a memory controller a RAS IP block.
> There are a number of registers on the memory controller (or on the OCX
> TLK interconnect), and while some of them are RAS related there are also
> other registers in the same device like the counters we want to access
> via PMU code.
How about adding a soc specific (wrapper) driver for the memory controller, which
could use the PCI id and trigger EDAC and PMU drivers (based on what is
selected by configs) ?
Suzuki
On Wed, Jul 26, 2017 at 04:17:11PM +0100, Suzuki K Poulose wrote:
> How about adding a soc specific (wrapper) driver for the memory controller, which
> could use the PCI id and trigger EDAC and PMU drivers (based on what is
> selected by configs) ?
That was the last idea on my list if nothing slicker comes up. Having a
"multiplexer" driver just for the loading is meh.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Wed, Jul 26, 2017 at 05:13:14PM +0200, Jan Glauber wrote:
> I'm also looking for CPU implementor (MIDR), I could check for the model
> too but I still need to detect devices based on PCI IDs as the model
> check is not sufficient here (only multi-socket ThunderX has OCX TLK
> devices).
So what does that mean? The only way to load a PMU driver and an EDAC
driver is the PCI ID of the memory controller? No other way?
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Wed, Jul 26, 2017 at 05:35:02PM +0200, Borislav Petkov wrote:
> On Wed, Jul 26, 2017 at 05:13:14PM +0200, Jan Glauber wrote:
> > I'm also looking for CPU implementor (MIDR), I could check for the model
> > too but I still need to detect devices based on PCI IDs as the model
> > check is not sufficient here (only multi-socket ThunderX has OCX TLK
> > devices).
>
> So what does that mean? The only way to load a PMU driver and an EDAC
> driver is the PCI ID of the memory controller? No other way?
I already tried multiple ways to load the drivers, so far with limited
success :)
The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
I'm not aware of other ways to access these devices. Please enlighten
me if I'm missing something.
--Jan
On Wed, Jul 26, 2017 at 04:17:11PM +0100, Suzuki K Poulose wrote:
> How about adding a soc specific (wrapper) driver for the memory controller, which
> could use the PCI id and trigger EDAC and PMU drivers (based on what is
> selected by configs) ?
Sounds good to me. Is there a driver that already does this?
--Jan
On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
> The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
> I'm not aware of other ways to access these devices. Please enlighten
> me if I'm missing something.
Me enlighten you on Cavium hardware?! You're funny.
So I don't know whether the PCI hotplug code can run more than one
function upon PCI ID detection. Probably Greg will say, write a
multiplexer wrapper. :-)
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Wed, Jul 26, 2017 at 05:55:48PM +0200, Borislav Petkov wrote:
> On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
> > The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
> > I'm not aware of other ways to access these devices. Please enlighten
> > me if I'm missing something.
>
> Me enlighten you on Cavium hardware?! You're funny.
>
> So I don't know whether the PCI hotplug code can run more than one
> function upon PCI ID detection. Probably Greg will say, write a
> multiplexer wrapper. :-)
-ENOCONTEXT....
Anyway, pci questions are best asked on the linux-pci@vger list. And
yes, all PCI devices end up with a 'struct pci_dev *' automatically.
thanks,
greg k-h
On Wed, 26 Jul 2017 17:46:23 +0200
Jan Glauber <[email protected]> wrote:
> On Wed, Jul 26, 2017 at 04:17:11PM +0100, Suzuki K Poulose wrote:
> > How about adding a soc specific (wrapper) driver for the memory controller, which
> > could use the PCI id and trigger EDAC and PMU drivers (based on what is
> > selected by configs) ?
>
> Sounds good to me. Is there a driver that already does this?
Sounds like a classic MFD (multifunction device). There are quite a few pci
devices to be found under drivers/mfd/ than may provide some inspiration.
Jonathan
>
> --Jan
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Wed, Jul 26, 2017 at 09:19:49AM -0700, Greg KH wrote:
> On Wed, Jul 26, 2017 at 05:55:48PM +0200, Borislav Petkov wrote:
> > On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
> > > The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
> > > I'm not aware of other ways to access these devices. Please enlighten
> > > me if I'm missing something.
> >
> > Me enlighten you on Cavium hardware?! You're funny.
> >
> > So I don't know whether the PCI hotplug code can run more than one
> > function upon PCI ID detection. Probably Greg will say, write a
> > multiplexer wrapper. :-)
>
> -ENOCONTEXT....
>
> Anyway, pci questions are best asked on the linux-pci@vger list. And
> yes, all PCI devices end up with a 'struct pci_dev *' automatically.
Simple: so they have a PCI ID of a memory contoller and want to hotplug
two drivers for it. And those two drivers should remain independent from
each other.
Thanks.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Wed, Jul 26, 2017 at 05:25:15PM +0100, Jonathan Cameron wrote:
> On Wed, 26 Jul 2017 17:46:23 +0200
> Jan Glauber <[email protected]> wrote:
>
> > On Wed, Jul 26, 2017 at 04:17:11PM +0100, Suzuki K Poulose wrote:
> > > How about adding a soc specific (wrapper) driver for the memory controller, which
> > > could use the PCI id and trigger EDAC and PMU drivers (based on what is
> > > selected by configs) ?
> >
> > Sounds good to me. Is there a driver that already does this?
> Sounds like a classic MFD (multifunction device). There are quite a few pci
> devices to be found under drivers/mfd/ than may provide some inspiration.
I've looked into that before, from what I recall it did not fit my use
case. After all these are multi-fn devices.
> Jonathan
> >
> > --Jan
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > [email protected]
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Wed, Jul 26, 2017 at 06:30:49PM +0200, Borislav Petkov wrote:
> On Wed, Jul 26, 2017 at 09:19:49AM -0700, Greg KH wrote:
> > On Wed, Jul 26, 2017 at 05:55:48PM +0200, Borislav Petkov wrote:
> > > On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
> > > > The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
> > > > I'm not aware of other ways to access these devices. Please enlighten
> > > > me if I'm missing something.
> > >
> > > Me enlighten you on Cavium hardware?! You're funny.
> > >
> > > So I don't know whether the PCI hotplug code can run more than one
> > > function upon PCI ID detection. Probably Greg will say, write a
> > > multiplexer wrapper. :-)
> >
> > -ENOCONTEXT....
> >
> > Anyway, pci questions are best asked on the linux-pci@vger list. And
> > yes, all PCI devices end up with a 'struct pci_dev *' automatically.
>
> Simple: so they have a PCI ID of a memory contoller and want to hotplug
> two drivers for it. And those two drivers should remain independent from
> each other.
Hahahahaha, no. That's crazy, you were right in guessing what my answer
was going to be :)
greg k-h
On 07/26/2017 10:33 AM, Greg KH wrote:
> On Wed, Jul 26, 2017 at 06:30:49PM +0200, Borislav Petkov wrote:
>> On Wed, Jul 26, 2017 at 09:19:49AM -0700, Greg KH wrote:
>>> On Wed, Jul 26, 2017 at 05:55:48PM +0200, Borislav Petkov wrote:
>>>> On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
>>>>> The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
>>>>> I'm not aware of other ways to access these devices. Please enlighten
>>>>> me if I'm missing something.
>>>>
>>>> Me enlighten you on Cavium hardware?! You're funny.
>>>>
>>>> So I don't know whether the PCI hotplug code can run more than one
>>>> function upon PCI ID detection. Probably Greg will say, write a
>>>> multiplexer wrapper. :-)
>>>
>>> -ENOCONTEXT....
>>>
>>> Anyway, pci questions are best asked on the linux-pci@vger list. And
>>> yes, all PCI devices end up with a 'struct pci_dev *' automatically.
>>
>> Simple: so they have a PCI ID of a memory contoller and want to hotplug
>> two drivers for it. And those two drivers should remain independent from
>> each other.
>
> Hahahahaha, no. That's crazy, you were right in guessing what my answer
> was going to be :)
>
Just to be clear about the situation, the device is a memory controller.
It has two main behaviors we are interested in:
A) Error Detection And Correction (EDAC). This should be connected to
the kernel's EDAC subsystem. An existing driver
(drivers/edac/thunderx_edac.c) does exactly this.
B) Performance Counters for actions taken in the corresponding memory.
This should be connected to the kernel's perf framework as an uncore-PMU
(the subject of this patch set).
It is a single PCI device. What should the driver architecture look
like to connect it to two different kernel subsystems?
Thanks,
David Daney
On Wed, Jul 26, 2017 at 01:02:38PM -0700, David Daney wrote:
> On 07/26/2017 10:33 AM, Greg KH wrote:
> > On Wed, Jul 26, 2017 at 06:30:49PM +0200, Borislav Petkov wrote:
> > > On Wed, Jul 26, 2017 at 09:19:49AM -0700, Greg KH wrote:
> > > > On Wed, Jul 26, 2017 at 05:55:48PM +0200, Borislav Petkov wrote:
> > > > > On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
> > > > > > The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
> > > > > > I'm not aware of other ways to access these devices. Please enlighten
> > > > > > me if I'm missing something.
> > > > >
> > > > > Me enlighten you on Cavium hardware?! You're funny.
> > > > >
> > > > > So I don't know whether the PCI hotplug code can run more than one
> > > > > function upon PCI ID detection. Probably Greg will say, write a
> > > > > multiplexer wrapper. :-)
> > > >
> > > > -ENOCONTEXT....
> > > >
> > > > Anyway, pci questions are best asked on the linux-pci@vger list. And
> > > > yes, all PCI devices end up with a 'struct pci_dev *' automatically.
> > >
> > > Simple: so they have a PCI ID of a memory contoller and want to hotplug
> > > two drivers for it. And those two drivers should remain independent from
> > > each other.
> >
> > Hahahahaha, no. That's crazy, you were right in guessing what my answer
> > was going to be :)
> >
>
>
> Just to be clear about the situation, the device is a memory controller. It
> has two main behaviors we are interested in:
>
> A) Error Detection And Correction (EDAC). This should be connected to the
> kernel's EDAC subsystem. An existing driver (drivers/edac/thunderx_edac.c)
> does exactly this.
>
> B) Performance Counters for actions taken in the corresponding memory. This
> should be connected to the kernel's perf framework as an uncore-PMU (the
> subject of this patch set).
>
> It is a single PCI device. What should the driver architecture look like to
> connect it to two different kernel subsystems?
Modify the drivers/edac/thunderx_edac.c code to add support for
performance counters.
On 07/26/2017 01:08 PM, Greg KH wrote:
> On Wed, Jul 26, 2017 at 01:02:38PM -0700, David Daney wrote:
>> On 07/26/2017 10:33 AM, Greg KH wrote:
>>> On Wed, Jul 26, 2017 at 06:30:49PM +0200, Borislav Petkov wrote:
>>>> On Wed, Jul 26, 2017 at 09:19:49AM -0700, Greg KH wrote:
>>>>> On Wed, Jul 26, 2017 at 05:55:48PM +0200, Borislav Petkov wrote:
>>>>>> On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
>>>>>>> The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
>>>>>>> I'm not aware of other ways to access these devices. Please enlighten
>>>>>>> me if I'm missing something.
>>>>>>
>>>>>> Me enlighten you on Cavium hardware?! You're funny.
>>>>>>
>>>>>> So I don't know whether the PCI hotplug code can run more than one
>>>>>> function upon PCI ID detection. Probably Greg will say, write a
>>>>>> multiplexer wrapper. :-)
>>>>>
>>>>> -ENOCONTEXT....
>>>>>
>>>>> Anyway, pci questions are best asked on the linux-pci@vger list. And
>>>>> yes, all PCI devices end up with a 'struct pci_dev *' automatically.
>>>>
>>>> Simple: so they have a PCI ID of a memory contoller and want to hotplug
>>>> two drivers for it. And those two drivers should remain independent from
>>>> each other.
>>>
>>> Hahahahaha, no. That's crazy, you were right in guessing what my answer
>>> was going to be :)
>>>
>>
>>
>> Just to be clear about the situation, the device is a memory controller. It
>> has two main behaviors we are interested in:
>>
>> A) Error Detection And Correction (EDAC). This should be connected to the
>> kernel's EDAC subsystem. An existing driver (drivers/edac/thunderx_edac.c)
>> does exactly this.
>>
>> B) Performance Counters for actions taken in the corresponding memory. This
>> should be connected to the kernel's perf framework as an uncore-PMU (the
>> subject of this patch set).
>>
>> It is a single PCI device. What should the driver architecture look like to
>> connect it to two different kernel subsystems?
>
> Modify the drivers/edac/thunderx_edac.c code to add support for
> performance counters.
>
Thanks Greg. This adds some clarity to the situation.
This technique does slightly complicate the mapping of files and
directories in the kernel source tree to maintainers.
Also, if a given configuration disables CONFIG_EDAC there is some
hackery needed to get the perf portion of the driver included.
David Daney
On Wed, Jul 26, 2017 at 02:02:42PM -0700, David Daney wrote:
> On 07/26/2017 01:08 PM, Greg KH wrote:
> > On Wed, Jul 26, 2017 at 01:02:38PM -0700, David Daney wrote:
> > > On 07/26/2017 10:33 AM, Greg KH wrote:
> > > > On Wed, Jul 26, 2017 at 06:30:49PM +0200, Borislav Petkov wrote:
> > > > > On Wed, Jul 26, 2017 at 09:19:49AM -0700, Greg KH wrote:
> > > > > > On Wed, Jul 26, 2017 at 05:55:48PM +0200, Borislav Petkov wrote:
> > > > > > > On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
> > > > > > > > The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
> > > > > > > > I'm not aware of other ways to access these devices. Please enlighten
> > > > > > > > me if I'm missing something.
> > > > > > >
> > > > > > > Me enlighten you on Cavium hardware?! You're funny.
> > > > > > >
> > > > > > > So I don't know whether the PCI hotplug code can run more than one
> > > > > > > function upon PCI ID detection. Probably Greg will say, write a
> > > > > > > multiplexer wrapper. :-)
> > > > > >
> > > > > > -ENOCONTEXT....
> > > > > >
> > > > > > Anyway, pci questions are best asked on the linux-pci@vger list. And
> > > > > > yes, all PCI devices end up with a 'struct pci_dev *' automatically.
> > > > >
> > > > > Simple: so they have a PCI ID of a memory contoller and want to hotplug
> > > > > two drivers for it. And those two drivers should remain independent from
> > > > > each other.
> > > >
> > > > Hahahahaha, no. That's crazy, you were right in guessing what my answer
> > > > was going to be :)
> > > >
> > >
> > >
> > > Just to be clear about the situation, the device is a memory controller. It
> > > has two main behaviors we are interested in:
> > >
> > > A) Error Detection And Correction (EDAC). This should be connected to the
> > > kernel's EDAC subsystem. An existing driver (drivers/edac/thunderx_edac.c)
> > > does exactly this.
> > >
> > > B) Performance Counters for actions taken in the corresponding memory. This
> > > should be connected to the kernel's perf framework as an uncore-PMU (the
> > > subject of this patch set).
> > >
> > > It is a single PCI device. What should the driver architecture look like to
> > > connect it to two different kernel subsystems?
> >
> > Modify the drivers/edac/thunderx_edac.c code to add support for
> > performance counters.
> >
>
> Thanks Greg. This adds some clarity to the situation.
>
> This technique does slightly complicate the mapping of files and directories
> in the kernel source tree to maintainers.
>
> Also, if a given configuration disables CONFIG_EDAC there is some hackery
> needed to get the perf portion of the driver included.
Well, you all deserve it for trying to have a single PCI device do
multiple things at the same time. There's no real good reason for
creating hardware that way, PCI devices are "free", you should go throw
a printed copy of the PCI spec at the firmware developers who did this
to you.
Then get them to fix the firmware so you have multiple PCI devices...
good luck!
greg k-h
On Wed, Jul 26, 2017 at 02:02:42PM -0700, David Daney wrote:
> Also, if a given configuration disables CONFIG_EDAC there is some hackery
> needed to get the perf portion of the driver included.
Yes, and we don't do performance counters in EDAC.
So you could add a small memory controller driver which does the
arbitration or fix the firmware.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Thu, Jul 27, 2017 at 07:11:57AM +0200, Borislav Petkov wrote:
> On Wed, Jul 26, 2017 at 02:02:42PM -0700, David Daney wrote:
> > Also, if a given configuration disables CONFIG_EDAC there is some hackery
> > needed to get the perf portion of the driver included.
>
> Yes, and we don't do performance counters in EDAC.
>
> So you could add a small memory controller driver which does the
> arbitration or fix the firmware.
OK. As fixing the firmware will take quite some time I'll go for the memory
controller driver that starts EDAC / PMU depending on their CONFIG_.
What would be the proper location for the multiplexer?
drivers/soc/cavium or drivers/misc?
--Jan
On Thu, Jul 27, 2017 at 11:08:56AM +0200, Jan Glauber wrote:
> OK. As fixing the firmware will take quite some time I'll go for the memory
> controller driver that starts EDAC / PMU depending on their CONFIG_.
>
> What would be the proper location for the multiplexer?
> drivers/soc/cavium or drivers/misc?
Uff, no idea. Greg? You're the special drivers guy. :-)
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On 07/26/2017 07:29 PM, Greg KH wrote:
> On Wed, Jul 26, 2017 at 02:02:42PM -0700, David Daney wrote:
>> On 07/26/2017 01:08 PM, Greg KH wrote:
>>> On Wed, Jul 26, 2017 at 01:02:38PM -0700, David Daney wrote:
>>>> On 07/26/2017 10:33 AM, Greg KH wrote:
>>>>> On Wed, Jul 26, 2017 at 06:30:49PM +0200, Borislav Petkov wrote:
>>>>>> On Wed, Jul 26, 2017 at 09:19:49AM -0700, Greg KH wrote:
>>>>>>> On Wed, Jul 26, 2017 at 05:55:48PM +0200, Borislav Petkov wrote:
>>>>>>>> On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
>>>>>>>>> The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
>>>>>>>>> I'm not aware of other ways to access these devices. Please enlighten
>>>>>>>>> me if I'm missing something.
>>>>>>>>
>>>>>>>> Me enlighten you on Cavium hardware?! You're funny.
>>>>>>>>
>>>>>>>> So I don't know whether the PCI hotplug code can run more than one
>>>>>>>> function upon PCI ID detection. Probably Greg will say, write a
>>>>>>>> multiplexer wrapper. :-)
>>>>>>>
>>>>>>> -ENOCONTEXT....
>>>>>>>
>>>>>>> Anyway, pci questions are best asked on the linux-pci@vger list. And
>>>>>>> yes, all PCI devices end up with a 'struct pci_dev *' automatically.
>>>>>>
>>>>>> Simple: so they have a PCI ID of a memory contoller and want to hotplug
>>>>>> two drivers for it. And those two drivers should remain independent from
>>>>>> each other.
>>>>>
>>>>> Hahahahaha, no. That's crazy, you were right in guessing what my answer
>>>>> was going to be :)
>>>>>
>>>>
>>>>
>>>> Just to be clear about the situation, the device is a memory controller. It
>>>> has two main behaviors we are interested in:
>>>>
>>>> A) Error Detection And Correction (EDAC). This should be connected to the
>>>> kernel's EDAC subsystem. An existing driver (drivers/edac/thunderx_edac.c)
>>>> does exactly this.
>>>>
>>>> B) Performance Counters for actions taken in the corresponding memory. This
>>>> should be connected to the kernel's perf framework as an uncore-PMU (the
>>>> subject of this patch set).
>>>>
>>>> It is a single PCI device. What should the driver architecture look like to
>>>> connect it to two different kernel subsystems?
>>>
>>> Modify the drivers/edac/thunderx_edac.c code to add support for
>>> performance counters.
>>>
>>
>> Thanks Greg. This adds some clarity to the situation.
>>
>> This technique does slightly complicate the mapping of files and directories
>> in the kernel source tree to maintainers.
>>
>> Also, if a given configuration disables CONFIG_EDAC there is some hackery
>> needed to get the perf portion of the driver included.
>
> Well, you all deserve it for trying to have a single PCI device do
> multiple things at the same time. There's no real good reason for
> creating hardware that way, PCI devices are "free", you should go throw
> a printed copy of the PCI spec at the firmware developers who did this
> to you.
The problem lies in something even more congealed than the "firmware".
The PCI topology is etched in to the very fabric of the silicon of the SoC.
>
> Then get them to fix the firmware so you have multiple PCI devices...
>
> good luck!
We'll use all of that we can get! Thanks.
>
> greg k-h
>
On Thu, Jul 27, 2017 at 10:29:53AM -0700, David Daney wrote:
> On 07/26/2017 07:29 PM, Greg KH wrote:
> > On Wed, Jul 26, 2017 at 02:02:42PM -0700, David Daney wrote:
> > > On 07/26/2017 01:08 PM, Greg KH wrote:
> > > > On Wed, Jul 26, 2017 at 01:02:38PM -0700, David Daney wrote:
> > > > > On 07/26/2017 10:33 AM, Greg KH wrote:
> > > > > > On Wed, Jul 26, 2017 at 06:30:49PM +0200, Borislav Petkov wrote:
> > > > > > > On Wed, Jul 26, 2017 at 09:19:49AM -0700, Greg KH wrote:
> > > > > > > > On Wed, Jul 26, 2017 at 05:55:48PM +0200, Borislav Petkov wrote:
> > > > > > > > > On Wed, Jul 26, 2017 at 05:45:15PM +0200, Jan Glauber wrote:
> > > > > > > > > > The PMU/EDAC devices are all PCI devices do I need the 'struct pci_dev *'.
> > > > > > > > > > I'm not aware of other ways to access these devices. Please enlighten
> > > > > > > > > > me if I'm missing something.
> > > > > > > > >
> > > > > > > > > Me enlighten you on Cavium hardware?! You're funny.
> > > > > > > > >
> > > > > > > > > So I don't know whether the PCI hotplug code can run more than one
> > > > > > > > > function upon PCI ID detection. Probably Greg will say, write a
> > > > > > > > > multiplexer wrapper. :-)
> > > > > > > >
> > > > > > > > -ENOCONTEXT....
> > > > > > > >
> > > > > > > > Anyway, pci questions are best asked on the linux-pci@vger list. And
> > > > > > > > yes, all PCI devices end up with a 'struct pci_dev *' automatically.
> > > > > > >
> > > > > > > Simple: so they have a PCI ID of a memory contoller and want to hotplug
> > > > > > > two drivers for it. And those two drivers should remain independent from
> > > > > > > each other.
> > > > > >
> > > > > > Hahahahaha, no. That's crazy, you were right in guessing what my answer
> > > > > > was going to be :)
> > > > > >
> > > > >
> > > > >
> > > > > Just to be clear about the situation, the device is a memory controller. It
> > > > > has two main behaviors we are interested in:
> > > > >
> > > > > A) Error Detection And Correction (EDAC). This should be connected to the
> > > > > kernel's EDAC subsystem. An existing driver (drivers/edac/thunderx_edac.c)
> > > > > does exactly this.
> > > > >
> > > > > B) Performance Counters for actions taken in the corresponding memory. This
> > > > > should be connected to the kernel's perf framework as an uncore-PMU (the
> > > > > subject of this patch set).
> > > > >
> > > > > It is a single PCI device. What should the driver architecture look like to
> > > > > connect it to two different kernel subsystems?
> > > >
> > > > Modify the drivers/edac/thunderx_edac.c code to add support for
> > > > performance counters.
> > > >
> > >
> > > Thanks Greg. This adds some clarity to the situation.
> > >
> > > This technique does slightly complicate the mapping of files and directories
> > > in the kernel source tree to maintainers.
> > >
> > > Also, if a given configuration disables CONFIG_EDAC there is some hackery
> > > needed to get the perf portion of the driver included.
> >
> > Well, you all deserve it for trying to have a single PCI device do
> > multiple things at the same time. There's no real good reason for
> > creating hardware that way, PCI devices are "free", you should go throw
> > a printed copy of the PCI spec at the firmware developers who did this
> > to you.
>
> The problem lies in something even more congealed than the "firmware". The
> PCI topology is etched in to the very fabric of the silicon of the SoC.
That's not very wise, as you can see here, most "modern" chips allow
stuff like this to be changeable at the firmware level. I strongly
suggest telling the hardware developers to fix this for your next
revision.
Oh well, fix it in the kernel, that's what it's there for :)
greg k-h
On Thu, Jul 27, 2017 at 06:11:30PM -0700, Greg KH wrote:
> Oh well, fix it in the kernel, that's what it's there for :)
Duude, don't put crazy ideas in people's heads!
:-)))
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Thu, Jul 27, 2017 at 03:15:15PM +0200, Borislav Petkov wrote:
> On Thu, Jul 27, 2017 at 11:08:56AM +0200, Jan Glauber wrote:
> > OK. As fixing the firmware will take quite some time I'll go for the memory
> > controller driver that starts EDAC / PMU depending on their CONFIG_.
> >
> > What would be the proper location for the multiplexer?
> > drivers/soc/cavium or drivers/misc?
>
> Uff, no idea. Greg? You're the special drivers guy. :-)
Start out in misc/ and let's see the code to determine if it should be
moved elsewhere.
thanks,
greg k-h
On 27/07/17 10:08, Jan Glauber wrote:
> On Thu, Jul 27, 2017 at 07:11:57AM +0200, Borislav Petkov wrote:
>> On Wed, Jul 26, 2017 at 02:02:42PM -0700, David Daney wrote:
>>> Also, if a given configuration disables CONFIG_EDAC there is some hackery
>>> needed to get the perf portion of the driver included.
>>
>> Yes, and we don't do performance counters in EDAC.
>>
>> So you could add a small memory controller driver which does the
>> arbitration or fix the firmware.
>
> OK. As fixing the firmware will take quite some time I'll go for the memory
> controller driver that starts EDAC / PMU depending on their CONFIG_.
Please could you also rename CAVIUM_PMU to something like CAVIUM_MMC_PMU or even
CAVIUM_UNCOR_PMU ? The symbol could easily be mistaken for a "cavium" CPU PMU.
Suzuki
>
> What would be the proper location for the multiplexer?
> drivers/soc/cavium or drivers/misc?
>
> --Jan
>
On Fri, Jul 28, 2017 at 04:12:33PM -0700, Greg KH wrote:
> On Thu, Jul 27, 2017 at 03:15:15PM +0200, Borislav Petkov wrote:
> > On Thu, Jul 27, 2017 at 11:08:56AM +0200, Jan Glauber wrote:
> > > OK. As fixing the firmware will take quite some time I'll go for the memory
> > > controller driver that starts EDAC / PMU depending on their CONFIG_.
> > >
> > > What would be the proper location for the multiplexer?
> > > drivers/soc/cavium or drivers/misc?
> >
> > Uff, no idea. Greg? You're the special drivers guy. :-)
>
> Start out in misc/ and let's see the code to determine if it should be
> moved elsewhere.
Jan, are you ok making this change, or are you waiting for anything else
from us? (Mark and I are just going through the pending PMU patches and
we're not clear on the status of this one).
Cheers,
Will
On Tue, Aug 08, 2017 at 02:25:10PM +0100, Will Deacon wrote:
> On Fri, Jul 28, 2017 at 04:12:33PM -0700, Greg KH wrote:
> > On Thu, Jul 27, 2017 at 03:15:15PM +0200, Borislav Petkov wrote:
> > > On Thu, Jul 27, 2017 at 11:08:56AM +0200, Jan Glauber wrote:
> > > > OK. As fixing the firmware will take quite some time I'll go for the memory
> > > > controller driver that starts EDAC / PMU depending on their CONFIG_.
> > > >
> > > > What would be the proper location for the multiplexer?
> > > > drivers/soc/cavium or drivers/misc?
> > >
> > > Uff, no idea. Greg? You're the special drivers guy. :-)
> >
> > Start out in misc/ and let's see the code to determine if it should be
> > moved elsewhere.
>
> Jan, are you ok making this change, or are you waiting for anything else
> from us? (Mark and I are just going through the pending PMU patches and
> we're not clear on the status of this one).
Hi Will,
it's my turn but I was on vacation. It was not completely trivial (well,
to me) to get the multiplexing working with modules but I'll look at it
again this week.
Cheers,
Jan