This patch contains the sysfs support.
We use the sysfs interface fot two reasons:
- perfmon2 administration
- user level information
Perfmon2 creates new directories in /sys:
- /sys/kernel/perfmon: for adminstration and global information
- /sys/devices/system/cpu/cpuXX/perfmon: per-cpu statistics (debugging)
In /sys/kernel/perfmon we find:
- arg_size_max (R/W): maximum size of vector arguments in bytes
- buf_size_max (R/W): maximum aggregated size of all smapling buffers
- smpl_buffer_mem (R): current consumption of buf_size_max
- counter_width (R): PMU HW counter width
- debug (R/W) : enable perfmon2 debugging messages
- debug_ovfl (R/W): enable perfmon2 interrupt debugging messages
- formats/ : information about available sampling formats
- pmc_max_fast_arg (R): how many vector arguments can be processed on the stack for pfarg_pmc_t
- pmd_max_fast_arg (R): how many vector arguments can be processed on the stack for pfarg_pmd_t
- pmu_desc/ : information about PMU register mapping
- pmu_model (R): name of active PMU description module
- reset_stats (W): reset statistics
- sys_group (R/W): which user group is allowed to create systemwide perfmon2 contexts
- task_group (R/W): which user group is allowed to create per-thread perfmon2 contexts
- sys_sessions_count (R): number of active per-thread contexts
- task_sessions_count (R): number of active system-wide contexts
- version (R): perfmon2 version
The statistics we maintained in /sys/system/devices/cpu/cpuXX are mostly
for debugging purposes at this point.
--- linux-2.6.17.9.base/perfmon/perfmon_sysfs.c 1969-12-31 16:00:00.000000000 -0800
+++ linux-2.6.17.9/perfmon/perfmon_sysfs.c 2006-08-21 03:37:46.000000000 -0700
@@ -0,0 +1,637 @@
+/*
+ * perfmon_proc.c: perfmon2 /proc interface
+ *
+ * This file implements the perfmon2 interface which
+ * provides access to the hardware performance counters
+ * of the host processor.
+ *
+ * The initial version of perfmon.c was written by
+ * Ganesh Venkitachalam, IBM Corp.
+ *
+ * Then it was modified for perfmon-1.x by Stephane Eranian and
+ * David Mosberger, Hewlett Packard Co.
+ *
+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
+ * by Stephane Eranian, Hewlett Packard Co.
+ *
+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
+ * Contributed by Stephane Eranian <[email protected]>
+ * David Mosberger-Tang <[email protected]>
+ *
+ * More information about perfmon available at:
+ * http://www.hpl.hp.com/research/linux/perfmon
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/smp_lock.h>
+#include <linux/proc_fs.h>
+#include <linux/list.h>
+#include <linux/version.h>
+#include <linux/perfmon.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/bitops.h>
+#include <asm/errno.h>
+#include <asm/processor.h>
+
+struct pfm_attribute {
+ struct attribute attr;
+ ssize_t (*show)(void *, char *);
+ ssize_t (*store)(void *, const char *, size_t);
+};
+#define to_attr(n) container_of(n, struct pfm_attribute, attr);
+
+#define PFM_RO_ATTR(_name) \
+struct pfm_attribute attr_##_name = __ATTR_RO(_name)
+
+#define PFM_RW_ATTR(_name,_mode,_show,_store) \
+struct pfm_attribute attr_##_name = __ATTR(_name,_mode,_show,_store);
+
+static int pfm_sysfs_init_done; /* true when pfm_sysfs_init() completed */
+
+static void pfm_sysfs_init_percpu(int i);
+
+int pfm_sysfs_add_pmu(struct _pfm_pmu_config *pmu);
+
+struct pfm_controls pfm_controls = {
+ .sys_group = PFM_GROUP_PERM_ANY,
+ .task_group = PFM_GROUP_PERM_ANY,
+ .arg_size_max = PAGE_SIZE,
+ .smpl_buf_size_max = ~0,
+};
+EXPORT_SYMBOL(pfm_controls);
+
+DECLARE_PER_CPU(struct pfm_stats, pfm_stats);
+
+static struct kobject pfm_kernel_kobj, pfm_kernel_fmt_kobj;
+
+static void pfm_reset_stats(void)
+{
+ struct pfm_stats *st;
+ int m;
+
+ for_each_online_cpu(m) {
+
+ st = &per_cpu(pfm_stats,m);
+ /*
+ * cannot use memset because of kobj member
+ */
+ st->pfm_ovfl_intr_replay_count = 0;
+ st->pfm_ovfl_intr_regular_count = 0;
+ st->pfm_ovfl_intr_all_count = 0;
+ st->pfm_ovfl_intr_cycles = 0;
+ st->pfm_ovfl_intr_phase1 = 0;
+ st->pfm_ovfl_intr_phase2 = 0;
+ st->pfm_ovfl_intr_phase3 = 0;
+ st->pfm_fmt_handler_calls = 0;
+ st->pfm_fmt_handler_cycles = 0;
+ st->pfm_set_switch_count = 0;
+ st->pfm_set_switch_cycles = 0;
+ st->pfm_ctxsw_count = 0;
+ st->pfm_ctxsw_cycles = 0;
+ st->pfm_handle_timeout_count = 0;
+ }
+}
+
+
+static ssize_t pfm_fmt_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct pfm_smpl_fmt *fmt = to_smpl_fmt(kobj);
+ struct pfm_attribute *attribute = to_attr(attr);
+ return attribute->show ? attribute->show(fmt, buf) : -EIO;
+}
+
+static ssize_t pfm_pmu_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct _pfm_pmu_config *pmu= to_pmu(kobj);
+ struct pfm_attribute *attribute = to_attr(attr);
+ return attribute->show ? attribute->show(pmu, buf) : -EIO;
+}
+
+static ssize_t pfm_stats_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct pfm_stats *st = to_stats(kobj);
+ struct pfm_attribute *attribute = to_attr(attr);
+ return attribute->show ? attribute->show(st, buf) : -EIO;
+}
+
+static ssize_t pfm_stats_attr_store(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t count)
+{
+ struct pfm_stats *st = to_stats(kobj);
+ struct pfm_attribute *attribute = to_attr(attr);
+ return attribute->store ? attribute->store(st, buf, count) : -EIO;
+}
+
+static struct sysfs_ops pfm_fmt_sysfs_ops = {
+ .show = pfm_fmt_attr_show,
+};
+
+static struct sysfs_ops pfm_pmu_sysfs_ops = {
+ .show = pfm_pmu_attr_show,
+};
+
+static struct sysfs_ops pfm_stats_sysfs_ops = {
+ .show = pfm_stats_attr_show,
+ .store = pfm_stats_attr_store,
+};
+
+static struct kobj_type pfm_fmt_ktype = {
+ .sysfs_ops = &pfm_fmt_sysfs_ops,
+};
+
+static struct kobj_type pfm_pmu_ktype = {
+ .sysfs_ops = &pfm_pmu_sysfs_ops,
+};
+
+static struct kobj_type pfm_stats_ktype = {
+ .sysfs_ops = &pfm_stats_sysfs_ops,
+};
+
+decl_subsys_name(pfm_fmt, pfm_fmt, &pfm_fmt_ktype, NULL);
+decl_subsys_name(pfm_pmu, pfm_pmu, &pfm_pmu_ktype, NULL);
+decl_subsys_name(pfm_stats, pfm_stats, &pfm_stats_ktype, NULL);
+
+static ssize_t version_show(void *info, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u.%u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN);
+}
+
+static ssize_t pmd_max_fast_arg_show(void *info, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", PFM_PMD_STK_ARG);
+}
+
+static ssize_t pmc_max_fast_arg_show(void *info, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", PFM_PMC_STK_ARG);
+}
+
+static ssize_t pmu_model_show(void *info, char *buf)
+{
+ return pfm_sysfs_session_show(buf, PAGE_SIZE, 0);
+}
+
+static ssize_t counter_width_show(void *info, char *buf)
+{
+ return pfm_sysfs_session_show(buf, PAGE_SIZE, 1);
+}
+
+static ssize_t task_sessions_count_show(void *info, char *buf)
+{
+ return pfm_sysfs_session_show(buf, PAGE_SIZE, 2);
+}
+
+static ssize_t sys_sessions_count_show(void *info, char *buf)
+{
+ return pfm_sysfs_session_show(buf, PAGE_SIZE, 3);
+}
+
+static ssize_t smpl_buffer_mem_show(void *info, char *buf)
+{
+ return pfm_sysfs_session_show(buf, PAGE_SIZE, 4);
+}
+
+static ssize_t debug_show(void *info, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.debug);
+}
+
+static ssize_t debug_store(void *info, const char *buf, size_t sz)
+{
+ int d;
+
+ if (sscanf(buf,"%d", &d) != 1)
+ return -EINVAL;
+
+ pfm_controls.debug = d;
+
+ if (d == 0)
+ pfm_reset_stats();
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static ssize_t debug_ovfl_show(void *info, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.debug_ovfl);
+}
+
+static ssize_t debug_ovfl_store(void *info, const char *buf, size_t sz)
+{
+ int d;
+
+ if (sscanf(buf,"%d", &d) != 1)
+ return -EINVAL;
+
+ pfm_controls.debug_ovfl = d;
+
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static ssize_t reset_stats_show(void *info, char *buf)
+{
+ buf[0]='0';
+ buf[1]='\0';
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static ssize_t reset_stats_store(void *info, const char *buf, size_t count)
+{
+ pfm_reset_stats();
+ return count;
+}
+
+static ssize_t sys_group_show(void *info, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.sys_group);
+}
+
+static ssize_t sys_group_store(void *info, const char *buf, size_t sz)
+{
+ int d;
+
+ if (sscanf(buf,"%d", &d) != 1)
+ return -EINVAL;
+
+ pfm_controls.sys_group = d;
+
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static ssize_t task_group_show(void *info, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.task_group);
+}
+
+static ssize_t task_group_store(void *info, const char *buf, size_t sz)
+{
+ int d;
+
+ if (sscanf(buf,"%d", &d) != 1)
+ return -EINVAL;
+
+ pfm_controls.task_group = d;
+
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static ssize_t buf_size_show(void *info, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.smpl_buf_size_max);
+}
+
+static ssize_t buf_size_store(void *info, const char *buf, size_t sz)
+{
+ size_t d;
+
+ if (sscanf(buf,"%zu", &d) != 1)
+ return -EINVAL;
+ /*
+ * we impose a page as the minimum
+ */
+ if (d < PAGE_SIZE)
+ return -EINVAL;
+
+ pfm_controls.smpl_buf_size_max = d;
+
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static ssize_t arg_size_show(void *info, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.arg_size_max);
+}
+
+static ssize_t arg_size_store(void *info, const char *buf, size_t sz)
+{
+ size_t d;
+
+ if (sscanf(buf,"%zu", &d) != 1)
+ return -EINVAL;
+
+ /*
+ * we impose a page as the minimum
+ */
+ if (d < PAGE_SIZE)
+ return -EINVAL;
+
+ pfm_controls.arg_size_max = d;
+
+ return strnlen(buf, PAGE_SIZE);
+}
+
+/*
+ * /sys/kernel/perfmon attributes
+ */
+static PFM_RO_ATTR(version);
+static PFM_RO_ATTR(pmu_model);
+static PFM_RO_ATTR(counter_width);
+static PFM_RO_ATTR(task_sessions_count);
+static PFM_RO_ATTR(sys_sessions_count);
+static PFM_RO_ATTR(smpl_buffer_mem);
+static PFM_RO_ATTR(pmd_max_fast_arg);
+static PFM_RO_ATTR(pmc_max_fast_arg);
+
+static PFM_RW_ATTR(debug, 0644, debug_show, debug_store);
+static PFM_RW_ATTR(debug_ovfl, 0644, debug_ovfl_show, debug_ovfl_store);
+static PFM_RW_ATTR(reset_stats, 0644, reset_stats_show, reset_stats_store);
+static PFM_RW_ATTR(sys_group, 0644, sys_group_show, sys_group_store);
+static PFM_RW_ATTR(task_group, 0644, task_group_show, task_group_store);
+static PFM_RW_ATTR(buf_size_max, 0644, buf_size_show, buf_size_store);
+static PFM_RW_ATTR(arg_size_max, 0644, arg_size_show, arg_size_store);
+
+static struct attribute *pfm_kernel_attrs[] = {
+ &attr_version.attr,
+ &attr_pmu_model.attr,
+ &attr_counter_width.attr,
+ &attr_task_sessions_count.attr,
+ &attr_sys_sessions_count.attr,
+ &attr_smpl_buffer_mem.attr,
+ &attr_pmd_max_fast_arg.attr,
+ &attr_pmc_max_fast_arg.attr,
+ &attr_debug.attr,
+ &attr_debug_ovfl.attr,
+ &attr_reset_stats.attr,
+ &attr_sys_group.attr,
+ &attr_task_group.attr,
+ &attr_buf_size_max.attr,
+ &attr_arg_size_max.attr,
+ NULL
+};
+
+static struct attribute_group pfm_kernel_attr_group = {
+ .attrs = pfm_kernel_attrs,
+};
+
+
+int pfm_sysfs_init(void)
+{
+ int i, ret;
+
+ ret = subsystem_register(&pfm_fmt_subsys);
+ if (ret) {
+ PFM_INFO("cannot register pfm_fmt_subsys: %d", ret);
+ return ret;
+ }
+
+ ret = subsystem_register(&pfm_pmu_subsys);
+ if (ret) {
+ PFM_INFO("cannot register pfm_pmu_subsys: %d", ret);
+ subsystem_unregister(&pfm_fmt_subsys);
+ return ret;
+ }
+
+ ret = subsystem_register(&pfm_stats_subsys);
+ if (ret) {
+ PFM_INFO("cannot register pfm_statssubsys: %d", ret);
+ subsystem_unregister(&pfm_fmt_subsys);
+ subsystem_unregister(&pfm_pmu_subsys);
+ return ret;
+ }
+
+ kobject_init(&pfm_kernel_kobj);
+ kobject_init(&pfm_kernel_fmt_kobj);
+
+ pfm_kernel_kobj.parent = &kernel_subsys.kset.kobj;
+ kobject_set_name(&pfm_kernel_kobj, "perfmon");
+
+ pfm_kernel_fmt_kobj.parent = &pfm_kernel_kobj;
+ kobject_set_name(&pfm_kernel_fmt_kobj, "formats");
+
+ kobject_add(&pfm_kernel_kobj);
+ kobject_add(&pfm_kernel_fmt_kobj);
+
+ sysfs_create_group(&pfm_kernel_kobj, &pfm_kernel_attr_group);
+
+ for_each_online_cpu(i) {
+ pfm_sysfs_init_percpu(i);
+ }
+
+ pfm_sysfs_init_done = 1;
+
+ pfm_builtin_fmt_sysfs_add();
+
+ if (pfm_pmu_conf)
+ pfm_sysfs_add_pmu(pfm_pmu_conf);
+
+ return 0;
+}
+
+#define PFM_DECL_ATTR(name) \
+static ssize_t name##_show(void *info, char *buf) \
+{ \
+ struct pfm_stats *st = info;\
+ return snprintf(buf, PAGE_SIZE, "%llu\n", \
+ (unsigned long long)st->pfm_##name); \
+} \
+static PFM_RO_ATTR(name)
+
+/*
+ * per-cpu perfmon stats attributes
+ */
+PFM_DECL_ATTR(ovfl_intr_replay_count);
+PFM_DECL_ATTR(ovfl_intr_all_count);
+PFM_DECL_ATTR(ovfl_intr_cycles);
+PFM_DECL_ATTR(fmt_handler_calls);
+PFM_DECL_ATTR(fmt_handler_cycles);
+PFM_DECL_ATTR(set_switch_count);
+PFM_DECL_ATTR(set_switch_cycles);
+PFM_DECL_ATTR(ctxsw_count);
+PFM_DECL_ATTR(ctxsw_cycles);
+PFM_DECL_ATTR(handle_timeout_count);
+
+static ssize_t ovfl_intr_spurious_count_show(void *info, char *buf)
+{
+ struct pfm_stats *st = info;
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)(st->pfm_ovfl_intr_all_count
+ - st->pfm_ovfl_intr_regular_count));
+}
+
+static ssize_t ovfl_intr_regular_count_show(void *info, char *buf)
+{
+ struct pfm_stats *st = info;
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)(st->pfm_ovfl_intr_regular_count
+ - st->pfm_ovfl_intr_replay_count));
+}
+
+static PFM_RO_ATTR(ovfl_intr_spurious_count);
+static PFM_RO_ATTR(ovfl_intr_regular_count);
+
+static struct attribute *pfm_cpu_attrs[] = {
+ &attr_ovfl_intr_spurious_count.attr,
+ &attr_ovfl_intr_replay_count.attr,
+ &attr_ovfl_intr_regular_count.attr,
+ &attr_ovfl_intr_all_count.attr,
+ &attr_ovfl_intr_cycles.attr,
+ &attr_fmt_handler_calls.attr,
+ &attr_fmt_handler_cycles.attr,
+ &attr_set_switch_count.attr,
+ &attr_set_switch_cycles.attr,
+ &attr_ctxsw_count.attr,
+ &attr_ctxsw_cycles.attr,
+ &attr_handle_timeout_count.attr,
+ NULL
+};
+
+static struct attribute_group pfm_cpu_attr_group = {
+ .attrs = pfm_cpu_attrs,
+};
+
+static void pfm_sysfs_init_percpu(int i)
+{
+ struct sys_device *cpudev;
+ struct pfm_stats *st;
+
+ cpudev = get_cpu_sysdev(i);
+ if (!cpudev)
+ return;
+
+ st = &per_cpu(pfm_stats, i);
+
+ kobject_init(&st->kobj);
+
+ st->kobj.parent = &cpudev->kobj;
+ kobject_set_name(&st->kobj, "perfmon");
+ kobj_set_kset_s(st, pfm_stats_subsys);
+
+ kobject_add(&st->kobj);
+
+ sysfs_create_group(&st->kobj, &pfm_cpu_attr_group);
+}
+
+static ssize_t uuid_show(void *data, char *buf)
+{
+ struct pfm_smpl_fmt *fmt = data;
+
+ return snprintf(buf, PAGE_SIZE, "%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x"
+ "-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x\n",
+ fmt->fmt_uuid[0],
+ fmt->fmt_uuid[1],
+ fmt->fmt_uuid[2],
+ fmt->fmt_uuid[3],
+ fmt->fmt_uuid[4],
+ fmt->fmt_uuid[5],
+ fmt->fmt_uuid[6],
+ fmt->fmt_uuid[7],
+ fmt->fmt_uuid[8],
+ fmt->fmt_uuid[9],
+ fmt->fmt_uuid[10],
+ fmt->fmt_uuid[11],
+ fmt->fmt_uuid[12],
+ fmt->fmt_uuid[13],
+ fmt->fmt_uuid[14],
+ fmt->fmt_uuid[15]);
+}
+
+PFM_RO_ATTR(uuid);
+
+int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt)
+{
+ int ret;
+
+ if (pfm_sysfs_init_done == 0)
+ return 0;
+
+ kobject_init(&fmt->kobj);
+ kobject_set_name(&fmt->kobj, fmt->fmt_name);
+ kobj_set_kset_s(fmt, pfm_fmt_subsys);
+ fmt->kobj.parent = &pfm_kernel_fmt_kobj;
+
+ ret = kobject_add(&fmt->kobj);
+
+ return sysfs_create_file(&fmt->kobj, &attr_uuid.attr);
+}
+
+int pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt)
+{
+ if (pfm_sysfs_init_done == 0)
+ return 0;
+
+ sysfs_remove_file(&fmt->kobj, &attr_uuid.attr);
+
+ kobject_del(&fmt->kobj);
+
+ return 0;
+}
+
+/*
+ * because the mappings can vary between one PMU and the other, we cannot really
+ * use an attribute per PMC to describe a mapping. Instead we have a single
+ * mappings attribute per PMU.
+ */
+static ssize_t mappings_show(void *data, char *buf)
+{
+ struct _pfm_pmu_config *pmu = data;
+ size_t s = PAGE_SIZE;
+ u32 n = 0, i;
+
+ for (i = 0; i < PFM_MAX_PMCS; i++) {
+
+ if ((pmu->pmc_desc[i].type & PFM_REG_I) == 0)
+ continue;
+
+ n = snprintf(buf, s, "PMC%u:0x%llx:0x%llx:%s\n",
+ i,
+ (unsigned long long)pfm_pmu_conf->pmc_desc[i].dfl_val,
+ (unsigned long long)pfm_pmu_conf->pmc_desc[i].rsvd_msk,
+ pfm_pmu_conf->pmc_desc[i].desc);
+ buf += n;
+ if (n > s)
+ goto skip;
+ s -= n;
+ }
+
+ for (i = 0; i < PFM_MAX_PMDS; i++) {
+
+ if ((pmu->pmd_desc[i].type & PFM_REG_I) == 0)
+ continue;
+
+ n = snprintf(buf, s, "PMD%u:0x%llx:0x%llx:%s\n",
+ i,
+ (unsigned long long)pfm_pmu_conf->pmd_desc[i].dfl_val,
+ (unsigned long long)pfm_pmu_conf->pmd_desc[i].rsvd_msk,
+ pfm_pmu_conf->pmd_desc[i].desc);
+ buf += n;
+ if (n > s)
+ break;
+ s -= n;
+ }
+skip:
+ return PAGE_SIZE - s;
+}
+
+PFM_RO_ATTR(mappings);
+
+int pfm_sysfs_add_pmu(struct _pfm_pmu_config *pmu)
+{
+ if (pfm_sysfs_init_done == 0)
+ return 0;
+
+ kobject_init(&pmu->kobj);
+ kobject_set_name(&pmu->kobj, "pmu_desc");
+ kobj_set_kset_s(pmu, pfm_pmu_subsys);
+ pmu->kobj.parent = &pfm_kernel_kobj;
+
+ kobject_add(&pmu->kobj);
+
+ return sysfs_create_file(&pmu->kobj, &attr_mappings.attr);
+}
+
+int pfm_sysfs_remove_pmu(struct _pfm_pmu_config *pmu)
+{
+ if (pfm_sysfs_init_done == 0)
+ return 0;
+
+ sysfs_remove_file(&pmu->kobj, &attr_mappings.attr);
+
+ kobject_del(&pmu->kobj);
+
+ return 0;
+}
On Wed, 23 Aug 2006 01:05:56 -0700
Stephane Eranian <[email protected]> wrote:
> This patch contains the sysfs support.
>
> We use the sysfs interface fot two reasons:
> - perfmon2 administration
> - user level information
>
> Perfmon2 creates new directories in /sys:
> - /sys/kernel/perfmon: for adminstration and global information
> - /sys/devices/system/cpu/cpuXX/perfmon: per-cpu statistics (debugging)
>
> In /sys/kernel/perfmon we find:
> - arg_size_max (R/W): maximum size of vector arguments in bytes
> - buf_size_max (R/W): maximum aggregated size of all smapling buffers
> - smpl_buffer_mem (R): current consumption of buf_size_max
> - counter_width (R): PMU HW counter width
> - debug (R/W) : enable perfmon2 debugging messages
> - debug_ovfl (R/W): enable perfmon2 interrupt debugging messages
> - formats/ : information about available sampling formats
> - pmc_max_fast_arg (R): how many vector arguments can be processed on the stack for pfarg_pmc_t
> - pmd_max_fast_arg (R): how many vector arguments can be processed on the stack for pfarg_pmd_t
> - pmu_desc/ : information about PMU register mapping
> - pmu_model (R): name of active PMU description module
> - reset_stats (W): reset statistics
> - sys_group (R/W): which user group is allowed to create systemwide perfmon2 contexts
> - task_group (R/W): which user group is allowed to create per-thread perfmon2 contexts
> - sys_sessions_count (R): number of active per-thread contexts
> - task_sessions_count (R): number of active system-wide contexts
> - version (R): perfmon2 version
>
> The statistics we maintained in /sys/system/devices/cpu/cpuXX are mostly
> for debugging purposes at this point.
>
>
>
>
> --- linux-2.6.17.9.base/perfmon/perfmon_sysfs.c 1969-12-31 16:00:00.000000000 -0800
> +++ linux-2.6.17.9/perfmon/perfmon_sysfs.c 2006-08-21 03:37:46.000000000 -0700
> @@ -0,0 +1,637 @@
> +/*
> + * perfmon_proc.c: perfmon2 /proc interface
> + *
> + * This file implements the perfmon2 interface which
> + * provides access to the hardware performance counters
> + * of the host processor.
> + *
> + * The initial version of perfmon.c was written by
> + * Ganesh Venkitachalam, IBM Corp.
> + *
> + * Then it was modified for perfmon-1.x by Stephane Eranian and
> + * David Mosberger, Hewlett Packard Co.
> + *
> + * Version Perfmon-2.x is a complete rewrite of perfmon-1.x
> + * by Stephane Eranian, Hewlett Packard Co.
> + *
> + * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P.
> + * Contributed by Stephane Eranian <[email protected]>
> + * David Mosberger-Tang <[email protected]>
> + *
> + * More information about perfmon available at:
> + * http://www.hpl.hp.com/research/linux/perfmon
> + */
> +#include <linux/config.h>
> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +#include <linux/smp_lock.h>
> +#include <linux/proc_fs.h>
> +#include <linux/list.h>
> +#include <linux/version.h>
> +#include <linux/perfmon.h>
> +#include <linux/device.h>
> +#include <linux/cpu.h>
> +
> +#include <asm/bitops.h>
> +#include <asm/errno.h>
> +#include <asm/processor.h>
> +
> +struct pfm_attribute {
> + struct attribute attr;
> + ssize_t (*show)(void *, char *);
> + ssize_t (*store)(void *, const char *, size_t);
> +};
> +#define to_attr(n) container_of(n, struct pfm_attribute, attr);
> +
> +#define PFM_RO_ATTR(_name) \
> +struct pfm_attribute attr_##_name = __ATTR_RO(_name)
> +
> +#define PFM_RW_ATTR(_name,_mode,_show,_store) \
> +struct pfm_attribute attr_##_name = __ATTR(_name,_mode,_show,_store);
> +
> +static int pfm_sysfs_init_done; /* true when pfm_sysfs_init() completed */
> +
> +static void pfm_sysfs_init_percpu(int i);
>
> +int pfm_sysfs_add_pmu(struct _pfm_pmu_config *pmu);
This is effectively an extern-decl-in-a-C-file. Either it should be
static, or this declaration should be moved to a shared header file.
> +static ssize_t debug_store(void *info, const char *buf, size_t sz)
> +{
> + int d;
> +
> + if (sscanf(buf,"%d", &d) != 1)
> + return -EINVAL;
> +
> + pfm_controls.debug = d;
> +
> + if (d == 0)
> + pfm_reset_stats();
> + return strnlen(buf, PAGE_SIZE);
> +}
hm. Does the sysfs core zero-terminate the incoming buffer? iirc it does.
perhaps a `return sz;' would be simpler and sufficient here.
> +int pfm_sysfs_init(void)
> +{
> + int i, ret;
> +
> + ret = subsystem_register(&pfm_fmt_subsys);
> + if (ret) {
> + PFM_INFO("cannot register pfm_fmt_subsys: %d", ret);
> + return ret;
> + }
> +
> + ret = subsystem_register(&pfm_pmu_subsys);
> + if (ret) {
> + PFM_INFO("cannot register pfm_pmu_subsys: %d", ret);
> + subsystem_unregister(&pfm_fmt_subsys);
> + return ret;
> + }
> +
> + ret = subsystem_register(&pfm_stats_subsys);
> + if (ret) {
> + PFM_INFO("cannot register pfm_statssubsys: %d", ret);
> + subsystem_unregister(&pfm_fmt_subsys);
> + subsystem_unregister(&pfm_pmu_subsys);
> + return ret;
> + }
We prefer the `goto out;' version of error recovery.
> + kobject_init(&pfm_kernel_kobj);
> + kobject_init(&pfm_kernel_fmt_kobj);
> +
> + pfm_kernel_kobj.parent = &kernel_subsys.kset.kobj;
> + kobject_set_name(&pfm_kernel_kobj, "perfmon");
> +
> + pfm_kernel_fmt_kobj.parent = &pfm_kernel_kobj;
> + kobject_set_name(&pfm_kernel_fmt_kobj, "formats");
> +
> + kobject_add(&pfm_kernel_kobj);
> + kobject_add(&pfm_kernel_fmt_kobj);
> +
> + sysfs_create_group(&pfm_kernel_kobj, &pfm_kernel_attr_group);
> +
> + for_each_online_cpu(i) {
> + pfm_sysfs_init_percpu(i);
> + }
Does this code support CPU hotplug?
> + pfm_sysfs_init_done = 1;
> +
> + pfm_builtin_fmt_sysfs_add();
> +
> + if (pfm_pmu_conf)
> + pfm_sysfs_add_pmu(pfm_pmu_conf);
> +
> + return 0;
> +}
> +
>
> ...
>
> +
> +static ssize_t uuid_show(void *data, char *buf)
> +{
> + struct pfm_smpl_fmt *fmt = data;
> +
> + return snprintf(buf, PAGE_SIZE, "%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x"
> + "-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x\n",
> + fmt->fmt_uuid[0],
> + fmt->fmt_uuid[1],
> + fmt->fmt_uuid[2],
> + fmt->fmt_uuid[3],
> + fmt->fmt_uuid[4],
> + fmt->fmt_uuid[5],
> + fmt->fmt_uuid[6],
> + fmt->fmt_uuid[7],
> + fmt->fmt_uuid[8],
> + fmt->fmt_uuid[9],
> + fmt->fmt_uuid[10],
> + fmt->fmt_uuid[11],
> + fmt->fmt_uuid[12],
> + fmt->fmt_uuid[13],
> + fmt->fmt_uuid[14],
> + fmt->fmt_uuid[15]);
> +}
Why does perfmon play with UUIDs??
> +
> +int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt)
> +{
> + int ret;
> +
> + if (pfm_sysfs_init_done == 0)
> + return 0;
> +
> + kobject_init(&fmt->kobj);
> + kobject_set_name(&fmt->kobj, fmt->fmt_name);
> + kobj_set_kset_s(fmt, pfm_fmt_subsys);
> + fmt->kobj.parent = &pfm_kernel_fmt_kobj;
> +
> + ret = kobject_add(&fmt->kobj);
> +
> + return sysfs_create_file(&fmt->kobj, &attr_uuid.attr);
> +}
> +
> +int pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt)
> +{
> + if (pfm_sysfs_init_done == 0)
> + return 0;
> +
> + sysfs_remove_file(&fmt->kobj, &attr_uuid.attr);
> +
> + kobject_del(&fmt->kobj);
> +
> + return 0;
> +}
What do these do? (Some comments would be nice)
> +/*
> + * because the mappings can vary between one PMU and the other, we cannot really
> + * use an attribute per PMC to describe a mapping. Instead we have a single
> + * mappings attribute per PMU.
> + */
> +static ssize_t mappings_show(void *data, char *buf)
> +{
> + struct _pfm_pmu_config *pmu = data;
> + size_t s = PAGE_SIZE;
> + u32 n = 0, i;
> +
> + for (i = 0; i < PFM_MAX_PMCS; i++) {
> +
> + if ((pmu->pmc_desc[i].type & PFM_REG_I) == 0)
> + continue;
> +
> + n = snprintf(buf, s, "PMC%u:0x%llx:0x%llx:%s\n",
> + i,
> + (unsigned long long)pfm_pmu_conf->pmc_desc[i].dfl_val,
> + (unsigned long long)pfm_pmu_conf->pmc_desc[i].rsvd_msk,
> + pfm_pmu_conf->pmc_desc[i].desc);
> + buf += n;
> + if (n > s)
> + goto skip;
> + s -= n;
> + }
> +
> + for (i = 0; i < PFM_MAX_PMDS; i++) {
> +
> + if ((pmu->pmd_desc[i].type & PFM_REG_I) == 0)
> + continue;
> +
> + n = snprintf(buf, s, "PMD%u:0x%llx:0x%llx:%s\n",
> + i,
> + (unsigned long long)pfm_pmu_conf->pmd_desc[i].dfl_val,
> + (unsigned long long)pfm_pmu_conf->pmd_desc[i].rsvd_msk,
> + pfm_pmu_conf->pmd_desc[i].desc);
> + buf += n;
> + if (n > s)
> + break;
> + s -= n;
> + }
> +skip:
> + return PAGE_SIZE - s;
> +}
This breaks the one-value-per-sysfs-file rule.
> +PFM_RO_ATTR(mappings);
> +
> +int pfm_sysfs_add_pmu(struct _pfm_pmu_config *pmu)
> +{
> + if (pfm_sysfs_init_done == 0)
> + return 0;
> +
> + kobject_init(&pmu->kobj);
> + kobject_set_name(&pmu->kobj, "pmu_desc");
> + kobj_set_kset_s(pmu, pfm_pmu_subsys);
> + pmu->kobj.parent = &pfm_kernel_kobj;
> +
> + kobject_add(&pmu->kobj);
> +
> + return sysfs_create_file(&pmu->kobj, &attr_mappings.attr);
> +}
> +
> +int pfm_sysfs_remove_pmu(struct _pfm_pmu_config *pmu)
> +{
> + if (pfm_sysfs_init_done == 0)
> + return 0;
> +
> + sysfs_remove_file(&pmu->kobj, &attr_mappings.attr);
> +
> + kobject_del(&pmu->kobj);
> +
> + return 0;
> +}
Again - comments, please.