2015-07-09 07:50:42

by Adrian Hunter

[permalink] [raw]
Subject: [RFC PATCH] perf: Provide status of known PMUs

Known PMUs may not be present for various reasons.
Provide a way for the user to know what the reason
is.

A bus attribute is created for each known PMU beneath
a group "known_pmus". The attribute name is the same
as the PMU name. The value is a string consisting of
one or, optionally, two parts: a canonical part, and
a driver specific part. If there are two parts, they
are separated by " - ". The canonical part is one of:

Supported
Driver error
Driver not loaded
Driver not in kernel config
Not supported by kernel
Not supported by hardware
Wrong vendor
Wrong architecture
Unknown status

Example:

$ cat /sys/bus/event_source/known_pmus/intel_pt
Supported

Signed-off-by: Adrian Hunter <[email protected]>
---
arch/x86/kernel/cpu/perf_event.c | 3 +
arch/x86/kernel/cpu/perf_event_intel_bts.c | 20 +++-
arch/x86/kernel/cpu/perf_event_intel_pt.c | 18 ++-
include/linux/perf_event.h | 23 ++++
kernel/events/core.c | 171 +++++++++++++++++++++++++++++
5 files changed, 226 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3658de47900f..546163a42a30 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1670,12 +1670,15 @@ static int __init init_hw_perf_events(void)

switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
+ perf_pmu_vendor("Intel");
err = intel_pmu_init();
break;
case X86_VENDOR_AMD:
+ perf_pmu_vendor("AMD");
err = amd_pmu_init();
break;
default:
+ perf_pmu_vendor("Unknown");
err = -ENOTSUPP;
}
if (err != 0) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c
index 43dd672d788b..6eb1b637668a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_bts.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c
@@ -514,8 +514,19 @@ static void bts_event_read(struct perf_event *event)

static __init int bts_init(void)
{
- if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
- return -ENODEV;
+ const char *status_msg = NULL;
+ int ret;
+
+ if (!x86_pmu.bts) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_DTES64)) {
+ status_msg = "requires 64-bit kernel";
+ ret = -ENOTSUPP;
+ goto out;
+ }

bts_pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
bts_pmu.task_ctx_nr = perf_sw_context;
@@ -528,6 +539,9 @@ static __init int bts_init(void)
bts_pmu.setup_aux = bts_buffer_setup_aux;
bts_pmu.free_aux = bts_buffer_free_aux;

- return perf_pmu_register(&bts_pmu, "intel_bts", -1);
+ ret = perf_pmu_register(&bts_pmu, "intel_bts", -1);
+out:
+ perf_pmu_error_status("intel_bts", ret, status_msg);
+ return ret;
}
arch_initcall(bts_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 183de719628d..e2dc697ab1a6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -1059,6 +1059,7 @@ static int pt_event_init(struct perf_event *event)
static __init int pt_init(void)
{
int ret, cpu, prior_warn = 0;
+ const char *status_msg = NULL;

BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE);
get_online_cpus();
@@ -1073,18 +1074,19 @@ static __init int pt_init(void)

if (prior_warn) {
x86_add_exclusive(x86_lbr_exclusive_pt);
- pr_warn("PT is enabled at boot time, doing nothing\n");
-
- return -EBUSY;
+ status_msg = "PT is enabled at boot time, doing nothing";
+ ret = -EBUSY;
+ goto out;
}

ret = pt_pmu_hw_init();
if (ret)
- return ret;
+ goto out;

if (!pt_cap_get(PT_CAP_topa_output)) {
- pr_warn("ToPA output is not supported on this CPU\n");
- return -ENODEV;
+ status_msg = "ToPA output is not supported on this CPU";
+ ret = -ENODEV;
+ goto out;
}

if (!pt_cap_get(PT_CAP_topa_multiple_entries))
@@ -1103,6 +1105,10 @@ static __init int pt_init(void)
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
+out:
+ if (status_msg)
+ pr_warn("%s\n", status_msg);
+ perf_pmu_error_status("intel_pt", ret, status_msg);

return ret;
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2027809433b3..e1eb95ec3e23 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -179,6 +179,22 @@ struct perf_event;
#define PERF_PMU_CAP_ITRACE 0x20

/**
+ * enum perf_pmu_status - the status of known PMUs
+ */
+enum perf_pmu_status {
+ PERF_PMU_STATUS_SUPPORTED,
+ PERF_PMU_STATUS_ERROR,
+ PERF_PMU_STATUS_NOT_LOADED,
+ PERF_PMU_STATUS_NOT_CONFIG,
+ PERF_PMU_STATUS_NOT_SUPPORTED,
+ PERF_PMU_STATUS_WRONG_HW,
+ PERF_PMU_STATUS_WRONG_VENDOR,
+ PERF_PMU_STATUS_WRONG_ARCH,
+ PERF_PMU_STATUS_UNKNOWN,
+ PERF_PMU_STATUS_MAX,
+};
+
+/**
* struct pmu - generic performance monitoring unit
*/
struct pmu {
@@ -631,6 +647,13 @@ extern void *perf_get_aux(struct perf_output_handle *handle);
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
extern void perf_pmu_unregister(struct pmu *pmu);

+extern void perf_pmu_update_status(const char *name,
+ enum perf_pmu_status status,
+ const char *status_msg);
+extern void perf_pmu_error_status(const char *name, int err,
+ const char *status_msg);
+extern void perf_pmu_vendor(const char *vendor);
+
extern int perf_num_counters(void);
extern const char *perf_pmu_name(void);
extern void __perf_event_task_sched_in(struct task_struct *prev,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d3dae3419b99..ad3e9ec1f32b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7154,6 +7154,176 @@ out:
}
static struct idr pmu_idr;

+struct known_pmu {
+ const char *name;
+ const char *vendor;
+ const char *status_msg;
+ int status;
+};
+
+#define KNOWN_PMU(_name, _vendor, _status) { \
+ .name = _name, \
+ .vendor = _vendor, \
+ .status = _status, \
+}
+
+#if defined(CONFIG_X86)
+#define PERF_PMU_STATUS_ARCH_X86 PERF_PMU_STATUS_UNKNOWN
+#else
+#define PERF_PMU_STATUS_ARCH_X86 PERF_PMU_STATUS_WRONG_ARCH
+#endif
+
+static struct known_pmu known_pmus[] = {
+ KNOWN_PMU("intel_pt", "Intel", PERF_PMU_STATUS_ARCH_X86),
+ KNOWN_PMU("intel_bts", "Intel", PERF_PMU_STATUS_ARCH_X86),
+ KNOWN_PMU(NULL, NULL, 0),
+};
+
+static DEFINE_MUTEX(known_pmus_lock);
+
+static struct known_pmu *known_pmu_find(const char *name)
+{
+ struct known_pmu *known_pmu;
+
+ if (!name)
+ return NULL;
+
+ for (known_pmu = known_pmus; known_pmu->name; known_pmu++) {
+ if (!strcmp(known_pmu->name, name))
+ return known_pmu;
+ }
+ return NULL;
+}
+
+static const char *pmu_status_msg[] = {
+ [PERF_PMU_STATUS_SUPPORTED] = "Supported",
+ [PERF_PMU_STATUS_ERROR] = "Driver error",
+ [PERF_PMU_STATUS_NOT_LOADED] = "Driver not loaded",
+ [PERF_PMU_STATUS_NOT_CONFIG] = "Driver not in kernel config",
+ [PERF_PMU_STATUS_NOT_SUPPORTED] = "Not supported by kernel",
+ [PERF_PMU_STATUS_WRONG_HW] = "Not supported by hardware",
+ [PERF_PMU_STATUS_WRONG_VENDOR] = "Wrong vendor",
+ [PERF_PMU_STATUS_WRONG_ARCH] = "Wrong architecture",
+ [PERF_PMU_STATUS_UNKNOWN] = "Unknown status",
+};
+
+static ssize_t __known_pmu_show(struct known_pmu *known_pmu, char *buf)
+{
+ const char *msg, *vendor;
+ int status;
+
+ status = known_pmu->status;
+ msg = known_pmu->status_msg;
+ vendor = known_pmu->vendor;
+
+ if (status < 0 || status >= PERF_PMU_STATUS_MAX)
+ status = PERF_PMU_STATUS_UNKNOWN;
+
+ if (!msg && status == PERF_PMU_STATUS_WRONG_VENDOR && vendor)
+ return sprintf(buf, "%s - requires %s CPU\n",
+ pmu_status_msg[status], vendor);
+
+ if (!msg)
+ return sprintf(buf, "%s\n", pmu_status_msg[status]);
+
+ return sprintf(buf, "%s - %s\n", pmu_status_msg[status], msg);
+}
+
+static ssize_t known_pmu_show(const char *name, char *buf)
+{
+ struct known_pmu *known_pmu = known_pmu_find(name);
+ ssize_t ret;
+
+ if (!known_pmu)
+ return sprintf(buf, "Unknown PMU\n");
+
+ mutex_lock(&known_pmus_lock);
+ ret = __known_pmu_show(known_pmu, buf);
+ mutex_unlock(&known_pmus_lock);
+
+ return ret;
+}
+
+void perf_pmu_update_status(const char *name, enum perf_pmu_status status,
+ const char *status_msg)
+{
+ struct known_pmu *known_pmu = known_pmu_find(name);
+
+ if (known_pmu) {
+ mutex_lock(&known_pmus_lock);
+ known_pmu->status = status;
+ kfree_const(known_pmus->status_msg);
+ known_pmus->status_msg = kstrdup_const(status_msg, GFP_KERNEL);
+ mutex_unlock(&known_pmus_lock);
+ }
+}
+
+void perf_pmu_error_status(const char *name, int err, const char *status_msg)
+{
+ enum perf_pmu_status status;
+
+ switch (err) {
+ case 0:
+ status = PERF_PMU_STATUS_SUPPORTED;
+ break;
+ case -ENODEV:
+ status = PERF_PMU_STATUS_WRONG_HW;
+ break;
+ case -ENOTSUPP:
+ status = PERF_PMU_STATUS_NOT_SUPPORTED;
+ break;
+ default:
+ status = PERF_PMU_STATUS_ERROR;
+ }
+
+ perf_pmu_update_status(name, status, status_msg);
+}
+
+void perf_pmu_vendor(const char *vendor)
+{
+ struct known_pmu *known_pmu;
+
+ if (!vendor)
+ return;
+
+ mutex_lock(&known_pmus_lock);
+ for (known_pmu = known_pmus; known_pmu->name; known_pmu++) {
+ if (known_pmu->status == PERF_PMU_STATUS_UNKNOWN &&
+ known_pmu->vendor && strcmp(known_pmu->vendor, vendor)) {
+ known_pmu->status = PERF_PMU_STATUS_WRONG_VENDOR;
+ kfree_const(known_pmus->status_msg);
+ known_pmu->status_msg = NULL;
+ }
+ }
+ mutex_unlock(&known_pmus_lock);
+}
+
+#define KNOWN_PMU_ATTR(_name) \
+static ssize_t _name##_show(struct bus_type *bus, char *buf) \
+{ \
+ return known_pmu_show(__stringify(_name), buf); \
+} \
+static BUS_ATTR_RO(_name);
+
+KNOWN_PMU_ATTR(intel_pt);
+KNOWN_PMU_ATTR(intel_bts);
+
+static struct attribute *known_pmus_attrs[] = {
+ &bus_attr_intel_pt.attr,
+ &bus_attr_intel_bts.attr,
+ NULL,
+};
+
+static const struct attribute_group pmu_bus_known_pmus_group = {
+ .name = "known_pmus",
+ .attrs = known_pmus_attrs,
+};
+
+static const struct attribute_group *pmu_bus_groups[] = {
+ &pmu_bus_known_pmus_group,
+ NULL,
+};
+
static ssize_t
type_show(struct device *dev, struct device_attribute *attr, char *page)
{
@@ -7224,6 +7394,7 @@ ATTRIBUTE_GROUPS(pmu_dev);
static int pmu_bus_running;
static struct bus_type pmu_bus = {
.name = "event_source",
+ .bus_groups = pmu_bus_groups,
.dev_groups = pmu_dev_groups,
};

--
1.9.1


2015-07-09 08:10:27

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs


* Adrian Hunter <[email protected]> wrote:

> Known PMUs may not be present for various reasons.
> Provide a way for the user to know what the reason
> is.
>
> A bus attribute is created for each known PMU beneath
> a group "known_pmus". The attribute name is the same
> as the PMU name. The value is a string consisting of
> one or, optionally, two parts: a canonical part, and
> a driver specific part. If there are two parts, they
> are separated by " - ". The canonical part is one of:
>
> Supported
> Driver error
> Driver not loaded
> Driver not in kernel config
> Not supported by kernel
> Not supported by hardware
> Wrong vendor
> Wrong architecture
> Unknown status

Very nice!

> Example:
>
> $ cat /sys/bus/event_source/known_pmus/intel_pt
> Supported

So I only have naming nits. 'Supported' is a bit ambiguous, because it could mean
that the PMU is supported but the driver is not active. How about 'Enabled'?

I'd also make the strings more unambiguously structured, something like:

Enabled
Disabled: Driver error
Disabled: Driver not loaded
Disabled: Driver not in kernel config
Disabled: Not supported by the kernel
Disabled: Not supported by the hardware
Disabled: Not supported by the hardware vendor
Disabled: Not supported by the the architecture
Disabled: Unknown status

(Note the small changes I did to the text in some places.)

Also note that I'd suggest not enumerating all the error reasons rigidly - just
have a single error code, but a free flowing error string that is provided by the
low level driver (and maybe strdup()-ed by the core). That way you can provide
very specific error descriptions, without having to change the core every time you
need a new category. Agreed?

Thanks,

Ingo

2015-07-09 08:47:42

by Adrian Hunter

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs

On 09/07/15 11:10, Ingo Molnar wrote:
>
> * Adrian Hunter <[email protected]> wrote:
>
>> Known PMUs may not be present for various reasons.
>> Provide a way for the user to know what the reason
>> is.
>>
>> A bus attribute is created for each known PMU beneath
>> a group "known_pmus". The attribute name is the same
>> as the PMU name. The value is a string consisting of
>> one or, optionally, two parts: a canonical part, and
>> a driver specific part. If there are two parts, they
>> are separated by " - ". The canonical part is one of:
>>
>> Supported
>> Driver error
>> Driver not loaded
>> Driver not in kernel config
>> Not supported by kernel
>> Not supported by hardware
>> Wrong vendor
>> Wrong architecture
>> Unknown status
>
> Very nice!
>
>> Example:
>>
>> $ cat /sys/bus/event_source/known_pmus/intel_pt
>> Supported
>
> So I only have naming nits. 'Supported' is a bit ambiguous, because it could mean
> that the PMU is supported but the driver is not active. How about 'Enabled'?
>
> I'd also make the strings more unambiguously structured, something like:
>
> Enabled
> Disabled: Driver error
> Disabled: Driver not loaded
> Disabled: Driver not in kernel config
> Disabled: Not supported by the kernel
> Disabled: Not supported by the hardware
> Disabled: Not supported by the hardware vendor
> Disabled: Not supported by the the architecture
> Disabled: Unknown status
>
> (Note the small changes I did to the text in some places.)

OK

>
> Also note that I'd suggest not enumerating all the error reasons rigidly - just
> have a single error code, but a free flowing error string that is provided by the
> low level driver (and maybe strdup()-ed by the core). That way you can provide
> very specific error descriptions, without having to change the core every time you
> need a new category. Agreed?

Drivers can optionally provide a string - the optional second part described above.
For example:

$ cat /sys/bus/event_source/known_pmus/intel_pt
Disabled: Not supported by the hardware - ToPA output is not supported on this CPU

Having a finite set of categories allows software to interpret the string for
purposes other than displaying it. Having an optional second part allows
drivers to detail anything else.

2015-07-09 08:50:46

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs

On Thu, Jul 09, 2015 at 10:48:00AM +0300, Adrian Hunter wrote:
> Known PMUs may not be present for various reasons.
> Provide a way for the user to know what the reason
> is.

Not a bad idea, but I do wonder where we should draw the line on what is
'known'. The patch as proposed will have bts/pt listed as 'known' for
every arch out there.

By that logic, x86 should list the ppc/sparc/mips/arm/etc.. PMUs as
known and wrong_arch too, which might be a tad excessive.

Can we limit it to PMUs for which we've (attempted to) load the drivers?
That would obviously make a few of your status bits redundant, but then
you've not explained why we're interested in it.

> Supported
> Driver error
> Driver not loaded
> Not supported by hardware
> Wrong vendor
> Unknown status

There would work.

> Driver not in kernel config
> Not supported by kernel
> Wrong architecture

These will be hard, for if we don't load the driver we don't 'know' of
them.

2015-07-09 09:27:11

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs


* Peter Zijlstra <[email protected]> wrote:

> On Thu, Jul 09, 2015 at 10:48:00AM +0300, Adrian Hunter wrote:
>
> > Known PMUs may not be present for various reasons. Provide a way for the user
> > to know what the reason is.
>
> Not a bad idea, but I do wonder where we should draw the line on what is
> 'known'. The patch as proposed will have bts/pt listed as 'known' for every arch
> out there.
>
> By that logic, x86 should list the ppc/sparc/mips/arm/etc.. PMUs as known and
> wrong_arch too, which might be a tad excessive.

Absolutely x86 should list them as well - from a user POV arch dependent tooling
sucks in general. There's nothing more annoying than trying to figure out why a
particular tool does not work.

If memory usage becomes any concern in the future we can list them in a bit more
compressed form.

Thanks,

Ingo

2015-07-09 09:33:25

by Adrian Hunter

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs

On 09/07/15 11:50, Peter Zijlstra wrote:
> On Thu, Jul 09, 2015 at 10:48:00AM +0300, Adrian Hunter wrote:
>> Known PMUs may not be present for various reasons.
>> Provide a way for the user to know what the reason
>> is.
>
> Not a bad idea, but I do wonder where we should draw the line on what is
> 'known'. The patch as proposed will have bts/pt listed as 'known' for
> every arch out there.
>
> By that logic, x86 should list the ppc/sparc/mips/arm/etc.. PMUs as
> known and wrong_arch too, which might be a tad excessive.
>
> Can we limit it to PMUs for which we've (attempted to) load the drivers?
> That would obviously make a few of your status bits redundant, but then
> you've not explained why we're interested in it.
>
>> Supported
>> Driver error
>> Driver not loaded
>> Not supported by hardware
>> Wrong vendor
>> Unknown status
>
> There would work.
>
>> Driver not in kernel config
>> Not supported by kernel
>> Wrong architecture
>
> These will be hard, for if we don't load the driver we don't 'know' of
> them.

Are they that hard?

The architecture one is done by perf core at the moment when the initial
status is defined e.g.

#if defined(CONFIG_X86)
#define PERF_PMU_STATUS_ARCH_X86 PERF_PMU_STATUS_UNKNOWN
#else
#define PERF_PMU_STATUS_ARCH_X86 PERF_PMU_STATUS_WRONG_ARCH
#endif

static struct known_pmu known_pmus[] = {
KNOWN_PMU("intel_pt", "Intel", PERF_PMU_STATUS_ARCH_X86),
KNOWN_PMU("intel_bts", "Intel", PERF_PMU_STATUS_ARCH_X86),
KNOWN_PMU(NULL, NULL, 0),
};

For config and module the arch perf init code can check e.g.

#if !defined(CONFIG_MY_PMU) && !defined(CONFIG_MY_PMU_MODULE)
perf_pmu_update_status("my_pmu", PERF_PMU_STATUS_NOT_CONFIG, NULL);
#elif defined(CONFIG_MY_PMU_MODULE)
perf_pmu_update_status("my_pmu", PERF_PMU_STATUS_NOT_LOADED, NULL);
#endif

2015-07-09 11:44:53

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs

On Thu, Jul 09, 2015 at 12:30:30PM +0300, Adrian Hunter wrote:
> On 09/07/15 11:50, Peter Zijlstra wrote:

> > Can we limit it to PMUs for which we've (attempted to) load the drivers?
> > That would obviously make a few of your status bits redundant, but then
> > you've not explained why we're interested in it.

> >> Driver not in kernel config
> >> Not supported by kernel
> >> Wrong architecture
> >
> > These will be hard, for if we don't load the driver we don't 'know' of
> > them.
>
> Are they that hard?

Because if we limit known to be what we're tried to probe, you simply do
not know about PMUs for the wrong arch or not build by the kernel etc.

> static struct known_pmu known_pmus[] = {
> KNOWN_PMU("intel_pt", "Intel", PERF_PMU_STATUS_ARCH_X86),
> KNOWN_PMU("intel_bts", "Intel", PERF_PMU_STATUS_ARCH_X86),
> KNOWN_PMU(NULL, NULL, 0),
> };

So I really don't like this hard-coded table much, that's just going to
be a pain to maintain.

2015-07-09 11:59:57

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs

On Thu, Jul 09, 2015 at 11:26:56AM +0200, Ingo Molnar wrote:
>
> * Peter Zijlstra <[email protected]> wrote:
>
> > On Thu, Jul 09, 2015 at 10:48:00AM +0300, Adrian Hunter wrote:
> >
> > > Known PMUs may not be present for various reasons. Provide a way for the user
> > > to know what the reason is.
> >
> > Not a bad idea, but I do wonder where we should draw the line on what is
> > 'known'. The patch as proposed will have bts/pt listed as 'known' for every arch
> > out there.
> >
> > By that logic, x86 should list the ppc/sparc/mips/arm/etc.. PMUs as known and
> > wrong_arch too, which might be a tad excessive.
>
> Absolutely x86 should list them as well - from a user POV arch dependent tooling
> sucks in general. There's nothing more annoying than trying to figure out why a
> particular tool does not work.

But why would the tool care?

2015-07-09 12:32:17

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs


* Peter Zijlstra <[email protected]> wrote:

> On Thu, Jul 09, 2015 at 11:26:56AM +0200, Ingo Molnar wrote:
> >
> > * Peter Zijlstra <[email protected]> wrote:
> >
> > > On Thu, Jul 09, 2015 at 10:48:00AM +0300, Adrian Hunter wrote:
> > >
> > > > Known PMUs may not be present for various reasons. Provide a way for the user
> > > > to know what the reason is.
> > >
> > > Not a bad idea, but I do wonder where we should draw the line on what is
> > > 'known'. The patch as proposed will have bts/pt listed as 'known' for every arch
> > > out there.
> > >
> > > By that logic, x86 should list the ppc/sparc/mips/arm/etc.. PMUs as known
> > > and wrong_arch too, which might be a tad excessive.
> >
> > Absolutely x86 should list them as well - from a user POV arch dependent
> > tooling sucks in general. There's nothing more annoying than trying to figure
> > out why a particular tool does not work.
>
> But why would the tool care?

Yeah, but the user cares. If I type something like:

perf record -e bts// --per-thread sleep 1

... and it does not work, I expect the tool to print something informative and
productive, for example one of these:

perf record error: The 'bts' PMU is not available, because this architecture does not support it
perf record error: The 'bts' PMU is not available, because the CPU does not support it
perf record error: The 'bts' PMU is not available, because its driver is not built into the kernel

Because if it's the wrong architecture or CPU, I look for a box with the right
one, if it's simply the kernel not having the necessary PMU driver then I'll boot
a kernel with it enabled.

Thanks,

Ingo

2015-07-09 12:43:14

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs

On Thu, Jul 09, 2015 at 02:32:05PM +0200, Ingo Molnar wrote:
>
> perf record error: The 'bts' PMU is not available, because the CPU does not support it

This one makes sense.

> perf record error: The 'bts' PMU is not available, because this architecture does not support it
> perf record error: The 'bts' PMU is not available, because its driver is not built into the kernel
>
> Because if it's the wrong architecture or CPU, I look for a box with the right
> one, if it's simply the kernel not having the necessary PMU driver then I'll boot
> a kernel with it enabled.

These not so much; why won't a generic: "Unknown PMU, check arch/kernel"
do?

The thing is, I hate that hard-coded list, its pain I don't need.

2015-07-09 14:48:07

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs

Em Thu, Jul 09, 2015 at 02:42:57PM +0200, Peter Zijlstra escreveu:
> On Thu, Jul 09, 2015 at 02:32:05PM +0200, Ingo Molnar wrote:

> > perf record error: The 'bts' PMU is not available, because the CPU does not support it

> This one makes sense.

> > perf record error: The 'bts' PMU is not available, because this architecture does not support it
> > perf record error: The 'bts' PMU is not available, because its driver is not built into the kernel

> > Because if it's the wrong architecture or CPU, I look for a box with
> > the right one, if it's simply the kernel not having the necessary
> > PMU driver then I'll boot a kernel with it enabled.

> These not so much; why won't a generic: "Unknown PMU, check
> arch/kernel" do?

> The thing is, I hate that hard-coded list, its pain I don't need.

Well, then we need to have tooling looking around and having tables to
provide those more precise error messages, which we want to have.

I guess we can do parts of that by looking at info already exported by
the kernel, /proc/cpuinfo stuff, etc. But "Unknown PMU, check
arch/kernel" is way too vague :-\

- Arnaldo

2015-07-10 08:35:36

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs


* Peter Zijlstra <[email protected]> wrote:

> On Thu, Jul 09, 2015 at 02:32:05PM +0200, Ingo Molnar wrote:
> >
> > perf record error: The 'bts' PMU is not available, because the CPU does not support it
>
> This one makes sense.
>
> > perf record error: The 'bts' PMU is not available, because this architecture does not support it
> > perf record error: The 'bts' PMU is not available, because its driver is not built into the kernel
> >
> > Because if it's the wrong architecture or CPU, I look for a box with the right
> > one, if it's simply the kernel not having the necessary PMU driver then I'll boot
> > a kernel with it enabled.
>
> These not so much; why won't a generic: "Unknown PMU, check arch/kernel" do?

Yeah, I mean why not make the user's job harder if we can? We really don't want to
solve this problem technically and we _really_ want tooling to be fundamentally
unhelpful, right? ;-)

I realize that the 'Error: there was a bug, aborting' style of sado-masochistic
error messages are the current Linux tooling status quo, which opaque error
feedback comes from an early technological mistake of Unix system calls screwing
up error handling, and I also see that after decades of abuse people are showing
signs of the Stockholm Syndrome related to this problem, but it _really_ does not
have to be so ...

Whenever we can we should change such bad patterns.

> The thing is, I hate that hard-coded list, its pain I don't need.

Absolutely! I pointed this out during review as well.

It does not impact the core concept though: we should have a single numeric error,
and free form error strings provided by the place that first triggers some
problem. That should be both programmatically easy to handle and maximally
informative to the users.

At least half of a tool's usability comes not from how it behaves when it works,
but how it behaves when it does not. (SystemD, I'm looking at you.)

Thanks,

Ingo

2015-07-10 19:00:00

by Stephane Eranian

[permalink] [raw]
Subject: Re: [RFC PATCH] perf: Provide status of known PMUs

Hi,

On Fri, Jul 10, 2015 at 1:35 AM, Ingo Molnar <[email protected]> wrote:
>
> * Peter Zijlstra <[email protected]> wrote:
>
>> On Thu, Jul 09, 2015 at 02:32:05PM +0200, Ingo Molnar wrote:
>> >
>> > perf record error: The 'bts' PMU is not available, because the CPU does not support it
>>
>> This one makes sense.
>>
>> > perf record error: The 'bts' PMU is not available, because this architecture does not support it
>> > perf record error: The 'bts' PMU is not available, because its driver is not built into the kernel
>> >
>> > Because if it's the wrong architecture or CPU, I look for a box with the right
>> > one, if it's simply the kernel not having the necessary PMU driver then I'll boot
>> > a kernel with it enabled.
>>
>> These not so much; why won't a generic: "Unknown PMU, check arch/kernel" do?
>
> Yeah, I mean why not make the user's job harder if we can? We really don't want to
> solve this problem technically and we _really_ want tooling to be fundamentally
> unhelpful, right? ;-)
>
> I realize that the 'Error: there was a bug, aborting' style of sado-masochistic
> error messages are the current Linux tooling status quo, which opaque error
> feedback comes from an early technological mistake of Unix system calls screwing
> up error handling, and I also see that after decades of abuse people are showing
> signs of the Stockholm Syndrome related to this problem, but it _really_ does not
> have to be so ...
>
> Whenever we can we should change such bad patterns.
>
>> The thing is, I hate that hard-coded list, its pain I don't need.
>
> Absolutely! I pointed this out during review as well.
>
> It does not impact the core concept though: we should have a single numeric error,
> and free form error strings provided by the place that first triggers some
> problem. That should be both programmatically easy to handle and maximally
> informative to the users.
>
> At least half of a tool's usability comes not from how it behaves when it works,
> but how it behaves when it does not. (SystemD, I'm looking at you.)
>
This patch looks useful but it does not address a related issue. Here
you are reporting
on the status of specific PMU support, i.e., PMU is not supported by hardware.
But there is another problem which I ran into on ARM very often (like
on Tegra) and it
really annoys me. The PMU hardware is present, but the instance of
the PMU on a CPU
is not present, simply because the CPU is hotpluggable and its offline
at the time the
tool (perf) starts. I am not talking about explicit hotplugging by the
user but instead be
the kernel. Then during the run, the CPU is plugged back in by the
kernel to handle the
load. Perf misses monitoring that CPU completely, thus it does not
measure what's going
on in reality.

I understand that reporting that a PMU instance is supported but
offline does not
solve the entire problem. There needs to be some other kernel support.
But I think
it would be good to have the tool at least issue a warning saying:
"some CPUs are
offline, not monitoring all CPUs, results may be partial".