2019-04-17 20:31:38

by Bandan Das

[permalink] [raw]
Subject: [PATCH v2] perf/x86: descriptive failure messages for PMU init


There's a default warning message that gets printed, however,
there are various failure conditions:
- a msr read can fail
- a msr write can fail
- a msr has an unexpected value
- all msrs have unexpected values (disable PMU)

Lastly, use %llx to silence checkpatch

Signed-off-by: Bandan Das <[email protected]>
---
v2:
Remove virt specific pr_debugs
Change the default warning message

arch/x86/events/core.c | 53 +++++++++++++++++++++++++++++++-----------
1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 81911e11a15d..52b0893da78b 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -192,9 +192,16 @@ static void release_pmc_hardware(void) {}

static bool check_hw_exists(void)
{
- u64 val, val_fail = -1, val_new= ~0;
- int i, reg, reg_fail = -1, ret = 0;
- int bios_fail = 0;
+ u64 val = -1, val_fail = -1, val_new = ~0;
+ int i, reg = -1, reg_fail = -1, ret = 0;
+
+ enum {
+ READ_FAIL = 1,
+ WRITE_FAIL = 2,
+ PMU_FAIL = 3,
+ BIOS_FAIL = 4,
+ };
+ int status = 0;
int reg_safe = -1;

/*
@@ -204,10 +211,13 @@ static bool check_hw_exists(void)
for (i = 0; i < x86_pmu.num_counters; i++) {
reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
- if (ret)
+ if (ret) {
+ status = READ_FAIL;
goto msr_fail;
+ }
+
if (val & ARCH_PERFMON_EVENTSEL_ENABLE) {
- bios_fail = 1;
+ status = BIOS_FAIL;
val_fail = val;
reg_fail = reg;
} else {
@@ -218,11 +228,13 @@ static bool check_hw_exists(void)
if (x86_pmu.num_counters_fixed) {
reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
ret = rdmsrl_safe(reg, &val);
- if (ret)
+ if (ret) {
+ status = READ_FAIL;
goto msr_fail;
+ }
for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
if (val & (0x03 << i*4)) {
- bios_fail = 1;
+ status = BIOS_FAIL;
val_fail = val;
reg_fail = reg;
}
@@ -236,7 +248,7 @@ static bool check_hw_exists(void)
*/

if (reg_safe == -1) {
- reg = reg_safe;
+ status = PMU_FAIL;
goto msr_fail;
}

@@ -246,18 +258,22 @@ static bool check_hw_exists(void)
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
reg = x86_pmu_event_addr(reg_safe);
- if (rdmsrl_safe(reg, &val))
+ if (rdmsrl_safe(reg, &val)) {
+ status = READ_FAIL;
goto msr_fail;
+ }
val ^= 0xffffUL;
ret = wrmsrl_safe(reg, val);
ret |= rdmsrl_safe(reg, &val_new);
- if (ret || val != val_new)
+ if (ret || val != val_new) {
+ status = WRITE_FAIL;
goto msr_fail;
+ }

/*
* We still allow the PMU driver to operate:
*/
- if (bios_fail) {
+ if (status == BIOS_FAIL) {
pr_cont("Broken BIOS detected, complain to your hardware vendor.\n");
pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n",
reg_fail, val_fail);
@@ -270,8 +286,19 @@ static bool check_hw_exists(void)
pr_cont("PMU not available due to virtualization, using software events only.\n");
} else {
pr_cont("Broken PMU hardware detected, using software events only.\n");
- pr_err("Failed to access perfctr msr (MSR %x is %Lx)\n",
- reg, val_new);
+ }
+ switch (status) {
+ case READ_FAIL:
+ pr_err("Failed to read perfctr msr (MSR %x)\n", reg);
+ break;
+ case WRITE_FAIL:
+ pr_err("Failed to write perfctr msr (MSR %x, wrote: %llx, read: %llx)\n",
+ reg, val, val_new);
+ break;
+ case PMU_FAIL:
+ /* fall through for default message */
+ default:
+ pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources.\n");
}

return false;
--
2.19.2