If AMD Performance Monitoring Version 2 (PerfMonV2) is
supported, use a new scheme to process Core PMC overflows
in the NMI handler using the new global control and status
registers. This will be bypassed on unsupported hardware
(x86_pmu.version < 2).
In x86_pmu_handle_irq(), overflows are detected by testing
the contents of the PERF_CTR register for each active PMC in
a loop. The new scheme instead inspects the overflow bits of
the global status register.
The Performance Counter Global Status (PerfCntrGlobalStatus)
register has overflow (PerfCntrOvfl) bits for each PMC. This
is, however, a read-only MSR. To acknowledge that overflows
have been processed, the NMI handler must clear the bits by
writing to the PerfCntrGlobalStatusClr register.
In x86_pmu_handle_irq(), PMCs counting the same event that
are started and stopped at the same time record slightly
different counts due to delays in between reads from the
PERF_CTR registers. This is fixed by stopping and starting
the PMCs at the same before and with a single write to the
Performance Counter Global Control (PerfCntrGlobalCtl) upon
entering and before exiting the NMI handler.
Signed-off-by: Sandipan Das <[email protected]>
---
arch/x86/events/amd/core.c | 125 +++++++++++++++++++++++++++++++++++--
1 file changed, 121 insertions(+), 4 deletions(-)
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 532e9bd76bf1..fbbba981d0bd 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -7,6 +7,7 @@
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <asm/apicdef.h>
+#include <asm/apic.h>
#include <asm/nmi.h>
#include "../perf_event.h"
@@ -601,6 +602,45 @@ static inline void amd_pmu_set_global_ctl(u64 ctl)
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl);
}
+static inline u64 amd_pmu_get_global_overflow(void)
+{
+ u64 status;
+
+ /* PerfCntrGlobalStatus is read-only */
+ rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
+
+ return status & amd_pmu_global_cntr_mask;
+}
+
+static inline void amd_pmu_ack_global_overflow(u64 status)
+{
+ /*
+ * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
+ * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
+ * clears the same bit in PerfCntrGlobalStatus
+ */
+
+ /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
+ status &= amd_pmu_global_cntr_mask;
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
+}
+
+static bool amd_pmu_legacy_has_overflow(int idx)
+{
+ u64 counter;
+
+ rdmsrl(x86_pmu_event_addr(idx), counter);
+
+ return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
+}
+
+static bool amd_pmu_global_has_overflow(int idx)
+{
+ return amd_pmu_get_global_overflow() & BIT_ULL(idx);
+}
+
+DEFINE_STATIC_CALL(amd_pmu_has_overflow, amd_pmu_legacy_has_overflow);
+
/*
* When a PMC counter overflows, an NMI is used to process the event and
* reset the counter. NMI latency can result in the counter being updated
@@ -613,7 +653,6 @@ static inline void amd_pmu_set_global_ctl(u64 ctl)
static void amd_pmu_wait_on_overflow(int idx)
{
unsigned int i;
- u64 counter;
/*
* Wait for the counter to be reset if it has overflowed. This loop
@@ -621,8 +660,7 @@ static void amd_pmu_wait_on_overflow(int idx)
* forever...
*/
for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
- rdmsrl(x86_pmu_event_addr(idx), counter);
- if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+ if (!static_call(amd_pmu_has_overflow)(idx))
break;
/* Might be in IRQ context, so can't sleep */
@@ -718,6 +756,83 @@ static void amd_pmu_enable_event(struct perf_event *event)
static_call(amd_pmu_enable_event)(event);
}
+static int amd_pmu_global_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ struct hw_perf_event *hwc;
+ struct perf_event *event;
+ u64 val, status, mask;
+ int handled = 0, idx;
+
+ status = amd_pmu_get_global_overflow();
+
+ /* Check if any overflows are pending */
+ if (!status)
+ return 0;
+
+ /* Stop counting */
+ amd_pmu_global_disable_all();
+
+ cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * Some chipsets need to unmask the LVTPC in a particular spot
+ * inside the nmi handler. As a result, the unmasking was
+ * pushed into all the nmi handlers.
+ *
+ * This generic handler doesn't seem to have any issues where
+ * the unmasking occurs so it was left at the top.
+ *
+ * N.B. Taken from x86_pmu_handle_irq()
+ */
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+
+ for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ if (!test_bit(idx, cpuc->active_mask))
+ continue;
+
+ event = cpuc->events[idx];
+ hwc = &event->hw;
+ val = x86_perf_event_update(event);
+ mask = BIT_ULL(idx);
+
+ if (!(status & mask))
+ continue;
+
+ /* Event overflow */
+ handled++;
+ perf_sample_data_init(&data, 0, hwc->last_period);
+
+ if (!x86_perf_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+
+ status &= ~mask;
+ }
+
+ /*
+ * It should never be the case that some overflows are not handled as
+ * the corresponding PMCs are expected to be inactive according to the
+ * active_mask
+ */
+ WARN_ON(status > 0);
+
+ /* Clear overflow bits */
+ amd_pmu_ack_global_overflow(~status);
+
+ inc_irq_stat(apic_perf_irqs);
+
+ /* Resume counting */
+ amd_pmu_global_enable_all(0);
+
+ return handled;
+}
+
+DEFINE_STATIC_CALL(amd_pmu_handle_irq, x86_pmu_handle_irq);
+
/*
* Because of NMI latency, if multiple PMC counters are active or other sources
* of NMIs are received, the perf NMI handler can handle one or more overflowed
@@ -741,7 +856,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
int handled;
/* Process any counter overflows */
- handled = x86_pmu_handle_irq(regs);
+ handled = static_call(amd_pmu_handle_irq)(regs);
/*
* If a counter was handled, record a timestamp such that un-handled
@@ -1041,6 +1156,8 @@ static int __init amd_core_pmu_init(void)
static_call_update(amd_pmu_enable_all, amd_pmu_global_enable_all);
static_call_update(amd_pmu_disable_all, amd_pmu_global_disable_all);
static_call_update(amd_pmu_enable_event, amd_pmu_global_enable_event);
+ static_call_update(amd_pmu_has_overflow, amd_pmu_global_has_overflow);
+ static_call_update(amd_pmu_handle_irq, amd_pmu_global_handle_irq);
}
/*
--
2.32.0
On Thu, Mar 17, 2022 at 11:58:35AM +0530, Sandipan Das wrote:
> +static inline u64 amd_pmu_get_global_overflow(void)
> +{
> + u64 status;
> +
> + /* PerfCntrGlobalStatus is read-only */
> + rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
> +
> + return status & amd_pmu_global_cntr_mask;
> +}
> +
> +static inline void amd_pmu_ack_global_overflow(u64 status)
> +{
> + /*
> + * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
> + * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
> + * clears the same bit in PerfCntrGlobalStatus
> + */
> +
> + /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
> + status &= amd_pmu_global_cntr_mask;
> + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
> +}
> +
> +static bool amd_pmu_legacy_has_overflow(int idx)
> +{
> + u64 counter;
> +
> + rdmsrl(x86_pmu_event_addr(idx), counter);
> +
> + return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
> +}
> +
> +static bool amd_pmu_global_has_overflow(int idx)
> +{
> + return amd_pmu_get_global_overflow() & BIT_ULL(idx);
> +}
> +
> +DEFINE_STATIC_CALL(amd_pmu_has_overflow, amd_pmu_legacy_has_overflow);
> +
> /*
> * When a PMC counter overflows, an NMI is used to process the event and
> * reset the counter. NMI latency can result in the counter being updated
> @@ -613,7 +653,6 @@ static inline void amd_pmu_set_global_ctl(u64 ctl)
> static void amd_pmu_wait_on_overflow(int idx)
> {
> unsigned int i;
> - u64 counter;
>
> /*
> * Wait for the counter to be reset if it has overflowed. This loop
> @@ -621,8 +660,7 @@ static void amd_pmu_wait_on_overflow(int idx)
> * forever...
> */
> for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
> - rdmsrl(x86_pmu_event_addr(idx), counter);
> - if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
> + if (!static_call(amd_pmu_has_overflow)(idx))
> break;
>
> /* Might be in IRQ context, so can't sleep */
This scares me... please tell me you fixed that mess.
> @@ -718,6 +756,83 @@ static void amd_pmu_enable_event(struct perf_event *event)
> static_call(amd_pmu_enable_event)(event);
> }
>
> +static int amd_pmu_global_handle_irq(struct pt_regs *regs)
> +{
> + struct perf_sample_data data;
> + struct cpu_hw_events *cpuc;
> + struct hw_perf_event *hwc;
> + struct perf_event *event;
> + u64 val, status, mask;
> + int handled = 0, idx;
> +
> + status = amd_pmu_get_global_overflow();
> +
> + /* Check if any overflows are pending */
> + if (!status)
> + return 0;
> +
> + /* Stop counting */
> + amd_pmu_global_disable_all();
This seems weird to me, I'd first disable it, then read status. MSR
access is expensive, you want to shut down the PMU asap.
Also, this is written like PMI would not be the primary NMI source,
which seems somewhat unlikely.
> +
> + cpuc = this_cpu_ptr(&cpu_hw_events);
> +
> + /*
> + * Some chipsets need to unmask the LVTPC in a particular spot
> + * inside the nmi handler. As a result, the unmasking was
> + * pushed into all the nmi handlers.
> + *
> + * This generic handler doesn't seem to have any issues where
> + * the unmasking occurs so it was left at the top.
> + *
> + * N.B. Taken from x86_pmu_handle_irq()
> + */
Please write an AMD specific comment here. Note how 'recent' Intel chips
ended up pushing this to the end of the handler. Verify with your
hardware team where they want this and write as much of the rationale as
you're allowed to share in the comment.
> + apic_write(APIC_LVTPC, APIC_DM_NMI);
> +
> + for (idx = 0; idx < x86_pmu.num_counters; idx++) {
> + if (!test_bit(idx, cpuc->active_mask))
> + continue;
> +
> + event = cpuc->events[idx];
> + hwc = &event->hw;
> + val = x86_perf_event_update(event);
> + mask = BIT_ULL(idx);
> +
> + if (!(status & mask))
> + continue;
> +
> + /* Event overflow */
> + handled++;
> + perf_sample_data_init(&data, 0, hwc->last_period);
> +
> + if (!x86_perf_event_set_period(event))
> + continue;
> +
> + if (perf_event_overflow(event, &data, regs))
> + x86_pmu_stop(event, 0);
> +
> + status &= ~mask;
> + }
> +
> + /*
> + * It should never be the case that some overflows are not handled as
> + * the corresponding PMCs are expected to be inactive according to the
> + * active_mask
> + */
> + WARN_ON(status > 0);
> +
> + /* Clear overflow bits */
> + amd_pmu_ack_global_overflow(~status);
> +
> + inc_irq_stat(apic_perf_irqs);
> +
> + /* Resume counting */
> + amd_pmu_global_enable_all(0);
I think this is broken vs perf_pmu_{dis,en}able(), note how
intel_pmu_handle_irq() saves/restores the enable state.
> +
> + return handled;
> +}
> +
> +DEFINE_STATIC_CALL(amd_pmu_handle_irq, x86_pmu_handle_irq);
> +
> /*
> * Because of NMI latency, if multiple PMC counters are active or other sources
> * of NMIs are received, the perf NMI handler can handle one or more overflowed
> @@ -741,7 +856,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
> int handled;
>
> /* Process any counter overflows */
> - handled = x86_pmu_handle_irq(regs);
> + handled = static_call(amd_pmu_handle_irq)(regs);
>
> /*
> * If a counter was handled, record a timestamp such that un-handled
> @@ -1041,6 +1156,8 @@ static int __init amd_core_pmu_init(void)
> static_call_update(amd_pmu_enable_all, amd_pmu_global_enable_all);
> static_call_update(amd_pmu_disable_all, amd_pmu_global_disable_all);
> static_call_update(amd_pmu_enable_event, amd_pmu_global_enable_event);
> + static_call_update(amd_pmu_has_overflow, amd_pmu_global_has_overflow);
> + static_call_update(amd_pmu_handle_irq, amd_pmu_global_handle_irq);
> }
Same, all this static_call() stuff is misguided.
Also, if you feel like it, you can create amd_pmu_v2.
On Thu, Mar 17, 2022 at 5:02 AM Peter Zijlstra <[email protected]> wrote:
>
> On Thu, Mar 17, 2022 at 11:58:35AM +0530, Sandipan Das wrote:
>
> > +static inline u64 amd_pmu_get_global_overflow(void)
> > +{
> > + u64 status;
> > +
> > + /* PerfCntrGlobalStatus is read-only */
> > + rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
> > +
> > + return status & amd_pmu_global_cntr_mask;
> > +}
> > +
> > +static inline void amd_pmu_ack_global_overflow(u64 status)
> > +{
> > + /*
> > + * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
> > + * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
> > + * clears the same bit in PerfCntrGlobalStatus
> > + */
> > +
> > + /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
> > + status &= amd_pmu_global_cntr_mask;
> > + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
> > +}
> > +
> > +static bool amd_pmu_legacy_has_overflow(int idx)
> > +{
> > + u64 counter;
> > +
> > + rdmsrl(x86_pmu_event_addr(idx), counter);
> > +
> > + return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
> > +}
> > +
> > +static bool amd_pmu_global_has_overflow(int idx)
> > +{
> > + return amd_pmu_get_global_overflow() & BIT_ULL(idx);
> > +}
> > +
> > +DEFINE_STATIC_CALL(amd_pmu_has_overflow, amd_pmu_legacy_has_overflow);
> > +
> > /*
> > * When a PMC counter overflows, an NMI is used to process the event and
> > * reset the counter. NMI latency can result in the counter being updated
> > @@ -613,7 +653,6 @@ static inline void amd_pmu_set_global_ctl(u64 ctl)
> > static void amd_pmu_wait_on_overflow(int idx)
> > {
> > unsigned int i;
> > - u64 counter;
> >
> > /*
> > * Wait for the counter to be reset if it has overflowed. This loop
> > @@ -621,8 +660,7 @@ static void amd_pmu_wait_on_overflow(int idx)
> > * forever...
> > */
> > for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
> > - rdmsrl(x86_pmu_event_addr(idx), counter);
> > - if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
> > + if (!static_call(amd_pmu_has_overflow)(idx))
> > break;
> >
> > /* Might be in IRQ context, so can't sleep */
>
> This scares me... please tell me you fixed that mess.
>
> > @@ -718,6 +756,83 @@ static void amd_pmu_enable_event(struct perf_event *event)
> > static_call(amd_pmu_enable_event)(event);
> > }
> >
> > +static int amd_pmu_global_handle_irq(struct pt_regs *regs)
> > +{
> > + struct perf_sample_data data;
> > + struct cpu_hw_events *cpuc;
> > + struct hw_perf_event *hwc;
> > + struct perf_event *event;
> > + u64 val, status, mask;
> > + int handled = 0, idx;
> > +
> > + status = amd_pmu_get_global_overflow();
> > +
> > + /* Check if any overflows are pending */
> > + if (!status)
> > + return 0;
> > +
> > + /* Stop counting */
> > + amd_pmu_global_disable_all();
>
>
> This seems weird to me, I'd first disable it, then read status. MSR
> access is expensive, you want to shut down the PMU asap.
>
> Also, this is written like PMI would not be the primary NMI source,
> which seems somewhat unlikely.
>
> > +
> > + cpuc = this_cpu_ptr(&cpu_hw_events);
> > +
> > + /*
> > + * Some chipsets need to unmask the LVTPC in a particular spot
> > + * inside the nmi handler. As a result, the unmasking was
> > + * pushed into all the nmi handlers.
> > + *
> > + * This generic handler doesn't seem to have any issues where
> > + * the unmasking occurs so it was left at the top.
> > + *
> > + * N.B. Taken from x86_pmu_handle_irq()
> > + */
>
> Please write an AMD specific comment here. Note how 'recent' Intel chips
> ended up pushing this to the end of the handler. Verify with your
> hardware team where they want this and write as much of the rationale as
> you're allowed to share in the comment.
>
> > + apic_write(APIC_LVTPC, APIC_DM_NMI);
> > +
> > + for (idx = 0; idx < x86_pmu.num_counters; idx++) {
> > + if (!test_bit(idx, cpuc->active_mask))
> > + continue;
> > +
> > + event = cpuc->events[idx];
> > + hwc = &event->hw;
> > + val = x86_perf_event_update(event);
> > + mask = BIT_ULL(idx);
> > +
> > + if (!(status & mask))
> > + continue;
> > +
> > + /* Event overflow */
> > + handled++;
> > + perf_sample_data_init(&data, 0, hwc->last_period);
> > +
> > + if (!x86_perf_event_set_period(event))
> > + continue;
> > +
> > + if (perf_event_overflow(event, &data, regs))
> > + x86_pmu_stop(event, 0);
> > +
> > + status &= ~mask;
> > + }
> > +
> > + /*
> > + * It should never be the case that some overflows are not handled as
> > + * the corresponding PMCs are expected to be inactive according to the
> > + * active_mask
> > + */
> > + WARN_ON(status > 0);
> > +
> > + /* Clear overflow bits */
> > + amd_pmu_ack_global_overflow(~status);
> > +
> > + inc_irq_stat(apic_perf_irqs);
> > +
> > + /* Resume counting */
> > + amd_pmu_global_enable_all(0);
>
> I think this is broken vs perf_pmu_{dis,en}able(), note how
> intel_pmu_handle_irq() saves/restores the enable state.
>
> > +
> > + return handled;
> > +}
> > +
> > +DEFINE_STATIC_CALL(amd_pmu_handle_irq, x86_pmu_handle_irq);
> > +
> > /*
> > * Because of NMI latency, if multiple PMC counters are active or other sources
> > * of NMIs are received, the perf NMI handler can handle one or more overflowed
> > @@ -741,7 +856,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
> > int handled;
> >
> > /* Process any counter overflows */
> > - handled = x86_pmu_handle_irq(regs);
> > + handled = static_call(amd_pmu_handle_irq)(regs);
> >
> > /*
> > * If a counter was handled, record a timestamp such that un-handled
> > @@ -1041,6 +1156,8 @@ static int __init amd_core_pmu_init(void)
> > static_call_update(amd_pmu_enable_all, amd_pmu_global_enable_all);
> > static_call_update(amd_pmu_disable_all, amd_pmu_global_disable_all);
> > static_call_update(amd_pmu_enable_event, amd_pmu_global_enable_event);
> > + static_call_update(amd_pmu_has_overflow, amd_pmu_global_has_overflow);
> > + static_call_update(amd_pmu_handle_irq, amd_pmu_global_handle_irq);
> > }
>
> Same, all this static_call() stuff is misguided.
>
> Also, if you feel like it, you can create amd_pmu_v2.
Given the number of overrides, that would also make more sense to me.
On 3/17/2022 5:31 PM, Peter Zijlstra wrote:
> On Thu, Mar 17, 2022 at 11:58:35AM +0530, Sandipan Das wrote:
>
>> +static inline u64 amd_pmu_get_global_overflow(void)
>> +{
>> + u64 status;
>> +
>> + /* PerfCntrGlobalStatus is read-only */
>> + rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
>> +
>> + return status & amd_pmu_global_cntr_mask;
>> +}
>> +
>> +static inline void amd_pmu_ack_global_overflow(u64 status)
>> +{
>> + /*
>> + * PerfCntrGlobalStatus is read-only but an overflow acknowledgment
>> + * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr
>> + * clears the same bit in PerfCntrGlobalStatus
>> + */
>> +
>> + /* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
>> + status &= amd_pmu_global_cntr_mask;
>> + wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
>> +}
>> +
>> +static bool amd_pmu_legacy_has_overflow(int idx)
>> +{
>> + u64 counter;
>> +
>> + rdmsrl(x86_pmu_event_addr(idx), counter);
>> +
>> + return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1));
>> +}
>> +
>> +static bool amd_pmu_global_has_overflow(int idx)
>> +{
>> + return amd_pmu_get_global_overflow() & BIT_ULL(idx);
>> +}
>> +
>> +DEFINE_STATIC_CALL(amd_pmu_has_overflow, amd_pmu_legacy_has_overflow);
>> +
>> /*
>> * When a PMC counter overflows, an NMI is used to process the event and
>> * reset the counter. NMI latency can result in the counter being updated
>> @@ -613,7 +653,6 @@ static inline void amd_pmu_set_global_ctl(u64 ctl)
>> static void amd_pmu_wait_on_overflow(int idx)
>> {
>> unsigned int i;
>> - u64 counter;
>>
>> /*
>> * Wait for the counter to be reset if it has overflowed. This loop
>> @@ -621,8 +660,7 @@ static void amd_pmu_wait_on_overflow(int idx)
>> * forever...
>> */
>> for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
>> - rdmsrl(x86_pmu_event_addr(idx), counter);
>> - if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
>> + if (!static_call(amd_pmu_has_overflow)(idx))
>> break;
>>
>> /* Might be in IRQ context, so can't sleep */
>
> This scares me... please tell me you fixed that mess.
>
>> @@ -718,6 +756,83 @@ static void amd_pmu_enable_event(struct perf_event *event)
>> static_call(amd_pmu_enable_event)(event);
>> }
>>
>> +static int amd_pmu_global_handle_irq(struct pt_regs *regs)
>> +{
>> + struct perf_sample_data data;
>> + struct cpu_hw_events *cpuc;
>> + struct hw_perf_event *hwc;
>> + struct perf_event *event;
>> + u64 val, status, mask;
>> + int handled = 0, idx;
>> +
>> + status = amd_pmu_get_global_overflow();
>> +
>> + /* Check if any overflows are pending */
>> + if (!status)
>> + return 0;
>> +
>> + /* Stop counting */
>> + amd_pmu_global_disable_all();
>
>
> This seems weird to me, I'd first disable it, then read status. MSR
> access is expensive, you want to shut down the PMU asap.
>
> Also, this is written like PMI would not be the primary NMI source,
> which seems somewhat unlikely.
>
Yes, PMI is the primary NMI source. Will fix this.
>> +
>> + cpuc = this_cpu_ptr(&cpu_hw_events);
>> +
>> + /*
>> + * Some chipsets need to unmask the LVTPC in a particular spot
>> + * inside the nmi handler. As a result, the unmasking was
>> + * pushed into all the nmi handlers.
>> + *
>> + * This generic handler doesn't seem to have any issues where
>> + * the unmasking occurs so it was left at the top.
>> + *
>> + * N.B. Taken from x86_pmu_handle_irq()
>> + */
>
> Please write an AMD specific comment here. Note how 'recent' Intel chips
> ended up pushing this to the end of the handler. Verify with your
> hardware team where they want this and write as much of the rationale as
> you're allowed to share in the comment.
>
Sure. I'll follow-up on this.
>> + apic_write(APIC_LVTPC, APIC_DM_NMI);
>> +
>> + for (idx = 0; idx < x86_pmu.num_counters; idx++) {
>> + if (!test_bit(idx, cpuc->active_mask))
>> + continue;
>> +
>> + event = cpuc->events[idx];
>> + hwc = &event->hw;
>> + val = x86_perf_event_update(event);
>> + mask = BIT_ULL(idx);
>> +
>> + if (!(status & mask))
>> + continue;
>> +
>> + /* Event overflow */
>> + handled++;
>> + perf_sample_data_init(&data, 0, hwc->last_period);
>> +
>> + if (!x86_perf_event_set_period(event))
>> + continue;
>> +
>> + if (perf_event_overflow(event, &data, regs))
>> + x86_pmu_stop(event, 0);
>> +
>> + status &= ~mask;
>> + }
>> +
>> + /*
>> + * It should never be the case that some overflows are not handled as
>> + * the corresponding PMCs are expected to be inactive according to the
>> + * active_mask
>> + */
>> + WARN_ON(status > 0);
>> +
>> + /* Clear overflow bits */
>> + amd_pmu_ack_global_overflow(~status);
>> +
>> + inc_irq_stat(apic_perf_irqs);
>> +
>> + /* Resume counting */
>> + amd_pmu_global_enable_all(0);
>
> I think this is broken vs perf_pmu_{dis,en}able(), note how
> intel_pmu_handle_irq() saves/restores the enable state.
>
Yes, it is. Will fix it.
>> +
>> + return handled;
>> +}
>> +
>> +DEFINE_STATIC_CALL(amd_pmu_handle_irq, x86_pmu_handle_irq);
>> +
>> /*
>> * Because of NMI latency, if multiple PMC counters are active or other sources
>> * of NMIs are received, the perf NMI handler can handle one or more overflowed
>> @@ -741,7 +856,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
>> int handled;
>>
>> /* Process any counter overflows */
>> - handled = x86_pmu_handle_irq(regs);
>> + handled = static_call(amd_pmu_handle_irq)(regs);
>>
>> /*
>> * If a counter was handled, record a timestamp such that un-handled
>> @@ -1041,6 +1156,8 @@ static int __init amd_core_pmu_init(void)
>> static_call_update(amd_pmu_enable_all, amd_pmu_global_enable_all);
>> static_call_update(amd_pmu_disable_all, amd_pmu_global_disable_all);
>> static_call_update(amd_pmu_enable_event, amd_pmu_global_enable_event);
>> + static_call_update(amd_pmu_has_overflow, amd_pmu_global_has_overflow);
>> + static_call_update(amd_pmu_handle_irq, amd_pmu_global_handle_irq);
>> }
>
> Same, all this static_call() stuff is misguided.
>
> Also, if you feel like it, you can create amd_pmu_v2.
Sure.
- Sandipan
On 17/3/2022 2:28 pm, Sandipan Das wrote:
> + val = x86_perf_event_update(event);
The variable 'val' set but not used.
> + mask = BIT_ULL(idx);
> +
> + if (!(status & mask))
Needed here ?
> + continue;
On 3/22/2022 12:36 PM, Like Xu wrote:
> On 17/3/2022 2:28 pm, Sandipan Das wrote:
>> + val = x86_perf_event_update(event);
>
> The variable 'val' set but not used.
>
>> + mask = BIT_ULL(idx);
>> +
>> + if (!(status & mask))
>
> Needed here ?
>
If you are referring to this previous usage of 'val' within
x86_pmu_handle_irq():
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
then, no. Instead of looking at bit 47 of the raw counter value,
one can look at the overflow bits of the global status register
to determine if a counter overflow has occurred.
Will remove 'val' as you suggested since it is no longer needed
for any decision making.