2022-08-03 16:19:16

by Stephane Eranian

[permalink] [raw]
Subject: [PATCH] perf/x86/intel/uncore: fix broken read_counter() for SNB IMC PMU

Existing code was generating bogus counts for the SNB IMC bandwidth counters:

$ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
1.000327813 1,024.03 MiB uncore_imc/data_reads/
1.000327813 20.73 MiB uncore_imc/data_writes/
2.000580153 261,120.00 MiB uncore_imc/data_reads/
2.000580153 23.28 MiB uncore_imc/data_writes/

The problem was introduced by commit:
07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC")

Where the read_counter callback was replace to point to the generic
uncore_mmio_read_counter() function.

The SNB IMC counters are freerunnig 32-bit counters laid out contiguously in
MMIO. But uncore_mmio_read_counter() is using a readq() call to read from
MMIO therefore reading 64-bit from MMIO. Although this is okay for the
uncore_perf_event_update() function because it is shifting the value based
on the actual counter width to compute a delta, it is not okay for the
uncore_pmu_event_start() which is simply reading the counter and therefore
priming the event->prev_count with a bogus value which is responsible for
causing bogus deltas in the perf stat command above.

The fix is to reintroduce the custom callback for read_counter for the SNB
IMC PMU and use readl() instead of readq(). With the change the output of
perf stat is back to normal:
$ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
1.000120987 296.94 MiB uncore_imc/data_reads/
1.000120987 138.42 MiB uncore_imc/data_writes/
2.000403144 175.91 MiB uncore_imc/data_reads/
2.000403144 68.50 MiB uncore_imc/data_writes/

Signed-off-by: Stephane Eranian <[email protected]>
---
arch/x86/events/intel/uncore_snb.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index ce440011cc4e..1ef4f7861e2e 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -841,6 +841,22 @@ int snb_pci2phy_map_init(int devid)
return 0;
}

+static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * SNB IMC counters are 32-bit and are laid out back to back
+ * in MMIO space. Therefore we must use a 32-bit accessor function
+ * using readq() from uncore_mmio_read_counter() causes problems
+ * because it is reading 64-bit at a time. This is okay for the
+ * uncore_perf_event_update() function because it drops the upper
+ * 32-bits but not okay for plain uncore_read_counter() as invoked
+ * in uncore_pmu_event_start().
+ */
+ return (u64)readl(box->io_addr + hwc->event_base);
+}
+
static struct pmu snb_uncore_imc_pmu = {
.task_ctx_nr = perf_invalid_context,
.event_init = snb_uncore_imc_event_init,
@@ -860,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = {
.disable_event = snb_uncore_imc_disable_event,
.enable_event = snb_uncore_imc_enable_event,
.hw_config = snb_uncore_imc_hw_config,
- .read_counter = uncore_mmio_read_counter,
+ .read_counter = snb_uncore_imc_read_counter,
};

static struct intel_uncore_type snb_uncore_imc = {
--
2.37.1.455.g008518b4e5-goog



2022-08-04 13:39:52

by Liang, Kan

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel/uncore: fix broken read_counter() for SNB IMC PMU



On 2022-08-03 12:00 p.m., Stephane Eranian wrote:
> Existing code was generating bogus counts for the SNB IMC bandwidth counters:
>
> $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
> 1.000327813 1,024.03 MiB uncore_imc/data_reads/
> 1.000327813 20.73 MiB uncore_imc/data_writes/
> 2.000580153 261,120.00 MiB uncore_imc/data_reads/
> 2.000580153 23.28 MiB uncore_imc/data_writes/
>
> The problem was introduced by commit:
> 07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC")
>
> Where the read_counter callback was replace to point to the generic
> uncore_mmio_read_counter() function.
>
> The SNB IMC counters are freerunnig 32-bit counters laid out contiguously in
> MMIO. But uncore_mmio_read_counter() is using a readq() call to read from
> MMIO therefore reading 64-bit from MMIO. Although this is okay for the
> uncore_perf_event_update() function because it is shifting the value based
> on the actual counter width to compute a delta, it is not okay for the
> uncore_pmu_event_start() which is simply reading the counter and therefore
> priming the event->prev_count with a bogus value which is responsible for
> causing bogus deltas in the perf stat command above.
>
> The fix is to reintroduce the custom callback for read_counter for the SNB
> IMC PMU and use readl() instead of readq(). With the change the output of
> perf stat is back to normal:
> $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
> 1.000120987 296.94 MiB uncore_imc/data_reads/
> 1.000120987 138.42 MiB uncore_imc/data_writes/
> 2.000403144 175.91 MiB uncore_imc/data_reads/
> 2.000403144 68.50 MiB uncore_imc/data_writes/
>
> Signed-off-by: Stephane Eranian <[email protected]>

Reviewed-by: Kan Liang <[email protected]>

Thanks,
Kan

> ---
> arch/x86/events/intel/uncore_snb.c | 18 +++++++++++++++++-
> 1 file changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
> index ce440011cc4e..1ef4f7861e2e 100644
> --- a/arch/x86/events/intel/uncore_snb.c
> +++ b/arch/x86/events/intel/uncore_snb.c
> @@ -841,6 +841,22 @@ int snb_pci2phy_map_init(int devid)
> return 0;
> }
>
> +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> +
> + /*
> + * SNB IMC counters are 32-bit and are laid out back to back
> + * in MMIO space. Therefore we must use a 32-bit accessor function
> + * using readq() from uncore_mmio_read_counter() causes problems
> + * because it is reading 64-bit at a time. This is okay for the
> + * uncore_perf_event_update() function because it drops the upper
> + * 32-bits but not okay for plain uncore_read_counter() as invoked
> + * in uncore_pmu_event_start().
> + */
> + return (u64)readl(box->io_addr + hwc->event_base);
> +}
> +
> static struct pmu snb_uncore_imc_pmu = {
> .task_ctx_nr = perf_invalid_context,
> .event_init = snb_uncore_imc_event_init,
> @@ -860,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = {
> .disable_event = snb_uncore_imc_disable_event,
> .enable_event = snb_uncore_imc_enable_event,
> .hw_config = snb_uncore_imc_hw_config,
> - .read_counter = uncore_mmio_read_counter,
> + .read_counter = snb_uncore_imc_read_counter,
> };
>
> static struct intel_uncore_type snb_uncore_imc = {

2022-08-16 05:43:03

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel/uncore: fix broken read_counter() for SNB IMC PMU

On Thu, Aug 4, 2022 at 6:09 AM Liang, Kan <[email protected]> wrote:
>
>
>
> On 2022-08-03 12:00 p.m., Stephane Eranian wrote:
> > Existing code was generating bogus counts for the SNB IMC bandwidth counters:
> >
> > $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
> > 1.000327813 1,024.03 MiB uncore_imc/data_reads/
> > 1.000327813 20.73 MiB uncore_imc/data_writes/
> > 2.000580153 261,120.00 MiB uncore_imc/data_reads/
> > 2.000580153 23.28 MiB uncore_imc/data_writes/
> >
> > The problem was introduced by commit:
> > 07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC")
> >
> > Where the read_counter callback was replace to point to the generic
> > uncore_mmio_read_counter() function.
> >
> > The SNB IMC counters are freerunnig 32-bit counters laid out contiguously in
> > MMIO. But uncore_mmio_read_counter() is using a readq() call to read from
> > MMIO therefore reading 64-bit from MMIO. Although this is okay for the
> > uncore_perf_event_update() function because it is shifting the value based
> > on the actual counter width to compute a delta, it is not okay for the
> > uncore_pmu_event_start() which is simply reading the counter and therefore
> > priming the event->prev_count with a bogus value which is responsible for
> > causing bogus deltas in the perf stat command above.
> >
> > The fix is to reintroduce the custom callback for read_counter for the SNB
> > IMC PMU and use readl() instead of readq(). With the change the output of
> > perf stat is back to normal:
> > $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
> > 1.000120987 296.94 MiB uncore_imc/data_reads/
> > 1.000120987 138.42 MiB uncore_imc/data_writes/
> > 2.000403144 175.91 MiB uncore_imc/data_reads/
> > 2.000403144 68.50 MiB uncore_imc/data_writes/
> >
> > Signed-off-by: Stephane Eranian <[email protected]>
>
> Reviewed-by: Kan Liang <[email protected]>
>
Any further comments?
Thanks.

2022-08-25 09:50:17

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel/uncore: fix broken read_counter() for SNB IMC PMU

On Mon, Aug 15, 2022 at 03:28:36PM -0700, Stephane Eranian wrote:
> On Thu, Aug 4, 2022 at 6:09 AM Liang, Kan <[email protected]> wrote:
> >
> >
> >
> > On 2022-08-03 12:00 p.m., Stephane Eranian wrote:
> > > Existing code was generating bogus counts for the SNB IMC bandwidth counters:
> > >
> > > $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
> > > 1.000327813 1,024.03 MiB uncore_imc/data_reads/
> > > 1.000327813 20.73 MiB uncore_imc/data_writes/
> > > 2.000580153 261,120.00 MiB uncore_imc/data_reads/
> > > 2.000580153 23.28 MiB uncore_imc/data_writes/
> > >
> > > The problem was introduced by commit:
> > > 07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC")
> > >
> > > Where the read_counter callback was replace to point to the generic
> > > uncore_mmio_read_counter() function.
> > >
> > > The SNB IMC counters are freerunnig 32-bit counters laid out contiguously in
> > > MMIO. But uncore_mmio_read_counter() is using a readq() call to read from
> > > MMIO therefore reading 64-bit from MMIO. Although this is okay for the
> > > uncore_perf_event_update() function because it is shifting the value based
> > > on the actual counter width to compute a delta, it is not okay for the
> > > uncore_pmu_event_start() which is simply reading the counter and therefore
> > > priming the event->prev_count with a bogus value which is responsible for
> > > causing bogus deltas in the perf stat command above.
> > >
> > > The fix is to reintroduce the custom callback for read_counter for the SNB
> > > IMC PMU and use readl() instead of readq(). With the change the output of
> > > perf stat is back to normal:
> > > $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
> > > 1.000120987 296.94 MiB uncore_imc/data_reads/
> > > 1.000120987 138.42 MiB uncore_imc/data_writes/
> > > 2.000403144 175.91 MiB uncore_imc/data_reads/
> > > 2.000403144 68.50 MiB uncore_imc/data_writes/
> > >
> > > Signed-off-by: Stephane Eranian <[email protected]>
> >
> > Reviewed-by: Kan Liang <[email protected]>
> >
> Any further comments?

Got lost in the holiday pile-up, applied!

2022-08-25 19:29:27

by Stephane Eranian

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel/uncore: fix broken read_counter() for SNB IMC PMU

On Thu, Aug 25, 2022 at 2:26 AM Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Aug 15, 2022 at 03:28:36PM -0700, Stephane Eranian wrote:
> > On Thu, Aug 4, 2022 at 6:09 AM Liang, Kan <[email protected]> wrote:
> > >
> > >
> > >
> > > On 2022-08-03 12:00 p.m., Stephane Eranian wrote:
> > > > Existing code was generating bogus counts for the SNB IMC bandwidth counters:
> > > >
> > > > $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
> > > > 1.000327813 1,024.03 MiB uncore_imc/data_reads/
> > > > 1.000327813 20.73 MiB uncore_imc/data_writes/
> > > > 2.000580153 261,120.00 MiB uncore_imc/data_reads/
> > > > 2.000580153 23.28 MiB uncore_imc/data_writes/
> > > >
> > > > The problem was introduced by commit:
> > > > 07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC")
> > > >
> > > > Where the read_counter callback was replace to point to the generic
> > > > uncore_mmio_read_counter() function.
> > > >
> > > > The SNB IMC counters are freerunnig 32-bit counters laid out contiguously in
> > > > MMIO. But uncore_mmio_read_counter() is using a readq() call to read from
> > > > MMIO therefore reading 64-bit from MMIO. Although this is okay for the
> > > > uncore_perf_event_update() function because it is shifting the value based
> > > > on the actual counter width to compute a delta, it is not okay for the
> > > > uncore_pmu_event_start() which is simply reading the counter and therefore
> > > > priming the event->prev_count with a bogus value which is responsible for
> > > > causing bogus deltas in the perf stat command above.
> > > >
> > > > The fix is to reintroduce the custom callback for read_counter for the SNB
> > > > IMC PMU and use readl() instead of readq(). With the change the output of
> > > > perf stat is back to normal:
> > > > $ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
> > > > 1.000120987 296.94 MiB uncore_imc/data_reads/
> > > > 1.000120987 138.42 MiB uncore_imc/data_writes/
> > > > 2.000403144 175.91 MiB uncore_imc/data_reads/
> > > > 2.000403144 68.50 MiB uncore_imc/data_writes/
> > > >
> > > > Signed-off-by: Stephane Eranian <[email protected]>
> > >
> > > Reviewed-by: Kan Liang <[email protected]>
> > >
> > Any further comments?
>
> Got lost in the holiday pile-up, applied!

Thanks.

Subject: [tip: perf/urgent] perf/x86/intel/uncore: Fix broken read_counter() for SNB IMC PMU

The following commit has been merged into the perf/urgent branch of tip:

Commit-ID: 11745ecfe8fea4b4a4c322967a7605d2ecbd5080
Gitweb: https://git.kernel.org/tip/11745ecfe8fea4b4a4c322967a7605d2ecbd5080
Author: Stephane Eranian <[email protected]>
AuthorDate: Wed, 03 Aug 2022 09:00:31 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Sat, 27 Aug 2022 00:05:38 +02:00

perf/x86/intel/uncore: Fix broken read_counter() for SNB IMC PMU

Existing code was generating bogus counts for the SNB IMC bandwidth counters:

$ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
1.000327813 1,024.03 MiB uncore_imc/data_reads/
1.000327813 20.73 MiB uncore_imc/data_writes/
2.000580153 261,120.00 MiB uncore_imc/data_reads/
2.000580153 23.28 MiB uncore_imc/data_writes/

The problem was introduced by commit:
07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC")

Where the read_counter callback was replace to point to the generic
uncore_mmio_read_counter() function.

The SNB IMC counters are freerunnig 32-bit counters laid out contiguously in
MMIO. But uncore_mmio_read_counter() is using a readq() call to read from
MMIO therefore reading 64-bit from MMIO. Although this is okay for the
uncore_perf_event_update() function because it is shifting the value based
on the actual counter width to compute a delta, it is not okay for the
uncore_pmu_event_start() which is simply reading the counter and therefore
priming the event->prev_count with a bogus value which is responsible for
causing bogus deltas in the perf stat command above.

The fix is to reintroduce the custom callback for read_counter for the SNB
IMC PMU and use readl() instead of readq(). With the change the output of
perf stat is back to normal:
$ perf stat -a -I 1000 -e uncore_imc/data_reads/,uncore_imc/data_writes/
1.000120987 296.94 MiB uncore_imc/data_reads/
1.000120987 138.42 MiB uncore_imc/data_writes/
2.000403144 175.91 MiB uncore_imc/data_reads/
2.000403144 68.50 MiB uncore_imc/data_writes/

Fixes: 07ce734dd8ad ("perf/x86/intel/uncore: Clean up client IMC")
Signed-off-by: Stephane Eranian <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Kan Liang <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/events/intel/uncore_snb.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index ce44001..1ef4f78 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -841,6 +841,22 @@ int snb_pci2phy_map_init(int devid)
return 0;
}

+static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * SNB IMC counters are 32-bit and are laid out back to back
+ * in MMIO space. Therefore we must use a 32-bit accessor function
+ * using readq() from uncore_mmio_read_counter() causes problems
+ * because it is reading 64-bit at a time. This is okay for the
+ * uncore_perf_event_update() function because it drops the upper
+ * 32-bits but not okay for plain uncore_read_counter() as invoked
+ * in uncore_pmu_event_start().
+ */
+ return (u64)readl(box->io_addr + hwc->event_base);
+}
+
static struct pmu snb_uncore_imc_pmu = {
.task_ctx_nr = perf_invalid_context,
.event_init = snb_uncore_imc_event_init,
@@ -860,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = {
.disable_event = snb_uncore_imc_disable_event,
.enable_event = snb_uncore_imc_enable_event,
.hw_config = snb_uncore_imc_hw_config,
- .read_counter = uncore_mmio_read_counter,
+ .read_counter = snb_uncore_imc_read_counter,
};

static struct intel_uncore_type snb_uncore_imc = {