When use 1ms interval, very large number of counter happens
once in a while as below:
25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
Root cause is the race between irq handler
and pmu.read callback. Use spin lock to protect the sw&hw
counters.
Signed-off-by: Jiucheng Xu <[email protected]>
---
drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index 0b24dee1ed3c..9b2e5d5c0626 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -14,6 +14,7 @@
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
+#include <linux/spinlock.h>
#include <linux/sysfs.h>
#include <linux/types.h>
@@ -23,6 +24,7 @@ struct ddr_pmu {
struct pmu pmu;
struct dmc_info info;
struct dmc_counter counters; /* save counters from hw */
+ spinlock_t lock; /* protect hw/sw counter */
bool pmu_enabled;
struct device *dev;
char *name;
@@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
int idx;
int chann_nr = pmu->info.hw_info->chann_nr;
+ spin_lock(&pmu->lock);
/* get the remain counters in register. */
pmu->info.hw_info->get_counters(&pmu->info, &dc);
ddr_cnt_addition(&sum_dc, &pmu->counters, &dc, chann_nr);
+ spin_unlock(&pmu->lock);
switch (event->attr.config) {
case ALL_CHAN_COUNTER_ID:
@@ -355,6 +359,7 @@ static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
pmu = dmc_info_to_pmu(info);
+ spin_lock(&pmu->lock);
if (info->hw_info->irq_handler(info, &counters) != 0)
goto out;
@@ -372,6 +377,8 @@ static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
* it in ISR to support continue mode.
*/
info->hw_info->enable(info);
+out:
+ spin_unlock(&pmu->lock);
dev_dbg(pmu->dev, "counts: %llu %llu %llu, %llu, %llu, %llu\t\t"
"sum: %llu %llu %llu, %llu, %llu, %llu\n",
@@ -388,7 +395,7 @@ static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
pmu->counters.channel_cnt[1],
pmu->counters.channel_cnt[2],
pmu->counters.channel_cnt[3]);
-out:
+
return IRQ_HANDLED;
}
@@ -539,6 +546,7 @@ int meson_ddr_pmu_create(struct platform_device *pdev)
pmu->name = name;
pmu->dev = &pdev->dev;
pmu->pmu_enabled = false;
+ spin_lock_init(&pmu->lock);
platform_set_drvdata(pdev, pmu);
--
2.25.1
On Thu, Feb 09, 2023 at 07:54:02PM +0800, Jiucheng Xu wrote:
> When use 1ms interval, very large number of counter happens
> once in a while as below:
>
> 25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> 26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> 26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>
> Root cause is the race between irq handler
> and pmu.read callback. Use spin lock to protect the sw&hw
> counters.
>
> Signed-off-by: Jiucheng Xu <[email protected]>
> ---
> drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
> 1 file changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> index 0b24dee1ed3c..9b2e5d5c0626 100644
> --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
> +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> @@ -14,6 +14,7 @@
> #include <linux/perf_event.h>
> #include <linux/platform_device.h>
> #include <linux/printk.h>
> +#include <linux/spinlock.h>
> #include <linux/sysfs.h>
> #include <linux/types.h>
>
> @@ -23,6 +24,7 @@ struct ddr_pmu {
> struct pmu pmu;
> struct dmc_info info;
> struct dmc_counter counters; /* save counters from hw */
> + spinlock_t lock; /* protect hw/sw counter */
> bool pmu_enabled;
> struct device *dev;
> char *name;
> @@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
> int idx;
> int chann_nr = pmu->info.hw_info->chann_nr;
>
> + spin_lock(&pmu->lock);
Why doesn't this need the _irqsave() variant if we're racing with the irq
handler?
Will
My Amlogic email box has some issues. Use my personal email
<[email protected]> to reply.
On 2023/3/27 22:10, Will Deacon wrote:
> [ EXTERNAL EMAIL ]
>
> On Thu, Feb 09, 2023 at 07:54:02PM +0800, Jiucheng Xu wrote:
>> When use 1ms interval, very large number of counter happens
>> once in a while as below:
>>
>> 25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>> 26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>> 26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>>
>> Root cause is the race between irq handler
>> and pmu.read callback. Use spin lock to protect the sw&hw
>> counters.
>>
>> Signed-off-by: Jiucheng Xu <[email protected]>
>> ---
>> drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
>> 1 file changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
>> index 0b24dee1ed3c..9b2e5d5c0626 100644
>> --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
>> +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
>> @@ -14,6 +14,7 @@
>> #include <linux/perf_event.h>
>> #include <linux/platform_device.h>
>> #include <linux/printk.h>
>> +#include <linux/spinlock.h>
>> #include <linux/sysfs.h>
>> #include <linux/types.h>
>>
>> @@ -23,6 +24,7 @@ struct ddr_pmu {
>> struct pmu pmu;
>> struct dmc_info info;
>> struct dmc_counter counters; /* save counters from hw */
>> + spinlock_t lock; /* protect hw/sw counter */
>> bool pmu_enabled;
>> struct device *dev;
>> char *name;
>> @@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
>> int idx;
>> int chann_nr = pmu->info.hw_info->chann_nr;
>>
>> + spin_lock(&pmu->lock);
> Why doesn't this need the _irqsave() variant if we're racing with the irq
> handler?
>
> Will
I think meson_ddr_perf_event_update function is called with hard irq off.
So update function couldn't be interrupted by irq handler. Right?
Thanks,
Jiucheng
>
On Tue, Mar 28, 2023 at 10:29:04AM +0800, Jiucheng Xu wrote:
>
> My Amlogic email box has some issues. Use my personal email
> <[email protected]> to reply.
>
> On 2023/3/27 22:10, Will Deacon wrote:
> > [ EXTERNAL EMAIL ]
> >
> > On Thu, Feb 09, 2023 at 07:54:02PM +0800, Jiucheng Xu wrote:
> > > When use 1ms interval, very large number of counter happens
> > > once in a while as below:
> > >
> > > 25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> > > 26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> > > 26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> > >
> > > Root cause is the race between irq handler
> > > and pmu.read callback. Use spin lock to protect the sw&hw
> > > counters.
> > >
> > > Signed-off-by: Jiucheng Xu <[email protected]>
> > > ---
> > > drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
> > > 1 file changed, 9 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> > > index 0b24dee1ed3c..9b2e5d5c0626 100644
> > > --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
> > > +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> > > @@ -14,6 +14,7 @@
> > > #include <linux/perf_event.h>
> > > #include <linux/platform_device.h>
> > > #include <linux/printk.h>
> > > +#include <linux/spinlock.h>
> > > #include <linux/sysfs.h>
> > > #include <linux/types.h>
> > > @@ -23,6 +24,7 @@ struct ddr_pmu {
> > > struct pmu pmu;
> > > struct dmc_info info;
> > > struct dmc_counter counters; /* save counters from hw */
> > > + spinlock_t lock; /* protect hw/sw counter */
> > > bool pmu_enabled;
> > > struct device *dev;
> > > char *name;
> > > @@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
> > > int idx;
> > > int chann_nr = pmu->info.hw_info->chann_nr;
> > > + spin_lock(&pmu->lock);
> > Why doesn't this need the _irqsave() variant if we're racing with the irq
> > handler?
> >
> > Will
> I think meson_ddr_perf_event_update function is called with hard irq off.
> So update function couldn't be interrupted by irq handler. Right?
I'm just confused about the race, then. The commit message says you have a
race between an irq handler and a callback, which you fix with a spinlock
that isn't irq safe. So either the race is real and the lock needs to be
irqsafe, or the race is something else entirely, no?
Will