Switch subsystem to use atomic find_bit() or atomic iterators as
appropriate.
Signed-off-by: Yury Norov <[email protected]>
---
drivers/perf/arm-cci.c | 23 +++++------------------
drivers/perf/arm-ccn.c | 10 ++--------
drivers/perf/arm_dmc620_pmu.c | 9 ++-------
drivers/perf/arm_pmuv3.c | 8 ++------
4 files changed, 11 insertions(+), 39 deletions(-)
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 61de861eaf91..70fbf9d09d37 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
return CCI400_PMU_CYCLE_CNTR_IDX;
}
- for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
- if (!test_and_set_bit(idx, hw->used_mask))
- return idx;
-
- /* No counters available */
- return -EAGAIN;
+ idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
+ return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
}
static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event)
@@ -802,13 +798,8 @@ static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *ev
if (cci_pmu->model->get_event_idx)
return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
- /* Generic code to find an unused idx from the mask */
- for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
- if (!test_and_set_bit(idx, hw->used_mask))
- return idx;
-
- /* No counters available */
- return -EAGAIN;
+ idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
+ return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
}
static int pmu_map_event(struct perf_event *event)
@@ -861,12 +852,8 @@ static void pmu_free_irq(struct cci_pmu *cci_pmu)
{
int i;
- for (i = 0; i < cci_pmu->nr_irqs; i++) {
- if (!test_and_clear_bit(i, &cci_pmu->active_irqs))
- continue;
-
+ for_each_test_and_clear_bit(i, &cci_pmu->active_irqs, cci_pmu->nr_irqs)
free_irq(cci_pmu->irqs[i], cci_pmu);
- }
}
static u32 pmu_read_counter(struct perf_event *event)
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 728d13d8e98a..d657701b1f23 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -589,15 +589,9 @@ static const struct attribute_group *arm_ccn_pmu_attr_groups[] = {
static int arm_ccn_pmu_alloc_bit(unsigned long *bitmap, unsigned long size)
{
- int bit;
-
- do {
- bit = find_first_zero_bit(bitmap, size);
- if (bit >= size)
- return -EAGAIN;
- } while (test_and_set_bit(bit, bitmap));
+ int bit = find_and_set_bit(bitmap, size);
- return bit;
+ return bit < size ? bit : -EAGAIN;
}
/* All RN-I and RN-D nodes have identical PMUs */
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 30cea6859574..e41c84dabc3e 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
end_idx = DMC620_PMU_MAX_COUNTERS;
}
- for (idx = start_idx; idx < end_idx; ++idx) {
- if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
- return idx;
- }
-
- /* The counters are all in use. */
- return -EAGAIN;
+ idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
+ return idx < end_idx ? idx : -EAGAIN;
}
static inline
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 18b91b56af1d..784b0383e9f8 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -825,13 +825,9 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
struct arm_pmu *cpu_pmu)
{
- int idx;
+ int idx = find_and_set_next_bit(cpuc->used_mask, cpu_pmu->num_events, ARMV8_IDX_COUNTER0);
- for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
- if (!test_and_set_bit(idx, cpuc->used_mask))
- return idx;
- }
- return -EAGAIN;
+ return idx < cpu_pmu->num_events ? idx : -EAGAIN;
}
static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,
--
2.39.2
On Sat, Nov 18, 2023 at 07:50:38AM -0800, Yury Norov wrote:
> Switch subsystem to use atomic find_bit() or atomic iterators as
> appropriate.
>
> Signed-off-by: Yury Norov <[email protected]>
> ---
> drivers/perf/arm-cci.c | 23 +++++------------------
> drivers/perf/arm-ccn.c | 10 ++--------
> drivers/perf/arm_dmc620_pmu.c | 9 ++-------
> drivers/perf/arm_pmuv3.c | 8 ++------
> 4 files changed, 11 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
> index 61de861eaf91..70fbf9d09d37 100644
> --- a/drivers/perf/arm-cci.c
> +++ b/drivers/perf/arm-cci.c
> @@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
> return CCI400_PMU_CYCLE_CNTR_IDX;
> }
>
> - for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
> - if (!test_and_set_bit(idx, hw->used_mask))
> - return idx;
> -
> - /* No counters available */
> - return -EAGAIN;
> + idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
CCI400_PMU_CNTR0_IDX is defined as 1, so isn't this wrong?
[...]
> diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> index 30cea6859574..e41c84dabc3e 100644
> --- a/drivers/perf/arm_dmc620_pmu.c
> +++ b/drivers/perf/arm_dmc620_pmu.c
> @@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
> end_idx = DMC620_PMU_MAX_COUNTERS;
> }
>
> - for (idx = start_idx; idx < end_idx; ++idx) {
> - if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
> - return idx;
> - }
> -
> - /* The counters are all in use. */
> - return -EAGAIN;
> + idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
It might just be me, but I'd find this a tonne easier to read if you swapped
the last two arguments around so that the offset came before the limit in
the new function.
Will
On Tue, Nov 21, 2023 at 08:16:13AM -0800, Yury Norov wrote:
> On Tue, Nov 21, 2023 at 03:53:44PM +0000, Will Deacon wrote:
> > On Sat, Nov 18, 2023 at 07:50:38AM -0800, Yury Norov wrote:
> > > Switch subsystem to use atomic find_bit() or atomic iterators as
> > > appropriate.
> > >
> > > Signed-off-by: Yury Norov <[email protected]>
> > > ---
> > > drivers/perf/arm-cci.c | 23 +++++------------------
> > > drivers/perf/arm-ccn.c | 10 ++--------
> > > drivers/perf/arm_dmc620_pmu.c | 9 ++-------
> > > drivers/perf/arm_pmuv3.c | 8 ++------
> > > 4 files changed, 11 insertions(+), 39 deletions(-)
> > >
> > > diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
> > > index 61de861eaf91..70fbf9d09d37 100644
> > > --- a/drivers/perf/arm-cci.c
> > > +++ b/drivers/perf/arm-cci.c
> > > @@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
> > > return CCI400_PMU_CYCLE_CNTR_IDX;
> > > }
> > >
> > > - for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
> > > - if (!test_and_set_bit(idx, hw->used_mask))
> > > - return idx;
> > > -
> > > - /* No counters available */
> > > - return -EAGAIN;
> > > + idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
> >
> > CCI400_PMU_CNTR0_IDX is defined as 1, so isn't this wrong?
>
> You're right. Will fix in v2
>
> > [...]
> >
> > > diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> > > index 30cea6859574..e41c84dabc3e 100644
> > > --- a/drivers/perf/arm_dmc620_pmu.c
> > > +++ b/drivers/perf/arm_dmc620_pmu.c
> > > @@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
> > > end_idx = DMC620_PMU_MAX_COUNTERS;
> > > }
> > >
> > > - for (idx = start_idx; idx < end_idx; ++idx) {
> > > - if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
> > > - return idx;
> > > - }
> > > -
> > > - /* The counters are all in use. */
> > > - return -EAGAIN;
> > > + idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
> >
> > It might just be me, but I'd find this a tonne easier to read if you swapped
> > the last two arguments around so that the offset came before the limit in
> > the new function.
>
> I personally agree, but we already have find_next_*_bit(addr, nbits, offset)
> functions, and having atomic versions of the same with different order
> of arguments will make it even more messy...
Urgh, and there's loads of them too :(
Will
On Tue, Nov 21, 2023 at 03:53:44PM +0000, Will Deacon wrote:
> On Sat, Nov 18, 2023 at 07:50:38AM -0800, Yury Norov wrote:
> > Switch subsystem to use atomic find_bit() or atomic iterators as
> > appropriate.
> >
> > Signed-off-by: Yury Norov <[email protected]>
> > ---
> > drivers/perf/arm-cci.c | 23 +++++------------------
> > drivers/perf/arm-ccn.c | 10 ++--------
> > drivers/perf/arm_dmc620_pmu.c | 9 ++-------
> > drivers/perf/arm_pmuv3.c | 8 ++------
> > 4 files changed, 11 insertions(+), 39 deletions(-)
> >
> > diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
> > index 61de861eaf91..70fbf9d09d37 100644
> > --- a/drivers/perf/arm-cci.c
> > +++ b/drivers/perf/arm-cci.c
> > @@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
> > return CCI400_PMU_CYCLE_CNTR_IDX;
> > }
> >
> > - for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
> > - if (!test_and_set_bit(idx, hw->used_mask))
> > - return idx;
> > -
> > - /* No counters available */
> > - return -EAGAIN;
> > + idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
>
> CCI400_PMU_CNTR0_IDX is defined as 1, so isn't this wrong?
You're right. Will fix in v2
> [...]
>
> > diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> > index 30cea6859574..e41c84dabc3e 100644
> > --- a/drivers/perf/arm_dmc620_pmu.c
> > +++ b/drivers/perf/arm_dmc620_pmu.c
> > @@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
> > end_idx = DMC620_PMU_MAX_COUNTERS;
> > }
> >
> > - for (idx = start_idx; idx < end_idx; ++idx) {
> > - if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
> > - return idx;
> > - }
> > -
> > - /* The counters are all in use. */
> > - return -EAGAIN;
> > + idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
>
> It might just be me, but I'd find this a tonne easier to read if you swapped
> the last two arguments around so that the offset came before the limit in
> the new function.
I personally agree, but we already have find_next_*_bit(addr, nbits, offset)
functions, and having atomic versions of the same with different order
of arguments will make it even more messy...
Thanks,
Yury