2023-03-06 16:12:38

by Vernon Yang

[permalink] [raw]
Subject: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

After commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask
optimizations"), the cpumask size is divided into three different case,
so fix comment of cpumask_xxx correctly.

Signed-off-by: Vernon Yang <[email protected]>
---
include/linux/cpumask.h | 46 ++++++++++++++++++++---------------------
1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 8fbe76607965..248bdb1c50dc 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -155,7 +155,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
* cpumask_first - get the first cpu in a cpumask
* @srcp: the cpumask pointer
*
- * Returns >= nr_cpu_ids if no cpus set.
+ * Returns >= small_cpumask_bits if no cpus set.
*/
static inline unsigned int cpumask_first(const struct cpumask *srcp)
{
@@ -166,7 +166,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
* cpumask_first_zero - get the first unset cpu in a cpumask
* @srcp: the cpumask pointer
*
- * Returns >= nr_cpu_ids if all cpus are set.
+ * Returns >= small_cpumask_bits if all cpus are set.
*/
static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
{
@@ -178,7 +178,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
* @src1p: the first input
* @src2p: the second input
*
- * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
+ * Returns >= small_cpumask_bits if no cpus set in both. See also cpumask_next_and().
*/
static inline
unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
@@ -190,7 +190,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
* cpumask_last - get the last CPU in a cpumask
* @srcp: - the cpumask pointer
*
- * Returns >= nr_cpumask_bits if no CPUs set.
+ * Returns >= small_cpumask_bits if no CPUs set.
*/
static inline unsigned int cpumask_last(const struct cpumask *srcp)
{
@@ -202,7 +202,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
* @n: the cpu prior to the place to search (ie. return will be > @n)
* @srcp: the cpumask pointer
*
- * Returns >= nr_cpu_ids if no further cpus set.
+ * Returns >= small_cpumask_bits if no further cpus set.
*/
static inline
unsigned int cpumask_next(int n, const struct cpumask *srcp)
@@ -218,7 +218,7 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
* @n: the cpu prior to the place to search (ie. return will be > @n)
* @srcp: the cpumask pointer
*
- * Returns >= nr_cpu_ids if no further cpus unset.
+ * Returns >= small_cpumask_bits if no further cpus unset.
*/
static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
{
@@ -258,7 +258,7 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp);
* @src1p: the first cpumask pointer
* @src2p: the second cpumask pointer
*
- * Returns >= nr_cpu_ids if no further cpus set in both.
+ * Returns >= small_cpumask_bits if no further cpus set in both.
*/
static inline
unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
@@ -276,7 +276,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
* @cpu: the (optionally unsigned) integer iterator
* @mask: the cpumask pointer
*
- * After the loop, cpu is >= nr_cpu_ids.
+ * After the loop, cpu is >= small_cpumask_bits.
*/
#define for_each_cpu(cpu, mask) \
for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
@@ -310,7 +310,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
*
* The implementation does not assume any bit in @mask is set (including @start).
*
- * After the loop, cpu is >= nr_cpu_ids.
+ * After the loop, cpu is >= small_cpumask_bits.
*/
#define for_each_cpu_wrap(cpu, mask, start) \
for_each_set_bit_wrap(cpu, cpumask_bits(mask), small_cpumask_bits, start)
@@ -327,7 +327,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
* for_each_cpu(cpu, &tmp)
* ...
*
- * After the loop, cpu is >= nr_cpu_ids.
+ * After the loop, cpu is >= small_cpumask_bits.
*/
#define for_each_cpu_and(cpu, mask1, mask2) \
for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
@@ -345,7 +345,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
* for_each_cpu(cpu, &tmp)
* ...
*
- * After the loop, cpu is >= nr_cpu_ids.
+ * After the loop, cpu is >= small_cpumask_bits.
*/
#define for_each_cpu_andnot(cpu, mask1, mask2) \
for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
@@ -375,7 +375,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
* @srcp: the cpumask pointer
* @cpu: the N'th cpu to find, starting from 0
*
- * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ * Returns >= small_cpumask_bits if such cpu doesn't exist.
*/
static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
{
@@ -388,7 +388,7 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s
* @srcp2: the cpumask pointer
* @cpu: the N'th cpu to find, starting from 0
*
- * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ * Returns >= small_cpumask_bits if such cpu doesn't exist.
*/
static inline
unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
@@ -404,7 +404,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
* @srcp2: the cpumask pointer
* @cpu: the N'th cpu to find, starting from 0
*
- * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ * Returns >= small_cpumask_bits if such cpu doesn't exist.
*/
static inline
unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
@@ -421,7 +421,7 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
* @srcp3: the cpumask pointer
* @cpu: the N'th cpu to find, starting from 0
*
- * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ * Returns >= small_cpumask_bits if such cpu doesn't exist.
*/
static __always_inline
unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1,
@@ -529,7 +529,7 @@ static inline void cpumask_setall(struct cpumask *dstp)
}

/**
- * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask
+ * cpumask_clear - clear all cpus (< large_cpumask_bits) in a cpumask
* @dstp: the cpumask pointer
*/
static inline void cpumask_clear(struct cpumask *dstp)
@@ -650,7 +650,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,

/**
* cpumask_empty - *srcp == 0
- * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear.
+ * @srcp: the cpumask to that all cpus < small_cpumask_bits are clear.
*/
static inline bool cpumask_empty(const struct cpumask *srcp)
{
@@ -659,7 +659,7 @@ static inline bool cpumask_empty(const struct cpumask *srcp)

/**
* cpumask_full - *srcp == 0xFFFFFFFF...
- * @srcp: the cpumask to that all cpus < nr_cpu_ids are set.
+ * @srcp: the cpumask to that all cpus < nr_cpumask_bits are set.
*/
static inline bool cpumask_full(const struct cpumask *srcp)
{
@@ -668,7 +668,7 @@ static inline bool cpumask_full(const struct cpumask *srcp)

/**
* cpumask_weight - Count of bits in *srcp
- * @srcp: the cpumask to count bits (< nr_cpu_ids) in.
+ * @srcp: the cpumask to count bits (< small_cpumask_bits) in.
*/
static inline unsigned int cpumask_weight(const struct cpumask *srcp)
{
@@ -677,8 +677,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)

/**
* cpumask_weight_and - Count of bits in (*srcp1 & *srcp2)
- * @srcp1: the cpumask to count bits (< nr_cpu_ids) in.
- * @srcp2: the cpumask to count bits (< nr_cpu_ids) in.
+ * @srcp1: the cpumask to count bits (< small_cpumask_bits) in.
+ * @srcp2: the cpumask to count bits (< small_cpumask_bits) in.
*/
static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
const struct cpumask *srcp2)
@@ -727,7 +727,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
* cpumask_any - pick a "random" cpu from *srcp
* @srcp: the input cpumask
*
- * Returns >= nr_cpu_ids if no cpus set.
+ * Returns >= small_cpumask_bits if no cpus set.
*/
#define cpumask_any(srcp) cpumask_first(srcp)

@@ -736,7 +736,7 @@ static inline void cpumask_copy(struct cpumask *dstp,
* @mask1: the first input cpumask
* @mask2: the second input cpumask
*
- * Returns >= nr_cpu_ids if no cpus set.
+ * Returns >= small_cpumask_bits if no cpus set.
*/
#define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2))

--
2.34.1



2023-03-06 16:50:10

by Yury Norov

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Tue, Mar 07, 2023 at 12:06:51AM +0800, Vernon Yang wrote:
> After commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask
> optimizations"), the cpumask size is divided into three different case,
> so fix comment of cpumask_xxx correctly.
>
> Signed-off-by: Vernon Yang <[email protected]>
> ---
> include/linux/cpumask.h | 46 ++++++++++++++++++++---------------------
> 1 file changed, 23 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index 8fbe76607965..248bdb1c50dc 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -155,7 +155,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
> * cpumask_first - get the first cpu in a cpumask
> * @srcp: the cpumask pointer
> *
> - * Returns >= nr_cpu_ids if no cpus set.
> + * Returns >= small_cpumask_bits if no cpus set.

There's no such thing like small_cpumask_bits. Here and everywhere,
nr_cpu_ids must be used.

Actually, before 596ff4a09b89 nr_cpumask_bits was deprecated, and it
must be like that for all users even now.

nr_cpumask_bits must be considered as internal cpumask parameter and
never referenced outside of cpumask code.

Thansk,
Yury

2023-03-06 16:52:08

by Jason A. Donenfeld

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 6, 2023 at 5:39 PM Yury Norov <[email protected]> wrote:
>
> On Tue, Mar 07, 2023 at 12:06:51AM +0800, Vernon Yang wrote:
> > After commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask
> > optimizations"), the cpumask size is divided into three different case,
> > so fix comment of cpumask_xxx correctly.
> >
> > Signed-off-by: Vernon Yang <[email protected]>
> > ---
> > include/linux/cpumask.h | 46 ++++++++++++++++++++---------------------
> > 1 file changed, 23 insertions(+), 23 deletions(-)
> >
> > diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> > index 8fbe76607965..248bdb1c50dc 100644
> > --- a/include/linux/cpumask.h
> > +++ b/include/linux/cpumask.h
> > @@ -155,7 +155,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
> > * cpumask_first - get the first cpu in a cpumask
> > * @srcp: the cpumask pointer
> > *
> > - * Returns >= nr_cpu_ids if no cpus set.
> > + * Returns >= small_cpumask_bits if no cpus set.
>
> There's no such thing like small_cpumask_bits. Here and everywhere,
> nr_cpu_ids must be used.
>
> Actually, before 596ff4a09b89 nr_cpumask_bits was deprecated, and it
> must be like that for all users even now.
>
> nr_cpumask_bits must be considered as internal cpumask parameter and
> never referenced outside of cpumask code.

What's the right thing I should do, then, for wireguard's usage and
for random.c's usage? It sounds like you object to this patchset, but
if the problem is real, it sounds like I should at least fix the two
cases I maintain. What's the right check?

Jason

2023-03-06 16:55:46

by Yury Norov

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 06, 2023 at 05:44:41PM +0100, Jason A. Donenfeld wrote:
> On Mon, Mar 6, 2023 at 5:39 PM Yury Norov <[email protected]> wrote:
> >
> > On Tue, Mar 07, 2023 at 12:06:51AM +0800, Vernon Yang wrote:
> > > After commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask
> > > optimizations"), the cpumask size is divided into three different case,
> > > so fix comment of cpumask_xxx correctly.
> > >
> > > Signed-off-by: Vernon Yang <[email protected]>
> > > ---
> > > include/linux/cpumask.h | 46 ++++++++++++++++++++---------------------
> > > 1 file changed, 23 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> > > index 8fbe76607965..248bdb1c50dc 100644
> > > --- a/include/linux/cpumask.h
> > > +++ b/include/linux/cpumask.h
> > > @@ -155,7 +155,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
> > > * cpumask_first - get the first cpu in a cpumask
> > > * @srcp: the cpumask pointer
> > > *
> > > - * Returns >= nr_cpu_ids if no cpus set.
> > > + * Returns >= small_cpumask_bits if no cpus set.
> >
> > There's no such thing like small_cpumask_bits. Here and everywhere,
> > nr_cpu_ids must be used.
> >
> > Actually, before 596ff4a09b89 nr_cpumask_bits was deprecated, and it
> > must be like that for all users even now.
> >
> > nr_cpumask_bits must be considered as internal cpumask parameter and
> > never referenced outside of cpumask code.
>
> What's the right thing I should do, then, for wireguard's usage and
> for random.c's usage? It sounds like you object to this patchset, but
> if the problem is real, it sounds like I should at least fix the two
> cases I maintain. What's the right check?

Everywhere outside of cpumasks internals use (cpu < nr_cpu_ids) to
check if the cpu is in a valid range, like:

cpu = cpumask_first(cpus);
if (cpu >= nr_cpu_ids)
pr_err("There's no cpus");



2023-03-06 17:05:51

by Jason A. Donenfeld

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 6, 2023 at 5:54 PM Yury Norov <[email protected]> wrote:
>
> On Mon, Mar 06, 2023 at 05:44:41PM +0100, Jason A. Donenfeld wrote:
> > On Mon, Mar 6, 2023 at 5:39 PM Yury Norov <[email protected]> wrote:
> > >
> > > On Tue, Mar 07, 2023 at 12:06:51AM +0800, Vernon Yang wrote:
> > > > After commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask
> > > > optimizations"), the cpumask size is divided into three different case,
> > > > so fix comment of cpumask_xxx correctly.
> > > >
> > > > Signed-off-by: Vernon Yang <[email protected]>
> > > > ---
> > > > include/linux/cpumask.h | 46 ++++++++++++++++++++---------------------
> > > > 1 file changed, 23 insertions(+), 23 deletions(-)
> > > >
> > > > diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> > > > index 8fbe76607965..248bdb1c50dc 100644
> > > > --- a/include/linux/cpumask.h
> > > > +++ b/include/linux/cpumask.h
> > > > @@ -155,7 +155,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
> > > > * cpumask_first - get the first cpu in a cpumask
> > > > * @srcp: the cpumask pointer
> > > > *
> > > > - * Returns >= nr_cpu_ids if no cpus set.
> > > > + * Returns >= small_cpumask_bits if no cpus set.
> > >
> > > There's no such thing like small_cpumask_bits. Here and everywhere,
> > > nr_cpu_ids must be used.
> > >
> > > Actually, before 596ff4a09b89 nr_cpumask_bits was deprecated, and it
> > > must be like that for all users even now.
> > >
> > > nr_cpumask_bits must be considered as internal cpumask parameter and
> > > never referenced outside of cpumask code.
> >
> > What's the right thing I should do, then, for wireguard's usage and
> > for random.c's usage? It sounds like you object to this patchset, but
> > if the problem is real, it sounds like I should at least fix the two
> > cases I maintain. What's the right check?
>
> Everywhere outside of cpumasks internals use (cpu < nr_cpu_ids) to
> check if the cpu is in a valid range, like:
>
> cpu = cpumask_first(cpus);
> if (cpu >= nr_cpu_ids)
> pr_err("There's no cpus");

Oh, okay, so the ones for wireguard and random.c in this series are
correct then? If so, could you give a Reviewed-by:, and then I'll
queue those up in my respective trees.

Jason

2023-03-06 17:30:36

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 6, 2023 at 8:07 AM Vernon Yang <[email protected]> wrote:
>
> After commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask
> optimizations"), the cpumask size is divided into three different case,
> so fix comment of cpumask_xxx correctly.

No no.

Those three cases are meant to be entirely internal optimizations.
They are literally just "preferred sizes".

The correct thing to do is always that

* Returns >= nr_cpu_ids if no cpus set.

because nr_cpu_ids is always the *smallest* of the access sizes.

That's exactly why it's a ">=". The CPU mask stuff has always
historically potentially used a different size than the actual
nr_cpu_ids, in that it could do word-sized scans even when the machine
might only have a smaller set of CPUs.

So the whole "small" vs "large" should be seen entirely internal to
cpumask.h. We should not expose it outside (sadly, that already
happened with "nr_cpumask_size", which also was that kind of thing.

So no, this patch is wrong. If anything, the comments should be strengthened.

Of course, right now Guenter seems to be reporting a problem with that
optimization, so unless I figure out what is going on I'll just need
to revert it anyway.

Linus

Linus

2023-03-06 17:46:11

by Vernon Yang

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 06, 2023 at 08:39:03AM -0800, Yury Norov wrote:
> On Tue, Mar 07, 2023 at 12:06:51AM +0800, Vernon Yang wrote:
> > After commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask
> > optimizations"), the cpumask size is divided into three different case,
> > so fix comment of cpumask_xxx correctly.
> >
> > Signed-off-by: Vernon Yang <[email protected]>
> > ---
> > include/linux/cpumask.h | 46 ++++++++++++++++++++---------------------
> > 1 file changed, 23 insertions(+), 23 deletions(-)
> >
> > diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> > index 8fbe76607965..248bdb1c50dc 100644
> > --- a/include/linux/cpumask.h
> > +++ b/include/linux/cpumask.h
> > @@ -155,7 +155,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
> > * cpumask_first - get the first cpu in a cpumask
> > * @srcp: the cpumask pointer
> > *
> > - * Returns >= nr_cpu_ids if no cpus set.
> > + * Returns >= small_cpumask_bits if no cpus set.
>
> There's no such thing like small_cpumask_bits. Here and everywhere,
> nr_cpu_ids must be used.
>
> Actually, before 596ff4a09b89 nr_cpumask_bits was deprecated, and it
> must be like that for all users even now.
>
> nr_cpumask_bits must be considered as internal cpumask parameter and
> never referenced outside of cpumask code.

OK, I remove this path for next version.

>
> Thansk,
> Yury

2023-03-06 17:49:14

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 6, 2023 at 9:29 AM Linus Torvalds
<[email protected]> wrote:
>
> The correct thing to do is always that
>
> * Returns >= nr_cpu_ids if no cpus set.
>
> because nr_cpu_ids is always the *smallest* of the access sizes.
>
> Of course, right now Guenter seems to be reporting a problem with that
> optimization, so unless I figure out what is going on I'll just need
> to revert it anyway.

Ahh. And the reason is exactly that people do *not* follow that
"Returns >= nr_cpu_ids" rule.

The drivers/char/random.c code is very wrong, and does

if (cpu == nr_cpumask_bits)
cpu = cpumask_first(&timer_cpus);

which fails miserably exactly because it doesn't use ">=".

Oh well.

I'll have to look for more of this pattern, but basically all those
"xyz_cpumask_bits" things were supposed to always be just internal to
that header file implementation, which is *exactly* why you have to
check the result for ">= nr_cpu_ids".

Linus

2023-03-06 18:03:26

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 6, 2023 at 9:47 AM Linus Torvalds
<[email protected]> wrote:
>
> The drivers/char/random.c code is very wrong, and does
>
> if (cpu == nr_cpumask_bits)
> cpu = cpumask_first(&timer_cpus);
>
> which fails miserably exactly because it doesn't use ">=".

Turns out this "cpu == nr_cpumask_bits" pattern exists in a couple of
other places too.

It was always wrong, but it always just happened to work. The lpfc
SCSI driver in particular seems to *love* this pattern:

start_cpu = cpumask_next(new_cpu, cpu_present_mask);
if (start_cpu == nr_cpumask_bits)
start_cpu = first_cpu;

and has repeated it multiple times, all incorrect.

We do have "cpumask_next_wrap()", and that *seems* to be what the lpcf
driver actually wants to do.

.. and then we have kernel/sched/fair.c, which is actually not buggy,
just odd. It uses nr_cpumask_bits too, but it uses it purely for its
own internal nefarious reasons - it's not actually related to the
cpumask functions at all, its just used as a "not valid CPU number".

I think that scheduler use is still very *wrong*, but it doesn't look
actively buggy.

The other cases all look very buggy indeed, but yes, they happened to
work, and now they don't. So commit 596ff4a09b89 ("cpumask:
re-introduce constant-sized cpumask optimizations") did break them.

I'd rather fix these bad users than revert, but there does seem to be
an alarming number of these things, which worries me:

git grep '== nr_cpumask_bits'

and that's just checking for this *exact* thing.

Linus

2023-03-06 18:14:28

by Vernon Yang

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 06, 2023 at 09:29:10AM -0800, Linus Torvalds wrote:
> On Mon, Mar 6, 2023 at 8:07 AM Vernon Yang <[email protected]> wrote:
> >
> > After commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask
> > optimizations"), the cpumask size is divided into three different case,
> > so fix comment of cpumask_xxx correctly.
>
> No no.
>
> Those three cases are meant to be entirely internal optimizations.
> They are literally just "preferred sizes".
>
> The correct thing to do is always that
>
> * Returns >= nr_cpu_ids if no cpus set.
>
> because nr_cpu_ids is always the *smallest* of the access sizes.
>
> That's exactly why it's a ">=". The CPU mask stuff has always
> historically potentially used a different size than the actual
> nr_cpu_ids, in that it could do word-sized scans even when the machine
> might only have a smaller set of CPUs.
>
> So the whole "small" vs "large" should be seen entirely internal to
> cpumask.h. We should not expose it outside (sadly, that already
> happened with "nr_cpumask_size", which also was that kind of thing.

I also just see nr_cpumask_size exposed to outside, so... Sorry.

>
> So no, this patch is wrong. If anything, the comments should be strengthened.
>
> Of course, right now Guenter seems to be reporting a problem with that
> optimization, so unless I figure out what is going on I'll just need
> to revert it anyway.

Yes, cause is the cpumask_next() calls find_next_bit(..., size, ...), and
find_next_bit(..., size, ...) if no bits are set, returns @size.

@size was a nr_cpumask_bits variable before, now it is small_cpumask_bits, and
when NR_CPUS < = BITS_PER_LONG, small_cpumask_bits is a macro, which is
replaced with NR_CPUS at compile, so only the NR_CPUS is returned when it no
further cpus set.

But before nr_cpumask_bits variable, it was read while running, and it was
mutable.

The random.c try_to_generate_entropy() to get first cpu by
`if (cpu == nr_cpumask_bits)`, but cpumask_next() alway return NR_CPUS,
nr_cpumask_bits is nr_cpu_ids, so pass NR_CPUS to add_timer_on(),

>
> Linus
>
> Linus

2023-03-06 18:41:30

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 6, 2023 at 10:13 AM Vernon Yang <[email protected]> wrote:
>
> I also just see nr_cpumask_size exposed to outside, so...

Yeah, it's not great.

nr_cpumask_bits came out of the exact same "this is an internal value
that we use for optimized cpumask accesses", and existed exactly
because it *might* be the same as 'nr_cpu_ids', but it might also be a
simpler "small constant that is big enough" case.

It just depended on the exact kernel config which one was used.

But clearly that internal value then spread outside, and that then
caused problems when the internal implementation changed.

Linus

2023-03-06 21:28:21

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

On Mon, Mar 6, 2023 at 12:43 PM Linus Torvalds
<[email protected]> wrote:
>
> And when I say "all", I might be missing some others that don't match
> that exact pattern, of course.

Indeed.

I'm looking at wg_cpumask_next_online(), and this:

while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;

seems very dodgy indeed. I'm not convinced it might not cause an endless loop.

The code does seem to expect that the modulus ends up being zero, and
thus starting from the beginning. But that's not necessarily at all
true.

For example, maybe you have CONFIG_NR_CPUS set to some odd value like
6, because who knows, you decided that you are on some mobile platform
that has a BIG.little setup with 4 little cores and 2 big ones.

But your're then running that on a machine that only has 4 cores.

And to make matters worse, you have decided to offline all but one of
those cores, for whatever reason. So only bit #0 is actually set in
'cpu_online_mask'.

End result:

- nr_cpumask_bits is 4, because that's how many CPU cores you have.

- cpumask_next(cpu, cpu_online_mask) will return 6 if 'cpu' >=0,
because it's looking for the next online CPU, doesn't find it, and it
uses that optimized version that uses the compile-time constant of
maximum CPU's rather than bothering with some run-time constant.

- cpumask_test_cpu(2, cpu_online_mask) returns false, because CPU#2
is not online. It *could* be, but it's not.

now that loop is:

while (unlikely(cpu2 is offline))
cpu = 6 % 4;

and it will just loop forever on trying to get the "next" cpu, but
that will always remain that offline CPU #2.

I *think* what you want is just that same "find next wrapping CPU",
but I find it very odd that you use a potentially horrendously slow
modulus operation for it, and a while loop.

IOW, the more obvious code would have been just

cpu = cpumask_next(cpu-1, cpu_online_mask);
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(cpu_online_mask);

which seems much more straigthforward, and avoids both a pointless
while loop and a potentially expensive modulus.

And yes, I think that "cpumask_next -> cpumask_first" wrapping search
pattern should probably have a better helper, but even without the
helper the above code seems simpler and more logical than the odd code
that it has now.

Now, I suspect you have to hit a few fairly odd situations to actually
get the above endless loop, but I don't think they have to necessarily
be quite as made-up as my example above.

And I suspect you wrote it that odd way because you expect that the
first "cpumask_test_cpu()" always succeeds, so the whole while-loop
was then a complete afterthought, and any oddity there was just
"nobody cares". Except for the fact that it does seem to be possibly
an endless loop.

[ Adding back lkml to the participants, just because I think this was
an interesting case of cpumask operation mis-use. Even if it probably
doesn't cause issues in practice, it's the kind of code that *can*
cause problems ]

Linus

2023-03-07 17:58:19

by Jason A. Donenfeld

[permalink] [raw]
Subject: Re: [PATCH 5/5] cpumask: fix comment of cpumask_xxx

Hi Linus,

On Mon, Mar 6, 2023 at 10:28 PM Linus Torvalds
<[email protected]> wrote:
>
> I'm looking at wg_cpumask_next_online(), and this:
>
> while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
> cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
>
> seems very dodgy indeed. I'm not convinced it might not cause an endless loop.

Indeed this code is crap and wrong in multiple ways. I can probably
simplify to something like

static inline int wg_cpumask_next_online(int *last_cpu)
{
int cpu = cpumask_next(*last_cpu, cpu_online_mask);
if (cpu >= nr_cpu_ids)
cpumask_first(cpu_online_mask);

as you suggested, which is indeed a lot more straightforward.

I'll get this all cleaned up.

Jason