On uniprocessor builds, it is currently assumed that any cpumask will
contain the single CPU: cpu0. This assumption is used to provide
optimised implementations.
The current assumption also appears to be wrong, by ignoring the fact
that users can provide empty cpumask-s. This can result in bugs as
explained in [1].
This series introduces some basic tests, and updates the optimisations
for uniprocessor builds.
[1] https://lore.kernel.org/all/[email protected]/
Changes since v2:
Link: https://lore.kernel.org/all/[email protected]/
- Put new tests after patch fixes
- Update for_each_* macros
Changes since v1:
Link: https://lore.kernel.org/all/[email protected]/
- Place tests in lib/test_cpumask.c
- Drop the modified UP code in favor of the generic SMP implementation
- Update declaration of cpumask_next_wrap()
Sander Vanheule (4):
cpumask: Fix invalid uniprocessor mask assumption
lib/test: Introduce cpumask KUnit test suite
cpumask: Add UP optimised for_each_*_cpu versions
cpumask: Update cpumask_next_wrap() signature
include/linux/cpumask.h | 89 +++------------------------
lib/Kconfig.debug | 9 +++
lib/Makefile | 4 +-
lib/test_cpumask.c | 132 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 151 insertions(+), 83 deletions(-)
create mode 100644 lib/test_cpumask.c
--
2.36.1
The extern specifier is not needed for this declaration, so drop it. The
function also depends only on the input parameters, and has no side
effects, so it can be marked __pure like other functions in cpumask.h
Signed-off-by: Sander Vanheule <[email protected]>
---
include/linux/cpumask.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 7ccddbc27ac3..f37ce00741a3 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -210,7 +210,7 @@ int cpumask_any_distribute(const struct cpumask *srcp);
(cpu) = cpumask_next_zero((cpu), (mask)), \
(cpu) < nr_cpu_ids;)
-extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
+int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
/**
* for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
--
2.36.1
On uniprocessor builds, any CPU mask is assumed to contain exactly one
CPU (cpu0). This assumption ignores the existence of empty masks,
resulting in incorrect behaviour.
cpumask_first_zero(), cpumask_next_zero(), and for_each_cpu_not() don't
provide behaviour matching the assumption that a UP mask is always "1",
and instead provide behaviour matching the empty mask.
Drop the incorrectly optimised code and use the generic implementations
in all cases.
Signed-off-by: Sander Vanheule <[email protected]>
---
Changes since v1:
- Drop UP implementations instead of trying to fix them
---
include/linux/cpumask.h | 80 -----------------------------------------
lib/Makefile | 3 +-
2 files changed, 1 insertion(+), 82 deletions(-)
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index fe29ac7cc469..d6add0e29ef4 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -116,85 +116,6 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
return cpu;
}
-#if NR_CPUS == 1
-/* Uniprocessor. Assume all masks are "1". */
-static inline unsigned int cpumask_first(const struct cpumask *srcp)
-{
- return 0;
-}
-
-static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
-{
- return 0;
-}
-
-static inline unsigned int cpumask_first_and(const struct cpumask *srcp1,
- const struct cpumask *srcp2)
-{
- return 0;
-}
-
-static inline unsigned int cpumask_last(const struct cpumask *srcp)
-{
- return 0;
-}
-
-/* Valid inputs for n are -1 and 0. */
-static inline unsigned int cpumask_next(int n, const struct cpumask *srcp)
-{
- return n+1;
-}
-
-static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
-{
- return n+1;
-}
-
-static inline unsigned int cpumask_next_and(int n,
- const struct cpumask *srcp,
- const struct cpumask *andp)
-{
- return n+1;
-}
-
-static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask,
- int start, bool wrap)
-{
- /* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */
- return (wrap && n == 0);
-}
-
-/* cpu must be a valid cpu, ie 0, so there's no other choice. */
-static inline unsigned int cpumask_any_but(const struct cpumask *mask,
- unsigned int cpu)
-{
- return 1;
-}
-
-static inline unsigned int cpumask_local_spread(unsigned int i, int node)
-{
- return 0;
-}
-
-static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
- const struct cpumask *src2p) {
- return cpumask_first_and(src1p, src2p);
-}
-
-static inline int cpumask_any_distribute(const struct cpumask *srcp)
-{
- return cpumask_first(srcp);
-}
-
-#define for_each_cpu(cpu, mask) \
- for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
-#define for_each_cpu_not(cpu, mask) \
- for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
-#define for_each_cpu_wrap(cpu, mask, start) \
- for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
-#define for_each_cpu_and(cpu, mask1, mask2) \
- for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2)
-#else
/**
* cpumask_first - get the first cpu in a cpumask
* @srcp: the cpumask pointer
@@ -324,7 +245,6 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
for ((cpu) = -1; \
(cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \
(cpu) < nr_cpu_ids;)
-#endif /* SMP */
#define CPU_BITS_NONE \
{ \
diff --git a/lib/Makefile b/lib/Makefile
index 89fcae891361..6f26a429115b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -34,10 +34,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o \
- buildid.o
+ buildid.o cpumask.o
lib-$(CONFIG_PRINTK) += dump_stack.o
-lib-$(CONFIG_SMP) += cpumask.o
lib-y += kobject.o klist.o
obj-y += lockref.o
--
2.36.1
Add a basic suite of tests for cpumask, providing some tests for empty
and completely filled cpumasks.
Signed-off-by: Sander Vanheule <[email protected]>
---
Changes since v2:
- Rework for_each_* test macros, as suggested by Yury
---
lib/Kconfig.debug | 9 ++++
lib/Makefile | 1 +
lib/test_cpumask.c | 132 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 142 insertions(+)
create mode 100644 lib/test_cpumask.c
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b8cc65d22169..85f2eb5c0b07 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2100,6 +2100,15 @@ config LKDTM
Documentation on how to use the module can be found in
Documentation/fault-injection/provoke-crashes.rst
+config TEST_CPUMASK
+ tristate "cpumask tests" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Enable to turn on cpumask tests, running at boot or module load time.
+
+ If unsure, say N.
+
config TEST_LIST_SORT
tristate "Linked list sorting test" if !KUNIT_ALL_TESTS
depends on KUNIT
diff --git a/lib/Makefile b/lib/Makefile
index 6f26a429115b..5abd7b2064f1 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o
obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o
+obj-$(CONFIG_TEST_CPUMASK) += test_cpumask.o
CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE)
obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o
#
diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c
new file mode 100644
index 000000000000..3f43b9a6548c
--- /dev/null
+++ b/lib/test_cpumask.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for cpumask.
+ *
+ * Author: Sander Vanheule <[email protected]>
+ */
+
+#include <kunit/test.h>
+#include <linux/cpumask.h>
+
+#define EXPECT_FOR_EACH_CPU_EQ(test, mask) \
+ do { \
+ const cpumask_t *m = (mask); \
+ int mask_weight = cpumask_weight(m); \
+ int cpu, iter = 0; \
+ for_each_cpu(cpu, m) \
+ iter++; \
+ KUNIT_EXPECT_EQ((test), mask_weight, iter); \
+ } while (0)
+
+#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \
+ do { \
+ const cpumask_t *m = (mask); \
+ int mask_weight = cpumask_weight(m); \
+ int cpu, iter = 0; \
+ for_each_cpu_not(cpu, m) \
+ iter++; \
+ KUNIT_EXPECT_EQ((test), nr_cpu_ids - mask_weight, iter); \
+ } while (0)
+
+#define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \
+ do { \
+ const cpumask_t *m = (mask); \
+ int mask_weight = cpumask_weight(m); \
+ int cpu, iter = 0; \
+ for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2) \
+ iter++; \
+ KUNIT_EXPECT_EQ((test), mask_weight, iter); \
+ } while (0)
+
+#define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \
+ do { \
+ const cpumask_t *m = cpu_##name##_mask; \
+ int mask_weight = cpumask_weight(m); \
+ int cpu, iter = 0; \
+ for_each_##name##_cpu(cpu) \
+ iter++; \
+ KUNIT_EXPECT_EQ((test), mask_weight, iter); \
+ } while (0)
+
+static cpumask_t mask_empty;
+static cpumask_t mask_all;
+
+static void test_cpumask_weight(struct kunit *test)
+{
+ KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty));
+ KUNIT_EXPECT_TRUE(test, cpumask_full(cpu_possible_mask));
+ KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all));
+
+ KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty));
+ KUNIT_EXPECT_EQ(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask));
+ KUNIT_EXPECT_EQ(test, nr_cpumask_bits, cpumask_weight(&mask_all));
+}
+
+static void test_cpumask_first(struct kunit *test)
+{
+ KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first(&mask_empty));
+ KUNIT_EXPECT_EQ(test, 0, cpumask_first(cpu_possible_mask));
+
+ KUNIT_EXPECT_EQ(test, 0, cpumask_first_zero(&mask_empty));
+ KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask));
+}
+
+static void test_cpumask_last(struct kunit *test)
+{
+ KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty));
+ KUNIT_EXPECT_EQ(test, nr_cpumask_bits - 1, cpumask_last(cpu_possible_mask));
+}
+
+static void test_cpumask_next(struct kunit *test)
+{
+ KUNIT_EXPECT_EQ(test, 0, cpumask_next_zero(-1, &mask_empty));
+ KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask));
+
+ KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next(-1, &mask_empty));
+ KUNIT_EXPECT_EQ(test, 0, cpumask_next(-1, cpu_possible_mask));
+}
+
+static void test_cpumask_iterators(struct kunit *test)
+{
+ EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty);
+ EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty);
+ EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty);
+
+ EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty);
+ EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty);
+ EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty);
+}
+
+static void test_cpumask_iterators_builtin(struct kunit *test)
+{
+ EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, possible);
+ EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, online);
+ EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, present);
+}
+
+static int test_cpumask_init(struct kunit *test)
+{
+ cpumask_clear(&mask_empty);
+ cpumask_setall(&mask_all);
+
+ return 0;
+}
+
+static struct kunit_case test_cpumask_cases[] = {
+ KUNIT_CASE(test_cpumask_weight),
+ KUNIT_CASE(test_cpumask_first),
+ KUNIT_CASE(test_cpumask_last),
+ KUNIT_CASE(test_cpumask_next),
+ KUNIT_CASE(test_cpumask_iterators),
+ KUNIT_CASE(test_cpumask_iterators_builtin),
+ {}
+};
+
+static struct kunit_suite test_cpumask_suite = {
+ .name = "cpumask",
+ .init = test_cpumask_init,
+ .test_cases = test_cpumask_cases,
+};
+kunit_test_suite(test_cpumask_suite);
+
+MODULE_LICENSE("GPL");
--
2.36.1
On Sun, 2022-06-05 at 08:22 +0200, Sander Vanheule wrote:
> Add a basic suite of tests for cpumask, providing some tests for empty
> and completely filled cpumasks.
>
> Signed-off-by: Sander Vanheule <[email protected]>
> ---
> +static void test_cpumask_iterators(struct kunit *test)
> +{
> + EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty);
> + EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty);
> + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty);
> +
> + EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty);
> + EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty);
> + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty);
This should be one block of &mask_empty, and one block of cpu_possible_mask. I'll fix this in v4
with other comments, or send an update tomorrow.
Best,
Sander
Hi Sander,
I love your patch! Yet something to improve:
[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master v5.18 next-20220603]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/intel-lab-lkp/linux/commits/Sander-Vanheule/cpumask-Fix-invalid-uniprocessor-assumptions/20220606-004959
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: i386-randconfig-a009 (https://download.01.org/0day-ci/archive/20220606/[email protected]/config)
compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/37b3f10c4604ea299b454f39ac5ba3cad903ae16
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Sander-Vanheule/cpumask-Fix-invalid-uniprocessor-assumptions/20220606-004959
git checkout 37b3f10c4604ea299b454f39ac5ba3cad903ae16
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>
All errors (new ones prefixed by >>):
ld: arch/x86/kernel/cpu/cacheinfo.o: in function `__cache_amd_cpumap_setup':
arch/x86/kernel/cpu/cacheinfo.c:890: undefined reference to `cpu_llc_shared_map'
>> ld: arch/x86/kernel/cpu/cacheinfo.c:895: undefined reference to `cpu_llc_shared_map'
--
0-DAY CI Kernel Test Service
https://01.org/lkp
On uniprocessor builds, the following loops will always run over a mask
that contains one enabled CPU (cpu0):
- for_each_possible_cpu
- for_each_online_cpu
- for_each_present_cpu
Provide uniprocessor-specific macros for these loops, that always run
exactly once.
Signed-off-by: Sander Vanheule <[email protected]>
---
include/linux/cpumask.h | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d6add0e29ef4..7ccddbc27ac3 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -731,9 +731,16 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
/* First bits of cpu_bit_bitmap are in fact unset. */
#define cpu_none_mask to_cpumask(cpu_bit_bitmap[0])
+#if NR_CPUS == 1
+/* Uniprocessor: the possible/online/present masks are always "1" */
+#define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)
+#define for_each_online_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)
+#define for_each_present_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)
+#else
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
#define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask)
#define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask)
+#endif
/* Wrappers for arch boot code to manipulate normally-constant masks */
void init_cpu_present(const struct cpumask *src);
--
2.36.1
On Sun, Jun 05, 2022 at 08:22:39AM +0200, Sander Vanheule wrote:
> Add a basic suite of tests for cpumask, providing some tests for empty
> and completely filled cpumasks.
Always in favour of a new test!
Reviewed-by: Andy Shevchenko <[email protected]>
> Signed-off-by: Sander Vanheule <[email protected]>
> ---
> Changes since v2:
> - Rework for_each_* test macros, as suggested by Yury
> ---
> lib/Kconfig.debug | 9 ++++
> lib/Makefile | 1 +
> lib/test_cpumask.c | 132 +++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 142 insertions(+)
> create mode 100644 lib/test_cpumask.c
>
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index b8cc65d22169..85f2eb5c0b07 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2100,6 +2100,15 @@ config LKDTM
> Documentation on how to use the module can be found in
> Documentation/fault-injection/provoke-crashes.rst
>
> +config TEST_CPUMASK
> + tristate "cpumask tests" if !KUNIT_ALL_TESTS
> + depends on KUNIT
> + default KUNIT_ALL_TESTS
> + help
> + Enable to turn on cpumask tests, running at boot or module load time.
> +
> + If unsure, say N.
> +
> config TEST_LIST_SORT
> tristate "Linked list sorting test" if !KUNIT_ALL_TESTS
> depends on KUNIT
> diff --git a/lib/Makefile b/lib/Makefile
> index 6f26a429115b..5abd7b2064f1 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -99,6 +99,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o
> obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o
> obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
> obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o
> +obj-$(CONFIG_TEST_CPUMASK) += test_cpumask.o
> CFLAGS_test_fprobe.o += $(CC_FLAGS_FTRACE)
> obj-$(CONFIG_FPROBE_SANITY_TEST) += test_fprobe.o
> #
> diff --git a/lib/test_cpumask.c b/lib/test_cpumask.c
> new file mode 100644
> index 000000000000..3f43b9a6548c
> --- /dev/null
> +++ b/lib/test_cpumask.c
> @@ -0,0 +1,132 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * KUnit tests for cpumask.
> + *
> + * Author: Sander Vanheule <[email protected]>
> + */
> +
> +#include <kunit/test.h>
> +#include <linux/cpumask.h>
> +
> +#define EXPECT_FOR_EACH_CPU_EQ(test, mask) \
> + do { \
> + const cpumask_t *m = (mask); \
> + int mask_weight = cpumask_weight(m); \
> + int cpu, iter = 0; \
> + for_each_cpu(cpu, m) \
> + iter++; \
> + KUNIT_EXPECT_EQ((test), mask_weight, iter); \
> + } while (0)
> +
> +#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \
> + do { \
> + const cpumask_t *m = (mask); \
> + int mask_weight = cpumask_weight(m); \
> + int cpu, iter = 0; \
> + for_each_cpu_not(cpu, m) \
> + iter++; \
> + KUNIT_EXPECT_EQ((test), nr_cpu_ids - mask_weight, iter); \
> + } while (0)
> +
> +#define EXPECT_FOR_EACH_CPU_WRAP_EQ(test, mask) \
> + do { \
> + const cpumask_t *m = (mask); \
> + int mask_weight = cpumask_weight(m); \
> + int cpu, iter = 0; \
> + for_each_cpu_wrap(cpu, m, nr_cpu_ids / 2) \
> + iter++; \
> + KUNIT_EXPECT_EQ((test), mask_weight, iter); \
> + } while (0)
> +
> +#define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \
> + do { \
> + const cpumask_t *m = cpu_##name##_mask; \
> + int mask_weight = cpumask_weight(m); \
> + int cpu, iter = 0; \
> + for_each_##name##_cpu(cpu) \
> + iter++; \
> + KUNIT_EXPECT_EQ((test), mask_weight, iter); \
> + } while (0)
> +
> +static cpumask_t mask_empty;
> +static cpumask_t mask_all;
> +
> +static void test_cpumask_weight(struct kunit *test)
> +{
> + KUNIT_EXPECT_TRUE(test, cpumask_empty(&mask_empty));
> + KUNIT_EXPECT_TRUE(test, cpumask_full(cpu_possible_mask));
> + KUNIT_EXPECT_TRUE(test, cpumask_full(&mask_all));
> +
> + KUNIT_EXPECT_EQ(test, 0, cpumask_weight(&mask_empty));
> + KUNIT_EXPECT_EQ(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask));
> + KUNIT_EXPECT_EQ(test, nr_cpumask_bits, cpumask_weight(&mask_all));
> +}
> +
> +static void test_cpumask_first(struct kunit *test)
> +{
> + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first(&mask_empty));
> + KUNIT_EXPECT_EQ(test, 0, cpumask_first(cpu_possible_mask));
> +
> + KUNIT_EXPECT_EQ(test, 0, cpumask_first_zero(&mask_empty));
> + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_first_zero(cpu_possible_mask));
> +}
> +
> +static void test_cpumask_last(struct kunit *test)
> +{
> + KUNIT_EXPECT_LE(test, nr_cpumask_bits, cpumask_last(&mask_empty));
> + KUNIT_EXPECT_EQ(test, nr_cpumask_bits - 1, cpumask_last(cpu_possible_mask));
> +}
> +
> +static void test_cpumask_next(struct kunit *test)
> +{
> + KUNIT_EXPECT_EQ(test, 0, cpumask_next_zero(-1, &mask_empty));
> + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next_zero(-1, cpu_possible_mask));
> +
> + KUNIT_EXPECT_LE(test, nr_cpu_ids, cpumask_next(-1, &mask_empty));
> + KUNIT_EXPECT_EQ(test, 0, cpumask_next(-1, cpu_possible_mask));
> +}
> +
> +static void test_cpumask_iterators(struct kunit *test)
> +{
> + EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty);
> + EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty);
> + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty);
> +
> + EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty);
> + EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty);
> + EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty);
> +}
> +
> +static void test_cpumask_iterators_builtin(struct kunit *test)
> +{
> + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, possible);
> + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, online);
> + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, present);
> +}
> +
> +static int test_cpumask_init(struct kunit *test)
> +{
> + cpumask_clear(&mask_empty);
> + cpumask_setall(&mask_all);
> +
> + return 0;
> +}
> +
> +static struct kunit_case test_cpumask_cases[] = {
> + KUNIT_CASE(test_cpumask_weight),
> + KUNIT_CASE(test_cpumask_first),
> + KUNIT_CASE(test_cpumask_last),
> + KUNIT_CASE(test_cpumask_next),
> + KUNIT_CASE(test_cpumask_iterators),
> + KUNIT_CASE(test_cpumask_iterators_builtin),
> + {}
> +};
> +
> +static struct kunit_suite test_cpumask_suite = {
> + .name = "cpumask",
> + .init = test_cpumask_init,
> + .test_cases = test_cpumask_cases,
> +};
> +kunit_test_suite(test_cpumask_suite);
> +
> +MODULE_LICENSE("GPL");
> --
> 2.36.1
>
--
With Best Regards,
Andy Shevchenko
On Sun, Jun 05, 2022 at 08:22:41AM +0200, Sander Vanheule wrote:
> The extern specifier is not needed for this declaration, so drop it. The
> function also depends only on the input parameters, and has no side
> effects, so it can be marked __pure like other functions in cpumask.h
Missed period.
Reviewed-by: Andy Shevchenko <[email protected]>
> Signed-off-by: Sander Vanheule <[email protected]>
> ---
> include/linux/cpumask.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index 7ccddbc27ac3..f37ce00741a3 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -210,7 +210,7 @@ int cpumask_any_distribute(const struct cpumask *srcp);
> (cpu) = cpumask_next_zero((cpu), (mask)), \
> (cpu) < nr_cpu_ids;)
>
> -extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
> +int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
>
> /**
> * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
> --
> 2.36.1
>
--
With Best Regards,
Andy Shevchenko
On Mon, Jun 06, 2022 at 08:48:05AM +0800, kernel test robot wrote:
> Hi Sander,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on akpm-mm/mm-everything]
> [also build test ERROR on linus/master v5.18 next-20220603]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Sander-Vanheule/cpumask-Fix-invalid-uniprocessor-assumptions/20220606-004959
> base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> config: i386-randconfig-a009 (https://download.01.org/0day-ci/archive/20220606/[email protected]/config)
> compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> reproduce (this is a W=1 build):
> # https://github.com/intel-lab-lkp/linux/commit/37b3f10c4604ea299b454f39ac5ba3cad903ae16
> git remote add linux-review https://github.com/intel-lab-lkp/linux
> git fetch --no-tags linux-review Sander-Vanheule/cpumask-Fix-invalid-uniprocessor-assumptions/20220606-004959
> git checkout 37b3f10c4604ea299b454f39ac5ba3cad903ae16
> # save the config file
> mkdir build_dir && cp config build_dir/.config
> make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash
>
> If you fix the issue, kindly add following tag where applicable
> Reported-by: kernel test robot <[email protected]>
>
> All errors (new ones prefixed by >>):
>
> ld: arch/x86/kernel/cpu/cacheinfo.o: in function `__cache_amd_cpumap_setup':
> arch/x86/kernel/cpu/cacheinfo.c:890: undefined reference to `cpu_llc_shared_map'
> >> ld: arch/x86/kernel/cpu/cacheinfo.c:895: undefined reference to `cpu_llc_shared_map'
Seems like somewhere we need stubs for UP builds for those cache related functions.
--
With Best Regards,
Andy Shevchenko
Hi Andy,
On Mon, 2022-06-06 at 13:36 +0300, Andy Shevchenko wrote:
> On Sun, Jun 05, 2022 at 08:22:39AM +0200, Sander Vanheule wrote:
> > Add a basic suite of tests for cpumask, providing some tests for empty
> > and completely filled cpumasks.
>
> Always in favour of a new test!
> Reviewed-by: Andy Shevchenko <[email protected]>
Thanks!
>
> > Signed-off-by: Sander Vanheule <[email protected]>
> > ---
> >
> > +#define EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, name) \
> > + do { \
> > + const cpumask_t *m = cpu_##name##_mask; \
> > + int mask_weight = cpumask_weight(m); \
Given the documentation for num_online_cpus(), I've replaced this with
int mask_weight = num_##name##_cpus();
and added guards around the test for the dynamic builtin masks.
> > + int cpu, iter = 0; \
> > + for_each_##name##_cpu(cpu) \
> > + iter++; \
> > + KUNIT_EXPECT_EQ((test), mask_weight, iter); \
> > + } while (0)
> >
> > +static void test_cpumask_iterators_builtin(struct kunit *test)
> > +{
> > + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, possible);
cpu_hotplug_disable();
> > + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, online);
> > + EXPECT_FOR_EACH_CPU_BUILTIN_EQ(test, present);
cpu_hotplug_enable();
> > +}
This should ensure the tests will not randomly fail, if they happen to run while a CPU is going
online/offline.
Best,
Sander
On Sun, Jun 05, 2022 at 08:22:38AM +0200, Sander Vanheule wrote:
> On uniprocessor builds, any CPU mask is assumed to contain exactly one
> CPU (cpu0). This assumption ignores the existence of empty masks,
> resulting in incorrect behaviour.
> cpumask_first_zero(), cpumask_next_zero(), and for_each_cpu_not() don't
> provide behaviour matching the assumption that a UP mask is always "1",
> and instead provide behaviour matching the empty mask.
>
> Drop the incorrectly optimised code and use the generic implementations
> in all cases.
>
> Signed-off-by: Sander Vanheule <[email protected]>
> ---
> Changes since v1:
> - Drop UP implementations instead of trying to fix them
> ---
> include/linux/cpumask.h | 80 -----------------------------------------
> lib/Makefile | 3 +-
> 2 files changed, 1 insertion(+), 82 deletions(-)
>
> diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
> index fe29ac7cc469..d6add0e29ef4 100644
> --- a/include/linux/cpumask.h
> +++ b/include/linux/cpumask.h
> @@ -116,85 +116,6 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
> return cpu;
> }
>
> -#if NR_CPUS == 1
> -/* Uniprocessor. Assume all masks are "1". */
> -static inline unsigned int cpumask_first(const struct cpumask *srcp)
> -{
> - return 0;
> -}
> -
> -static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
> -{
> - return 0;
> -}
> -
> -static inline unsigned int cpumask_first_and(const struct cpumask *srcp1,
> - const struct cpumask *srcp2)
> -{
> - return 0;
> -}
> -
> -static inline unsigned int cpumask_last(const struct cpumask *srcp)
> -{
> - return 0;
> -}
> -
> -/* Valid inputs for n are -1 and 0. */
> -static inline unsigned int cpumask_next(int n, const struct cpumask *srcp)
> -{
> - return n+1;
> -}
> -
> -static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
> -{
> - return n+1;
> -}
> -
> -static inline unsigned int cpumask_next_and(int n,
> - const struct cpumask *srcp,
> - const struct cpumask *andp)
> -{
> - return n+1;
> -}
> -
> -static inline unsigned int cpumask_next_wrap(int n, const struct cpumask *mask,
> - int start, bool wrap)
> -{
> - /* cpu0 unless stop condition, wrap and at cpu0, then nr_cpumask_bits */
> - return (wrap && n == 0);
> -}
> -
> -/* cpu must be a valid cpu, ie 0, so there's no other choice. */
> -static inline unsigned int cpumask_any_but(const struct cpumask *mask,
> - unsigned int cpu)
> -{
> - return 1;
> -}
> -
> -static inline unsigned int cpumask_local_spread(unsigned int i, int node)
> -{
> - return 0;
> -}
> -
> -static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
> - const struct cpumask *src2p) {
> - return cpumask_first_and(src1p, src2p);
> -}
> -
> -static inline int cpumask_any_distribute(const struct cpumask *srcp)
> -{
> - return cpumask_first(srcp);
> -}
It looks like cpumask_local_spread, cpumask_any_and_distribute and
cpumask_any_distribute were correct and better optimized in UP case.
cpumask_local_spread - for sure. I think it's worth keeping them
optimized.
Thanks,
Yury
> -#define for_each_cpu(cpu, mask) \
> - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
> -#define for_each_cpu_not(cpu, mask) \
> - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
> -#define for_each_cpu_wrap(cpu, mask, start) \
> - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
> -#define for_each_cpu_and(cpu, mask1, mask2) \
> - for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2)
> -#else
> /**
> * cpumask_first - get the first cpu in a cpumask
> * @srcp: the cpumask pointer
> @@ -324,7 +245,6 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
> for ((cpu) = -1; \
> (cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \
> (cpu) < nr_cpu_ids;)
> -#endif /* SMP */
>
> #define CPU_BITS_NONE \
> { \
> diff --git a/lib/Makefile b/lib/Makefile
> index 89fcae891361..6f26a429115b 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -34,10 +34,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
> is_single_threaded.o plist.o decompress.o kobject_uevent.o \
> earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
> nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o \
> - buildid.o
> + buildid.o cpumask.o
>
> lib-$(CONFIG_PRINTK) += dump_stack.o
> -lib-$(CONFIG_SMP) += cpumask.o
>
> lib-y += kobject.o klist.o
> obj-y += lockref.o
> --
> 2.36.1
On Mon, 2022-06-06 at 12:14 -0700, Yury Norov wrote:
> On Sun, Jun 05, 2022 at 08:22:38AM +0200, Sander Vanheule wrote:
> > On uniprocessor builds, any CPU mask is assumed to contain exactly one
> > CPU (cpu0). This assumption ignores the existence of empty masks,
> > resulting in incorrect behaviour.
> > cpumask_first_zero(), cpumask_next_zero(), and for_each_cpu_not() don't
> > provide behaviour matching the assumption that a UP mask is always "1",
> > and instead provide behaviour matching the empty mask.
> >
> > Drop the incorrectly optimised code and use the generic implementations
> > in all cases.
> >
> > Signed-off-by: Sander Vanheule <[email protected]>
> > ---
[...]
> > -static inline unsigned int cpumask_local_spread(unsigned int i, int node)
> > -{
> > - return 0;
> > -}
> > -
> > -static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
> > - const struct cpumask *src2p) {
> > - return cpumask_first_and(src1p, src2p);
> > -}
> > -
> > -static inline int cpumask_any_distribute(const struct cpumask *srcp)
> > -{
> > - return cpumask_first(srcp);
> > -}
>
> It looks like cpumask_local_spread, cpumask_any_and_distribute and
> cpumask_any_distribute were correct and better optimized in UP case.
> cpumask_local_spread - for sure. I think it's worth keeping them
> optimized.
Yes, these were correct and we can keep them. I will have to add an #ifded CONFIG_SMP (or #if
NR_CPUS > 1) guard in lib/cpumask.c around these functions, so they don't collide with the inlined
UP versions.
Best,
Sander
On Mon, 2022-06-06 at 13:40 +0300, Andy Shevchenko wrote:
> On Mon, Jun 06, 2022 at 08:48:05AM +0800, kernel test robot wrote:
> > Hi Sander,
> >
> > I love your patch! Yet something to improve:
> >
> > [auto build test ERROR on akpm-mm/mm-everything]
> > [also build test ERROR on linus/master v5.18 next-20220603]
> > [If your patch is applied to the wrong git tree, kindly drop us a note.
> > And when submitting patch, we suggest to use '--base' as documented in
> > https://git-scm.com/docs/git-format-patch]
> >
> > url:
> > https://github.com/intel-lab-lkp/linux/commits/Sander-Vanheule/cpumask-Fix-invalid-uniprocessor-assumptions/20220606-004959
> > base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> > config: i386-randconfig-a009
> > (https://download.01.org/0day-ci/archive/20220606/[email protected]/config)
> > compiler: gcc-11 (Debian 11.3.0-1) 11.3.0
> > reproduce (this is a W=1 build):
> > # https://github.com/intel-lab-lkp/linux/commit/37b3f10c4604ea299b454f39ac5ba3cad903ae16
> > git remote add linux-review https://github.com/intel-lab-lkp/linux
> > git fetch --no-tags linux-review Sander-Vanheule/cpumask-Fix-invalid-uniprocessor-
> > assumptions/20220606-004959
> > git checkout 37b3f10c4604ea299b454f39ac5ba3cad903ae16
> > # save the config file
> > mkdir build_dir && cp config build_dir/.config
> > make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash
> >
> > If you fix the issue, kindly add following tag where applicable
> > Reported-by: kernel test robot <[email protected]>
> >
> > All errors (new ones prefixed by >>):
> >
> > ld: arch/x86/kernel/cpu/cacheinfo.o: in function `__cache_amd_cpumap_setup':
> > arch/x86/kernel/cpu/cacheinfo.c:890: undefined reference to `cpu_llc_shared_map'
> > > > ld: arch/x86/kernel/cpu/cacheinfo.c:895: undefined reference to `cpu_llc_shared_map'
>
> Seems like somewhere we need stubs for UP builds for those cache related functions.
>
I think I finally figured out what's going on here.
cpu_llc_shared_map is always declared with DECLARE_PER_CPU_READ_MOSTLY, but defined in
arch/x86/kernel/smpboot.c which only builds on CONFIG_SMP=y.
cpu_llc_shared_map is accessed in a for_each_cpu loop:
for_each_cpu(i, cpu_llc_shared_mask(cpu))
The old (wrong) UP implementation would ignore the mask, so cpu_llc_shared_map access was optimised
out and the linker would never see that symbol.
Adding a stub for the two inline functions in arch/x86/include/smp.h won't be sufficient I'm afraid.
But I think we can safely make the following assumptions (nobody complained before):
* anything using cpumask_first_zero(), cpumask_next_zero(), and for_each_cpu_not() was expecting an
empty mask on UP builds,
* anything else would have expected a filled mask on UP builds.
I'll think about how to identify all the possible cases, but may not be able to spend a lot of time
on this in the two coming weeks. Any suggestions, or alternative solutions, are of course welcome.
Best,
Sander