Bitmap functions are often a part of hot paths, and we can't put argument
sanity checks inside them. Sometimes wrong parameters combination cause
bug reports that are pretty hard to investigate:
https://lore.kernel.org/linux-mm/YsbpTNmDaam8pl+f@xsang-OptiPlex-9020/
And sometimes we can optimize caller code. For example, to avoid copying
of bitmap if source and destination are the same.
It's quite tricky to detect such places unless we've covered all bitmap
API calls with the parameters checker.
This series:
- introduces bitmap_check_params() with a couple of common-used wrappers;
- clears all bitmap warnings found for x86_64, arm64 and powerpc64 in
boot test.
Yury Norov (16):
lib/bitmap: add bitmap_check_params()
lib/bitmap: don't call bitmap_set() with len == 0
lib/test_bitmap: don't test bitmap_set if nbits == 0
lib/test_bitmap: test test_bitmap_arr{32,64} starting from nbits == 1
lib/test_bitmap: disable compile-time test if DEBUG_BITMAP() is
enabled
lib/test_bitmap: delete meaningless test for bitmap_cut
smp: optimize smp_call_function_many_cond()
smp: optimize smp_call_function_many_cond() for more
irq: don't copy cpu affinity mask if source is equal to destination
sched: optimize __set_cpus_allowed_ptr_locked()
time: optimize tick_check_preferred()
time: optimize tick_check_percpu()
time: optimize tick_setup_device()
mm/percpu: optimize pcpu_alloc_area()
sched/topology: optimize topology_span_sane()
lib: create CONFIG_DEBUG_BITMAP parameter
include/linux/bitmap.h | 95 +++++++++++++++++++++++++++
kernel/irq/manage.c | 3 +-
kernel/sched/core.c | 3 +-
kernel/sched/topology.c | 10 ++-
kernel/smp.c | 35 ++++++++--
kernel/time/tick-common.c | 18 ++++--
lib/Kconfig.debug | 7 ++
lib/bitmap.c | 132 ++++++++++++++++++++++++++++++++++----
lib/test_bitmap.c | 12 ++--
mm/percpu.c | 3 +-
10 files changed, 281 insertions(+), 37 deletions(-)
--
2.34.1
bitmap_check_params() takes all arguments passed into bitmap functions
and runs sanity checks. bitmap_check(), bitmap_check_op() and
bitmap_check_move() are convenient wrappers for frequent cases.
The following patches of this series clear all warnings found with
bitmap_check_params() for x86_64, arm64 and powerpc64.
The last patch introduces CONFIG_DEBUG_BITMAP option to let user enable
bitmap_check_params().
No functional changes for existing kernel users, and for the following
functions inline parameters checks removed:
- bitmap_pos_to_ord;
- bitmap_remap;
- bitmap_onto;
- bitmap_fold.
Signed-off-by: Yury Norov <[email protected]>
---
include/linux/bitmap.h | 95 +++++++++++++++++++++++++++++++
lib/bitmap.c | 123 ++++++++++++++++++++++++++++++++++++++---
2 files changed, 209 insertions(+), 9 deletions(-)
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 035d4ac66641..6a0d9170c4f0 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -8,9 +8,46 @@
#include <linux/bitops.h>
#include <linux/find.h>
#include <linux/limits.h>
+#include <linux/printk.h>
#include <linux/string.h>
#include <linux/types.h>
+#define CHECK_B2 BIT(0)
+#define CHECK_B3 BIT(1)
+#define CHECK_START BIT(2)
+#define CHECK_OFF BIT(3)
+#define CHECK_OVERLAP12 BIT(4)
+#define CHECK_OVERLAP13 BIT(5)
+#define CHECK_OVERLAP23 BIT(6)
+#define CHECK_OFF_EQ_0 BIT(7)
+#define CHECK_START_LE_OFF BIT(8)
+
+#define NBITS_MAX (INT_MAX-1)
+
+#ifdef CONFIG_DEBUG_BITMAP
+#define bitmap_check_params(b1, b2, b3, nbits, start, off, flags) \
+ do { \
+ if (__bitmap_check_params((b1), (b2), (b3), (nbits), \
+ (start), (off), (flags))) { \
+ pr_warn("Bitmap: parameters check failed"); \
+ pr_warn("%s [%d]: %s\n", __FILE__, __LINE__, __func__); \
+ } \
+ } while (0)
+
+bool __bitmap_check_params(const unsigned long *b1, const unsigned long *b2,
+ const unsigned long *b3, const unsigned long nbits,
+ const unsigned long start, const unsigned long off,
+ const unsigned long flags);
+#else
+#define bitmap_check_params(b1, b2, b3, nbits, start, off, flags)
+#endif
+
+#define bitmap_check(buf, nbits) bitmap_check_params(buf, NULL, NULL, nbits, 0, 0, 0)
+#define bitmap_check_op(dst, src1, src2, nbits) \
+ bitmap_check_params(dst, src1, src2, nbits, 0, 0, CHECK_B2 | CHECK_B3 | CHECK_OVERLAP23)
+#define bitmap_check_move(dst, src, nbits) \
+ bitmap_check_params(dst, src, NULL, nbits, 0, 0, CHECK_B2 | CHECK_OVERLAP12)
+
struct device;
/*
@@ -239,6 +276,8 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ bitmap_check(dst, nbits);
+
if (small_const_nbits(nbits))
*dst = 0;
else
@@ -249,6 +288,8 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
{
unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ bitmap_check(dst, nbits);
+
if (small_const_nbits(nbits))
*dst = ~0UL;
else
@@ -260,6 +301,8 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
{
unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ bitmap_check_move(dst, src, nbits);
+
if (small_const_nbits(nbits))
*dst = *src;
else
@@ -318,6 +361,8 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits);
static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
+ bitmap_check_op(dst, src1, src2, nbits);
+
if (small_const_nbits(nbits))
return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_and(dst, src1, src2, nbits);
@@ -326,6 +371,8 @@ static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1,
static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
+ bitmap_check_op(dst, src1, src2, nbits);
+
if (small_const_nbits(nbits))
*dst = *src1 | *src2;
else
@@ -335,6 +382,8 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
+ bitmap_check_op(dst, src1, src2, nbits);
+
if (small_const_nbits(nbits))
*dst = *src1 ^ *src2;
else
@@ -344,6 +393,8 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
+ bitmap_check_op(dst, src1, src2, nbits);
+
if (small_const_nbits(nbits))
return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_andnot(dst, src1, src2, nbits);
@@ -352,6 +403,8 @@ static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
unsigned int nbits)
{
+ bitmap_check_move(dst, src, nbits);
+
if (small_const_nbits(nbits))
*dst = ~(*src);
else
@@ -368,6 +421,8 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
static inline bool bitmap_equal(const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
+ bitmap_check_move(src1, src2, nbits);
+
if (small_const_nbits(nbits))
return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
@@ -390,6 +445,9 @@ static inline bool bitmap_or_equal(const unsigned long *src1,
const unsigned long *src3,
unsigned int nbits)
{
+ bitmap_check_params(src1, src2, src3, nbits, 0, 0, CHECK_B2 | CHECK_B3 |
+ CHECK_OVERLAP12 | CHECK_OVERLAP13 | CHECK_OVERLAP23);
+
if (!small_const_nbits(nbits))
return __bitmap_or_equal(src1, src2, src3, nbits);
@@ -400,6 +458,8 @@ static inline bool bitmap_intersects(const unsigned long *src1,
const unsigned long *src2,
unsigned int nbits)
{
+ bitmap_check_move(src1, src2, nbits);
+
if (small_const_nbits(nbits))
return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
else
@@ -409,6 +469,8 @@ static inline bool bitmap_intersects(const unsigned long *src1,
static inline bool bitmap_subset(const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
+ bitmap_check_move(src1, src2, nbits);
+
if (small_const_nbits(nbits))
return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
else
@@ -417,6 +479,8 @@ static inline bool bitmap_subset(const unsigned long *src1,
static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
{
+ bitmap_check(src, nbits);
+
if (small_const_nbits(nbits))
return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -425,6 +489,8 @@ static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
static inline bool bitmap_full(const unsigned long *src, unsigned int nbits)
{
+ bitmap_check(src, nbits);
+
if (small_const_nbits(nbits))
return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -434,6 +500,8 @@ static inline bool bitmap_full(const unsigned long *src, unsigned int nbits)
static __always_inline
unsigned long bitmap_weight(const unsigned long *src, unsigned int nbits)
{
+ bitmap_check(src, nbits);
+
if (small_const_nbits(nbits))
return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
return __bitmap_weight(src, nbits);
@@ -442,6 +510,9 @@ unsigned long bitmap_weight(const unsigned long *src, unsigned int nbits)
static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
unsigned int nbits)
{
+ bitmap_check_params(map, NULL, NULL, start + nbits, start, nbits,
+ CHECK_START | CHECK_OFF | CHECK_OFF_EQ_0);
+
if (__builtin_constant_p(nbits) && nbits == 1)
__set_bit(start, map);
else if (small_const_nbits(start + nbits))
@@ -458,6 +529,9 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
unsigned int nbits)
{
+ bitmap_check_params(map, NULL, NULL, start + nbits, start, nbits,
+ CHECK_START | CHECK_OFF | CHECK_OFF_EQ_0);
+
if (__builtin_constant_p(nbits) && nbits == 1)
__clear_bit(start, map);
else if (small_const_nbits(start + nbits))
@@ -474,6 +548,8 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
unsigned int shift, unsigned int nbits)
{
+ bitmap_check_params(dst, src, NULL, nbits, shift, 0, CHECK_START);
+
if (small_const_nbits(nbits))
*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
else
@@ -483,6 +559,8 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s
static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
unsigned int shift, unsigned int nbits)
{
+ bitmap_check_params(dst, src, NULL, nbits, shift, 0, CHECK_START);
+
if (small_const_nbits(nbits))
*dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits);
else
@@ -495,6 +573,10 @@ static inline void bitmap_replace(unsigned long *dst,
const unsigned long *mask,
unsigned int nbits)
{
+ bitmap_check_op(dst, old, mask, nbits);
+ bitmap_check_op(dst, new, mask, nbits);
+ bitmap_check_op(dst, old, new, nbits);
+
if (small_const_nbits(nbits))
*dst = (*old & ~(*mask)) | (*new & *mask);
else
@@ -505,6 +587,8 @@ static inline void bitmap_next_set_region(unsigned long *bitmap,
unsigned int *rs, unsigned int *re,
unsigned int end)
{
+ bitmap_check(bitmap, end);
+
*rs = find_next_bit(bitmap, end, *rs);
*re = find_next_zero_bit(bitmap, end, *rs + 1);
}
@@ -571,6 +655,11 @@ static inline unsigned long bitmap_get_value8(const unsigned long *map,
const size_t index = BIT_WORD(start);
const unsigned long offset = start % BITS_PER_LONG;
+#ifdef DEBUG_BITMAP
+ bitmap_check_params(map, NULL, NULL, start + 8, start, 0, CHECK_START);
+ WARN_ON(start % 8);
+#endif
+
return (map[index] >> offset) & 0xFF;
}
@@ -586,6 +675,12 @@ static inline void bitmap_set_value8(unsigned long *map, unsigned long value,
const size_t index = BIT_WORD(start);
const unsigned long offset = start % BITS_PER_LONG;
+#ifdef DEBUG_BITMAP
+ bitmap_check_params(map, NULL, NULL, start + 8, start, 0, CHECK_START);
+ WARN_ON(start % 8);
+ WARN_ON(value > 0xFFUL);
+#endif
+
map[index] &= ~(0xFFUL << offset);
map[index] |= value << offset;
}
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 2b67cd657692..cd4dd848ea6a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -22,6 +22,71 @@
#include "kstrtox.h"
+#ifdef CONFIG_DEBUG_BITMAP
+static inline const bool check_overlap(const unsigned long *b1, const unsigned long *b2,
+ unsigned long nbits)
+{
+ return min(b1, b2) + DIV_ROUND_UP(nbits, BITS_PER_LONG) > max(b1, b2);
+}
+
+bool __bitmap_check_params(const unsigned long *b1, const unsigned long *b2,
+ const unsigned long *b3, const unsigned long nbits,
+ const unsigned long start, const unsigned long off,
+ const unsigned long flags)
+{
+ bool warn = false;
+
+ warn |= WARN_ON(b1 == NULL);
+ warn |= WARN_ON(nbits == 0);
+ warn |= WARN_ON(nbits > NBITS_MAX);
+
+ if (flags & CHECK_B2) {
+ warn |= WARN_ON(b2 == NULL);
+ warn |= WARN_ON(flags & CHECK_OVERLAP12 &&
+ check_overlap(b1, b2, nbits));
+ }
+
+ if (flags & CHECK_B3) {
+ warn |= WARN_ON(b3 == NULL);
+ warn |= WARN_ON(flags & CHECK_OVERLAP13 &&
+ check_overlap(b1, b3, nbits));
+ }
+
+ if (flags & CHECK_OVERLAP23)
+ warn |= WARN_ON(check_overlap(b2, b3, nbits));
+
+ if (flags & CHECK_START)
+ warn |= WARN_ON(start >= nbits);
+
+ if (flags & CHECK_OFF)
+ warn |= WARN_ON(off > nbits);
+
+ if (flags & CHECK_OFF_EQ_0)
+ warn |= WARN_ON(off == 0);
+
+ if (flags & CHECK_START_LE_OFF)
+ warn |= WARN_ON(start > off);
+
+ if (flags & CHECK_B2 && flags & CHECK_B3)
+ warn |= WARN_ON(b2 == b3);
+
+ if (warn) {
+ /*
+ * Convert kernel addresses to unsigned long because
+ * %pK hides actual values with the lack of randomization.
+ */
+ pr_warn("b1:\t\t%lx\n", (unsigned long)b1);
+ pr_warn("b2:\t\t%lx\n", (unsigned long)b2);
+ pr_warn("b3:\t\t%lx\n", (unsigned long)b3);
+ pr_warn("nbits:\t%lu\n", nbits);
+ pr_warn("start:\t%lu\n", start);
+ pr_warn("off:\t%lu\n", off);
+ }
+ return warn;
+}
+EXPORT_SYMBOL(__bitmap_check_params);
+#endif
+
/**
* DOC: bitmap introduction
*
@@ -214,6 +279,9 @@ void bitmap_cut(unsigned long *dst, const unsigned long *src,
unsigned long keep = 0, carry;
int i;
+ bitmap_check_params(dst, src, NULL, nbits, first, first + cut,
+ CHECK_B2 | CHECK_START | CHECK_OFF);
+
if (first % BITS_PER_LONG) {
keep = src[first / BITS_PER_LONG] &
(~0UL >> (BITS_PER_LONG - first % BITS_PER_LONG));
@@ -410,6 +478,10 @@ unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
unsigned long align_offset)
{
unsigned long index, end, i;
+
+ bitmap_check_params(map, NULL, NULL, size, start, start + nr,
+ CHECK_START | CHECK_OFF | CHECK_START_LE_OFF);
+
again:
index = find_next_zero_bit(map, size, start);
@@ -797,6 +869,8 @@ int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits)
struct region r;
long ret;
+ bitmap_check(maskp, nmaskbits);
+
r.nbits = nmaskbits;
bitmap_zero(maskp, r.nbits);
@@ -900,6 +974,8 @@ int bitmap_parse(const char *start, unsigned int buflen,
int unset_bit;
int chunk;
+ bitmap_check(maskp, nmaskbits);
+
for (chunk = 0; ; chunk++) {
end = bitmap_find_region_reverse(start, end);
if (start > end)
@@ -950,7 +1026,9 @@ EXPORT_SYMBOL(bitmap_parse);
*/
static int bitmap_pos_to_ord(const unsigned long *buf, unsigned int pos, unsigned int nbits)
{
- if (pos >= nbits || !test_bit(pos, buf))
+ bitmap_check_params(buf, NULL, NULL, nbits, pos, 0, CHECK_START);
+
+ if (!test_bit(pos, buf))
return -1;
return __bitmap_weight(buf, pos);
@@ -1024,8 +1102,13 @@ void bitmap_remap(unsigned long *dst, const unsigned long *src,
{
unsigned int oldbit, w;
- if (dst == src) /* following doesn't handle inplace remaps */
- return;
+ bitmap_check_params(dst, src, old, nbits, 0, 0,
+ CHECK_B2 | CHECK_B3 | CHECK_OVERLAP12 |
+ CHECK_OVERLAP13 | CHECK_OVERLAP23);
+ bitmap_check_params(dst, src, new, nbits, 0, 0,
+ CHECK_B2 | CHECK_B3 | CHECK_OVERLAP12 |
+ CHECK_OVERLAP13 | CHECK_OVERLAP23);
+
bitmap_zero(dst, nbits);
w = bitmap_weight(new, nbits);
@@ -1069,8 +1152,13 @@ EXPORT_SYMBOL(bitmap_remap);
int bitmap_bitremap(int oldbit, const unsigned long *old,
const unsigned long *new, int bits)
{
- int w = bitmap_weight(new, bits);
- int n = bitmap_pos_to_ord(old, oldbit, bits);
+ int w, n;
+
+ bitmap_check_params(old, new, NULL, bits, oldbit, 0, CHECK_B2 |
+ CHECK_START | CHECK_OVERLAP12);
+
+ w = bitmap_weight(new, bits);
+ n = bitmap_pos_to_ord(old, oldbit, bits);
if (n < 0 || w == 0)
return oldbit;
else
@@ -1190,8 +1278,9 @@ void bitmap_onto(unsigned long *dst, const unsigned long *orig,
{
unsigned int n, m; /* same meaning as in above comment */
- if (dst == orig) /* following doesn't handle inplace mappings */
- return;
+ bitmap_check_params(dst, orig, relmap, bits, 0, 0, CHECK_B2 | CHECK_B3 |
+ CHECK_OVERLAP12 | CHECK_OVERLAP13 | CHECK_OVERLAP23);
+
bitmap_zero(dst, bits);
/*
@@ -1229,8 +1318,8 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
{
unsigned int oldbit;
- if (dst == orig) /* following doesn't handle inplace mappings */
- return;
+ bitmap_check_move(dst, orig, sz);
+
bitmap_zero(dst, nbits);
for_each_set_bit(oldbit, orig, nbits)
@@ -1332,6 +1421,8 @@ int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
{
unsigned int pos, end; /* scans bitmap by regions of size order */
+ bitmap_check_params(bitmap, NULL, NULL, bits, (1 << order), 0, CHECK_OFF);
+
for (pos = 0 ; (end = pos + (1U << order)) <= bits; pos = end) {
if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
continue;
@@ -1355,6 +1446,8 @@ EXPORT_SYMBOL(bitmap_find_free_region);
*/
void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
{
+ bitmap_check_params(bitmap, NULL, NULL, pos + (1 << order), pos, pos + (1 << order),
+ CHECK_START | CHECK_OFF);
__reg_op(bitmap, pos, order, REG_OP_RELEASE);
}
EXPORT_SYMBOL(bitmap_release_region);
@@ -1372,6 +1465,8 @@ EXPORT_SYMBOL(bitmap_release_region);
*/
int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
{
+ bitmap_check_params(bitmap, NULL, NULL, pos + (1 << order), pos, pos + (1 << order),
+ CHECK_START | CHECK_OFF);
if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
return -EBUSY;
return __reg_op(bitmap, pos, order, REG_OP_ALLOC);
@@ -1391,6 +1486,8 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n
{
unsigned int i;
+ bitmap_check_move(dst, src, nbits);
+
for (i = 0; i < nbits/BITS_PER_LONG; i++) {
if (BITS_PER_LONG == 64)
dst[i] = cpu_to_le64(src[i]);
@@ -1476,6 +1573,8 @@ void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits
{
unsigned int i, halfwords;
+ bitmap_check_move(bitmap, (unsigned long *)buf, nbits);
+
halfwords = DIV_ROUND_UP(nbits, 32);
for (i = 0; i < halfwords; i++) {
bitmap[i/2] = (unsigned long) buf[i];
@@ -1499,6 +1598,8 @@ void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits)
{
unsigned int i, halfwords;
+ bitmap_check_move(bitmap, (unsigned long *)buf, nbits);
+
halfwords = DIV_ROUND_UP(nbits, 32);
for (i = 0; i < halfwords; i++) {
buf[i] = (u32) (bitmap[i/2] & UINT_MAX);
@@ -1524,6 +1625,8 @@ void bitmap_from_arr64(unsigned long *bitmap, const u64 *buf, unsigned int nbits
{
int n;
+ bitmap_check_move(bitmap, (unsigned long *)buf, nbits);
+
for (n = nbits; n > 0; n -= 64) {
u64 val = *buf++;
@@ -1554,6 +1657,8 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits)
{
const unsigned long *end = bitmap + BITS_TO_LONGS(nbits);
+ bitmap_check_move(bitmap, (unsigned long *)buf, nbits);
+
while (bitmap < end) {
*buf = *bitmap++;
if (bitmap < end)
--
2.34.1
smp_call_function_many_cond() is often passed with cpu_online_mask.
If this is the case, we can use num_online_cpus(), which is O(1)
instead of cpumask_{first,next}(), which is O(N).
It can be optimized further: if cpu_online_mask has 0 or single bit
set (depending on cpu_online(this_cpu), we can return result without
AND'ing with user's mask.
Caught with CONFIG_DEBUG_BITMAP:
[ 7.830337] Call trace:
[ 7.830397] __bitmap_check_params+0x1d8/0x260
[ 7.830499] smp_call_function_many_cond+0x1e8/0x45c
[ 7.830607] kick_all_cpus_sync+0x44/0x80
[ 7.830698] bpf_int_jit_compile+0x34c/0x5cc
[ 7.830796] bpf_prog_select_runtime+0x118/0x190
[ 7.830900] bpf_prepare_filter+0x3dc/0x51c
[ 7.830995] __get_filter+0xd4/0x170
[ 7.831145] sk_attach_filter+0x18/0xb0
[ 7.831236] sock_setsockopt+0x5b0/0x1214
[ 7.831330] __sys_setsockopt+0x144/0x170
[ 7.831431] __arm64_sys_setsockopt+0x2c/0x40
[ 7.831541] invoke_syscall+0x48/0x114
[ 7.831634] el0_svc_common.constprop.0+0x44/0xfc
[ 7.831745] do_el0_svc+0x30/0xc0
[ 7.831825] el0_svc+0x2c/0x84
[ 7.831899] el0t_64_sync_handler+0xbc/0x140
[ 7.831999] el0t_64_sync+0x18c/0x190
[ 7.832086] ---[ end trace 0000000000000000 ]---
[ 7.832375] b1: ffff24d1ffd98a48
[ 7.832385] b2: ffffa65533a29a38
[ 7.832393] b3: ffffa65533a29a38
[ 7.832400] nbits: 256
[ 7.832407] start: 0
[ 7.832412] off: 0
[ 7.832418] smp: Bitmap: parameters check failed
[ 7.832432] smp: include/linux/bitmap.h [363]: bitmap_and
Signed-off-by: Yury Norov <[email protected]>
---
kernel/smp.c | 29 +++++++++++++++++++++++------
1 file changed, 23 insertions(+), 6 deletions(-)
diff --git a/kernel/smp.c b/kernel/smp.c
index dd215f439426..7ed2b9b12f74 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -880,6 +880,28 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
#define SCF_WAIT (1U << 0)
#define SCF_RUN_LOCAL (1U << 1)
+/* Check if we need remote execution, i.e., any CPU excluding this one. */
+static inline bool __need_remote_exec(const struct cpumask *mask, unsigned int this_cpu)
+{
+ unsigned int cpu;
+
+ switch (num_online_cpus()) {
+ case 0:
+ return false;
+ case 1:
+ return cpu_online(this_cpu) ? false : true;
+ default:
+ if (mask == cpu_online_mask)
+ return true;
+ }
+
+ cpu = cpumask_first_and(mask, cpu_online_mask);
+ if (cpu == this_cpu)
+ cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
+
+ return cpu < nr_cpu_ids;
+}
+
static void smp_call_function_many_cond(const struct cpumask *mask,
smp_call_func_t func, void *info,
unsigned int scf_flags,
@@ -916,12 +938,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
run_local = true;
- /* Check if we need remote execution, i.e., any CPU excluding this one. */
- cpu = cpumask_first_and(mask, cpu_online_mask);
- if (cpu == this_cpu)
- cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
- if (cpu < nr_cpu_ids)
- run_remote = true;
+ run_remote = __need_remote_exec(mask, this_cpu);
if (run_remote) {
cfd = this_cpu_ptr(&cfd_data);
--
2.34.1
Create CONFIG_DEBUG_BITMAP parameter to let people use
new option. Default is N.
Signed-off-by: Yury Norov <[email protected]>
---
lib/Kconfig.debug | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2e24db4bff19..cde1b5b7bb9d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -533,6 +533,13 @@ config DEBUG_FORCE_WEAK_PER_CPU
endmenu # "Compiler options"
+config DEBUG_BITMAP
+ bool "Debug bitmaps"
+ help
+ Say Y here if you want to check bitmap functions parameters at
+ the runtime. Enable CONFIG_DEBUG_BITMAP only for debugging because
+ it may affect performance.
+
menu "Generic Kernel Debugging Instruments"
config MAGIC_SYSRQ
--
2.34.1
Don't call bitmap_clear() to clear 0 bits.
bitmap_clear() can handle 0-length requests properly, but it's not covered
with static optimizations, and falls to __bitmap_set(). So we are paying a
function call + prologue work cost just for nothing.
Caught with CONFIG_DEBUG_BITMAP:
[ 45.571799] <TASK>
[ 45.571801] pcpu_alloc_area+0x194/0x340
[ 45.571806] pcpu_alloc+0x2fb/0x8b0
[ 45.571811] ? kmem_cache_alloc_trace+0x177/0x2a0
[ 45.571815] __percpu_counter_init+0x22/0xa0
[ 45.571819] fprop_local_init_percpu+0x14/0x30
[ 45.571823] wb_get_create+0x15d/0x5f0
[ 45.571828] cleanup_offline_cgwb+0x73/0x210
[ 45.571831] cleanup_offline_cgwbs_workfn+0xcf/0x200
[ 45.571835] process_one_work+0x1e5/0x3b0
[ 45.571839] worker_thread+0x50/0x3a0
[ 45.571843] ? rescuer_thread+0x390/0x390
[ 45.571846] kthread+0xe8/0x110
[ 45.571849] ? kthread_complete_and_exit+0x20/0x20
[ 45.571853] ret_from_fork+0x22/0x30
[ 45.571858] </TASK>
[ 45.571859] ---[ end trace 0000000000000000 ]---
[ 45.571860] b1: ffffa8d5002e1000
[ 45.571861] b2: 0
[ 45.571861] b3: 0
[ 45.571862] nbits: 44638
[ 45.571863] start: 44638
[ 45.571864] off: 0
[ 45.571864] percpu: Bitmap: parameters check failed
[ 45.571865] percpu: include/linux/bitmap.h [538]: bitmap_clear
Signed-off-by: Yury Norov <[email protected]>
---
mm/percpu.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/percpu.c b/mm/percpu.c
index 3633eeefaa0d..f720f7c36b91 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1239,7 +1239,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits,
/* update boundary map */
set_bit(bit_off, chunk->bound_map);
- bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1);
+ if (alloc_bits > 1)
+ bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1);
set_bit(bit_off + alloc_bits, chunk->bound_map);
chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE;
--
2.34.1
topology_span_sane() checks if cpu == i before calling
cpumask_equal(tl->mask(cpu), tl->mask(i)).
However, tl->mask(cpu) and tl->mask(i) may point to the same cpumask
even if i != cpu. Fix the check accordingly.
While here, move tl->mask(cpu) out of the loop, and make the in-loop
code calculating tl->mask(i) only once.
Catched with CONFIG_DEBUG_BITMAP:
[ 0.867917] Call Trace:
[ 0.868209] <TASK>
[ 0.868471] build_sched_domains+0x36f/0x1a40
[ 0.868576] sched_init_smp+0x44/0xba
[ 0.869012] ? mtrr_aps_init+0x84/0xa0
[ 0.869465] kernel_init_freeable+0x12e/0x26e
[ 0.869982] ? rest_init+0xd0/0xd0
[ 0.870406] kernel_init+0x16/0x120
[ 0.870821] ret_from_fork+0x22/0x30
[ 0.871244] </TASK>
[ 0.871502] ---[ end trace 0000000000000000 ]---
[ 0.872040] b1: ffffffffb1fd3480
[ 0.872041] b2: ffffffffb1fd3480
[ 0.872041] b3: 0
[ 0.872042] nbits: 256
[ 0.872042] start: 0
[ 0.872042] off: 0
[ 0.872043] Bitmap: parameters check failed
[ 0.872043] include/linux/bitmap.h [427]: bitmap_equal
Signed-off-by: Yury Norov <[email protected]>
---
kernel/sched/topology.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 05b6c2ad90b9..ad32d0a43424 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2211,6 +2211,8 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
static bool topology_span_sane(struct sched_domain_topology_level *tl,
const struct cpumask *cpu_map, int cpu)
{
+ const struct cpumask *mc = tl->mask(cpu);
+ const struct cpumask *mi;
int i;
/* NUMA levels are allowed to overlap */
@@ -2226,14 +2228,18 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
for_each_cpu(i, cpu_map) {
if (i == cpu)
continue;
+
+ mi = tl->mask(i);
+ if (mi == mc)
+ continue;
+
/*
* We should 'and' all those masks with 'cpu_map' to exactly
* match the topology we're about to build, but that can only
* remove CPUs, which only lessens our ability to detect
* overlaps
*/
- if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
- cpumask_intersects(tl->mask(cpu), tl->mask(i)))
+ if (!cpumask_equal(mc, mi) && cpumask_intersects(mc, mi))
return false;
}
--
2.34.1
irq_do_set_affinity() may be called with
mask == irq_data_to_desc()->irq_common_data.affinity
Copying in that case is useless.
Caught with CONFIG_DEBUG_BITMAP:
[ 1.089177] __bitmap_check_params+0x144/0x250
[ 1.089238] irq_do_set_affinity+0x120/0x470
[ 1.089298] irq_startup+0x140/0x16c
[ 1.089350] __setup_irq+0x668/0x760
[ 1.089402] request_threaded_irq+0xe8/0x1b0
[ 1.089474] vp_find_vqs_msix+0x270/0x410
[ 1.089532] vp_find_vqs+0x48/0x1b4
[ 1.089584] vp_modern_find_vqs+0x1c/0x70
[ 1.089641] init_vq+0x2dc/0x34c
[ 1.089690] virtblk_probe+0xdc/0x710
[ 1.089745] virtio_dev_probe+0x19c/0x270
[ 1.089802] really_probe.part.0+0x9c/0x2ac
[ 1.089863] __driver_probe_device+0x98/0x144
[ 1.089923] driver_probe_device+0xac/0x140
[ 1.089985] __driver_attach+0xf8/0x1a0
[ 1.090047] bus_for_each_dev+0x70/0xd0
[ 1.090101] driver_attach+0x24/0x30
[ 1.090153] bus_add_driver+0x150/0x200
[ 1.090208] driver_register+0x78/0x130
[ 1.090266] register_virtio_driver+0x28/0x40
[ 1.090329] virtio_blk_init+0x68/0xa4
[ 1.090400] do_one_initcall+0x50/0x1c0
[ 1.090471] kernel_init_freeable+0x208/0x28c
[ 1.090538] kernel_init+0x28/0x13c
[ 1.090590] ret_from_fork+0x10/0x20
[ 1.090642] ---[ end trace 0000000000000000 ]---
[ 1.090705] b1: ffff2ec742b85e18
[ 1.090710] b2: ffff2ec742b85e18
[ 1.090715] b3: 0
[ 1.090719] nbits: 256
[ 1.090723] start: 0
[ 1.090727] off: 0
Signed-off-by: Yury Norov <[email protected]>
---
kernel/irq/manage.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 8c396319d5ac..f9c1b21584ec 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -284,7 +284,8 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
switch (ret) {
case IRQ_SET_MASK_OK:
case IRQ_SET_MASK_OK_DONE:
- cpumask_copy(desc->irq_common_data.affinity, mask);
+ if (desc->irq_common_data.affinity != mask)
+ cpumask_copy(desc->irq_common_data.affinity, mask);
fallthrough;
case IRQ_SET_MASK_OK_NOCOPY:
irq_validate_effective_affinity(data);
--
2.34.1
tick_check_percpu() calls cpumask_equal() even if
curdev->cpumask == cpumask_of(cpu). Fix it.
Caught with CONFIG_DEBUG_BITMAP:
[ 0.077622] Call trace:
[ 0.077637] __bitmap_check_params+0x144/0x250
[ 0.077675] tick_check_replacement+0xac/0x320
[ 0.077716] tick_check_new_device+0x50/0x110
[ 0.077747] clockevents_register_device+0x74/0x1c0
[ 0.077779] dummy_timer_starting_cpu+0x6c/0x80
[ 0.077817] cpuhp_invoke_callback+0x104/0x20c
[ 0.077856] cpuhp_invoke_callback_range+0x70/0xf0
[ 0.077890] notify_cpu_starting+0xac/0xcc
[ 0.077921] secondary_start_kernel+0xe4/0x154
[ 0.077951] __secondary_switched+0xa0/0xa4
[ 0.077992] ---[ end trace 0000000000000000 ]---
[ 0.078021] b1: ffffbfec4703b890
[ 0.078031] b2: ffffbfec4703b890
[ 0.078043] b3: 0
[ 0.078052] nbits: 256
[ 0.078065] start: 0
[ 0.078075] off: 0
[ 0.078086] Bitmap: parameters check failed
[ 0.078095] include/linux/bitmap.h [419]: bitmap_equal
Signed-off-by: Yury Norov <[email protected]>
---
kernel/time/tick-common.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index fdd5ae1a074b..7205f76f8d10 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -290,13 +290,15 @@ static bool tick_check_percpu(struct clock_event_device *curdev,
{
if (!cpumask_test_cpu(cpu, newdev->cpumask))
return false;
- if (cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
+ if (newdev->cpumask == cpumask_of(cpu) ||
+ cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
return true;
/* Check if irq affinity can be set */
if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq))
return false;
/* Prefer an existing cpu local device */
- if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
+ if (curdev && (curdev->cpumask == cpumask_of(cpu) ||
+ cpumask_equal(curdev->cpumask, cpumask_of(cpu))))
return false;
return true;
}
--
2.34.1
One of bitmap_cut() tests passed it with:
nbits = BITS_PER_LONG;
first = BITS_PER_LONG;
cut = BITS_PER_LONG;
This test is useless because the range to cut is not inside the
bitmap. This should normally raise an error, but bitmap_cut() is
void and returns nothing.
To check if the test is passed, it just tests that the memory is
not touched by bitmap_cut(), which is probably not correct, because
if a function is passed with wrong parameters, it's too optimistic
to expect a correct, or even sane behavior.
Now that we have bitmap_check_params(), there's a tool to detect such
things in real code, and we can drop the test.
Signed-off-by: Yury Norov <[email protected]>
---
lib/test_bitmap.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 8bd279a7633f..c1ea449aae2d 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -707,10 +707,6 @@ static struct test_bitmap_cut test_cut[] = {
{ 15, 16, 32, { 0xa5a5a5a5UL, }, { 0x0000a5a5UL, }, },
{ 16, 15, 32, { 0xa5a5a5a5UL, }, { 0x0001a5a5UL, }, },
- { BITS_PER_LONG, BITS_PER_LONG, BITS_PER_LONG,
- { 0xa5a5a5a5UL, 0xa5a5a5a5UL, },
- { 0xa5a5a5a5UL, 0xa5a5a5a5UL, },
- },
{ 1, BITS_PER_LONG - 1, BITS_PER_LONG,
{ 0xa5a5a5a5UL, 0xa5a5a5a5UL, },
{ 0x00000001UL, 0x00000001UL, },
--
2.34.1
CONFIG_DEBUG_BITMAP, when enabled, injects __bitmap_check_params()
into bitmap functions. It prevents compiler from compile-time
optimizations, which makes corresponding test fail.
Signed-off-by: Yury Norov <[email protected]>
---
lib/test_bitmap.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index bc48d992d10d..8bd279a7633f 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -877,6 +877,7 @@ static void __init test_bitmap_print_buf(void)
static void __init test_bitmap_const_eval(void)
{
+#ifndef CONFIG_DEBUG_BITMAP
DECLARE_BITMAP(bitmap, BITS_PER_LONG);
unsigned long initvar = BIT(2);
unsigned long bitopvar = 0;
@@ -934,6 +935,7 @@ static void __init test_bitmap_const_eval(void)
/* ~BIT(25) */
BUILD_BUG_ON(!__builtin_constant_p(~var));
BUILD_BUG_ON(~var != ~BIT(25));
+#endif
}
static void __init selftest(void)
--
2.34.1
Don't call cpumask_subset(new_mask, cpu_allowed_mask) if new_mask
is cpu_allowed_mask.
Caught with CONFIG_DEBUG_BITMAP:
[ 0.132174] Call trace:
[ 0.132189] __bitmap_check_params+0x144/0x250
[ 0.132216] __set_cpus_allowed_ptr_locked+0x8c/0x2c0
[ 0.132241] sched_init_smp+0x80/0xd8
[ 0.132273] kernel_init_freeable+0x12c/0x28c
[ 0.132299] kernel_init+0x28/0x13c
[ 0.132325] ret_from_fork+0x10/0x20
[ 0.132354] ---[ end trace 0000000000000000 ]---
[ 0.132378] b1: ffffcd0c07819a58
[ 0.132388] b2: ffffcd0c07819a58
[ 0.132397] b3: 0
[ 0.132405] nbits: 256
[ 0.132414] start: 0
[ 0.132422] off: 0
[ 0.132444] Bitmap: parameters check failed
[ 0.132467] include/linux/bitmap.h [468]: bitmap_subset
Signed-off-by: Yury Norov <[email protected]>
---
kernel/sched/core.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index da0bf6fe9ecd..d6424336ef2d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2874,7 +2874,8 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
cpu_valid_mask = cpu_online_mask;
}
- if (!kthread && !cpumask_subset(new_mask, cpu_allowed_mask)) {
+ if (!kthread && new_mask != cpu_allowed_mask &&
+ !cpumask_subset(new_mask, cpu_allowed_mask)) {
ret = -EINVAL;
goto out;
}
--
2.34.1
tick_setup_device() calls cpumask_equal(newdev->cpumask, cpumask)
even if newdev->cpumask == cpumask. Fix it.
Caught with CONFIG_DEBUG_BITMAP:
[ 0.070960] Call trace:
[ 0.070974] __bitmap_check_params+0x144/0x250
[ 0.071008] tick_setup_device+0x70/0x1a0
[ 0.071040] tick_check_new_device+0xc0/0x110
[ 0.071066] clockevents_register_device+0x74/0x1c0
[ 0.071090] clockevents_config_and_register+0x2c/0x3c
[ 0.071114] arch_timer_starting_cpu+0x170/0x470
[ 0.071147] cpuhp_invoke_callback+0x104/0x20c
[ 0.071180] cpuhp_invoke_callback_range+0x70/0xf0
[ 0.071205] notify_cpu_starting+0xac/0xcc
[ 0.071229] secondary_start_kernel+0xe4/0x154
[ 0.071259] __secondary_switched+0xa0/0xa4
[ 0.071297] ---[ end trace 0000000000000000 ]---
[ 0.071328] b1: ffffa1f27323b890
[ 0.071339] b2: ffffa1f27323b890
[ 0.071348] b3: 0
[ 0.071356] nbits: 256
[ 0.071366] start: 0
[ 0.071374] off: 0
[ 0.071383] Bitmap: parameters check failed
[ 0.071390] include/linux/bitmap.h [419]: bitmap_equal
Signed-off-by: Yury Norov <[email protected]>
---
kernel/time/tick-common.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 7205f76f8d10..7b2da8ef09ef 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -255,7 +255,7 @@ static void tick_setup_device(struct tick_device *td,
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
- if (!cpumask_equal(newdev->cpumask, cpumask))
+ if (newdev->cpumask != cpumask && !cpumask_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask);
/*
--
2.34.1
smp_call_function_many_cond() is often passed with cpu_online_mask.
If it's the case, we can use cpumask_copy instead of cpumask_and, which
is faster.
Caught with CONFIG_DEBUG_BITMAP:
[ 7.830337] Call trace:
[ 7.830397] __bitmap_check_params+0x1d8/0x260
[ 7.830499] smp_call_function_many_cond+0x1e8/0x45c
[ 7.830607] kick_all_cpus_sync+0x44/0x80
[ 7.830698] bpf_int_jit_compile+0x34c/0x5cc
[ 7.830796] bpf_prog_select_runtime+0x118/0x190
[ 7.830900] bpf_prepare_filter+0x3dc/0x51c
[ 7.830995] __get_filter+0xd4/0x170
[ 7.831145] sk_attach_filter+0x18/0xb0
[ 7.831236] sock_setsockopt+0x5b0/0x1214
[ 7.831330] __sys_setsockopt+0x144/0x170
[ 7.831431] __arm64_sys_setsockopt+0x2c/0x40
[ 7.831541] invoke_syscall+0x48/0x114
[ 7.831634] el0_svc_common.constprop.0+0x44/0xfc
[ 7.831745] do_el0_svc+0x30/0xc0
[ 7.831825] el0_svc+0x2c/0x84
[ 7.831899] el0t_64_sync_handler+0xbc/0x140
[ 7.831999] el0t_64_sync+0x18c/0x190
[ 7.832086] ---[ end trace 0000000000000000 ]---
[ 7.832375] b1: ffff24d1ffd98a48
[ 7.832385] b2: ffffa65533a29a38
[ 7.832393] b3: ffffa65533a29a38
[ 7.832400] nbits: 256
[ 7.832407] start: 0
[ 7.832412] off: 0
[ 7.832418] smp: Bitmap: parameters check failed
[ 7.832432] smp: include/linux/bitmap.h [363]: bitmap_and
Signed-off-by: Yury Norov <[email protected]>
---
kernel/smp.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/kernel/smp.c b/kernel/smp.c
index 7ed2b9b12f74..f96fdf944b4a 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -942,7 +942,11 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
if (run_remote) {
cfd = this_cpu_ptr(&cfd_data);
- cpumask_and(cfd->cpumask, mask, cpu_online_mask);
+ if (mask == cpu_online_mask)
+ cpumask_copy(cfd->cpumask, cpu_online_mask);
+ else
+ cpumask_and(cfd->cpumask, mask, cpu_online_mask);
+
__cpumask_clear_cpu(this_cpu, cfd->cpumask);
cpumask_clear(cfd->cpumask_ipi);
--
2.34.1
tick_check_preferred() calls cpumask_equal() even if
curdev->cpumask == newdev->cpumask. Fix it.
Caught with CONFIG_DEBUG_BITMAP:
[ 0.079109] Call trace:
[ 0.079124] __bitmap_check_params+0x144/0x250
[ 0.079161] tick_check_replacement+0x1a4/0x320
[ 0.079203] tick_check_new_device+0x50/0x110
[ 0.079237] clockevents_register_device+0x74/0x1c0
[ 0.079268] dummy_timer_starting_cpu+0x6c/0x80
[ 0.079310] cpuhp_invoke_callback+0x104/0x20c
[ 0.079353] cpuhp_invoke_callback_range+0x70/0xf0
[ 0.079401] notify_cpu_starting+0xac/0xcc
[ 0.079434] secondary_start_kernel+0xe4/0x154
[ 0.079471] __secondary_switched+0xa0/0xa4
[ 0.079516] ---[ end trace 0000000000000000 ]---
[ 0.079542] b1: ffffbfec4703b890
[ 0.079553] b2: ffffbfec4703b890
[ 0.079566] b3: 0
[ 0.079576] nbits: 256
[ 0.079588] start: 0
[ 0.079598] off: 0
[ 0.079609] Bitmap: parameters check failed
[ 0.079619] include/linux/bitmap.h [419]: bitmap_equal
Signed-off-by: Yury Norov <[email protected]>
---
kernel/time/tick-common.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 46789356f856..fdd5ae1a074b 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -316,9 +316,13 @@ static bool tick_check_preferred(struct clock_event_device *curdev,
* Use the higher rated one, but prefer a CPU local device with a lower
* rating than a non-CPU local device
*/
- return !curdev ||
- newdev->rating > curdev->rating ||
- !cpumask_equal(curdev->cpumask, newdev->cpumask);
+ if (!curdev || newdev->rating > curdev->rating)
+ return true;
+
+ if (newdev->cpumask == curdev->cpumask)
+ return false;
+
+ return !cpumask_equal(curdev->cpumask, newdev->cpumask);
}
/*
--
2.34.1
On Mon, Jul 18, 2022 at 12:28:29PM -0700, Yury Norov wrote:
> bitmap_check_params() takes all arguments passed into bitmap functions
> and runs sanity checks. bitmap_check(), bitmap_check_op() and
> bitmap_check_move() are convenient wrappers for frequent cases.
>
> The following patches of this series clear all warnings found with
> bitmap_check_params() for x86_64, arm64 and powerpc64.
>
> The last patch introduces CONFIG_DEBUG_BITMAP option to let user enable
> bitmap_check_params().
>
> No functional changes for existing kernel users, and for the following
> functions inline parameters checks removed:
> - bitmap_pos_to_ord;
> - bitmap_remap;
> - bitmap_onto;
> - bitmap_fold.
...
> +#define bitmap_check_params(b1, b2, b3, nbits, start, off, flags) \
> + do { \
> + if (__bitmap_check_params((b1), (b2), (b3), (nbits), \
> + (start), (off), (flags))) { \
> + pr_warn("Bitmap: parameters check failed"); \
> + pr_warn("%s [%d]: %s\n", __FILE__, __LINE__, __func__); \
> + } \
> + } while (0)
Why printk() and not trace points?
Also, try to avoid WARN() / etc in the generic code, it may be easily converted
to the BUG() (by kernel command line option, no recompilation needed), and
hence make all WARN() effectively reboot machine. Can you guarantee that in all
cases the functionality is critical to continue only with correct parameters?
--
With Best Regards,
Andy Shevchenko
On Mon, Jul 18, 2022 at 12:28:33PM -0700, Yury Norov wrote:
> CONFIG_DEBUG_BITMAP, when enabled, injects __bitmap_check_params()
> into bitmap functions. It prevents compiler from compile-time
> optimizations, which makes corresponding test fail.
Does it stays the same for trace points?
--
With Best Regards,
Andy Shevchenko
On Mon, Jul 18, 2022 at 12:28:34PM -0700, Yury Norov wrote:
> One of bitmap_cut() tests passed it with:
> nbits = BITS_PER_LONG;
> first = BITS_PER_LONG;
> cut = BITS_PER_LONG;
>
> This test is useless because the range to cut is not inside the
> bitmap. This should normally raise an error, but bitmap_cut() is
> void and returns nothing.
>
> To check if the test is passed, it just tests that the memory is
> not touched by bitmap_cut(), which is probably not correct, because
> if a function is passed with wrong parameters, it's too optimistic
> to expect a correct, or even sane behavior.
>
> Now that we have bitmap_check_params(), there's a tool to detect such
> things in real code, and we can drop the test.
There are no "useless" tests. Same comments as per a couple of previous
patches.
--
With Best Regards,
Andy Shevchenko
On Mon, Jul 18, 2022 at 12:28:35PM -0700, Yury Norov wrote:
> diff --git a/kernel/smp.c b/kernel/smp.c
> index dd215f439426..7ed2b9b12f74 100644
> --- a/kernel/smp.c
> +++ b/kernel/smp.c
> @@ -880,6 +880,28 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
> #define SCF_WAIT (1U << 0)
> #define SCF_RUN_LOCAL (1U << 1)
>
> +/* Check if we need remote execution, i.e., any CPU excluding this one. */
> +static inline bool __need_remote_exec(const struct cpumask *mask, unsigned int this_cpu)
> +{
> + unsigned int cpu;
> +
> + switch (num_online_cpus()) {
> + case 0:
> + return false;
> + case 1:
> + return cpu_online(this_cpu) ? false : true;
> + default:
> + if (mask == cpu_online_mask)
> + return true;
> + }
> +
> + cpu = cpumask_first_and(mask, cpu_online_mask);
> + if (cpu == this_cpu)
> + cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
> +
> + return cpu < nr_cpu_ids;
> +}
> +
> static void smp_call_function_many_cond(const struct cpumask *mask,
> smp_call_func_t func, void *info,
> unsigned int scf_flags,
> @@ -916,12 +938,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
> if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
> run_local = true;
>
> - /* Check if we need remote execution, i.e., any CPU excluding this one. */
> - cpu = cpumask_first_and(mask, cpu_online_mask);
> - if (cpu == this_cpu)
> - cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
> - if (cpu < nr_cpu_ids)
> - run_remote = true;
> + run_remote = __need_remote_exec(mask, this_cpu);
>
> if (run_remote) {
> cfd = this_cpu_ptr(&cfd_data);
This is more complex code for, very little to no gain. Why ?!
On Mon, Jul 18, 2022 at 12:28:37PM -0700, Yury Norov wrote:
> kernel/irq/manage.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
> index 8c396319d5ac..f9c1b21584ec 100644
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -284,7 +284,8 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
> switch (ret) {
> case IRQ_SET_MASK_OK:
> case IRQ_SET_MASK_OK_DONE:
> - cpumask_copy(desc->irq_common_data.affinity, mask);
> + if (desc->irq_common_data.affinity != mask)
> + cpumask_copy(desc->irq_common_data.affinity, mask);
Seems like mostly pointless logic at this point. This is not a
performance senstive operation afaik.
> fallthrough;
> case IRQ_SET_MASK_OK_NOCOPY:
> irq_validate_effective_affinity(data);
> --
> 2.34.1
>
On Mon, Jul 18, 2022 at 12:28:41PM -0700, Yury Norov wrote:
> kernel/time/tick-common.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
> index 7205f76f8d10..7b2da8ef09ef 100644
> --- a/kernel/time/tick-common.c
> +++ b/kernel/time/tick-common.c
> @@ -255,7 +255,7 @@ static void tick_setup_device(struct tick_device *td,
> * When the device is not per cpu, pin the interrupt to the
> * current cpu:
> */
> - if (!cpumask_equal(newdev->cpumask, cpumask))
> + if (newdev->cpumask != cpumask && !cpumask_equal(newdev->cpumask, cpumask))
> irq_set_affinity(newdev->irq, cpumask);
This is again making a slow path harder to read for now benefit.
On Mon, Jul 18, 2022 at 12:28:38PM -0700, Yury Norov wrote:
> ---
> kernel/sched/core.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index da0bf6fe9ecd..d6424336ef2d 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2874,7 +2874,8 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
> cpu_valid_mask = cpu_online_mask;
> }
>
> - if (!kthread && !cpumask_subset(new_mask, cpu_allowed_mask)) {
> + if (!kthread && new_mask != cpu_allowed_mask &&
> + !cpumask_subset(new_mask, cpu_allowed_mask)) {
Optimize cpumask_subset() for src1p == src2p instead?
> ret = -EINVAL;
> goto out;
> }
> --
> 2.34.1
>
On Mon, Jul 18, 2022 at 12:28:35PM -0700, Yury Norov wrote:
> smp_call_function_many_cond() is often passed with cpu_online_mask.
> If this is the case, we can use num_online_cpus(), which is O(1)
> instead of cpumask_{first,next}(), which is O(N).
>
> It can be optimized further: if cpu_online_mask has 0 or single bit
> set (depending on cpu_online(this_cpu), we can return result without
> AND'ing with user's mask.
> Caught with CONFIG_DEBUG_BITMAP:
> [ 7.830337] Call trace:
> [ 7.830397] __bitmap_check_params+0x1d8/0x260
> [ 7.830499] smp_call_function_many_cond+0x1e8/0x45c
> [ 7.830607] kick_all_cpus_sync+0x44/0x80
> [ 7.830698] bpf_int_jit_compile+0x34c/0x5cc
> [ 7.830796] bpf_prog_select_runtime+0x118/0x190
> [ 7.830900] bpf_prepare_filter+0x3dc/0x51c
> [ 7.830995] __get_filter+0xd4/0x170
> [ 7.831145] sk_attach_filter+0x18/0xb0
> [ 7.831236] sock_setsockopt+0x5b0/0x1214
> [ 7.831330] __sys_setsockopt+0x144/0x170
> [ 7.831431] __arm64_sys_setsockopt+0x2c/0x40
> [ 7.831541] invoke_syscall+0x48/0x114
> [ 7.831634] el0_svc_common.constprop.0+0x44/0xfc
> [ 7.831745] do_el0_svc+0x30/0xc0
> [ 7.831825] el0_svc+0x2c/0x84
> [ 7.831899] el0t_64_sync_handler+0xbc/0x140
> [ 7.831999] el0t_64_sync+0x18c/0x190
> [ 7.832086] ---[ end trace 0000000000000000 ]---
> [ 7.832375] b1: ffff24d1ffd98a48
> [ 7.832385] b2: ffffa65533a29a38
> [ 7.832393] b3: ffffa65533a29a38
> [ 7.832400] nbits: 256
> [ 7.832407] start: 0
> [ 7.832412] off: 0
> [ 7.832418] smp: Bitmap: parameters check failed
> [ 7.832432] smp: include/linux/bitmap.h [363]: bitmap_and
Documentation specifically says:
https://www.kernel.org/doc/html/latest/process/submitting-patches.html#backtraces-in-commit-mesages
...
> + default:
> + if (mask == cpu_online_mask)
> + return true;
Instead, put (missed) break; here and do "default" case together below.
> + cpu = cpumask_first_and(mask, cpu_online_mask);
> + if (cpu == this_cpu)
> + cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
> +
> + return cpu < nr_cpu_ids;
...
> + run_remote = __need_remote_exec(mask, this_cpu);
>
Now you may remove this blank line.
> if (run_remote) {
--
With Best Regards,
Andy Shevchenko
On Mon, Jul 18, 2022 at 12:28:44PM -0700, Yury Norov wrote:
> Create CONFIG_DEBUG_BITMAP parameter to let people use
> new option. Default is N.
Even if a separate, it should follow immediately the implementation.
--
With Best Regards,
Andy Shevchenko
On Mon, Jul 18, 2022 at 12:28:36PM -0700, Yury Norov wrote:
> ---
> kernel/smp.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/smp.c b/kernel/smp.c
> index 7ed2b9b12f74..f96fdf944b4a 100644
> --- a/kernel/smp.c
> +++ b/kernel/smp.c
> @@ -942,7 +942,11 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
>
> if (run_remote) {
> cfd = this_cpu_ptr(&cfd_data);
> - cpumask_and(cfd->cpumask, mask, cpu_online_mask);
> + if (mask == cpu_online_mask)
> + cpumask_copy(cfd->cpumask, cpu_online_mask);
> + else
> + cpumask_and(cfd->cpumask, mask, cpu_online_mask);
> +
Or... you could optimize cpumask_and() to detect the src1p == src2p case?
> __cpumask_clear_cpu(this_cpu, cfd->cpumask);
>
> cpumask_clear(cfd->cpumask_ipi);
> --
> 2.34.1
>
On Mon, Jul 18, 2022 at 12:28:36PM -0700, Yury Norov wrote:
> smp_call_function_many_cond() is often passed with cpu_online_mask.
> If it's the case, we can use cpumask_copy instead of cpumask_and, which
> is faster.
>
> Caught with CONFIG_DEBUG_BITMAP:
> [ 7.830337] Call trace:
> [ 7.830397] __bitmap_check_params+0x1d8/0x260
> [ 7.830499] smp_call_function_many_cond+0x1e8/0x45c
> [ 7.830607] kick_all_cpus_sync+0x44/0x80
> [ 7.830698] bpf_int_jit_compile+0x34c/0x5cc
> [ 7.830796] bpf_prog_select_runtime+0x118/0x190
> [ 7.830900] bpf_prepare_filter+0x3dc/0x51c
> [ 7.830995] __get_filter+0xd4/0x170
> [ 7.831145] sk_attach_filter+0x18/0xb0
> [ 7.831236] sock_setsockopt+0x5b0/0x1214
> [ 7.831330] __sys_setsockopt+0x144/0x170
> [ 7.831431] __arm64_sys_setsockopt+0x2c/0x40
> [ 7.831541] invoke_syscall+0x48/0x114
> [ 7.831634] el0_svc_common.constprop.0+0x44/0xfc
> [ 7.831745] do_el0_svc+0x30/0xc0
> [ 7.831825] el0_svc+0x2c/0x84
> [ 7.831899] el0t_64_sync_handler+0xbc/0x140
> [ 7.831999] el0t_64_sync+0x18c/0x190
> [ 7.832086] ---[ end trace 0000000000000000 ]---
> [ 7.832375] b1: ffff24d1ffd98a48
> [ 7.832385] b2: ffffa65533a29a38
> [ 7.832393] b3: ffffa65533a29a38
> [ 7.832400] nbits: 256
> [ 7.832407] start: 0
> [ 7.832412] off: 0
> [ 7.832418] smp: Bitmap: parameters check failed
> [ 7.832432] smp: include/linux/bitmap.h [363]: bitmap_and
Same for long commit message noise.
--
With Best Regards,
Andy Shevchenko
On Mon, Jul 18, 2022 at 12:28:43PM -0700, Yury Norov wrote:
> kernel/sched/topology.c | 10 ++++++++--
> 1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 05b6c2ad90b9..ad32d0a43424 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -2211,6 +2211,8 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
> static bool topology_span_sane(struct sched_domain_topology_level *tl,
> const struct cpumask *cpu_map, int cpu)
> {
> + const struct cpumask *mc = tl->mask(cpu);
> + const struct cpumask *mi;
> int i;
>
> /* NUMA levels are allowed to overlap */
> @@ -2226,14 +2228,18 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
> for_each_cpu(i, cpu_map) {
> if (i == cpu)
> continue;
> +
> + mi = tl->mask(i);
> + if (mi == mc)
> + continue;
> +
> /*
> * We should 'and' all those masks with 'cpu_map' to exactly
> * match the topology we're about to build, but that can only
> * remove CPUs, which only lessens our ability to detect
> * overlaps
> */
> - if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
> - cpumask_intersects(tl->mask(cpu), tl->mask(i)))
> + if (!cpumask_equal(mc, mi) && cpumask_intersects(mc, mi))
> return false;
> }
This is once again a super slow path; but I don't suppose you're making
the code worse in this case.
Hello,
On Mon, Jul 18, 2022 at 12:28:42PM -0700, Yury Norov wrote:
> Don't call bitmap_clear() to clear 0 bits.
>
> bitmap_clear() can handle 0-length requests properly, but it's not covered
> with static optimizations, and falls to __bitmap_set(). So we are paying a
> function call + prologue work cost just for nothing.
>
> Caught with CONFIG_DEBUG_BITMAP:
> [ 45.571799] <TASK>
> [ 45.571801] pcpu_alloc_area+0x194/0x340
> [ 45.571806] pcpu_alloc+0x2fb/0x8b0
> [ 45.571811] ? kmem_cache_alloc_trace+0x177/0x2a0
> [ 45.571815] __percpu_counter_init+0x22/0xa0
> [ 45.571819] fprop_local_init_percpu+0x14/0x30
> [ 45.571823] wb_get_create+0x15d/0x5f0
> [ 45.571828] cleanup_offline_cgwb+0x73/0x210
> [ 45.571831] cleanup_offline_cgwbs_workfn+0xcf/0x200
> [ 45.571835] process_one_work+0x1e5/0x3b0
> [ 45.571839] worker_thread+0x50/0x3a0
> [ 45.571843] ? rescuer_thread+0x390/0x390
> [ 45.571846] kthread+0xe8/0x110
> [ 45.571849] ? kthread_complete_and_exit+0x20/0x20
> [ 45.571853] ret_from_fork+0x22/0x30
> [ 45.571858] </TASK>
> [ 45.571859] ---[ end trace 0000000000000000 ]---
> [ 45.571860] b1: ffffa8d5002e1000
> [ 45.571861] b2: 0
> [ 45.571861] b3: 0
> [ 45.571862] nbits: 44638
> [ 45.571863] start: 44638
> [ 45.571864] off: 0
> [ 45.571864] percpu: Bitmap: parameters check failed
> [ 45.571865] percpu: include/linux/bitmap.h [538]: bitmap_clear
>
> Signed-off-by: Yury Norov <[email protected]>
> ---
> mm/percpu.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 3633eeefaa0d..f720f7c36b91 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1239,7 +1239,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int alloc_bits,
>
> /* update boundary map */
> set_bit(bit_off, chunk->bound_map);
> - bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1);
> + if (alloc_bits > 1)
> + bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1);
> set_bit(bit_off + alloc_bits, chunk->bound_map);
>
> chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE;
> --
> 2.34.1
>
Acked-by: Dennis Zhou <[email protected]>
Thanks,
Dennis
On Tue, Jul 19, 2022 at 12:39:02AM +0300, Andy Shevchenko wrote:
> On Mon, Jul 18, 2022 at 12:28:44PM -0700, Yury Norov wrote:
> > Create CONFIG_DEBUG_BITMAP parameter to let people use
> > new option. Default is N.
>
> Even if a separate, it should follow immediately the implementation.
If it follows the 1st patch immediately, and becomes enabled, it will
generate a lot of noise for those who bisect and occasionally jumps
into a mid of the series.
There are quite a lot of patchsets that create a feature in many
patches, and explicitly enable it in the very last patch.
On Mon, Jul 18, 2022 at 11:29:06PM +0200, Peter Zijlstra wrote:
> On Mon, Jul 18, 2022 at 12:28:36PM -0700, Yury Norov wrote:
>
> > ---
> > kernel/smp.c | 6 +++++-
> > 1 file changed, 5 insertions(+), 1 deletion(-)
> >
> > diff --git a/kernel/smp.c b/kernel/smp.c
> > index 7ed2b9b12f74..f96fdf944b4a 100644
> > --- a/kernel/smp.c
> > +++ b/kernel/smp.c
> > @@ -942,7 +942,11 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
> >
> > if (run_remote) {
> > cfd = this_cpu_ptr(&cfd_data);
> > - cpumask_and(cfd->cpumask, mask, cpu_online_mask);
> > + if (mask == cpu_online_mask)
> > + cpumask_copy(cfd->cpumask, cpu_online_mask);
> > + else
> > + cpumask_and(cfd->cpumask, mask, cpu_online_mask);
> > +
>
> Or... you could optimize cpumask_and() to detect the src1p == src2p case?
This is not what I would consider as optimization. For vast majority
of users this check is useless because they know for sure that
cpumasks are different.
For this case I can invent something like cpumask_and_check_eq(), so
that there'll be minimal impact on user code. (Suggestions for a better
name are very welcome.)
Thanks,
Yury
On Mon, Jul 18 2022 at 12:28, Yury Norov wrote:
> tick_check_preferred() calls cpumask_equal() even if
> curdev->cpumask == newdev->cpumask. Fix it.
What's to fix here? It's a pointless operation in a slow path and all
your "fix' is doing is to make the code larger.
Thanks,
tglx
On Sat, Aug 06 2022 at 10:30, Thomas Gleixner wrote:
> On Mon, Jul 18 2022 at 12:28, Yury Norov wrote:
>
>> tick_check_preferred() calls cpumask_equal() even if
>> curdev->cpumask == newdev->cpumask. Fix it.
>
> What's to fix here? It's a pointless operation in a slow path and all
> your "fix' is doing is to make the code larger.
In fact cpumask_equal() should have the ptr1 == ptr2 check, so you don't
have to add it all over the place.
Thanks,
tglx
On Mon, Aug 08, 2022 at 01:42:54PM +0200, Thomas Gleixner wrote:
> On Sat, Aug 06 2022 at 10:30, Thomas Gleixner wrote:
> > On Mon, Jul 18 2022 at 12:28, Yury Norov wrote:
> >
> >> tick_check_preferred() calls cpumask_equal() even if
> >> curdev->cpumask == newdev->cpumask. Fix it.
> >
> > What's to fix here? It's a pointless operation in a slow path and all
> > your "fix' is doing is to make the code larger.
Pointless operation in a slow path is still pointless.
> In fact cpumask_equal() should have the ptr1 == ptr2 check, so you don't
> have to add it all over the place.
This adds to the image size:
add/remove: 1/1 grow/shrink: 24/3 up/down: 507/-46 (461)
The more important, cpumask shouldn't check parameters because this is
an internal function. This whole series point is about adding such checks
under DEBUG_BITMAP config, and not affecting general case.
What about adding cpumask_equal__addr? (Any better name is welcome.)
Thanks,
Yury
On 08/08/2022 18.38, Yury Norov wrote:
> On Mon, Aug 08, 2022 at 01:42:54PM +0200, Thomas Gleixner wrote:
>> On Sat, Aug 06 2022 at 10:30, Thomas Gleixner wrote:
>>> On Mon, Jul 18 2022 at 12:28, Yury Norov wrote:
>>>
>>>> tick_check_preferred() calls cpumask_equal() even if
>>>> curdev->cpumask == newdev->cpumask. Fix it.
>>>
>>> What's to fix here? It's a pointless operation in a slow path and all
>>> your "fix' is doing is to make the code larger.
>
> Pointless operation in a slow path is still pointless.
>
>> In fact cpumask_equal() should have the ptr1 == ptr2 check, so you don't
>> have to add it all over the place.
>
> This adds to the image size:
> add/remove: 1/1 grow/shrink: 24/3 up/down: 507/-46 (461)
>
> The more important, cpumask shouldn't check parameters because this is
> an internal function. This whole series point is about adding such checks
> under DEBUG_BITMAP config, and not affecting general case.
Yury, calling bitmap_equal (and by extension cpumask_equal) with
something that happens in some cases to be the same pointer for both
operands is not a bug.
If you want to optimize that case, add a check in __bitmap_equal(), it
will add a few bytes (maybe 2-4 instructions) to the kernel image, much
less than what this whole series does by open-coding that check all
over, and since it's comparing two registers, it won't in any way affect
the performance of the case where the pointers are distinct.
Rasmus