2023-03-03 16:19:48

by Schspa Shi

[permalink] [raw]
Subject: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object

The is_static_object implementation relay on the initial state of the
object. If multiple places are accessed concurrently, there is a
probability that the debug object has been registered in the system, which
will invalidate the judgment of is_static_object.

The following is the scenario where the problem occurs:

T0 T1
=========================================================================
mod_timer();
debug_object_assert_init
db = get_bucket((unsigned long) addr);
raw_spin_lock_irqsave(&db->lock, flags);
obj = lookup_object(addr, db);
if (!obj) {
raw_spin_unlock_irqrestore(&db->lock, flags);
<< Context switch >>
mod_timer();
debug_object_assert_init
...
enqueue_timer();
/*
* The initial state changed a static timer object, and
* is_static_object will return false
*/

if (descr->is_static_object &&
descr->is_static_object(addr)) {
debug_object_init();
} else {
<< Hit here for a static object >>
debug_print_object(&o, "assert_init");
debug_object_fixup(descr->fixup_assert_init, addr,
ODEBUG_STATE_NOTAVAILABLE);
}
}

To fix it, we got the is_static_object called within db->lock, and save
it's state to struct debug_obj. This will ensure we won't hit the code
branch not belong to the static object.

For the same static object, debug_object_init may enter multiple times, but
there is a lock in debug_object_init to ensure that there is no problem.

Reported-by: [email protected]
Link: https://syzkaller.appspot.com/bug?id=22c8a5938eab640d1c6bcc0e3dc7be519d878462
Signed-off-by: Schspa Shi <[email protected]>
---
include/linux/debugobjects.h | 1 +
lib/debugobjects.c | 71 ++++++++++++++++++++++++++++--------
2 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 32444686b6ff4..544a6111b97f6 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -30,6 +30,7 @@ struct debug_obj {
enum debug_obj_state state;
unsigned int astate;
void *object;
+ bool is_static;
const struct debug_obj_descr *descr;
};

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index df86e649d8be0..d1be18158a1f7 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -275,6 +275,8 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
obj->descr = descr;
obj->state = ODEBUG_STATE_NONE;
obj->astate = 0;
+ obj->is_static = descr->is_static_object &&
+ descr->is_static_object(addr);
hlist_add_head(&obj->node, &b->list);
}
return obj;
@@ -581,7 +583,16 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
debug_objects_oom();
return;
}
+
check_stack = true;
+ } else {
+ /*
+ * The debug object is inited, and we should check this again
+ */
+ if (obj->is_static) {
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ return;
+ }
}

switch (obj->state) {
@@ -640,6 +651,29 @@ void debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr)
}
EXPORT_SYMBOL_GPL(debug_object_init_on_stack);

+/*
+ * Check static object.
+ */
+static bool debug_object_check_static(struct debug_bucket *db, void *addr,
+ const struct debug_obj_descr *descr)
+{
+ struct debug_obj *obj;
+
+ /*
+ * The is_static_object implementation relay on the initial state of the
+ * object. If multiple places are accessed concurrently, there is a
+ * probability that the debug object has been registered in the system,
+ * which will invalidate the judgment of is_static_object.
+ */
+ lockdep_assert_held(&db->lock);
+
+ obj = lookup_object(addr, db);
+ if (likely(obj))
+ return obj->is_static;
+
+ return descr->is_static_object && descr->is_static_object(addr);
+}
+
/**
* debug_object_activate - debug checks when an object is activated
* @addr: address of the object
@@ -656,6 +690,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
struct debug_obj o = { .object = addr,
.state = ODEBUG_STATE_NOTAVAILABLE,
.descr = descr };
+ bool is_static;

if (!debug_objects_enabled)
return 0;
@@ -696,6 +731,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
return ret;
}

+ is_static = debug_object_check_static(db, addr, descr);
raw_spin_unlock_irqrestore(&db->lock, flags);

/*
@@ -705,7 +741,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
* static object is tracked in the object tracker. If
* not, this must be a bug, so we try to fix it up.
*/
- if (descr->is_static_object && descr->is_static_object(addr)) {
+ if (is_static) {
/* track this static object */
debug_object_init(addr, descr);
debug_object_activate(addr, descr);
@@ -872,6 +908,7 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
+ bool is_static;

if (!debug_objects_enabled)
return;
@@ -886,13 +923,14 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
.state = ODEBUG_STATE_NOTAVAILABLE,
.descr = descr };

+ is_static = debug_object_check_static(db, addr, descr);
raw_spin_unlock_irqrestore(&db->lock, flags);
/*
* Maybe the object is static, and we let the type specific
* code confirm. Track this static object if true, else invoke
* fixup.
*/
- if (descr->is_static_object && descr->is_static_object(addr)) {
+ if (is_static) {
/* Track this static object */
debug_object_init(addr, descr);
} else {
@@ -1215,7 +1253,8 @@ static __initconst const struct debug_obj_descr descr_type_test = {
.fixup_free = fixup_free,
};

-static __initdata struct self_test obj = { .static_init = 0 };
+static struct self_test obj __initdata = { .static_init = 0 };
+static struct self_test sobj __initdata = { .static_init = 1 };

static void __init debug_objects_selftest(void)
{
@@ -1256,26 +1295,26 @@ static void __init debug_objects_selftest(void)
if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
goto out;

- obj.static_init = 1;
- debug_object_activate(&obj, &descr_type_test);
- if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
+ debug_object_init(&sobj, &descr_type_test);
+ debug_object_activate(&sobj, &descr_type_test);
+ if (check_results(&sobj, ODEBUG_STATE_ACTIVE, fixups, warnings))
goto out;
- debug_object_init(&obj, &descr_type_test);
- if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
+ debug_object_init(&sobj, &descr_type_test);
+ if (check_results(&sobj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
goto out;
- debug_object_free(&obj, &descr_type_test);
- if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
+ debug_object_free(&sobj, &descr_type_test);
+ if (check_results(&sobj, ODEBUG_STATE_NONE, fixups, warnings))
goto out;

#ifdef CONFIG_DEBUG_OBJECTS_FREE
- debug_object_init(&obj, &descr_type_test);
- if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings))
+ debug_object_init(&sobj, &descr_type_test);
+ if (check_results(&sobj, ODEBUG_STATE_INIT, fixups, warnings))
goto out;
- debug_object_activate(&obj, &descr_type_test);
- if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
+ debug_object_activate(&sobj, &descr_type_test);
+ if (check_results(&sobj, ODEBUG_STATE_ACTIVE, fixups, warnings))
goto out;
- __debug_check_no_obj_freed(&obj, sizeof(obj));
- if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings))
+ __debug_check_no_obj_freed(&sobj, sizeof(sobj));
+ if (check_results(&sobj, ODEBUG_STATE_NONE, ++fixups, ++warnings))
goto out;
#endif
pr_info("selftest passed\n");
--
2.39.2



2023-03-03 16:19:57

by Schspa Shi

[permalink] [raw]
Subject: [PATCH 2/2] debugobject: add unit test for static debug object

Add test case to enusre that static debug object correctness.

Tested on little-endian arm64 qemu, result:

[ 2.385735] KTAP version 1
[ 2.385860] 1..1
[ 2.386406] KTAP version 1
[ 2.386658] # Subtest: static debugobject init
[ 2.386726] 1..1
[ 2.401777] ok 1 static_debugobject_test
[ 2.402455] ok 1 static debugobject init

Signed-off-by: Schspa Shi <[email protected]>
---
MAINTAINERS | 5 ++
lib/Kconfig.debug | 14 ++++
lib/Makefile | 2 +
lib/test_static_debug_object.c | 125 +++++++++++++++++++++++++++++++++
4 files changed, 146 insertions(+)
create mode 100644 lib/test_static_debug_object.c

diff --git a/MAINTAINERS b/MAINTAINERS
index b0db911207ba4..38187e2921691 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -23202,6 +23202,11 @@ L: [email protected]
S: Maintained
F: mm/zswap.c

+STATIC DEBUGOBJECT TEST
+M: Schspa Shi <[email protected]>
+S: Maintained
+F: lib/test_static_debug_object.c
+
THE REST
M: Linus Torvalds <[email protected]>
L: [email protected]
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c8b379e2e9adc..9d5ee631d4380 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2801,6 +2801,20 @@ config TEST_CLOCKSOURCE_WATCHDOG

If unsure, say N.

+config TEST_STATIC_DEBUGOBJECT
+ tristate "KUnit test for static debugobject"
+ depends on KUNIT
+ select KPROBES
+ select DEBUG_OBJECTS
+ select DEBUG_OBJECTS_TIMERS
+ help
+ This builds the static debugobject unit test, which runs on boot.
+ Tests the static debug object correctness.
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
+
endif # RUNTIME_TESTING_MENU

config ARCH_USE_MEMTEST
diff --git a/lib/Makefile b/lib/Makefile
index baf2821f7a00f..f663686beabd9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -427,3 +427,5 @@ $(obj)/$(TEST_FORTIFY_LOG): $(addprefix $(obj)/, $(TEST_FORTIFY_LOGS)) FORCE
ifeq ($(CONFIG_FORTIFY_SOURCE),y)
$(obj)/string.o: $(obj)/$(TEST_FORTIFY_LOG)
endif
+
+obj-$(CONFIG_TEST_STATIC_DEBUGOBJECT) += test_static_debug_object.o
diff --git a/lib/test_static_debug_object.c b/lib/test_static_debug_object.c
new file mode 100644
index 0000000000000..8a0d6ab5c24b5
--- /dev/null
+++ b/lib/test_static_debug_object.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * THis module tests the static debugobject via a static timer instance. This
+ * test use kretprobe to inject some delay to make the problem easier to
+ * reproduce.
+ *
+ * Copyright (c) 2023, Schspa Shi <[email protected]>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/kprobes.h>
+#include <linux/workqueue.h>
+#include <linux/cpu.h>
+#include <kunit/test.h>
+
+static void ktest_timer_func(struct timer_list *);
+
+static DEFINE_TIMER(ktest_timer, ktest_timer_func);
+static int timer_stop;
+DEFINE_SPINLOCK(tlock);
+
+static DEFINE_PER_CPU(struct work_struct, timer_debugobject_test_work);
+
+static void timer_debugobject_workfn(struct work_struct *work)
+{
+ mod_timer(&ktest_timer, jiffies + (5 * HZ));
+}
+
+/*
+ * Reaper for links from keyrings to dead keys.
+ */
+static void ktest_timer_func(struct timer_list *t)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tlock, flags);
+ if (!timer_stop)
+ mod_timer(&ktest_timer, jiffies + (1 * HZ));
+ spin_unlock_irqrestore(&tlock, flags);
+}
+
+
+static int static_object_check_handler(
+ struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+ void *address;
+
+ address = (void *)regs_get_register(regs, 0);
+
+ if (address == &ktest_timer) {
+ int this_cpu = raw_smp_processor_id();
+ /*
+ * This hook point adds an extra delay to make the problem
+ * easier to reproduce. We need different delay for
+ * differenct processor.
+ */
+ mdelay(this_cpu * 100);
+ }
+
+ return 0;
+}
+
+
+static struct kretprobe is_static_kretprobes = {
+ .entry_handler = static_object_check_handler,
+ .data_size = 0,
+ /* Probe up to 512 instances concurrently. */
+ .maxactive = 512,
+ .kp = {
+ .symbol_name = "timer_is_static_object",
+ }
+};
+
+
+static void static_debugobject_test(struct kunit *test)
+{
+ unsigned long flags;
+ int cpu;
+ int ret;
+
+ ret = register_kretprobe(&is_static_kretprobes);
+ KUNIT_ASSERT_EQ(test, ret, 0);
+
+ /* Do test */
+ cpus_read_lock();
+ for_each_online_cpu(cpu) {
+ struct work_struct *work =
+ &per_cpu(timer_debugobject_test_work, cpu);
+ INIT_WORK(work, timer_debugobject_workfn);
+ schedule_work_on(cpu, work);
+ }
+
+ for_each_online_cpu(cpu) {
+ struct work_struct *work =
+ &per_cpu(timer_debugobject_test_work, cpu);
+ flush_work(work);
+ }
+ cpus_read_unlock();
+
+ spin_lock_irqsave(&tlock, flags);
+ timer_stop = 0;
+ spin_unlock_irqrestore(&tlock, flags);
+
+ del_timer_sync(&ktest_timer);
+
+ unregister_kretprobe(&is_static_kretprobes);
+}
+
+static struct kunit_case static_debugobject_init_cases[] = {
+ KUNIT_CASE(static_debugobject_test),
+ {}
+};
+
+static struct kunit_suite static_debugobject_suite = {
+ .name = "static debugobject init",
+ .test_cases = static_debugobject_init_cases,
+};
+
+kunit_test_suite(static_debugobject_suite);
+MODULE_AUTHOR("Schspa <[email protected]>");
+MODULE_LICENSE("GPL");
--
2.39.2


2023-03-03 17:01:00

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object


On 3/3/23 11:19, Schspa Shi wrote:
> The is_static_object implementation relay on the initial state of the
> object. If multiple places are accessed concurrently, there is a
> probability that the debug object has been registered in the system, which
> will invalidate the judgment of is_static_object.
>
> The following is the scenario where the problem occurs:
>
> T0 T1
> =========================================================================
> mod_timer();
> debug_object_assert_init
> db = get_bucket((unsigned long) addr);
> raw_spin_lock_irqsave(&db->lock, flags);
> obj = lookup_object(addr, db);
> if (!obj) {
> raw_spin_unlock_irqrestore(&db->lock, flags);
> << Context switch >>
> mod_timer();
> debug_object_assert_init
> ...
> enqueue_timer();
> /*
> * The initial state changed a static timer object, and
> * is_static_object will return false
> */
>
> if (descr->is_static_object &&
> descr->is_static_object(addr)) {
> debug_object_init();
> } else {
> << Hit here for a static object >>
> debug_print_object(&o, "assert_init");
> debug_object_fixup(descr->fixup_assert_init, addr,
> ODEBUG_STATE_NOTAVAILABLE);
> }
> }
>
> To fix it, we got the is_static_object called within db->lock, and save
> it's state to struct debug_obj. This will ensure we won't hit the code
> branch not belong to the static object.
>
> For the same static object, debug_object_init may enter multiple times, but
> there is a lock in debug_object_init to ensure that there is no problem.
>
> Reported-by: [email protected]
> Link: https://syzkaller.appspot.com/bug?id=22c8a5938eab640d1c6bcc0e3dc7be519d878462
> Signed-off-by: Schspa Shi <[email protected]>
> ---
> include/linux/debugobjects.h | 1 +
> lib/debugobjects.c | 71 ++++++++++++++++++++++++++++--------
> 2 files changed, 56 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
> index 32444686b6ff4..544a6111b97f6 100644
> --- a/include/linux/debugobjects.h
> +++ b/include/linux/debugobjects.h
> @@ -30,6 +30,7 @@ struct debug_obj {
> enum debug_obj_state state;
> unsigned int astate;
> void *object;
> + bool is_static;
> const struct debug_obj_descr *descr;
> };

The patch looks reasonable. My main concern is the increase in size of
the debug_obj structure. It is an additional 8 bytes on 64-bit arches.
How much will we save performance-wise by caching it in the debug_obj.
Alternatively, you may pack it within the current size by, maybe,
reducing the size of state.

Cheers,
Longman

>
> diff --git a/lib/debugobjects.c b/lib/debugobjects.c
> index df86e649d8be0..d1be18158a1f7 100644
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -275,6 +275,8 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
> obj->descr = descr;
> obj->state = ODEBUG_STATE_NONE;
> obj->astate = 0;
> + obj->is_static = descr->is_static_object &&
> + descr->is_static_object(addr);
> hlist_add_head(&obj->node, &b->list);
> }
> return obj;
> @@ -581,7 +583,16 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
> debug_objects_oom();
> return;
> }
> +
> check_stack = true;
> + } else {
> + /*
> + * The debug object is inited, and we should check this again
> + */
> + if (obj->is_static) {
> + raw_spin_unlock_irqrestore(&db->lock, flags);
> + return;
> + }
> }
>
> switch (obj->state) {
> @@ -640,6 +651,29 @@ void debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr)
> }
> EXPORT_SYMBOL_GPL(debug_object_init_on_stack);
>
> +/*
> + * Check static object.
> + */
> +static bool debug_object_check_static(struct debug_bucket *db, void *addr,
> + const struct debug_obj_descr *descr)
> +{
> + struct debug_obj *obj;
> +
> + /*
> + * The is_static_object implementation relay on the initial state of the
> + * object. If multiple places are accessed concurrently, there is a
> + * probability that the debug object has been registered in the system,
> + * which will invalidate the judgment of is_static_object.
> + */
> + lockdep_assert_held(&db->lock);
> +
> + obj = lookup_object(addr, db);
> + if (likely(obj))
> + return obj->is_static;
> +
> + return descr->is_static_object && descr->is_static_object(addr);
> +}
> +
> /**
> * debug_object_activate - debug checks when an object is activated
> * @addr: address of the object
> @@ -656,6 +690,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
> struct debug_obj o = { .object = addr,
> .state = ODEBUG_STATE_NOTAVAILABLE,
> .descr = descr };
> + bool is_static;
>
> if (!debug_objects_enabled)
> return 0;
> @@ -696,6 +731,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
> return ret;
> }
>
> + is_static = debug_object_check_static(db, addr, descr);
> raw_spin_unlock_irqrestore(&db->lock, flags);
>
> /*
> @@ -705,7 +741,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
> * static object is tracked in the object tracker. If
> * not, this must be a bug, so we try to fix it up.
> */
> - if (descr->is_static_object && descr->is_static_object(addr)) {
> + if (is_static) {
> /* track this static object */
> debug_object_init(addr, descr);
> debug_object_activate(addr, descr);
> @@ -872,6 +908,7 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
> struct debug_bucket *db;
> struct debug_obj *obj;
> unsigned long flags;
> + bool is_static;
>
> if (!debug_objects_enabled)
> return;
> @@ -886,13 +923,14 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
> .state = ODEBUG_STATE_NOTAVAILABLE,
> .descr = descr };
>
> + is_static = debug_object_check_static(db, addr, descr);
> raw_spin_unlock_irqrestore(&db->lock, flags);
> /*
> * Maybe the object is static, and we let the type specific
> * code confirm. Track this static object if true, else invoke
> * fixup.
> */
> - if (descr->is_static_object && descr->is_static_object(addr)) {
> + if (is_static) {
> /* Track this static object */
> debug_object_init(addr, descr);
> } else {
> @@ -1215,7 +1253,8 @@ static __initconst const struct debug_obj_descr descr_type_test = {
> .fixup_free = fixup_free,
> };
>
> -static __initdata struct self_test obj = { .static_init = 0 };
> +static struct self_test obj __initdata = { .static_init = 0 };
> +static struct self_test sobj __initdata = { .static_init = 1 };
>
> static void __init debug_objects_selftest(void)
> {
> @@ -1256,26 +1295,26 @@ static void __init debug_objects_selftest(void)
> if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
> goto out;
>
> - obj.static_init = 1;
> - debug_object_activate(&obj, &descr_type_test);
> - if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
> + debug_object_init(&sobj, &descr_type_test);
> + debug_object_activate(&sobj, &descr_type_test);
> + if (check_results(&sobj, ODEBUG_STATE_ACTIVE, fixups, warnings))
> goto out;
> - debug_object_init(&obj, &descr_type_test);
> - if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
> + debug_object_init(&sobj, &descr_type_test);
> + if (check_results(&sobj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
> goto out;
> - debug_object_free(&obj, &descr_type_test);
> - if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
> + debug_object_free(&sobj, &descr_type_test);
> + if (check_results(&sobj, ODEBUG_STATE_NONE, fixups, warnings))
> goto out;
>
> #ifdef CONFIG_DEBUG_OBJECTS_FREE
> - debug_object_init(&obj, &descr_type_test);
> - if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings))
> + debug_object_init(&sobj, &descr_type_test);
> + if (check_results(&sobj, ODEBUG_STATE_INIT, fixups, warnings))
> goto out;
> - debug_object_activate(&obj, &descr_type_test);
> - if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
> + debug_object_activate(&sobj, &descr_type_test);
> + if (check_results(&sobj, ODEBUG_STATE_ACTIVE, fixups, warnings))
> goto out;
> - __debug_check_no_obj_freed(&obj, sizeof(obj));
> - if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings))
> + __debug_check_no_obj_freed(&sobj, sizeof(sobj));
> + if (check_results(&sobj, ODEBUG_STATE_NONE, ++fixups, ++warnings))
> goto out;
> #endif
> pr_info("selftest passed\n");


2023-03-03 17:56:35

by Schspa Shi

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object


Waiman Long <[email protected]> writes:

> On 3/3/23 11:19, Schspa Shi wrote:
>> The is_static_object implementation relay on the initial state of the
>> object. If multiple places are accessed concurrently, there is a
>> probability that the debug object has been registered in the system, which
>> will invalidate the judgment of is_static_object.
>>
>> The following is the scenario where the problem occurs:
>>
>> T0 T1
>> =========================================================================
>> mod_timer();
>> debug_object_assert_init
>> db = get_bucket((unsigned long) addr);
>> raw_spin_lock_irqsave(&db->lock, flags);
>> obj = lookup_object(addr, db);
>> if (!obj) {
>> raw_spin_unlock_irqrestore(&db->lock, flags);
>> << Context switch >>
>> mod_timer();
>> debug_object_assert_init
>> ...
>> enqueue_timer();
>> /*
>> * The initial state changed a static timer object, and
>> * is_static_object will return false
>> */
>>
>> if (descr->is_static_object &&
>> descr->is_static_object(addr)) {
>> debug_object_init();
>> } else {
>> << Hit here for a static object >>
>> debug_print_object(&o, "assert_init");
>> debug_object_fixup(descr->fixup_assert_init, addr,
>> ODEBUG_STATE_NOTAVAILABLE);
>> }
>> }
>>
>> To fix it, we got the is_static_object called within db->lock, and save
>> it's state to struct debug_obj. This will ensure we won't hit the code
>> branch not belong to the static object.
>>
>> For the same static object, debug_object_init may enter multiple times, but
>> there is a lock in debug_object_init to ensure that there is no problem.
>>
>> Reported-by: [email protected]
>> Link: https://syzkaller.appspot.com/bug?id=22c8a5938eab640d1c6bcc0e3dc7be519d878462
>> Signed-off-by: Schspa Shi <[email protected]>
>> ---
>> include/linux/debugobjects.h | 1 +
>> lib/debugobjects.c | 71 ++++++++++++++++++++++++++++--------
>> 2 files changed, 56 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
>> index 32444686b6ff4..544a6111b97f6 100644
>> --- a/include/linux/debugobjects.h
>> +++ b/include/linux/debugobjects.h
>> @@ -30,6 +30,7 @@ struct debug_obj {
>> enum debug_obj_state state;
>> unsigned int astate;
>> void *object;
>> + bool is_static;
>> const struct debug_obj_descr *descr;
>> };
>
> The patch looks reasonable. My main concern is the increase in size of the
> debug_obj structure. It is an additional 8 bytes on 64-bit arches. How much will
> we save performance-wise by caching it in the debug_obj. Alternatively, you may
> pack it within the current size by, maybe, reducing the size of state.
>

Yes, we can change this to:

struct debug_obj {
struct hlist_node node;
struct {
enum debug_obj_state state : 31;
bool is_static : 1;
};
unsigned int astate;
void *object;
const struct debug_obj_descr *descr;
};

Which won't increase the object size.

(gdb) ptype /o struct debug_obj
/* offset | size */ type = struct debug_obj {
/* 0 | 0 */ struct hlist_node {
<incomplete type>

/* total size (bytes): 0 */
} node;
/* 16 | 4 */ struct {
/* 16: 0 | 4 */ enum debug_obj_state state : 31;
/* 19: 7 | 1 */ bool is_static : 1;

/* total size (bytes): 4 */
};
/* 20 | 4 */ unsigned int astate;
/* 24 | 8 */ void *object;
/* 32 | 8 */ const struct debug_obj_descr *descr;

/* total size (bytes): 40 */
}


> Cheers,
> Longman
>
>> diff --git a/lib/debugobjects.c b/lib/debugobjects.c
>> index df86e649d8be0..d1be18158a1f7 100644
>> --- a/lib/debugobjects.c
>> +++ b/lib/debugobjects.c
>> @@ -275,6 +275,8 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
>> obj->descr = descr;
>> obj->state = ODEBUG_STATE_NONE;
>> obj->astate = 0;
>> + obj->is_static = descr->is_static_object &&
>> + descr->is_static_object(addr);
>> hlist_add_head(&obj->node, &b->list);
>> }
>> return obj;
>> @@ -581,7 +583,16 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
>> debug_objects_oom();
>> return;
>> }
>> +
>> check_stack = true;
>> + } else {
>> + /*
>> + * The debug object is inited, and we should check this again
>> + */
>> + if (obj->is_static) {
>> + raw_spin_unlock_irqrestore(&db->lock, flags);
>> + return;
>> + }
>> }
>> switch (obj->state) {
>> @@ -640,6 +651,29 @@ void debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr)
>> }
>> EXPORT_SYMBOL_GPL(debug_object_init_on_stack);
>> +/*
>> + * Check static object.
>> + */
>> +static bool debug_object_check_static(struct debug_bucket *db, void *addr,
>> + const struct debug_obj_descr *descr)
>> +{
>> + struct debug_obj *obj;
>> +
>> + /*
>> + * The is_static_object implementation relay on the initial state of the
>> + * object. If multiple places are accessed concurrently, there is a
>> + * probability that the debug object has been registered in the system,
>> + * which will invalidate the judgment of is_static_object.
>> + */
>> + lockdep_assert_held(&db->lock);
>> +
>> + obj = lookup_object(addr, db);
>> + if (likely(obj))
>> + return obj->is_static;
>> +
>> + return descr->is_static_object && descr->is_static_object(addr);
>> +}
>> +
>> /**
>> * debug_object_activate - debug checks when an object is activated
>> * @addr: address of the object
>> @@ -656,6 +690,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
>> struct debug_obj o = { .object = addr,
>> .state = ODEBUG_STATE_NOTAVAILABLE,
>> .descr = descr };
>> + bool is_static;
>> if (!debug_objects_enabled)
>> return 0;
>> @@ -696,6 +731,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
>> return ret;
>> }
>> + is_static = debug_object_check_static(db, addr, descr);
>> raw_spin_unlock_irqrestore(&db->lock, flags);
>> /*
>> @@ -705,7 +741,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
>> * static object is tracked in the object tracker. If
>> * not, this must be a bug, so we try to fix it up.
>> */
>> - if (descr->is_static_object && descr->is_static_object(addr)) {
>> + if (is_static) {
>> /* track this static object */
>> debug_object_init(addr, descr);
>> debug_object_activate(addr, descr);
>> @@ -872,6 +908,7 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
>> struct debug_bucket *db;
>> struct debug_obj *obj;
>> unsigned long flags;
>> + bool is_static;
>> if (!debug_objects_enabled)
>> return;
>> @@ -886,13 +923,14 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
>> .state = ODEBUG_STATE_NOTAVAILABLE,
>> .descr = descr };
>> + is_static = debug_object_check_static(db, addr, descr);
>> raw_spin_unlock_irqrestore(&db->lock, flags);
>> /*
>> * Maybe the object is static, and we let the type specific
>> * code confirm. Track this static object if true, else invoke
>> * fixup.
>> */
>> - if (descr->is_static_object && descr->is_static_object(addr)) {
>> + if (is_static) {
>> /* Track this static object */
>> debug_object_init(addr, descr);
>> } else {
>> @@ -1215,7 +1253,8 @@ static __initconst const struct debug_obj_descr descr_type_test = {
>> .fixup_free = fixup_free,
>> };
>> -static __initdata struct self_test obj = { .static_init = 0 };
>> +static struct self_test obj __initdata = { .static_init = 0 };
>> +static struct self_test sobj __initdata = { .static_init = 1 };
>> static void __init debug_objects_selftest(void)
>> {
>> @@ -1256,26 +1295,26 @@ static void __init debug_objects_selftest(void)
>> if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
>> goto out;
>> - obj.static_init = 1;
>> - debug_object_activate(&obj, &descr_type_test);
>> - if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
>> + debug_object_init(&sobj, &descr_type_test);
>> + debug_object_activate(&sobj, &descr_type_test);
>> + if (check_results(&sobj, ODEBUG_STATE_ACTIVE, fixups, warnings))
>> goto out;
>> - debug_object_init(&obj, &descr_type_test);
>> - if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
>> + debug_object_init(&sobj, &descr_type_test);
>> + if (check_results(&sobj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
>> goto out;
>> - debug_object_free(&obj, &descr_type_test);
>> - if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
>> + debug_object_free(&sobj, &descr_type_test);
>> + if (check_results(&sobj, ODEBUG_STATE_NONE, fixups, warnings))
>> goto out;
>> #ifdef CONFIG_DEBUG_OBJECTS_FREE
>> - debug_object_init(&obj, &descr_type_test);
>> - if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings))
>> + debug_object_init(&sobj, &descr_type_test);
>> + if (check_results(&sobj, ODEBUG_STATE_INIT, fixups, warnings))
>> goto out;
>> - debug_object_activate(&obj, &descr_type_test);
>> - if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
>> + debug_object_activate(&sobj, &descr_type_test);
>> + if (check_results(&sobj, ODEBUG_STATE_ACTIVE, fixups, warnings))
>> goto out;
>> - __debug_check_no_obj_freed(&obj, sizeof(obj));
>> - if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings))
>> + __debug_check_no_obj_freed(&sobj, sizeof(sobj));
>> + if (check_results(&sobj, ODEBUG_STATE_NONE, ++fixups, ++warnings))
>> goto out;
>> #endif
>> pr_info("selftest passed\n");


--
BRs
Schspa Shi

2023-03-03 23:47:16

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object

Hi!

On Sat, Mar 04 2023 at 00:19, Schspa Shi wrote:

> The is_static_object implementation relay on the initial state of the
> object. If multiple places are accessed concurrently, there is a
> probability that the debug object has been registered in the system, which
> will invalidate the judgment of is_static_object.
>
> The following is the scenario where the problem occurs:
>
> T0 T1
> =========================================================================
> mod_timer();
> debug_object_assert_init
> db = get_bucket((unsigned long) addr);
> raw_spin_lock_irqsave(&db->lock, flags);
> obj = lookup_object(addr, db);
> if (!obj) {
> raw_spin_unlock_irqrestore(&db->lock, flags);
> << Context switch >>
> mod_timer();
> debug_object_assert_init
> ...
> enqueue_timer();
> /*
> * The initial state changed a static timer object, and
> * is_static_object will return false
> */
>
> if (descr->is_static_object &&
> descr->is_static_object(addr)) {
> debug_object_init();
> } else {
> << Hit here for a static object >>
> debug_print_object(&o, "assert_init");
> debug_object_fixup(descr->fixup_assert_init, addr,
> ODEBUG_STATE_NOTAVAILABLE);
> }
> }

That analysis is correct.

> diff --git a/lib/debugobjects.c b/lib/debugobjects.c
> index df86e649d8be0..d1be18158a1f7 100644
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -275,6 +275,8 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
> obj->descr = descr;
> obj->state = ODEBUG_STATE_NONE;
> obj->astate = 0;
> + obj->is_static = descr->is_static_object &&
> + descr->is_static_object(addr);
> hlist_add_head(&obj->node, &b->list);
> }
> return obj;
> @@ -581,7 +583,16 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
> debug_objects_oom();
> return;
> }
> +
> check_stack = true;
> + } else {
> + /*
> + * The debug object is inited, and we should check this again
> + */
> + if (obj->is_static) {
> + raw_spin_unlock_irqrestore(&db->lock, flags);
> + return;

This is broken. If the object is static and already hashed and in active
state then this returns and fails to detect the re-initialization of an
active object.

> +/*
> + * Check static object.
> + */
> +static bool debug_object_check_static(struct debug_bucket *db, void *addr,
> + const struct debug_obj_descr *descr)
> +{
> + struct debug_obj *obj;
> +
> + /*
> + * The is_static_object implementation relay on the initial state of the
> + * object. If multiple places are accessed concurrently, there is a
> + * probability that the debug object has been registered in the system,
> + * which will invalidate the judgment of is_static_object.
> + */
> + lockdep_assert_held(&db->lock);
> +
> + obj = lookup_object(addr, db);

What does this lookup solve? See below...

> + if (likely(obj))
> + return obj->is_static;
> +
> + return descr->is_static_object && descr->is_static_object(addr);
> +}

> @@ -696,6 +731,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
> return ret;
> }
>
> + is_static = debug_object_check_static(db, addr, descr);

It's already clear that the object is not hashed because activate does:

lock()
lookup()
if (ob) {
....
unlock();
}

At this point nothing has changed after the lookup because
the hash bucket is still locked.

> raw_spin_unlock_irqrestore(&db->lock, flags);

> /*
> @@ -705,7 +741,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
> * static object is tracked in the object tracker. If
> * not, this must be a bug, so we try to fix it up.
> */
> - if (descr->is_static_object && descr->is_static_object(addr)) {
> + if (is_static) {
> /* track this static object */
> debug_object_init(addr, descr);

This cannot work as the change in debug_object_init() is not correct and
there is no way to make it correct.

> -static __initdata struct self_test obj = { .static_init = 0 };
> +static struct self_test obj __initdata = { .static_init = 0 };
> +static struct self_test sobj __initdata = { .static_init = 1 };

...

> - obj.static_init = 1;

Plus the s/obj/sobj/ which should be equivalent, unless I'm missing
something here.

But that also changes the actual test:

- obj.static_init = 1;
- debug_object_activate(&obj, &descr_type_test);
- if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
+ debug_object_init(&sobj, &descr_type_test);
+ debug_object_activate(&sobj, &descr_type_test);
+ if (check_results(&sobj, ODEBUG_STATE_ACTIVE, fixups, warnings))
goto out;

That's _not_ equivalent.

The original code checks exactly for the case where the object
initialization does not happen before debug_object_activate() is invoked
which is perfectly correct for statically initialized
objects. Initializing the statically allocated object breaks that
test. The changelog is utterly silent about that.

The proper fix for this is obvious from the analysis. The problem
happens because the failed lookup drops the hash bucket lock which opens
the window for the concurrent caller. So why dropping the hash bucket
lock at all?

Uncompiled and untested fix below.

Thanks,

tglx
---
lib/debugobjects.c | 127 +++++++++++++++++++++++++++--------------------------
1 file changed, 67 insertions(+), 60 deletions(-)

--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -216,10 +216,6 @@ static struct debug_obj *__alloc_object(
return obj;
}

-/*
- * Allocate a new object. If the pool is empty, switch off the debugger.
- * Must be called with interrupts disabled.
- */
static struct debug_obj *
alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
{
@@ -273,7 +269,7 @@ alloc_object(void *addr, struct debug_bu
if (obj) {
obj->object = addr;
obj->descr = descr;
- obj->state = ODEBUG_STATE_NONE;
+ obj->state = ODEBUG_STATE_INIT;
obj->astate = 0;
hlist_add_head(&obj->node, &b->list);
}
@@ -552,11 +548,49 @@ static void debug_object_is_on_stack(voi
WARN_ON(1);
}

+static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket *b,
+ const struct debug_obj_descr *descr,
+ bool onstack, bool alloc_ifstatic)
+{
+ struct debug_obj *obj = lookup_object(addr, b);
+
+ if (likely(obj))
+ return obj;
+
+ /*
+ * debug_object_init() unconditionally allocates untracked
+ * objects. It does not matter whether it is a static object or
+ * not.
+ *
+ * debug_object_assert_init() and debug_object_activate() allow
+ * allocation only if the descriptor callback confirms that the
+ * object is static and considered initialized. For non-static
+ * objects the allocation needs to be done from the fixup callback.
+ */
+ if (unlikely(alloc_ifstatic)) {
+ if (!descr->is_static_object || !descr->is_static_object(addr))
+ return ERR_PTR(-ENOENT);
+ }
+
+ /*
+ * On success the returned object is in INIT state as that's the
+ * correct state for all callers.
+ */
+ obj = alloc_object(addr, b, descr);
+ if (likely(obj)) {
+ debug_object_is_on_stack(addr, onstack);
+ return obj;
+ }
+
+ /* Out of memory. Do the cleanup outside of the locked region */
+ debug_objects_enabled = 0;
+ return NULL;
+}
+
static void
__debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
{
enum debug_obj_state state;
- bool check_stack = false;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
@@ -572,16 +606,11 @@ static void

raw_spin_lock_irqsave(&db->lock, flags);

- obj = lookup_object(addr, db);
- if (!obj) {
- obj = alloc_object(addr, db, descr);
- if (!obj) {
- debug_objects_enabled = 0;
- raw_spin_unlock_irqrestore(&db->lock, flags);
- debug_objects_oom();
- return;
- }
- check_stack = true;
+ obj = lookup_object_or_alloc(addr, db, descr, onstack, false);
+ if (unlikely(!obj)) {
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_objects_oom();
+ return;
}

switch (obj->state) {
@@ -607,8 +636,6 @@ static void
}

raw_spin_unlock_irqrestore(&db->lock, flags);
- if (check_stack)
- debug_object_is_on_stack(addr, onstack);
}

/**
@@ -648,14 +675,12 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_s
*/
int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
{
+ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
enum debug_obj_state state;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
int ret;
- struct debug_obj o = { .object = addr,
- .state = ODEBUG_STATE_NOTAVAILABLE,
- .descr = descr };

if (!debug_objects_enabled)
return 0;
@@ -664,8 +689,8 @@ int debug_object_activate(void *addr, co

raw_spin_lock_irqsave(&db->lock, flags);

- obj = lookup_object(addr, db);
- if (obj) {
+ obj = lookup_object_or_alloc(addr, db, descr, false, true);
+ if (likely(!IS_ERR_OR_NULL(obj))) {
bool print_object = false;

switch (obj->state) {
@@ -698,24 +723,16 @@ int debug_object_activate(void *addr, co

raw_spin_unlock_irqrestore(&db->lock, flags);

- /*
- * We are here when a static object is activated. We
- * let the type specific code confirm whether this is
- * true or not. if true, we just make sure that the
- * static object is tracked in the object tracker. If
- * not, this must be a bug, so we try to fix it up.
- */
- if (descr->is_static_object && descr->is_static_object(addr)) {
- /* track this static object */
- debug_object_init(addr, descr);
- debug_object_activate(addr, descr);
- } else {
- debug_print_object(&o, "activate");
- ret = debug_object_fixup(descr->fixup_activate, addr,
- ODEBUG_STATE_NOTAVAILABLE);
- return ret ? 0 : -EINVAL;
+ /* If NULL the allocaction has hit OOM */
+ if (!obj) {
+ debug_objects_oom();
+ return 0;
}
- return 0;
+
+ /* Object is neither static nor tracked. It's not initialized */
+ debug_print_object(&o, "activate");
+ ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE);
+ return ret ? 0 : -EINVAL;
}
EXPORT_SYMBOL_GPL(debug_object_activate);

@@ -869,6 +886,7 @@ EXPORT_SYMBOL_GPL(debug_object_free);
*/
void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
{
+ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
@@ -879,31 +897,20 @@ void debug_object_assert_init(void *addr
db = get_bucket((unsigned long) addr);

raw_spin_lock_irqsave(&db->lock, flags);
+ obj = lookup_object_or_alloc(addr, db, descr, false, true);
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (likely(!IS_ERR_OR_NULL(obj)))
+ return;

- obj = lookup_object(addr, db);
+ /* If NULL the allocaction has hit OOM */
if (!obj) {
- struct debug_obj o = { .object = addr,
- .state = ODEBUG_STATE_NOTAVAILABLE,
- .descr = descr };
-
- raw_spin_unlock_irqrestore(&db->lock, flags);
- /*
- * Maybe the object is static, and we let the type specific
- * code confirm. Track this static object if true, else invoke
- * fixup.
- */
- if (descr->is_static_object && descr->is_static_object(addr)) {
- /* Track this static object */
- debug_object_init(addr, descr);
- } else {
- debug_print_object(&o, "assert_init");
- debug_object_fixup(descr->fixup_assert_init, addr,
- ODEBUG_STATE_NOTAVAILABLE);
- }
+ debug_objects_oom();
return;
}

- raw_spin_unlock_irqrestore(&db->lock, flags);
+ /* Object is neither tracked nor static. It's not initialized. */
+ debug_print_object(&o, "assert_init");
+ debug_object_fixup(descr->fixup_assert_init, addr, ODEBUG_STATE_NOTAVAILABLE);
}
EXPORT_SYMBOL_GPL(debug_object_assert_init);




2023-03-04 00:14:27

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object

On Sat, Mar 04 2023 at 01:53, Schspa Shi wrote:
> Waiman Long <[email protected]> writes:
>>> diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
>>> index 32444686b6ff4..544a6111b97f6 100644
>>> --- a/include/linux/debugobjects.h
>>> +++ b/include/linux/debugobjects.h
>>> @@ -30,6 +30,7 @@ struct debug_obj {
>>> enum debug_obj_state state;
>>> unsigned int astate;
>>> void *object;
>>> + bool is_static;
>>> const struct debug_obj_descr *descr;
>>> };
>>
>> The patch looks reasonable. My main concern is the increase in size of the
>> debug_obj structure. It is an additional 8 bytes on 64-bit arches. How much will
>> we save performance-wise by caching it in the debug_obj. Alternatively, you may
>> pack it within the current size by, maybe, reducing the size of state.
>>
>
> Yes, we can change this to:
>
> struct debug_obj {
> struct hlist_node node;
> struct {
> enum debug_obj_state state : 31;
> bool is_static : 1;
> };

and thereby making debugobjects even more slower than it is right
now. Please check the resulting assembly code before proposing quick
"solutions".

Thanks,

tglx

2023-03-22 17:04:50

by Schspa Shi

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object


Thomas Gleixner <[email protected]> writes:

> Hi!
>
> On Sat, Mar 04 2023 at 00:19, Schspa Shi wrote:
>
>> The is_static_object implementation relay on the initial state of the
>> object. If multiple places are accessed concurrently, there is a
>> probability that the debug object has been registered in the system, which
>> will invalidate the judgment of is_static_object.
>>
>> The following is the scenario where the problem occurs:
>>
>> T0 T1
>> =========================================================================
>> mod_timer();
>> debug_object_assert_init
>> db = get_bucket((unsigned long) addr);
>> raw_spin_lock_irqsave(&db->lock, flags);
>> obj = lookup_object(addr, db);
>> if (!obj) {
>> raw_spin_unlock_irqrestore(&db->lock, flags);
>> << Context switch >>
>> mod_timer();
>> debug_object_assert_init
>> ...
>> enqueue_timer();
>> /*
>> * The initial state changed a static timer object, and
>> * is_static_object will return false
>> */
>>
>> if (descr->is_static_object &&
>> descr->is_static_object(addr)) {
>> debug_object_init();
>> } else {
>> << Hit here for a static object >>
>> debug_print_object(&o, "assert_init");
>> debug_object_fixup(descr->fixup_assert_init, addr,
>> ODEBUG_STATE_NOTAVAILABLE);
>> }
>> }
>
> That analysis is correct.
>
>> diff --git a/lib/debugobjects.c b/lib/debugobjects.c
>> index df86e649d8be0..d1be18158a1f7 100644
>> --- a/lib/debugobjects.c
>> +++ b/lib/debugobjects.c
>> @@ -275,6 +275,8 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
>> obj->descr = descr;
>> obj->state = ODEBUG_STATE_NONE;
>> obj->astate = 0;
>> + obj->is_static = descr->is_static_object &&
>> + descr->is_static_object(addr);
>> hlist_add_head(&obj->node, &b->list);
>> }
>> return obj;
>> @@ -581,7 +583,16 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
>> debug_objects_oom();
>> return;
>> }
>> +
>> check_stack = true;
>> + } else {
>> + /*
>> + * The debug object is inited, and we should check this again
>> + */
>> + if (obj->is_static) {
>> + raw_spin_unlock_irqrestore(&db->lock, flags);
>> + return;
>
> This is broken. If the object is static and already hashed and in active
> state then this returns and fails to detect the re-initialization of an
> active object.
>

Yes, it's right, this can be fixed by pass a skip_ifstatic parameters
from debug_object_activate. then re-initialization of an active object
can be detected.

>> +/*
>> + * Check static object.
>> + */
>> +static bool debug_object_check_static(struct debug_bucket *db, void *addr,
>> + const struct debug_obj_descr *descr)
>> +{
>> + struct debug_obj *obj;
>> +
>> + /*
>> + * The is_static_object implementation relay on the initial state of the
>> + * object. If multiple places are accessed concurrently, there is a
>> + * probability that the debug object has been registered in the system,
>> + * which will invalidate the judgment of is_static_object.
>> + */
>> + lockdep_assert_held(&db->lock);
>> +
>> + obj = lookup_object(addr, db);
>
> What does this lookup solve? See below...
>

This lookup can be droped by add a extra obj parameter.

>> + if (likely(obj))
>> + return obj->is_static;
>> +
>> + return descr->is_static_object && descr->is_static_object(addr);
>> +}
>
>> @@ -696,6 +731,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
>> return ret;
>> }
>>
>> + is_static = debug_object_check_static(db, addr, descr);
>
> It's already clear that the object is not hashed because activate does:
>
> lock()
> lookup()
> if (ob) {
> ....
> unlock();
> }
>
> At this point nothing has changed after the lookup because
> the hash bucket is still locked.
>

Yes, the obj variable should be passed to debug_object_check_static.

>> raw_spin_unlock_irqrestore(&db->lock, flags);
>
>> /*
>> @@ -705,7 +741,7 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
>> * static object is tracked in the object tracker. If
>> * not, this must be a bug, so we try to fix it up.
>> */
>> - if (descr->is_static_object && descr->is_static_object(addr)) {
>> + if (is_static) {
>> /* track this static object */
>> debug_object_init(addr, descr);
>
> This cannot work as the change in debug_object_init() is not correct and
> there is no way to make it correct.
>

Function debug_object_init() can be fixed as explained above.

>> -static __initdata struct self_test obj = { .static_init = 0 };
>> +static struct self_test obj __initdata = { .static_init = 0 };
>> +static struct self_test sobj __initdata = { .static_init = 1 };
>
> ...
>
>> - obj.static_init = 1;
>
> Plus the s/obj/sobj/ which should be equivalent, unless I'm missing
> something here.
>

We have saved the is_static state when it is used at the first time, so
the is_static_object function won't be called in this environment.

> But that also changes the actual test:
>
> - obj.static_init = 1;
> - debug_object_activate(&obj, &descr_type_test);
> - if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
> + debug_object_init(&sobj, &descr_type_test);
> + debug_object_activate(&sobj, &descr_type_test);
> + if (check_results(&sobj, ODEBUG_STATE_ACTIVE, fixups, warnings))
> goto out;
>
> That's _not_ equivalent.
>
> The original code checks exactly for the case where the object
> initialization does not happen before debug_object_activate() is invoked
> which is perfectly correct for statically initialized
> objects. Initializing the statically allocated object breaks that
> test. The changelog is utterly silent about that.
>

This debug_object_init(&sobj, &descr_type_test) can be droped. I'm
sorry to add this extra change.

> The proper fix for this is obvious from the analysis. The problem
> happens because the failed lookup drops the hash bucket lock which opens
> the window for the concurrent caller. So why dropping the hash bucket
> lock at all?
>
> Uncompiled and untested fix below.
>
> Thanks,
>
> tglx
> ---
> lib/debugobjects.c | 127 +++++++++++++++++++++++++++--------------------------
> 1 file changed, 67 insertions(+), 60 deletions(-)
>
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -216,10 +216,6 @@ static struct debug_obj *__alloc_object(
> return obj;
> }
>
> -/*
> - * Allocate a new object. If the pool is empty, switch off the debugger.
> - * Must be called with interrupts disabled.
> - */
> static struct debug_obj *
> alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
> {
> @@ -273,7 +269,7 @@ alloc_object(void *addr, struct debug_bu
> if (obj) {
> obj->object = addr;
> obj->descr = descr;
> - obj->state = ODEBUG_STATE_NONE;
> + obj->state = ODEBUG_STATE_INIT;

This actually droped the ODEBUG_STATE_NONE state. If we active a
uninitialized object, there will be no error report.

This should be

if (descr->is_static_object && descr->is_static_object(addr))
obj->state = ODEBUG_STATE_INIT;
else
obj->state = ODEBUG_STATE_NONE;

But this can't resolve the initial state requirement from the
is_static_object() call.

I think we can report an error when calling debug_object_free() from a
static object. If don't do so, there is no way to determine it's a
static object. When its initialization state changes, the
is_static_object() call will return the wrong value.

Please see the fellowing test case:

obj.static_init = 1;
debug_object_activate(&obj, &descr_type_test);
if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
goto out;
debug_object_init(&obj, &descr_type_test);
if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
goto out;
debug_object_free(&obj, &descr_type_test);
if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
goto out;

/*
* for the static debugobject, it's initial value will be changed
* once used.
*/
obj.static_init = 2;
debug_object_activate(&obj, &descr_type_test);
if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
goto out;

/* This test will fail */

> obj->astate = 0;
> hlist_add_head(&obj->node, &b->list);
> }
> @@ -552,11 +548,49 @@ static void debug_object_is_on_stack(voi
> WARN_ON(1);
> }
>
> +static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket *b,
> + const struct debug_obj_descr *descr,
> + bool onstack, bool alloc_ifstatic)
> +{
> + struct debug_obj *obj = lookup_object(addr, b);
> +
> + if (likely(obj))
> + return obj;
> +
> + /*
> + * debug_object_init() unconditionally allocates untracked
> + * objects. It does not matter whether it is a static object or
> + * not.
> + *
> + * debug_object_assert_init() and debug_object_activate() allow
> + * allocation only if the descriptor callback confirms that the
> + * object is static and considered initialized. For non-static
> + * objects the allocation needs to be done from the fixup callback.
> + */
> + if (unlikely(alloc_ifstatic)) {
> + if (!descr->is_static_object || !descr->is_static_object(addr))
> + return ERR_PTR(-ENOENT);
> + }
> +
> + /*
> + * On success the returned object is in INIT state as that's the
> + * correct state for all callers.
> + */
> + obj = alloc_object(addr, b, descr);
> + if (likely(obj)) {
> + debug_object_is_on_stack(addr, onstack);
> + return obj;
> + }
> +
> + /* Out of memory. Do the cleanup outside of the locked region */
> + debug_objects_enabled = 0;
> + return NULL;
> +}
> +
> static void
> __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
> {
> enum debug_obj_state state;
> - bool check_stack = false;
> struct debug_bucket *db;
> struct debug_obj *obj;
> unsigned long flags;
> @@ -572,16 +606,11 @@ static void
>
> raw_spin_lock_irqsave(&db->lock, flags);
>
> - obj = lookup_object(addr, db);
> - if (!obj) {
> - obj = alloc_object(addr, db, descr);
> - if (!obj) {
> - debug_objects_enabled = 0;
> - raw_spin_unlock_irqrestore(&db->lock, flags);
> - debug_objects_oom();
> - return;
> - }
> - check_stack = true;
> + obj = lookup_object_or_alloc(addr, db, descr, onstack, false);
> + if (unlikely(!obj)) {
> + raw_spin_unlock_irqrestore(&db->lock, flags);
> + debug_objects_oom();
> + return;
> }
>
> switch (obj->state) {
> @@ -607,8 +636,6 @@ static void
> }
>
> raw_spin_unlock_irqrestore(&db->lock, flags);
> - if (check_stack)
> - debug_object_is_on_stack(addr, onstack);
> }
>
> /**
> @@ -648,14 +675,12 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_s
> */
> int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
> {
> + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
> enum debug_obj_state state;
> struct debug_bucket *db;
> struct debug_obj *obj;
> unsigned long flags;
> int ret;
> - struct debug_obj o = { .object = addr,
> - .state = ODEBUG_STATE_NOTAVAILABLE,
> - .descr = descr };
>
> if (!debug_objects_enabled)
> return 0;
> @@ -664,8 +689,8 @@ int debug_object_activate(void *addr, co
>
> raw_spin_lock_irqsave(&db->lock, flags);
>
> - obj = lookup_object(addr, db);
> - if (obj) {
> + obj = lookup_object_or_alloc(addr, db, descr, false, true);
> + if (likely(!IS_ERR_OR_NULL(obj))) {
> bool print_object = false;
>
> switch (obj->state) {
> @@ -698,24 +723,16 @@ int debug_object_activate(void *addr, co
>
> raw_spin_unlock_irqrestore(&db->lock, flags);
>
> - /*
> - * We are here when a static object is activated. We
> - * let the type specific code confirm whether this is
> - * true or not. if true, we just make sure that the
> - * static object is tracked in the object tracker. If
> - * not, this must be a bug, so we try to fix it up.
> - */
> - if (descr->is_static_object && descr->is_static_object(addr)) {
> - /* track this static object */
> - debug_object_init(addr, descr);
> - debug_object_activate(addr, descr);
> - } else {
> - debug_print_object(&o, "activate");
> - ret = debug_object_fixup(descr->fixup_activate, addr,
> - ODEBUG_STATE_NOTAVAILABLE);
> - return ret ? 0 : -EINVAL;
> + /* If NULL the allocaction has hit OOM */
> + if (!obj) {
> + debug_objects_oom();
> + return 0;
> }
> - return 0;
> +
> + /* Object is neither static nor tracked. It's not initialized */
> + debug_print_object(&o, "activate");
> + ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE);
> + return ret ? 0 : -EINVAL;
> }
> EXPORT_SYMBOL_GPL(debug_object_activate);
>
> @@ -869,6 +886,7 @@ EXPORT_SYMBOL_GPL(debug_object_free);
> */
> void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
> {
> + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
> struct debug_bucket *db;
> struct debug_obj *obj;
> unsigned long flags;
> @@ -879,31 +897,20 @@ void debug_object_assert_init(void *addr
> db = get_bucket((unsigned long) addr);
>
> raw_spin_lock_irqsave(&db->lock, flags);
> + obj = lookup_object_or_alloc(addr, db, descr, false, true);
> + raw_spin_unlock_irqrestore(&db->lock, flags);
> + if (likely(!IS_ERR_OR_NULL(obj)))
> + return;
>
> - obj = lookup_object(addr, db);
> + /* If NULL the allocaction has hit OOM */
> if (!obj) {
> - struct debug_obj o = { .object = addr,
> - .state = ODEBUG_STATE_NOTAVAILABLE,
> - .descr = descr };
> -
> - raw_spin_unlock_irqrestore(&db->lock, flags);
> - /*
> - * Maybe the object is static, and we let the type specific
> - * code confirm. Track this static object if true, else invoke
> - * fixup.
> - */
> - if (descr->is_static_object && descr->is_static_object(addr)) {
> - /* Track this static object */
> - debug_object_init(addr, descr);
> - } else {
> - debug_print_object(&o, "assert_init");
> - debug_object_fixup(descr->fixup_assert_init, addr,
> - ODEBUG_STATE_NOTAVAILABLE);
> - }
> + debug_objects_oom();
> return;
> }
>
> - raw_spin_unlock_irqrestore(&db->lock, flags);
> + /* Object is neither tracked nor static. It's not initialized. */
> + debug_print_object(&o, "assert_init");
> + debug_object_fixup(descr->fixup_assert_init, addr, ODEBUG_STATE_NOTAVAILABLE);
> }
> EXPORT_SYMBOL_GPL(debug_object_assert_init);
>

I test this patch, with my above change, and it seems to work well, but
we still need to add extra flags to store its static state. And
debug_object_free() should report an error for the static object.

I think we should introduce lookup_object_or_alloc and is_static at the
same time.


--
BRs
Schspa Shi

2023-03-22 17:59:21

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object

On Wed, Mar 22 2023 at 23:40, Schspa Shi wrote:
> Thomas Gleixner <[email protected]> writes:
>>> + } else {
>>> + /*
>>> + * The debug object is inited, and we should check this again
>>> + */
>>> + if (obj->is_static) {
>>> + raw_spin_unlock_irqrestore(&db->lock, flags);
>>> + return;
>>
>> This is broken. If the object is static and already hashed and in active
>> state then this returns and fails to detect the re-initialization of an
>> active object.
>>
>
> Yes, it's right, this can be fixed by pass a skip_ifstatic parameters
> from debug_object_activate. then re-initialization of an active object
> can be detected.

>>> -static __initdata struct self_test obj = { .static_init = 0 };
>>> +static struct self_test obj __initdata = { .static_init = 0 };
>>> +static struct self_test sobj __initdata = { .static_init = 1 };
>>
>> ...
>>
>>> - obj.static_init = 1;
>>
>> Plus the s/obj/sobj/ which should be equivalent, unless I'm missing
>> something here.
>>
>
> We have saved the is_static state when it is used at the first time, so
> the is_static_object function won't be called in this environment.

There is zero requirement for saving that state.

>> lib/debugobjects.c | 127 +++++++++++++++++++++++++++--------------------------
>> 1 file changed, 67 insertions(+), 60 deletions(-)
>>
>> --- a/lib/debugobjects.c
>> +++ b/lib/debugobjects.c
>> @@ -216,10 +216,6 @@ static struct debug_obj *__alloc_object(
>> return obj;
>> }
>>
>> -/*
>> - * Allocate a new object. If the pool is empty, switch off the debugger.
>> - * Must be called with interrupts disabled.
>> - */
>> static struct debug_obj *
>> alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
>> {
>> @@ -273,7 +269,7 @@ alloc_object(void *addr, struct debug_bu
>> if (obj) {
>> obj->object = addr;
>> obj->descr = descr;
>> - obj->state = ODEBUG_STATE_NONE;
>> + obj->state = ODEBUG_STATE_INIT;
>
> This actually droped the ODEBUG_STATE_NONE state. If we active a
> uninitialized object, there will be no error report.

Indeed.

> This should be
>
> if (descr->is_static_object && descr->is_static_object(addr))
> obj->state = ODEBUG_STATE_INIT;
> else
> obj->state = ODEBUG_STATE_NONE;

Kinda.

> But this can't resolve the initial state requirement from the
> is_static_object() call.

Which requirement? The is_static_object() call takes the address of the
actual object and has nothing to do with the tracking object at all.

> I think we can report an error when calling debug_object_free() from a
> static object. If don't do so, there is no way to determine it's a
> static object.

The memory allocator will tell you loudly when you try to free a static
object. So no point in having another check.

> When its initialization state changes, the is_static_object() call
> will return the wrong value.

That call is only relevant on the first invocation when there is no
tracking object yet. So what's the problem you are trying to solve?

> Please see the fellowing test case:
>
> obj.static_init = 1;

This is pointless, really. Once the object is tracked it does not matter
at all whether it was statically or dynamically allocated.

>
> I test this patch, with my above change, and it seems to work well, but
> we still need to add extra flags to store its static state. And
> debug_object_free() should report an error for the static object.

No, we don't.

> I think we should introduce lookup_object_or_alloc and is_static at the
> same time.

What for?

Thanks,

tglx

2023-03-22 18:08:27

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object

On Wed, Mar 22 2023 at 18:46, Thomas Gleixner wrote:
> On Wed, Mar 22 2023 at 23:40, Schspa Shi wrote:
>> I think we should introduce lookup_object_or_alloc and is_static at the
>> same time.
>
> What for?

The below has the NONE/INIT issue addressed and plugs that race
completely, so no further action required.

Thanks,

tglx
---
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -216,10 +216,6 @@ static struct debug_obj *__alloc_object(
return obj;
}

-/*
- * Allocate a new object. If the pool is empty, switch off the debugger.
- * Must be called with interrupts disabled.
- */
static struct debug_obj *
alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
{
@@ -552,11 +548,49 @@ static void debug_object_is_on_stack(voi
WARN_ON(1);
}

+static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket *b,
+ const struct debug_obj_descr *descr,
+ bool onstack, bool alloc_ifstatic)
+{
+ struct debug_obj *obj = lookup_object(addr, b);
+ enum debug_obj_state state = ODEBUG_STATE_NONE;
+
+ if (likely(obj))
+ return obj;
+
+ /*
+ * debug_object_init() unconditionally allocates untracked
+ * objects. It does not matter whether it is a static object or
+ * not.
+ *
+ * debug_object_assert_init() and debug_object_activate() allow
+ * allocation only if the descriptor callback confirms that the
+ * object is static and considered initialized. For non-static
+ * objects the allocation needs to be done from the fixup callback.
+ */
+ if (unlikely(alloc_ifstatic)) {
+ if (!descr->is_static_object || !descr->is_static_object(addr))
+ return ERR_PTR(-ENOENT);
+ /* Statically allocated objects are considered initialized */
+ state = ODEBUG_STATE_INIT;
+ }
+
+ obj = alloc_object(addr, b, descr);
+ if (likely(obj)) {
+ obj->state = state;
+ debug_object_is_on_stack(addr, onstack);
+ return obj;
+ }
+
+ /* Out of memory. Do the cleanup outside of the locked region */
+ debug_objects_enabled = 0;
+ return NULL;
+}
+
static void
__debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
{
enum debug_obj_state state;
- bool check_stack = false;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
@@ -572,16 +606,11 @@ static void

raw_spin_lock_irqsave(&db->lock, flags);

- obj = lookup_object(addr, db);
- if (!obj) {
- obj = alloc_object(addr, db, descr);
- if (!obj) {
- debug_objects_enabled = 0;
- raw_spin_unlock_irqrestore(&db->lock, flags);
- debug_objects_oom();
- return;
- }
- check_stack = true;
+ obj = lookup_object_or_alloc(addr, db, descr, onstack, false);
+ if (unlikely(!obj)) {
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_objects_oom();
+ return;
}

switch (obj->state) {
@@ -607,8 +636,6 @@ static void
}

raw_spin_unlock_irqrestore(&db->lock, flags);
- if (check_stack)
- debug_object_is_on_stack(addr, onstack);
}

/**
@@ -648,14 +675,12 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_s
*/
int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
{
+ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
enum debug_obj_state state;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
int ret;
- struct debug_obj o = { .object = addr,
- .state = ODEBUG_STATE_NOTAVAILABLE,
- .descr = descr };

if (!debug_objects_enabled)
return 0;
@@ -664,8 +689,8 @@ int debug_object_activate(void *addr, co

raw_spin_lock_irqsave(&db->lock, flags);

- obj = lookup_object(addr, db);
- if (obj) {
+ obj = lookup_object_or_alloc(addr, db, descr, false, true);
+ if (likely(!IS_ERR_OR_NULL(obj))) {
bool print_object = false;

switch (obj->state) {
@@ -698,24 +723,16 @@ int debug_object_activate(void *addr, co

raw_spin_unlock_irqrestore(&db->lock, flags);

- /*
- * We are here when a static object is activated. We
- * let the type specific code confirm whether this is
- * true or not. if true, we just make sure that the
- * static object is tracked in the object tracker. If
- * not, this must be a bug, so we try to fix it up.
- */
- if (descr->is_static_object && descr->is_static_object(addr)) {
- /* track this static object */
- debug_object_init(addr, descr);
- debug_object_activate(addr, descr);
- } else {
- debug_print_object(&o, "activate");
- ret = debug_object_fixup(descr->fixup_activate, addr,
- ODEBUG_STATE_NOTAVAILABLE);
- return ret ? 0 : -EINVAL;
+ /* If NULL the allocaction has hit OOM */
+ if (!obj) {
+ debug_objects_oom();
+ return 0;
}
- return 0;
+
+ /* Object is neither static nor tracked. It's not initialized */
+ debug_print_object(&o, "activate");
+ ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE);
+ return ret ? 0 : -EINVAL;
}
EXPORT_SYMBOL_GPL(debug_object_activate);

@@ -869,6 +886,7 @@ EXPORT_SYMBOL_GPL(debug_object_free);
*/
void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
{
+ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
@@ -879,31 +897,20 @@ void debug_object_assert_init(void *addr
db = get_bucket((unsigned long) addr);

raw_spin_lock_irqsave(&db->lock, flags);
+ obj = lookup_object_or_alloc(addr, db, descr, false, true);
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (likely(!IS_ERR_OR_NULL(obj)))
+ return;

- obj = lookup_object(addr, db);
+ /* If NULL the allocaction has hit OOM */
if (!obj) {
- struct debug_obj o = { .object = addr,
- .state = ODEBUG_STATE_NOTAVAILABLE,
- .descr = descr };
-
- raw_spin_unlock_irqrestore(&db->lock, flags);
- /*
- * Maybe the object is static, and we let the type specific
- * code confirm. Track this static object if true, else invoke
- * fixup.
- */
- if (descr->is_static_object && descr->is_static_object(addr)) {
- /* Track this static object */
- debug_object_init(addr, descr);
- } else {
- debug_print_object(&o, "assert_init");
- debug_object_fixup(descr->fixup_assert_init, addr,
- ODEBUG_STATE_NOTAVAILABLE);
- }
+ debug_objects_oom();
return;
}

- raw_spin_unlock_irqrestore(&db->lock, flags);
+ /* Object is neither tracked nor static. It's not initialized. */
+ debug_print_object(&o, "assert_init");
+ debug_object_fixup(descr->fixup_assert_init, addr, ODEBUG_STATE_NOTAVAILABLE);
}
EXPORT_SYMBOL_GPL(debug_object_assert_init);

2023-03-22 18:10:42

by Schspa Shi

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object


Thomas Gleixner <[email protected]> writes:

> On Wed, Mar 22 2023 at 23:40, Schspa Shi wrote:
>> Thomas Gleixner <[email protected]> writes:
>>>> + } else {
>>>> + /*
>>>> + * The debug object is inited, and we should check this again
>>>> + */
>>>> + if (obj->is_static) {
>>>> + raw_spin_unlock_irqrestore(&db->lock, flags);
>>>> + return;
>>>
>>> This is broken. If the object is static and already hashed and in active
>>> state then this returns and fails to detect the re-initialization of an
>>> active object.
>>>
>>
>> Yes, it's right, this can be fixed by pass a skip_ifstatic parameters
>> from debug_object_activate. then re-initialization of an active object
>> can be detected.
>
>>>> -static __initdata struct self_test obj = { .static_init = 0 };
>>>> +static struct self_test obj __initdata = { .static_init = 0 };
>>>> +static struct self_test sobj __initdata = { .static_init = 1 };
>>>
>>> ...
>>>
>>>> - obj.static_init = 1;
>>>
>>> Plus the s/obj/sobj/ which should be equivalent, unless I'm missing
>>> something here.
>>>
>>
>> We have saved the is_static state when it is used at the first time, so
>> the is_static_object function won't be called in this environment.
>
> There is zero requirement for saving that state.
>
>>> lib/debugobjects.c | 127 +++++++++++++++++++++++++++--------------------------
>>> 1 file changed, 67 insertions(+), 60 deletions(-)
>>>
>>> --- a/lib/debugobjects.c
>>> +++ b/lib/debugobjects.c
>>> @@ -216,10 +216,6 @@ static struct debug_obj *__alloc_object(
>>> return obj;
>>> }
>>>
>>> -/*
>>> - * Allocate a new object. If the pool is empty, switch off the debugger.
>>> - * Must be called with interrupts disabled.
>>> - */
>>> static struct debug_obj *
>>> alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
>>> {
>>> @@ -273,7 +269,7 @@ alloc_object(void *addr, struct debug_bu
>>> if (obj) {
>>> obj->object = addr;
>>> obj->descr = descr;
>>> - obj->state = ODEBUG_STATE_NONE;
>>> + obj->state = ODEBUG_STATE_INIT;
>>
>> This actually droped the ODEBUG_STATE_NONE state. If we active a
>> uninitialized object, there will be no error report.
>
> Indeed.
>
>> This should be
>>
>> if (descr->is_static_object && descr->is_static_object(addr))
>> obj->state = ODEBUG_STATE_INIT;
>> else
>> obj->state = ODEBUG_STATE_NONE;
>
> Kinda.
>
>> But this can't resolve the initial state requirement from the
>> is_static_object() call.
>
> Which requirement? The is_static_object() call takes the address of the
> actual object and has nothing to do with the tracking object at all.
>

This is for the fellowing test case, actually we calls
debug_object_free() from a static object in our selftest, if we don't
report any thing when call debug_object_free from a static object, we
there is no such issues.

obj.static_init = 1;
debug_object_activate(&obj, &descr_type_test);
if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
goto out;
debug_object_init(&obj, &descr_type_test);
if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
goto out;


/*
* We need to remove the debug_object_free here, because it's not
* a legal operation.
*/
- debug_object_free(&obj, &descr_type_test);
- if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
- goto out;

#if 0
/*
* for the static debugobject, it's initial value will be changed
* once used.
*/
obj.static_init = 2;
debug_object_activate(&obj, &descr_type_test);
if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
goto out;

/* This test will fail */
#endif

>> I think we can report an error when calling debug_object_free() from a
>> static object. If don't do so, there is no way to determine it's a
>> static object.
>
> The memory allocator will tell you loudly when you try to free a static
> object. So no point in having another check.
>
>> When its initialization state changes, the is_static_object() call
>> will return the wrong value.
>
> That call is only relevant on the first invocation when there is no
> tracking object yet. So what's the problem you are trying to solve?
>
>> Please see the fellowing test case:
>>
>> obj.static_init = 1;
>
> This is pointless, really. Once the object is tracked it does not matter
> at all whether it was statically or dynamically allocated.
>
>>
>> I test this patch, with my above change, and it seems to work well, but
>> we still need to add extra flags to store its static state. And
>> debug_object_free() should report an error for the static object.
>
> No, we don't.
>

OK, we don't need to store the state if don't take care the
debug_object_free() call on static object at all. If so, we should
delete the debug_object_free() call on static object at
debug_objects_selftest().

>> I think we should introduce lookup_object_or_alloc and is_static at the
>> same time.
>
> What for?
>

To report an error when someone calls debug_object_free on a static
object.

> Thanks,
>
> tglx


--
BRs
Schspa Shi

2023-03-22 18:25:44

by Schspa Shi

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object


Thomas Gleixner <[email protected]> writes:

> On Wed, Mar 22 2023 at 18:46, Thomas Gleixner wrote:
>> On Wed, Mar 22 2023 at 23:40, Schspa Shi wrote:
>>> I think we should introduce lookup_object_or_alloc and is_static at the
>>> same time.
>>
>> What for?
>
> The below has the NONE/INIT issue addressed and plugs that race
> completely, so no further action required.
>
> Thanks,
>
> tglx
> ---
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -216,10 +216,6 @@ static struct debug_obj *__alloc_object(
> return obj;
> }
>
> -/*
> - * Allocate a new object. If the pool is empty, switch off the debugger.
> - * Must be called with interrupts disabled.
> - */
> static struct debug_obj *
> alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
> {
> @@ -552,11 +548,49 @@ static void debug_object_is_on_stack(voi
> WARN_ON(1);
> }
>
> +static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket *b,
> + const struct debug_obj_descr *descr,
> + bool onstack, bool alloc_ifstatic)
> +{
> + struct debug_obj *obj = lookup_object(addr, b);
> + enum debug_obj_state state = ODEBUG_STATE_NONE;
> +
> + if (likely(obj))
> + return obj;
> +
> + /*
> + * debug_object_init() unconditionally allocates untracked
> + * objects. It does not matter whether it is a static object or
> + * not.
> + *
> + * debug_object_assert_init() and debug_object_activate() allow
> + * allocation only if the descriptor callback confirms that the
> + * object is static and considered initialized. For non-static
> + * objects the allocation needs to be done from the fixup callback.
> + */
> + if (unlikely(alloc_ifstatic)) {
> + if (!descr->is_static_object || !descr->is_static_object(addr))
> + return ERR_PTR(-ENOENT);
> + /* Statically allocated objects are considered initialized */
> + state = ODEBUG_STATE_INIT;
> + }
> +
> + obj = alloc_object(addr, b, descr);
> + if (likely(obj)) {
> + obj->state = state;
> + debug_object_is_on_stack(addr, onstack);
> + return obj;
> + }
> +
> + /* Out of memory. Do the cleanup outside of the locked region */
> + debug_objects_enabled = 0;
> + return NULL;
> +}
> +
> static void
> __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
> {
> enum debug_obj_state state;
> - bool check_stack = false;
> struct debug_bucket *db;
> struct debug_obj *obj;
> unsigned long flags;
> @@ -572,16 +606,11 @@ static void
>
> raw_spin_lock_irqsave(&db->lock, flags);
>
> - obj = lookup_object(addr, db);
> - if (!obj) {
> - obj = alloc_object(addr, db, descr);
> - if (!obj) {
> - debug_objects_enabled = 0;
> - raw_spin_unlock_irqrestore(&db->lock, flags);
> - debug_objects_oom();
> - return;
> - }
> - check_stack = true;
> + obj = lookup_object_or_alloc(addr, db, descr, onstack, false);
> + if (unlikely(!obj)) {
> + raw_spin_unlock_irqrestore(&db->lock, flags);
> + debug_objects_oom();
> + return;
> }
>
> switch (obj->state) {
> @@ -607,8 +636,6 @@ static void
> }
>
> raw_spin_unlock_irqrestore(&db->lock, flags);
> - if (check_stack)
> - debug_object_is_on_stack(addr, onstack);
> }
>
> /**
> @@ -648,14 +675,12 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_s
> */
> int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
> {
> + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
> enum debug_obj_state state;
> struct debug_bucket *db;
> struct debug_obj *obj;
> unsigned long flags;
> int ret;
> - struct debug_obj o = { .object = addr,
> - .state = ODEBUG_STATE_NOTAVAILABLE,
> - .descr = descr };
>
> if (!debug_objects_enabled)
> return 0;
> @@ -664,8 +689,8 @@ int debug_object_activate(void *addr, co
>
> raw_spin_lock_irqsave(&db->lock, flags);
>
> - obj = lookup_object(addr, db);
> - if (obj) {
> + obj = lookup_object_or_alloc(addr, db, descr, false, true);
> + if (likely(!IS_ERR_OR_NULL(obj))) {
> bool print_object = false;
>
> switch (obj->state) {
> @@ -698,24 +723,16 @@ int debug_object_activate(void *addr, co
>
> raw_spin_unlock_irqrestore(&db->lock, flags);
>
> - /*
> - * We are here when a static object is activated. We
> - * let the type specific code confirm whether this is
> - * true or not. if true, we just make sure that the
> - * static object is tracked in the object tracker. If
> - * not, this must be a bug, so we try to fix it up.
> - */
> - if (descr->is_static_object && descr->is_static_object(addr)) {
> - /* track this static object */
> - debug_object_init(addr, descr);
> - debug_object_activate(addr, descr);
> - } else {
> - debug_print_object(&o, "activate");
> - ret = debug_object_fixup(descr->fixup_activate, addr,
> - ODEBUG_STATE_NOTAVAILABLE);
> - return ret ? 0 : -EINVAL;
> + /* If NULL the allocaction has hit OOM */
> + if (!obj) {
> + debug_objects_oom();
> + return 0;
> }
> - return 0;
> +
> + /* Object is neither static nor tracked. It's not initialized */
> + debug_print_object(&o, "activate");
> + ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE);
> + return ret ? 0 : -EINVAL;
> }
> EXPORT_SYMBOL_GPL(debug_object_activate);
>
> @@ -869,6 +886,7 @@ EXPORT_SYMBOL_GPL(debug_object_free);
> */
> void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
> {
> + struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
> struct debug_bucket *db;
> struct debug_obj *obj;
> unsigned long flags;
> @@ -879,31 +897,20 @@ void debug_object_assert_init(void *addr
> db = get_bucket((unsigned long) addr);
>
> raw_spin_lock_irqsave(&db->lock, flags);
> + obj = lookup_object_or_alloc(addr, db, descr, false, true);
> + raw_spin_unlock_irqrestore(&db->lock, flags);
> + if (likely(!IS_ERR_OR_NULL(obj)))
> + return;
>
> - obj = lookup_object(addr, db);
> + /* If NULL the allocaction has hit OOM */
> if (!obj) {
> - struct debug_obj o = { .object = addr,
> - .state = ODEBUG_STATE_NOTAVAILABLE,
> - .descr = descr };
> -
> - raw_spin_unlock_irqrestore(&db->lock, flags);
> - /*
> - * Maybe the object is static, and we let the type specific
> - * code confirm. Track this static object if true, else invoke
> - * fixup.
> - */
> - if (descr->is_static_object && descr->is_static_object(addr)) {
> - /* Track this static object */
> - debug_object_init(addr, descr);
> - } else {
> - debug_print_object(&o, "assert_init");
> - debug_object_fixup(descr->fixup_assert_init, addr,
> - ODEBUG_STATE_NOTAVAILABLE);
> - }
> + debug_objects_oom();
> return;
> }
>
> - raw_spin_unlock_irqrestore(&db->lock, flags);
> + /* Object is neither tracked nor static. It's not initialized. */
> + debug_print_object(&o, "assert_init");
> + debug_object_fixup(descr->fixup_assert_init, addr, ODEBUG_STATE_NOTAVAILABLE);
> }
> EXPORT_SYMBOL_GPL(debug_object_assert_init);
>

It's OK if you don't think debug_object_free() should report a error when
the object is static. But I still think we should report an error and
modify the autotest case in debug_objects_selftest() to remove
debug_object_free() call on the static object.

--
BRs
Schspa Shi

2023-03-22 21:20:47

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object

On Thu, Mar 23 2023 at 01:55, Schspa Shi wrote:
> Thomas Gleixner <[email protected]> writes:
>> Which requirement? The is_static_object() call takes the address of the
>> actual object and has nothing to do with the tracking object at all.
>>
>
> This is for the fellowing test case, actually we calls
> debug_object_free() from a static object in our selftest, if we don't
> report any thing when call debug_object_free from a static object, we
> there is no such issues.

That's an artifical and completely pointless test case. As I told you
before the memory subsystem will warn when it's tried to free a static
object. debug_objects_free() is invoked from the memory subsystem *free*
functions.

What is the value of another warning?

Nothing at all.

So why would we add extra code just to keep track of something
completely redundant?

Thanks,

tglx

2023-03-23 03:28:09

by Schspa Shi

[permalink] [raw]
Subject: Re: [PATCH 1/2] debugobject: fix concurrency issues with is_static_object


Thomas Gleixner <[email protected]> writes:

> On Thu, Mar 23 2023 at 01:55, Schspa Shi wrote:
>> Thomas Gleixner <[email protected]> writes:
>>> Which requirement? The is_static_object() call takes the address of the
>>> actual object and has nothing to do with the tracking object at all.
>>>
>>
>> This is for the fellowing test case, actually we calls
>> debug_object_free() from a static object in our selftest, if we don't
>> report any thing when call debug_object_free from a static object, we
>> there is no such issues.
>
> That's an artifical and completely pointless test case. As I told you
> before the memory subsystem will warn when it's tried to free a static
> object. debug_objects_free() is invoked from the memory subsystem *free*
> functions.
>
> What is the value of another warning?
>
> Nothing at all.
>
> So why would we add extra code just to keep track of something
> completely redundant?
>

OK, there is really no need to do this extra check. And should I
submit a new patch version with your change ?????

> Thanks,
>
> tglx


--
BRs
Schspa Shi

2023-03-23 03:29:47

by Schspa Shi

[permalink] [raw]
Subject: Re: [PATCH 2/2] debugobject: add unit test for static debug object


Schspa Shi <[email protected]> writes:

> Add test case to enusre that static debug object correctness.
>
> Tested on little-endian arm64 qemu, result:
>
> [ 2.385735] KTAP version 1
> [ 2.385860] 1..1
> [ 2.386406] KTAP version 1
> [ 2.386658] # Subtest: static debugobject init
> [ 2.386726] 1..1
> [ 2.401777] ok 1 static_debugobject_test
> [ 2.402455] ok 1 static debugobject init
>
> Signed-off-by: Schspa Shi <[email protected]>
> ---
> MAINTAINERS | 5 ++
> lib/Kconfig.debug | 14 ++++
> lib/Makefile | 2 +
> lib/test_static_debug_object.c | 125 +++++++++++++++++++++++++++++++++
> 4 files changed, 146 insertions(+)
> create mode 100644 lib/test_static_debug_object.c

Hi tglx:

What do you think about this test case? Should we need it ? There are
some platform compatibility issues here that need a little optimization.

--
BRs
Schspa Shi

2023-03-23 08:12:53

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 2/2] debugobject: add unit test for static debug object

On Thu, Mar 23 2023 at 11:16, Schspa Shi wrote:
> Schspa Shi <[email protected]> writes:
>> MAINTAINERS | 5 ++
>> lib/Kconfig.debug | 14 ++++
>> lib/Makefile | 2 +
>> lib/test_static_debug_object.c | 125 +++++++++++++++++++++++++++++++++
>> 4 files changed, 146 insertions(+)
>> create mode 100644 lib/test_static_debug_object.c
>
> What do you think about this test case? Should we need it ? There are
> some platform compatibility issues here that need a little optimization.

What does it buy over the existing self test. Nothing AFACIT aside of
extra code.

Thanks,

tglx

2023-03-23 08:54:29

by Schspa Shi

[permalink] [raw]
Subject: Re: [PATCH 2/2] debugobject: add unit test for static debug object


Thomas Gleixner <[email protected]> writes:

> On Thu, Mar 23 2023 at 11:16, Schspa Shi wrote:
>> Schspa Shi <[email protected]> writes:
>>> MAINTAINERS | 5 ++
>>> lib/Kconfig.debug | 14 ++++
>>> lib/Makefile | 2 +
>>> lib/test_static_debug_object.c | 125 +++++++++++++++++++++++++++++++++
>>> 4 files changed, 146 insertions(+)
>>> create mode 100644 lib/test_static_debug_object.c
>>
>> What do you think about this test case? Should we need it ? There are
>> some platform compatibility issues here that need a little optimization.
>
> What does it buy over the existing self test. Nothing AFACIT aside of
> extra code.
>

It checks the race of the is_static_object() call in the previous
BUG. This test can used to make sure the new fix patch works. The
existing self test have no ability to check this.

> Thanks,
>
> tglx


--
BRs
Schspa Shi

2023-04-12 08:04:22

by Thomas Gleixner

[permalink] [raw]
Subject: [PATCH] debugobject: Prevent init race with static objects

Statically initialized objects are usually not initialized via the init()
function of the subsystem. They are special cased and the subsystem
provides a function to validate whether an object which is not yet tracked
by debugobjects is statically initialized. This means the object is started
to be tracked on first use, e.g. activation.

This works perfectly fine, unless there are two concurrent operations on
that object. Schspa decoded the problem:

T0 T1

debug_object_assert_init(addr)
lock_hash_bucket()
obj = lookup_object(addr);
if (!obj) {
unlock_hash_bucket();
- > preemption
lock_subsytem_object(addr);
activate_object(addr)
lock_hash_bucket();
obj = lookup_object(addr);
if (!obj) {
unlock_hash_bucket();
if (is_static_object(addr))
init_and_track(addr);
lock_hash_bucket();
obj = lookup_object(addr);
obj->state = ACTIVATED;
unlock_hash_bucket();

subsys function modifies content of addr,
so static object detection does
not longer work.

unlock_subsytem_object(addr);

if (is_static_object(addr)) <- Fails

debugobject emits a warning and invokes the fixup function which
reinitializes the already active object in the worst case.

This race exists forever, but was never observed until mod_timer() got a
debug_object_assert_init() added which is outside of the timer base lock
held section right at the beginning of the function to cover the lockless
early exit points too.

Rework the code so that the lookup, the static object check and the
tracking object association happens atomically under the hash bucket
lock. This prevents the issue completely as all callers are serialized on
the hash bucket lock and therefore cannot observe inconsistent state.

Fixes: 3ac7fe5a4aab ("infrastructure to debug (dynamic) objects")
Reported-by: [email protected]
Debugged-by: Schspa Shi <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Link: https://syzkaller.appspot.com/bug?id=22c8a5938eab640d1c6bcc0e3dc7be519d878462
Link: https://lore.kernel.org/lkml/[email protected]
---
lib/debugobjects.c | 125 +++++++++++++++++++++++++++--------------------------
1 file changed, 66 insertions(+), 59 deletions(-)

--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -216,10 +216,6 @@ static struct debug_obj *__alloc_object(
return obj;
}

-/*
- * Allocate a new object. If the pool is empty, switch off the debugger.
- * Must be called with interrupts disabled.
- */
static struct debug_obj *
alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
{
@@ -552,11 +548,49 @@ static void debug_object_is_on_stack(voi
WARN_ON(1);
}

+static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket *b,
+ const struct debug_obj_descr *descr,
+ bool onstack, bool alloc_ifstatic)
+{
+ struct debug_obj *obj = lookup_object(addr, b);
+ enum debug_obj_state state = ODEBUG_STATE_NONE;
+
+ if (likely(obj))
+ return obj;
+
+ /*
+ * debug_object_init() unconditionally allocates untracked
+ * objects. It does not matter whether it is a static object or
+ * not.
+ *
+ * debug_object_assert_init() and debug_object_activate() allow
+ * allocation only if the descriptor callback confirms that the
+ * object is static and considered initialized. For non-static
+ * objects the allocation needs to be done from the fixup callback.
+ */
+ if (unlikely(alloc_ifstatic)) {
+ if (!descr->is_static_object || !descr->is_static_object(addr))
+ return ERR_PTR(-ENOENT);
+ /* Statically allocated objects are considered initialized */
+ state = ODEBUG_STATE_INIT;
+ }
+
+ obj = alloc_object(addr, b, descr);
+ if (likely(obj)) {
+ obj->state = state;
+ debug_object_is_on_stack(addr, onstack);
+ return obj;
+ }
+
+ /* Out of memory. Do the cleanup outside of the locked region */
+ debug_objects_enabled = 0;
+ return NULL;
+}
+
static void
__debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
{
enum debug_obj_state state;
- bool check_stack = false;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
@@ -572,16 +606,11 @@ static void

raw_spin_lock_irqsave(&db->lock, flags);

- obj = lookup_object(addr, db);
- if (!obj) {
- obj = alloc_object(addr, db, descr);
- if (!obj) {
- debug_objects_enabled = 0;
- raw_spin_unlock_irqrestore(&db->lock, flags);
- debug_objects_oom();
- return;
- }
- check_stack = true;
+ obj = lookup_object_or_alloc(addr, db, descr, onstack, false);
+ if (unlikely(!obj)) {
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_objects_oom();
+ return;
}

switch (obj->state) {
@@ -607,8 +636,6 @@ static void
}

raw_spin_unlock_irqrestore(&db->lock, flags);
- if (check_stack)
- debug_object_is_on_stack(addr, onstack);
}

/**
@@ -648,14 +675,12 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_s
*/
int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
{
+ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
enum debug_obj_state state;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
int ret;
- struct debug_obj o = { .object = addr,
- .state = ODEBUG_STATE_NOTAVAILABLE,
- .descr = descr };

if (!debug_objects_enabled)
return 0;
@@ -664,8 +689,8 @@ int debug_object_activate(void *addr, co

raw_spin_lock_irqsave(&db->lock, flags);

- obj = lookup_object(addr, db);
- if (obj) {
+ obj = lookup_object_or_alloc(addr, db, descr, false, true);
+ if (likely(!IS_ERR_OR_NULL(obj))) {
bool print_object = false;

switch (obj->state) {
@@ -698,24 +723,16 @@ int debug_object_activate(void *addr, co

raw_spin_unlock_irqrestore(&db->lock, flags);

- /*
- * We are here when a static object is activated. We
- * let the type specific code confirm whether this is
- * true or not. if true, we just make sure that the
- * static object is tracked in the object tracker. If
- * not, this must be a bug, so we try to fix it up.
- */
- if (descr->is_static_object && descr->is_static_object(addr)) {
- /* track this static object */
- debug_object_init(addr, descr);
- debug_object_activate(addr, descr);
- } else {
- debug_print_object(&o, "activate");
- ret = debug_object_fixup(descr->fixup_activate, addr,
- ODEBUG_STATE_NOTAVAILABLE);
- return ret ? 0 : -EINVAL;
+ /* If NULL the allocaction has hit OOM */
+ if (!obj) {
+ debug_objects_oom();
+ return 0;
}
- return 0;
+
+ /* Object is neither static nor tracked. It's not initialized */
+ debug_print_object(&o, "activate");
+ ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE);
+ return ret ? 0 : -EINVAL;
}
EXPORT_SYMBOL_GPL(debug_object_activate);

@@ -869,6 +886,7 @@ EXPORT_SYMBOL_GPL(debug_object_free);
*/
void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
{
+ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
@@ -879,31 +897,20 @@ void debug_object_assert_init(void *addr
db = get_bucket((unsigned long) addr);

raw_spin_lock_irqsave(&db->lock, flags);
+ obj = lookup_object_or_alloc(addr, db, descr, false, true);
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (likely(!IS_ERR_OR_NULL(obj)))
+ return;

- obj = lookup_object(addr, db);
+ /* If NULL the allocaction has hit OOM */
if (!obj) {
- struct debug_obj o = { .object = addr,
- .state = ODEBUG_STATE_NOTAVAILABLE,
- .descr = descr };
-
- raw_spin_unlock_irqrestore(&db->lock, flags);
- /*
- * Maybe the object is static, and we let the type specific
- * code confirm. Track this static object if true, else invoke
- * fixup.
- */
- if (descr->is_static_object && descr->is_static_object(addr)) {
- /* Track this static object */
- debug_object_init(addr, descr);
- } else {
- debug_print_object(&o, "assert_init");
- debug_object_fixup(descr->fixup_assert_init, addr,
- ODEBUG_STATE_NOTAVAILABLE);
- }
+ debug_objects_oom();
return;
}

- raw_spin_unlock_irqrestore(&db->lock, flags);
+ /* Object is neither tracked nor static. It's not initialized. */
+ debug_print_object(&o, "assert_init");
+ debug_object_fixup(descr->fixup_assert_init, addr, ODEBUG_STATE_NOTAVAILABLE);
}
EXPORT_SYMBOL_GPL(debug_object_assert_init);

2023-04-13 00:23:44

by Stephen Boyd

[permalink] [raw]
Subject: Re: [PATCH] debugobject: Prevent init race with static objects

Quoting Thomas Gleixner (2023-04-12 00:54:39)
> Statically initialized objects are usually not initialized via the init()
> function of the subsystem. They are special cased and the subsystem
[...]
>
> Rework the code so that the lookup, the static object check and the
> tracking object association happens atomically under the hash bucket
> lock. This prevents the issue completely as all callers are serialized on
> the hash bucket lock and therefore cannot observe inconsistent state.
>
> Fixes: 3ac7fe5a4aab ("infrastructure to debug (dynamic) objects")
> Reported-by: [email protected]
> Debugged-by: Schspa Shi <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Link: https://syzkaller.appspot.com/bug?id=22c8a5938eab640d1c6bcc0e3dc7be519d878462
> Link: https://lore.kernel.org/lkml/[email protected]
> ---

Reviewed-by: Stephen Boyd <[email protected]>

Tiny nitpick below

>
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -698,24 +723,16 @@ int debug_object_activate(void *addr, co
>
> raw_spin_unlock_irqrestore(&db->lock, flags);
>
> - /*
> - * We are here when a static object is activated. We
> - * let the type specific code confirm whether this is
> - * true or not. if true, we just make sure that the
> - * static object is tracked in the object tracker. If
> - * not, this must be a bug, so we try to fix it up.
> - */
> - if (descr->is_static_object && descr->is_static_object(addr)) {
> - /* track this static object */
> - debug_object_init(addr, descr);
> - debug_object_activate(addr, descr);
> - } else {
> - debug_print_object(&o, "activate");
> - ret = debug_object_fixup(descr->fixup_activate, addr,
> - ODEBUG_STATE_NOTAVAILABLE);
> - return ret ? 0 : -EINVAL;
> + /* If NULL the allocaction has hit OOM */

s/allocaction/allocation/

Or is this "alloc action"? Or should it be "lookup_object_or_alloc() has
hit OOM"?

> + if (!obj) {
> + debug_objects_oom();
> + return 0;
> }
> - return 0;
> +
> + /* Object is neither static nor tracked. It's not initialized */
> + debug_print_object(&o, "activate");
> + ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE);
> + return ret ? 0 : -EINVAL;
> }
> EXPORT_SYMBOL_GPL(debug_object_activate);
>
> @@ -879,31 +897,20 @@ void debug_object_assert_init(void *addr
> db = get_bucket((unsigned long) addr);
>
> raw_spin_lock_irqsave(&db->lock, flags);
> + obj = lookup_object_or_alloc(addr, db, descr, false, true);
> + raw_spin_unlock_irqrestore(&db->lock, flags);
> + if (likely(!IS_ERR_OR_NULL(obj)))
> + return;
>
> - obj = lookup_object(addr, db);
> + /* If NULL the allocaction has hit OOM */

Same comment.

2023-04-13 12:18:52

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] debugobject: Prevent init race with static objects

On Wed, Apr 12 2023 at 17:17, Stephen Boyd wrote:
> Quoting Thomas Gleixner (2023-04-12 00:54:39)
>> + /* If NULL the allocaction has hit OOM */
>
> s/allocaction/allocation/

That's what I probably wanted to type.

>> - obj = lookup_object(addr, db);
>> + /* If NULL the allocaction has hit OOM */
>
> Same comment.

The wonders of copy & pasta!

Thanks for the review!

tglx

2023-04-13 22:42:10

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 2/2] debugobject: add unit test for static debug object

On Thu, Mar 23 2023 at 16:44, Schspa Shi wrote:
> Thomas Gleixner <[email protected]> writes:
>> On Thu, Mar 23 2023 at 11:16, Schspa Shi wrote:
>>> What do you think about this test case? Should we need it ? There are
>>> some platform compatibility issues here that need a little optimization.
>>
>> What does it buy over the existing self test. Nothing AFACIT aside of
>> extra code.
>>
>
> It checks the race of the is_static_object() call in the previous
> BUG. This test can used to make sure the new fix patch works. The
> existing self test have no ability to check this.

Sure it can somehow make sure that it works.

Don't misunderstand me. I'm all for self tests, but this one really
falls into the category of testing the obvious. There are tons of more
interesting places in the kernel which lack self tests than this
particular oddity which is well understood and more than unlikely to
come back.

That said, it would be valuable if you could add your Tested-by to the
final patch, which will be queued up soonish.

Thanks,

tglx

Subject: [tip: core/debugobjects] debugobject: Prevent init race with static objects

The following commit has been merged into the core/debugobjects branch of tip:

Commit-ID: 63a759694eed61025713b3e14dd827c8548daadc
Gitweb: https://git.kernel.org/tip/63a759694eed61025713b3e14dd827c8548daadc
Author: Thomas Gleixner <[email protected]>
AuthorDate: Wed, 12 Apr 2023 09:54:39 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Sat, 15 Apr 2023 23:13:36 +02:00

debugobject: Prevent init race with static objects

Statically initialized objects are usually not initialized via the init()
function of the subsystem. They are special cased and the subsystem
provides a function to validate whether an object which is not yet tracked
by debugobjects is statically initialized. This means the object is started
to be tracked on first use, e.g. activation.

This works perfectly fine, unless there are two concurrent operations on
that object. Schspa decoded the problem:

T0 T1

debug_object_assert_init(addr)
lock_hash_bucket()
obj = lookup_object(addr);
if (!obj) {
unlock_hash_bucket();
- > preemption
lock_subsytem_object(addr);
activate_object(addr)
lock_hash_bucket();
obj = lookup_object(addr);
if (!obj) {
unlock_hash_bucket();
if (is_static_object(addr))
init_and_track(addr);
lock_hash_bucket();
obj = lookup_object(addr);
obj->state = ACTIVATED;
unlock_hash_bucket();

subsys function modifies content of addr,
so static object detection does
not longer work.

unlock_subsytem_object(addr);

if (is_static_object(addr)) <- Fails

debugobject emits a warning and invokes the fixup function which
reinitializes the already active object in the worst case.

This race exists forever, but was never observed until mod_timer() got a
debug_object_assert_init() added which is outside of the timer base lock
held section right at the beginning of the function to cover the lockless
early exit points too.

Rework the code so that the lookup, the static object check and the
tracking object association happens atomically under the hash bucket
lock. This prevents the issue completely as all callers are serialized on
the hash bucket lock and therefore cannot observe inconsistent state.

Fixes: 3ac7fe5a4aab ("infrastructure to debug (dynamic) objects")
Reported-by: [email protected]
Debugged-by: Schspa Shi <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Stephen Boyd <[email protected]>
Link: https://syzkaller.appspot.com/bug?id=22c8a5938eab640d1c6bcc0e3dc7be519d878462
Link: https://lore.kernel.org/lkml/[email protected]
Link: https://lore.kernel.org/r/87zg7dzgao.ffs@tglx

---
lib/debugobjects.c | 125 +++++++++++++++++++++++---------------------
1 file changed, 66 insertions(+), 59 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index df86e64..b796799 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -216,10 +216,6 @@ static struct debug_obj *__alloc_object(struct hlist_head *list)
return obj;
}

-/*
- * Allocate a new object. If the pool is empty, switch off the debugger.
- * Must be called with interrupts disabled.
- */
static struct debug_obj *
alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
{
@@ -552,11 +548,49 @@ static void debug_object_is_on_stack(void *addr, int onstack)
WARN_ON(1);
}

+static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket *b,
+ const struct debug_obj_descr *descr,
+ bool onstack, bool alloc_ifstatic)
+{
+ struct debug_obj *obj = lookup_object(addr, b);
+ enum debug_obj_state state = ODEBUG_STATE_NONE;
+
+ if (likely(obj))
+ return obj;
+
+ /*
+ * debug_object_init() unconditionally allocates untracked
+ * objects. It does not matter whether it is a static object or
+ * not.
+ *
+ * debug_object_assert_init() and debug_object_activate() allow
+ * allocation only if the descriptor callback confirms that the
+ * object is static and considered initialized. For non-static
+ * objects the allocation needs to be done from the fixup callback.
+ */
+ if (unlikely(alloc_ifstatic)) {
+ if (!descr->is_static_object || !descr->is_static_object(addr))
+ return ERR_PTR(-ENOENT);
+ /* Statically allocated objects are considered initialized */
+ state = ODEBUG_STATE_INIT;
+ }
+
+ obj = alloc_object(addr, b, descr);
+ if (likely(obj)) {
+ obj->state = state;
+ debug_object_is_on_stack(addr, onstack);
+ return obj;
+ }
+
+ /* Out of memory. Do the cleanup outside of the locked region */
+ debug_objects_enabled = 0;
+ return NULL;
+}
+
static void
__debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
{
enum debug_obj_state state;
- bool check_stack = false;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
@@ -572,16 +606,11 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack

raw_spin_lock_irqsave(&db->lock, flags);

- obj = lookup_object(addr, db);
- if (!obj) {
- obj = alloc_object(addr, db, descr);
- if (!obj) {
- debug_objects_enabled = 0;
- raw_spin_unlock_irqrestore(&db->lock, flags);
- debug_objects_oom();
- return;
- }
- check_stack = true;
+ obj = lookup_object_or_alloc(addr, db, descr, onstack, false);
+ if (unlikely(!obj)) {
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_objects_oom();
+ return;
}

switch (obj->state) {
@@ -607,8 +636,6 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
}

raw_spin_unlock_irqrestore(&db->lock, flags);
- if (check_stack)
- debug_object_is_on_stack(addr, onstack);
}

/**
@@ -648,14 +675,12 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_stack);
*/
int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
{
+ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
enum debug_obj_state state;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
int ret;
- struct debug_obj o = { .object = addr,
- .state = ODEBUG_STATE_NOTAVAILABLE,
- .descr = descr };

if (!debug_objects_enabled)
return 0;
@@ -664,8 +689,8 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)

raw_spin_lock_irqsave(&db->lock, flags);

- obj = lookup_object(addr, db);
- if (obj) {
+ obj = lookup_object_or_alloc(addr, db, descr, false, true);
+ if (likely(!IS_ERR_OR_NULL(obj))) {
bool print_object = false;

switch (obj->state) {
@@ -698,24 +723,16 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)

raw_spin_unlock_irqrestore(&db->lock, flags);

- /*
- * We are here when a static object is activated. We
- * let the type specific code confirm whether this is
- * true or not. if true, we just make sure that the
- * static object is tracked in the object tracker. If
- * not, this must be a bug, so we try to fix it up.
- */
- if (descr->is_static_object && descr->is_static_object(addr)) {
- /* track this static object */
- debug_object_init(addr, descr);
- debug_object_activate(addr, descr);
- } else {
- debug_print_object(&o, "activate");
- ret = debug_object_fixup(descr->fixup_activate, addr,
- ODEBUG_STATE_NOTAVAILABLE);
- return ret ? 0 : -EINVAL;
+ /* If NULL the allocation has hit OOM */
+ if (!obj) {
+ debug_objects_oom();
+ return 0;
}
- return 0;
+
+ /* Object is neither static nor tracked. It's not initialized */
+ debug_print_object(&o, "activate");
+ ret = debug_object_fixup(descr->fixup_activate, addr, ODEBUG_STATE_NOTAVAILABLE);
+ return ret ? 0 : -EINVAL;
}
EXPORT_SYMBOL_GPL(debug_object_activate);

@@ -869,6 +886,7 @@ EXPORT_SYMBOL_GPL(debug_object_free);
*/
void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
{
+ struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr };
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
@@ -879,31 +897,20 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
db = get_bucket((unsigned long) addr);

raw_spin_lock_irqsave(&db->lock, flags);
+ obj = lookup_object_or_alloc(addr, db, descr, false, true);
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (likely(!IS_ERR_OR_NULL(obj)))
+ return;

- obj = lookup_object(addr, db);
+ /* If NULL the allocation has hit OOM */
if (!obj) {
- struct debug_obj o = { .object = addr,
- .state = ODEBUG_STATE_NOTAVAILABLE,
- .descr = descr };
-
- raw_spin_unlock_irqrestore(&db->lock, flags);
- /*
- * Maybe the object is static, and we let the type specific
- * code confirm. Track this static object if true, else invoke
- * fixup.
- */
- if (descr->is_static_object && descr->is_static_object(addr)) {
- /* Track this static object */
- debug_object_init(addr, descr);
- } else {
- debug_print_object(&o, "assert_init");
- debug_object_fixup(descr->fixup_assert_init, addr,
- ODEBUG_STATE_NOTAVAILABLE);
- }
+ debug_objects_oom();
return;
}

- raw_spin_unlock_irqrestore(&db->lock, flags);
+ /* Object is neither tracked nor static. It's not initialized. */
+ debug_print_object(&o, "assert_init");
+ debug_object_fixup(descr->fixup_assert_init, addr, ODEBUG_STATE_NOTAVAILABLE);
}
EXPORT_SYMBOL_GPL(debug_object_assert_init);

2023-05-01 13:51:38

by Ido Schimmel

[permalink] [raw]
Subject: Re: [PATCH] debugobject: Prevent init race with static objects

On Wed, Apr 12, 2023 at 09:54:39AM +0200, Thomas Gleixner wrote:
> Statically initialized objects are usually not initialized via the init()
> function of the subsystem. They are special cased and the subsystem
> provides a function to validate whether an object which is not yet tracked
> by debugobjects is statically initialized. This means the object is started
> to be tracked on first use, e.g. activation.
>
> This works perfectly fine, unless there are two concurrent operations on
> that object. Schspa decoded the problem:

[...]

> This race exists forever, but was never observed until mod_timer() got a
> debug_object_assert_init() added which is outside of the timer base lock
> held section right at the beginning of the function to cover the lockless
> early exit points too.
>
> Rework the code so that the lookup, the static object check and the
> tracking object association happens atomically under the hash bucket
> lock. This prevents the issue completely as all callers are serialized on
> the hash bucket lock and therefore cannot observe inconsistent state.
>
> Fixes: 3ac7fe5a4aab ("infrastructure to debug (dynamic) objects")
> Reported-by: [email protected]
> Debugged-by: Schspa Shi <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Link: https://syzkaller.appspot.com/bug?id=22c8a5938eab640d1c6bcc0e3dc7be519d878462
> Link: https://lore.kernel.org/lkml/[email protected]

Thomas,

With this patch we see the following warning in the kernel log during
boot:

"ODEBUG: Out of memory. ODEBUG disabled"

In which case, the stats are:

# cat /sys/kernel/debug/debug_objects/stats
max_chain :24
max_checked :37
warnings :0
fixups :0
pool_free :4297
pool_pcp_free :84
pool_min_free :0
pool_used :0
pool_max_used :6615
on_free_list :0
objs_allocated:15616
objs_freed :11319


If I revert this patch, the warning disappears and I see the following
stats:

# cat /sys/kernel/debug/debug_objects/stats
max_chain :25
max_checked :40
warnings :0
fixups :0
pool_free :1219
pool_pcp_free :209
pool_min_free :289
pool_used :1578
pool_max_used :8026
on_free_list :0
objs_allocated:32304
objs_freed :29507

The following diff seems to solve the problem for me:

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index b796799fadb2..af4bd66c571c 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -21,7 +21,7 @@
#define ODEBUG_HASH_BITS 14
#define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS)

-#define ODEBUG_POOL_SIZE 1024
+#define ODEBUG_POOL_SIZE (16 * 1024)
#define ODEBUG_POOL_MIN_LEVEL 256
#define ODEBUG_POOL_PERCPU_SIZE 64
#define ODEBUG_BATCH_SIZE 16

In which case, the stats are:

# cat /sys/kernel/debug/debug_objects/stats
max_chain :28
max_checked :64
warnings :0
fixups :0
pool_free :14789
pool_pcp_free :192
pool_min_free :8120
pool_used :1595
pool_max_used :8264
on_free_list :0
objs_allocated:16384
objs_freed :0

I'm not familiar with the debugobjects code, but maybe it makes sense to
make "ODEBUG_POOL_SIZE" configurable via Kconfig in a similar fashion to
"CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE"?

Please let me know if more information is required or if you want me to
test a patch.

Thanks

2023-05-01 15:43:53

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] debugobject: Prevent init race with static objects

Ido!

On Mon, May 01 2023 at 16:40, Ido Schimmel wrote:
> On Wed, Apr 12, 2023 at 09:54:39AM +0200, Thomas Gleixner wrote:
> With this patch we see the following warning in the kernel log during
> boot:
>
> "ODEBUG: Out of memory. ODEBUG disabled"
...

> The following diff seems to solve the problem for me:
>
> diff --git a/lib/debugobjects.c b/lib/debugobjects.c
> index b796799fadb2..af4bd66c571c 100644
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -21,7 +21,7 @@
> #define ODEBUG_HASH_BITS 14
> #define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS)
>
> -#define ODEBUG_POOL_SIZE 1024
> +#define ODEBUG_POOL_SIZE (16 * 1024)

That's a big hammer :)

> I'm not familiar with the debugobjects code, but maybe it makes sense to
> make "ODEBUG_POOL_SIZE" configurable via Kconfig in a similar fashion to
> "CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE"?

I don't think so.

The change in that patch is neither debug_objects_activate() nor
debug_objecs_assert_init() no longer invoke debug_object_init() which is
now the only place doing pool refills. So depending on the number of
statically allocated objects this might deplete the pool quick enough.

Does the patch below restore the old behaviour?

Thanks,

tglx
---
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index b796799fadb2..003edc5ebd67 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -587,6 +587,16 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket
return NULL;
}

+static void debug_objects_fill_pool(void)
+{
+ /*
+ * On RT enabled kernels the pool refill must happen in preemptible
+ * context:
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+ fill_pool();
+}
+
static void
__debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
{
@@ -595,12 +605,7 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
struct debug_obj *obj;
unsigned long flags;

- /*
- * On RT enabled kernels the pool refill must happen in preemptible
- * context:
- */
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
- fill_pool();
+ debug_objects_fill_pool();

db = get_bucket((unsigned long) addr);

@@ -685,6 +690,8 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
if (!debug_objects_enabled)
return 0;

+ debug_objects_fill_pool();
+
db = get_bucket((unsigned long) addr);

raw_spin_lock_irqsave(&db->lock, flags);
@@ -894,6 +901,8 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
if (!debug_objects_enabled)
return;

+ debug_objects_fill_pool();
+
db = get_bucket((unsigned long) addr);

raw_spin_lock_irqsave(&db->lock, flags);

2023-05-02 06:00:44

by Ido Schimmel

[permalink] [raw]
Subject: Re: [PATCH] debugobject: Prevent init race with static objects

On Mon, May 01, 2023 at 05:42:06PM +0200, Thomas Gleixner wrote:
> The change in that patch is neither debug_objects_activate() nor
> debug_objecs_assert_init() no longer invoke debug_object_init() which is
> now the only place doing pool refills. So depending on the number of
> statically allocated objects this might deplete the pool quick enough.
>
> Does the patch below restore the old behaviour?

Yes. Couldn't reproduce the issue with the proposed fix. Feel free to
add:

Tested-by: Ido Schimmel <[email protected]>

Thanks for the quick fix!

Subject: [tip: core/debugobjects] debugobject: Ensure pool refill (again)

The following commit has been merged into the core/debugobjects branch of tip:

Commit-ID: 0af462f19e635ad522f28981238334620881badc
Gitweb: https://git.kernel.org/tip/0af462f19e635ad522f28981238334620881badc
Author: Thomas Gleixner <[email protected]>
AuthorDate: Mon, 01 May 2023 17:42:06 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Tue, 02 May 2023 10:07:04 +02:00

debugobject: Ensure pool refill (again)

The recent fix to ensure atomicity of lookup and allocation inadvertently
broke the pool refill mechanism.

Prior to that change debug_objects_activate() and debug_objecs_assert_init()
invoked debug_objecs_init() to set up the tracking object for statically
initialized objects. That's not longer the case and debug_objecs_init() is
now the only place which does pool refills.

Depending on the number of statically initialized objects this can be
enough to actually deplete the pool, which was observed by Ido via a
debugobjects OOM warning.

Restore the old behaviour by adding explicit refill opportunities to
debug_objects_activate() and debug_objecs_assert_init().

Fixes: 63a759694eed ("debugobject: Prevent init race with static objects")
Reported-by: Ido Schimmel <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Ido Schimmel <[email protected]>
Link: https://lore.kernel.org/r/871qk05a9d.ffs@tglx


---
lib/debugobjects.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index b796799..003edc5 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -587,6 +587,16 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket
return NULL;
}

+static void debug_objects_fill_pool(void)
+{
+ /*
+ * On RT enabled kernels the pool refill must happen in preemptible
+ * context:
+ */
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+ fill_pool();
+}
+
static void
__debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
{
@@ -595,12 +605,7 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
struct debug_obj *obj;
unsigned long flags;

- /*
- * On RT enabled kernels the pool refill must happen in preemptible
- * context:
- */
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
- fill_pool();
+ debug_objects_fill_pool();

db = get_bucket((unsigned long) addr);

@@ -685,6 +690,8 @@ int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
if (!debug_objects_enabled)
return 0;

+ debug_objects_fill_pool();
+
db = get_bucket((unsigned long) addr);

raw_spin_lock_irqsave(&db->lock, flags);
@@ -894,6 +901,8 @@ void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
if (!debug_objects_enabled)
return;

+ debug_objects_fill_pool();
+
db = get_bucket((unsigned long) addr);

raw_spin_lock_irqsave(&db->lock, flags);