At present kernfs_notify_list is implemented as a singly linked
list of kernfs_node(s), where last element points to itself and
value of ->attr.next tells if node is present on the list or not.
Both addition and deletion to list happen under kernfs_notify_lock.
Change kernfs_notify_list to llist so that addition to list can heppen
locklessly.
Suggested by: Al Viro <[email protected]>
Signed-off-by: Imran Khan <[email protected]>
Acked-by: Tejun Heo <[email protected]>
---
fs/kernfs/file.c | 47 ++++++++++++++++++------------------------
include/linux/kernfs.h | 2 +-
2 files changed, 21 insertions(+), 28 deletions(-)
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 706aebcfb8f69..dce654ad2cea9 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -38,18 +38,16 @@ struct kernfs_open_node {
struct list_head files; /* goes through kernfs_open_file.list */
};
-/*
- * kernfs_notify() may be called from any context and bounces notifications
- * through a work item. To minimize space overhead in kernfs_node, the
- * pending queue is implemented as a singly linked list of kernfs_nodes.
- * The list is terminated with the self pointer so that whether a
- * kernfs_node is on the list or not can be determined by testing the next
- * pointer for NULL.
+/**
+ * attribute_to_node - get kernfs_node object corresponding to a kernfs attribute
+ * @ptr: &struct kernfs_elem_attr
+ * @type: struct kernfs_node
+ * @member: name of member (i.e attr)
*/
-#define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
+#define attribute_to_node(ptr, type, member) \
+ container_of(ptr, type, member)
-static DEFINE_SPINLOCK(kernfs_notify_lock);
-static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
+static LLIST_HEAD(kernfs_notify_list);
/**
* kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn.
@@ -903,18 +901,16 @@ static void kernfs_notify_workfn(struct work_struct *work)
struct kernfs_node *kn;
struct kernfs_super_info *info;
struct kernfs_root *root;
+ struct llist_node *free;
+ struct kernfs_elem_attr *attr;
repeat:
/* pop one off the notify_list */
- spin_lock_irq(&kernfs_notify_lock);
- kn = kernfs_notify_list;
- if (kn == KERNFS_NOTIFY_EOL) {
- spin_unlock_irq(&kernfs_notify_lock);
+ free = llist_del_first(&kernfs_notify_list);
+ if (free == NULL)
return;
- }
- kernfs_notify_list = kn->attr.notify_next;
- kn->attr.notify_next = NULL;
- spin_unlock_irq(&kernfs_notify_lock);
+ attr = llist_entry(free, struct kernfs_elem_attr, notify_next);
+ kn = attribute_to_node(attr, struct kernfs_node, attr);
root = kernfs_root(kn);
/* kick fsnotify */
down_write(&root->kernfs_rwsem);
@@ -970,12 +966,14 @@ static void kernfs_notify_workfn(struct work_struct *work)
void kernfs_notify(struct kernfs_node *kn)
{
static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
- unsigned long flags;
struct kernfs_open_node *on;
if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
return;
+ /* Because we are using llist for kernfs_notify_list */
+ WARN_ON_ONCE(in_nmi());
+
/* kick poll immediately */
rcu_read_lock();
on = rcu_dereference(kn->attr.open);
@@ -986,14 +984,9 @@ void kernfs_notify(struct kernfs_node *kn)
rcu_read_unlock();
/* schedule work to kick fsnotify */
- spin_lock_irqsave(&kernfs_notify_lock, flags);
- if (!kn->attr.notify_next) {
- kernfs_get(kn);
- kn->attr.notify_next = kernfs_notify_list;
- kernfs_notify_list = kn;
- schedule_work(&kernfs_notify_work);
- }
- spin_unlock_irqrestore(&kernfs_notify_lock, flags);
+ kernfs_get(kn);
+ llist_add(&kn->attr.notify_next, &kernfs_notify_list);
+ schedule_work(&kernfs_notify_work);
}
EXPORT_SYMBOL_GPL(kernfs_notify);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 13f54f078a52a..2dd9c8df0f4f6 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -116,7 +116,7 @@ struct kernfs_elem_attr {
const struct kernfs_ops *ops;
struct kernfs_open_node __rcu *open;
loff_t size;
- struct kernfs_node *notify_next; /* for kernfs_notify() */
+ struct llist_node notify_next; /* for kernfs_notify() */
};
/*
--
2.30.2
Hi All,
On 15.06.2022 04:10, Imran Khan wrote:
> At present kernfs_notify_list is implemented as a singly linked
> list of kernfs_node(s), where last element points to itself and
> value of ->attr.next tells if node is present on the list or not.
> Both addition and deletion to list happen under kernfs_notify_lock.
>
> Change kernfs_notify_list to llist so that addition to list can heppen
> locklessly.
>
> Suggested by: Al Viro <[email protected]>
> Signed-off-by: Imran Khan <[email protected]>
> Acked-by: Tejun Heo <[email protected]>
This patch landed in linux next-20220630 as commit b8f35fa1188b
("kernfs: Change kernfs_notify_list to llist."). Unfortunately, it
causes serious regression on my test systems. It can be easily noticed
in the logs by the following warning:
------------[ cut here ]------------
WARNING: CPU: 1 PID: 34 at fs/kernfs/dir.c:531 kernfs_put.part.0+0x1a4/0x1d8
kernfs_put: console/active: released with incorrect active_ref 0
Modules linked in:
CPU: 1 PID: 34 Comm: kworker/1:4 Not tainted
5.19.0-rc4-05465-g5732b42edfd1 #12317
Hardware name: Samsung Exynos (Flattened Device Tree)
Workqueue: events kernfs_notify_workfn
unwind_backtrace from show_stack+0x10/0x14
show_stack from dump_stack_lvl+0x40/0x4c
dump_stack_lvl from __warn+0xc8/0x13c
__warn from warn_slowpath_fmt+0x90/0xb4
warn_slowpath_fmt from kernfs_put.part.0+0x1a4/0x1d8
kernfs_put.part.0 from kernfs_notify_workfn+0x1a0/0x1d0
kernfs_notify_workfn from process_one_work+0x1ec/0x4cc
process_one_work from worker_thread+0x58/0x54c
worker_thread from kthread+0xd0/0xec
kthread from ret_from_fork+0x14/0x2c
Exception stack(0xf099dfb0 to 0xf099dff8)
...
---[ end trace 0000000000000000 ]---
then, after some standard kernel messages, booting stops with the
following log:
rcu: INFO: rcu_sched self-detected stall on CPU
rcu: 1-...!: (2099 ticks this GP) idle=403c/1/0x40000002
softirq=33/33 fqs=1
(t=2100 jiffies g=-1135 q=4 ncpus=2)
rcu: rcu_sched kthread starved for 2098 jiffies! g-1135 f0x0
RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=1
rcu: Unless rcu_sched kthread gets sufficient CPU time, OOM is now
expected behavior.
rcu: RCU grace-period kthread stack dump:
task:rcu_sched state:R running task stack: 0 pid: 10
ppid: 2 flags:0x00000000
__schedule from schedule+0x48/0xb0
schedule from schedule_timeout+0x84/0x138
schedule_timeout from rcu_gp_fqs_loop+0x124/0x478
rcu_gp_fqs_loop from rcu_gp_kthread+0x150/0x1c8
rcu_gp_kthread from kthread+0xd0/0xec
kthread from ret_from_fork+0x14/0x2c
Exception stack(0xf088dfb0 to 0xf088dff8)
...
rcu: Stack dump where RCU GP kthread last ran:
NMI backtrace for cpu 1
CPU: 1 PID: 34 Comm: kworker/1:4 Tainted: G W
5.19.0-rc4-05465-g5732b42edfd1 #12317
Hardware name: Samsung Exynos (Flattened Device Tree)
Workqueue: events kernfs_notify_workfn
unwind_backtrace from show_stack+0x10/0x14
show_stack from dump_stack_lvl+0x40/0x4c
dump_stack_lvl from nmi_cpu_backtrace+0xd0/0x104
nmi_cpu_backtrace from nmi_trigger_cpumask_backtrace+0xd8/0x120
nmi_trigger_cpumask_backtrace from
rcu_check_gp_kthread_starvation+0x138/0x154
rcu_check_gp_kthread_starvation from rcu_sched_clock_irq+0x664/0x9f4
rcu_sched_clock_irq from update_process_times+0x6c/0x94
update_process_times from tick_sched_timer+0x60/0xe8
tick_sched_timer from __hrtimer_run_queues+0x1b4/0x328
__hrtimer_run_queues from hrtimer_interrupt+0x124/0x2b0
hrtimer_interrupt from exynos4_mct_tick_isr+0x44/0x7c
exynos4_mct_tick_isr from __handle_irq_event_percpu+0x7c/0x1cc
__handle_irq_event_percpu from handle_irq_event+0x44/0x8c
handle_irq_event from handle_fasteoi_irq+0x98/0x18c
handle_fasteoi_irq from generic_handle_domain_irq+0x24/0x34
generic_handle_domain_irq from gic_handle_irq+0x88/0xa8
gic_handle_irq from generic_handle_arch_irq+0x34/0x44
generic_handle_arch_irq from call_with_stack+0x18/0x20
call_with_stack from __irq_svc+0x98/0xb0
Exception stack(0xf099de78 to 0xf099dec0)
...
__irq_svc from up_write+0x4/0x3c
up_write from 0xef7cbb00
It was not easy to find this, because bisecting points to commit
5732b42edfd1 ("Merge branch 'driver-core-next' of
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git").
This means that there are some non-trivial dependencies between both
merged branches. I've rebased 5732b42edfd1^2 onto 5732b42edfd1^1 and
then I've did the bisecting of the rebased commits. Finally I've found
this one. Then I've confirmed that reverting it on top of 5732b42edfd1
and then next-20220630 really fixes the issue.
Let me know how I can help fixing this issue. I've observed this issue
on ARM 32bit kernel compiled from multi_v7_defconfig and following
boards: Raspberry Pi 3b and 4b as well as Samsung Exynos based Odroid U3
and XU4. This issue doesn't happen on QEMU's ARM32bit 'virt' machine.
> ---
> fs/kernfs/file.c | 47 ++++++++++++++++++------------------------
> include/linux/kernfs.h | 2 +-
> 2 files changed, 21 insertions(+), 28 deletions(-)
>
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index 706aebcfb8f69..dce654ad2cea9 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -38,18 +38,16 @@ struct kernfs_open_node {
> struct list_head files; /* goes through kernfs_open_file.list */
> };
>
> -/*
> - * kernfs_notify() may be called from any context and bounces notifications
> - * through a work item. To minimize space overhead in kernfs_node, the
> - * pending queue is implemented as a singly linked list of kernfs_nodes.
> - * The list is terminated with the self pointer so that whether a
> - * kernfs_node is on the list or not can be determined by testing the next
> - * pointer for NULL.
> +/**
> + * attribute_to_node - get kernfs_node object corresponding to a kernfs attribute
> + * @ptr: &struct kernfs_elem_attr
> + * @type: struct kernfs_node
> + * @member: name of member (i.e attr)
> */
> -#define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
> +#define attribute_to_node(ptr, type, member) \
> + container_of(ptr, type, member)
>
> -static DEFINE_SPINLOCK(kernfs_notify_lock);
> -static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
> +static LLIST_HEAD(kernfs_notify_list);
>
> /**
> * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn.
> @@ -903,18 +901,16 @@ static void kernfs_notify_workfn(struct work_struct *work)
> struct kernfs_node *kn;
> struct kernfs_super_info *info;
> struct kernfs_root *root;
> + struct llist_node *free;
> + struct kernfs_elem_attr *attr;
> repeat:
> /* pop one off the notify_list */
> - spin_lock_irq(&kernfs_notify_lock);
> - kn = kernfs_notify_list;
> - if (kn == KERNFS_NOTIFY_EOL) {
> - spin_unlock_irq(&kernfs_notify_lock);
> + free = llist_del_first(&kernfs_notify_list);
> + if (free == NULL)
> return;
> - }
> - kernfs_notify_list = kn->attr.notify_next;
> - kn->attr.notify_next = NULL;
> - spin_unlock_irq(&kernfs_notify_lock);
>
> + attr = llist_entry(free, struct kernfs_elem_attr, notify_next);
> + kn = attribute_to_node(attr, struct kernfs_node, attr);
> root = kernfs_root(kn);
> /* kick fsnotify */
> down_write(&root->kernfs_rwsem);
> @@ -970,12 +966,14 @@ static void kernfs_notify_workfn(struct work_struct *work)
> void kernfs_notify(struct kernfs_node *kn)
> {
> static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
> - unsigned long flags;
> struct kernfs_open_node *on;
>
> if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
> return;
>
> + /* Because we are using llist for kernfs_notify_list */
> + WARN_ON_ONCE(in_nmi());
> +
> /* kick poll immediately */
> rcu_read_lock();
> on = rcu_dereference(kn->attr.open);
> @@ -986,14 +984,9 @@ void kernfs_notify(struct kernfs_node *kn)
> rcu_read_unlock();
>
> /* schedule work to kick fsnotify */
> - spin_lock_irqsave(&kernfs_notify_lock, flags);
> - if (!kn->attr.notify_next) {
> - kernfs_get(kn);
> - kn->attr.notify_next = kernfs_notify_list;
> - kernfs_notify_list = kn;
> - schedule_work(&kernfs_notify_work);
> - }
> - spin_unlock_irqrestore(&kernfs_notify_lock, flags);
> + kernfs_get(kn);
> + llist_add(&kn->attr.notify_next, &kernfs_notify_list);
> + schedule_work(&kernfs_notify_work);
> }
> EXPORT_SYMBOL_GPL(kernfs_notify);
>
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index 13f54f078a52a..2dd9c8df0f4f6 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -116,7 +116,7 @@ struct kernfs_elem_attr {
> const struct kernfs_ops *ops;
> struct kernfs_open_node __rcu *open;
> loff_t size;
> - struct kernfs_node *notify_next; /* for kernfs_notify() */
> + struct llist_node notify_next; /* for kernfs_notify() */
> };
>
> /*
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
Hi,
On 01.07.2022 14:20, Imran Khan wrote:
> On 1/7/22 9:22 pm, Marek Szyprowski wrote:
>> On 15.06.2022 04:10, Imran Khan wrote:
>>> At present kernfs_notify_list is implemented as a singly linked
>>> list of kernfs_node(s), where last element points to itself and
>>> value of ->attr.next tells if node is present on the list or not.
>>> Both addition and deletion to list happen under kernfs_notify_lock.
>>>
>>> Change kernfs_notify_list to llist so that addition to list can heppen
>>> locklessly.
>>>
>>> Suggested by: Al Viro <[email protected]>
>>> Signed-off-by: Imran Khan <[email protected]>
>>> Acked-by: Tejun Heo <[email protected]>
>> This patch landed in linux next-20220630 as commit b8f35fa1188b
>> ("kernfs: Change kernfs_notify_list to llist."). Unfortunately, it
>> causes serious regression on my test systems. It can be easily noticed
>> in the logs by the following warning:
>>
>> ------------[ cut here ]------------
>> WARNING: CPU: 1 PID: 34 at fs/kernfs/dir.c:531 kernfs_put.part.0+0x1a4/0x1d8
>> kernfs_put: console/active: released with incorrect active_ref 0
>> Modules linked in:
>> CPU: 1 PID: 34 Comm: kworker/1:4 Not tainted
>> 5.19.0-rc4-05465-g5732b42edfd1 #12317
>> Hardware name: Samsung Exynos (Flattened Device Tree)
>> Workqueue: events kernfs_notify_workfn
>> unwind_backtrace from show_stack+0x10/0x14
>> show_stack from dump_stack_lvl+0x40/0x4c
>> dump_stack_lvl from __warn+0xc8/0x13c
>> __warn from warn_slowpath_fmt+0x90/0xb4
>> warn_slowpath_fmt from kernfs_put.part.0+0x1a4/0x1d8
>> kernfs_put.part.0 from kernfs_notify_workfn+0x1a0/0x1d0
>> kernfs_notify_workfn from process_one_work+0x1ec/0x4cc
>> process_one_work from worker_thread+0x58/0x54c
>> worker_thread from kthread+0xd0/0xec
>> kthread from ret_from_fork+0x14/0x2c
>> Exception stack(0xf099dfb0 to 0xf099dff8)
>> ...
>> ---[ end trace 0000000000000000 ]---
>>
> Thanks for reporting this issue. It has been reported earlier in [1] as well. I
> am unable to reproduce it locally. Could you please test with following patch on
> top of linux next-20220630 and let me know if it helps:
Yes, this fixes the issue. Feel free to add:
Reported-by: Marek Szyprowski <[email protected]>
Tested-by: Marek Szyprowski <[email protected]>
Maybe it is related to the fact, that I have earlycon enabled on those
machines?
> >From 6bf7f1adc4b091dc6d6c60e0dd0f16247f61f374 Mon Sep 17 00:00:00 2001
> From: Imran Khan <[email protected]>
> Date: Fri, 1 Jul 2022 14:27:52 +1000
> Subject: [PATCH] kernfs: Avoid re-adding kernfs_node into kernfs_notify_list.
>
> ---
> fs/kernfs/file.c | 9 ++++++---
> 1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index bb933221b4bae..e8ec054e11c63 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -917,6 +917,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
> if (free == NULL)
> return;
>
> + free->next = NULL;
> attr = llist_entry(free, struct kernfs_elem_attr, notify_next);
> kn = attribute_to_node(attr, struct kernfs_node, attr);
> root = kernfs_root(kn);
> @@ -992,9 +993,11 @@ void kernfs_notify(struct kernfs_node *kn)
> rcu_read_unlock();
>
> /* schedule work to kick fsnotify */
> - kernfs_get(kn);
> - llist_add(&kn->attr.notify_next, &kernfs_notify_list);
> - schedule_work(&kernfs_notify_work);
> + if (kn->attr.notify_next.next != NULL) {
> + kernfs_get(kn);
> + llist_add(&kn->attr.notify_next, &kernfs_notify_list);
> + schedule_work(&kernfs_notify_work);
> + }
> }
> EXPORT_SYMBOL_GPL(kernfs_notify);
>
>
> base-commit: 6cc11d2a1759275b856e464265823d94aabd5eaf
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
Hello Marek,
On 1/7/22 9:22 pm, Marek Szyprowski wrote:
> Hi All,
>
> On 15.06.2022 04:10, Imran Khan wrote:
>> At present kernfs_notify_list is implemented as a singly linked
>> list of kernfs_node(s), where last element points to itself and
>> value of ->attr.next tells if node is present on the list or not.
>> Both addition and deletion to list happen under kernfs_notify_lock.
>>
>> Change kernfs_notify_list to llist so that addition to list can heppen
>> locklessly.
>>
>> Suggested by: Al Viro <[email protected]>
>> Signed-off-by: Imran Khan <[email protected]>
>> Acked-by: Tejun Heo <[email protected]>
>
> This patch landed in linux next-20220630 as commit b8f35fa1188b
> ("kernfs: Change kernfs_notify_list to llist."). Unfortunately, it
> causes serious regression on my test systems. It can be easily noticed
> in the logs by the following warning:
>
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 34 at fs/kernfs/dir.c:531 kernfs_put.part.0+0x1a4/0x1d8
> kernfs_put: console/active: released with incorrect active_ref 0
> Modules linked in:
> CPU: 1 PID: 34 Comm: kworker/1:4 Not tainted
> 5.19.0-rc4-05465-g5732b42edfd1 #12317
> Hardware name: Samsung Exynos (Flattened Device Tree)
> Workqueue: events kernfs_notify_workfn
> unwind_backtrace from show_stack+0x10/0x14
> show_stack from dump_stack_lvl+0x40/0x4c
> dump_stack_lvl from __warn+0xc8/0x13c
> __warn from warn_slowpath_fmt+0x90/0xb4
> warn_slowpath_fmt from kernfs_put.part.0+0x1a4/0x1d8
> kernfs_put.part.0 from kernfs_notify_workfn+0x1a0/0x1d0
> kernfs_notify_workfn from process_one_work+0x1ec/0x4cc
> process_one_work from worker_thread+0x58/0x54c
> worker_thread from kthread+0xd0/0xec
> kthread from ret_from_fork+0x14/0x2c
> Exception stack(0xf099dfb0 to 0xf099dff8)
> ...
> ---[ end trace 0000000000000000 ]---
>
Thanks for reporting this issue. It has been reported earlier in [1] as well. I
am unable to reproduce it locally. Could you please test with following patch on
top of linux next-20220630 and let me know if it helps:
From 6bf7f1adc4b091dc6d6c60e0dd0f16247f61f374 Mon Sep 17 00:00:00 2001
From: Imran Khan <[email protected]>
Date: Fri, 1 Jul 2022 14:27:52 +1000
Subject: [PATCH] kernfs: Avoid re-adding kernfs_node into kernfs_notify_list.
---
fs/kernfs/file.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index bb933221b4bae..e8ec054e11c63 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -917,6 +917,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
if (free == NULL)
return;
+ free->next = NULL;
attr = llist_entry(free, struct kernfs_elem_attr, notify_next);
kn = attribute_to_node(attr, struct kernfs_node, attr);
root = kernfs_root(kn);
@@ -992,9 +993,11 @@ void kernfs_notify(struct kernfs_node *kn)
rcu_read_unlock();
/* schedule work to kick fsnotify */
- kernfs_get(kn);
- llist_add(&kn->attr.notify_next, &kernfs_notify_list);
- schedule_work(&kernfs_notify_work);
+ if (kn->attr.notify_next.next != NULL) {
+ kernfs_get(kn);
+ llist_add(&kn->attr.notify_next, &kernfs_notify_list);
+ schedule_work(&kernfs_notify_work);
+ }
}
EXPORT_SYMBOL_GPL(kernfs_notify);
base-commit: 6cc11d2a1759275b856e464265823d94aabd5eaf
--
2.30.2
Thanks
-- Imran
[1]: https://lore.kernel.org/lkml/[email protected]/
Hello,
On 1/7/22 10:49 pm, Marek Szyprowski wrote:
> Hi,
>
> On 01.07.2022 14:20, Imran Khan wrote:
>> On 1/7/22 9:22 pm, Marek Szyprowski wrote:
>>> On 15.06.2022 04:10, Imran Khan wrote:
>>>> At present kernfs_notify_list is implemented as a singly linked
>>>> list of kernfs_node(s), where last element points to itself and
>>>> value of ->attr.next tells if node is present on the list or not.
>>>> Both addition and deletion to list happen under kernfs_notify_lock.
>>>>
>>>> Change kernfs_notify_list to llist so that addition to list can heppen
>>>> locklessly.
>>>>
>>>> Suggested by: Al Viro <[email protected]>
>>>> Signed-off-by: Imran Khan <[email protected]>
>>>> Acked-by: Tejun Heo <[email protected]>
>>> This patch landed in linux next-20220630 as commit b8f35fa1188b
>>> ("kernfs: Change kernfs_notify_list to llist."). Unfortunately, it
>>> causes serious regression on my test systems. It can be easily noticed
>>> in the logs by the following warning:
>>>
[...]
>
> Yes, this fixes the issue. Feel free to add:
>
> Reported-by: Marek Szyprowski <[email protected]>
>
> Tested-by: Marek Szyprowski <[email protected]>
>
Thanks a lot for testing. Sure I have added these tags. I have
send the patch for review at [1].
> Maybe it is related to the fact, that I have earlycon enabled on those
> machines?
>
For sure it is occuring with some tweaking in console settings. So far both the
reported occurences have this thing in common. I will be able to confirm further
if I could reproduce this locally and I am trying that at the moment.
I will share when I have some more findings.
Thanks
-- Imran
[1]: https://lore.kernel.org/lkml/[email protected]/
Hi Marek,
On Fri, Jul 1, 2022 at 2:51 PM Marek Szyprowski
<[email protected]> wrote:
> On 01.07.2022 14:20, Imran Khan wrote:
> > On 1/7/22 9:22 pm, Marek Szyprowski wrote:
> >> On 15.06.2022 04:10, Imran Khan wrote:
> >>> At present kernfs_notify_list is implemented as a singly linked
> >>> list of kernfs_node(s), where last element points to itself and
> >>> value of ->attr.next tells if node is present on the list or not.
> >>> Both addition and deletion to list happen under kernfs_notify_lock.
> >>>
> >>> Change kernfs_notify_list to llist so that addition to list can heppen
> >>> locklessly.
> >>>
> >>> Suggested by: Al Viro <[email protected]>
> >>> Signed-off-by: Imran Khan <[email protected]>
> >>> Acked-by: Tejun Heo <[email protected]>
> >> This patch landed in linux next-20220630 as commit b8f35fa1188b
> >> ("kernfs: Change kernfs_notify_list to llist."). Unfortunately, it
> >> causes serious regression on my test systems. It can be easily noticed
> >> in the logs by the following warning:
> >>
> >> ------------[ cut here ]------------
> >> WARNING: CPU: 1 PID: 34 at fs/kernfs/dir.c:531 kernfs_put.part.0+0x1a4/0x1d8
> >> kernfs_put: console/active: released with incorrect active_ref 0
> >> Modules linked in:
> >> CPU: 1 PID: 34 Comm: kworker/1:4 Not tainted
> >> 5.19.0-rc4-05465-g5732b42edfd1 #12317
> >> Hardware name: Samsung Exynos (Flattened Device Tree)
> >> Workqueue: events kernfs_notify_workfn
> >> unwind_backtrace from show_stack+0x10/0x14
> >> show_stack from dump_stack_lvl+0x40/0x4c
> >> dump_stack_lvl from __warn+0xc8/0x13c
> >> __warn from warn_slowpath_fmt+0x90/0xb4
> >> warn_slowpath_fmt from kernfs_put.part.0+0x1a4/0x1d8
> >> kernfs_put.part.0 from kernfs_notify_workfn+0x1a0/0x1d0
> >> kernfs_notify_workfn from process_one_work+0x1ec/0x4cc
> >> process_one_work from worker_thread+0x58/0x54c
> >> worker_thread from kthread+0xd0/0xec
> >> kthread from ret_from_fork+0x14/0x2c
> >> Exception stack(0xf099dfb0 to 0xf099dff8)
> >> ...
> >> ---[ end trace 0000000000000000 ]---
> >>
> > Thanks for reporting this issue. It has been reported earlier in [1] as well. I
> > am unable to reproduce it locally. Could you please test with following patch on
> > top of linux next-20220630 and let me know if it helps:
>
> Yes, this fixes the issue. Feel free to add:
>
> Reported-by: Marek Szyprowski <[email protected]>
>
> Tested-by: Marek Szyprowski <[email protected]>
>
> Maybe it is related to the fact, that I have earlycon enabled on those
> machines?
Probably.
I see the issue on
- rbtx4927 (CONFIG_EARLY_PRINTK=y),
- SiPEED MAiXBiT (chosen/bootargs = "earlycon console=ttySIF0",
chosen/stdout-path = "serial0:115200n8")
- Litex/VexRiscV (chosen/bootargs = "console=liteuart earlycon=sbi").
It doesn't happen on the boards that just provide chosen/stdout-path
in DT.
Reverting commit b8f35fa1188b8403 ("kernfs: Change kernfs_notify_list
to llist.") fixes the issue.
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds