Fix two sleeping functions called from atomic context by doing immediate
return to the caller if !preemptible() evaluates 'true'. Remove two
in_interrupt() tests because they are not suited for being used here.
Since functions do_con_write() and con_flush_chars() might sleep in
console_lock(), it must be assured that they are never executed in
atomic contexts.
This issue is reported by Syzbot which notices that they are executed
while holding spinlocks and with interrupts disabled. Actually Syzbot
emits a first report and then, after fixing do_con_write(), a second
report for the same problem in con_flush_chars() because these functions
are called one after the other by con_write().
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: [email protected]
Suggested-by: Marco Elver <[email protected]>
Signed-off-by: Fabio M. De Francesco <[email protected]>
---
drivers/tty/vt/vt.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 7359c3e80d63..508f8a56d361 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
struct vt_notifier_param param;
bool rescan;
- if (in_interrupt())
+ if (!preemptible())
return count;
console_lock();
@@ -3358,7 +3358,7 @@ static void con_flush_chars(struct tty_struct *tty)
{
struct vc_data *vc;
- if (in_interrupt()) /* from flush_to_ldisc */
+ if (!preemptible()) /* from flush_to_ldisc */
return;
/* if we race with con_close(), vt may be null */
--
2.33.1
On Tue, Nov 16, 2021 at 03:49:37PM +0100, Fabio M. De Francesco wrote:
> Fix two sleeping functions called from atomic context by doing immediate
> return to the caller if !preemptible() evaluates 'true'. Remove two
> in_interrupt() tests because they are not suited for being used here.
>
> Since functions do_con_write() and con_flush_chars() might sleep in
> console_lock(), it must be assured that they are never executed in
> atomic contexts.
>
> This issue is reported by Syzbot which notices that they are executed
> while holding spinlocks and with interrupts disabled. Actually Syzbot
> emits a first report and then, after fixing do_con_write(), a second
> report for the same problem in con_flush_chars() because these functions
> are called one after the other by con_write().
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Reported-by: [email protected]
> Suggested-by: Marco Elver <[email protected]>
> Signed-off-by: Fabio M. De Francesco <[email protected]>
> ---
> drivers/tty/vt/vt.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
> index 7359c3e80d63..508f8a56d361 100644
> --- a/drivers/tty/vt/vt.c
> +++ b/drivers/tty/vt/vt.c
> @@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
> struct vt_notifier_param param;
> bool rescan;
>
> - if (in_interrupt())
> + if (!preemptible())
> return count;
Very odd, what code is calling these functions to trigger this check?
Shouldn't the caller be fixed instead?
What changed to suddenly cause this to show up?
Given that this check has been here for a _very_ long time, changing it
now without finding out the root cause is probably not a good idea.
thanks,
greg k-h
On Tuesday, November 16, 2021 3:58:44 PM CET Greg Kroah-Hartman wrote:
> On Tue, Nov 16, 2021 at 03:49:37PM +0100, Fabio M. De Francesco wrote:
> > Fix two sleeping functions called from atomic context by doing immediate
> > return to the caller if !preemptible() evaluates 'true'. Remove two
> > in_interrupt() tests because they are not suited for being used here.
> >
> > Since functions do_con_write() and con_flush_chars() might sleep in
> > console_lock(), it must be assured that they are never executed in
> > atomic contexts.
> >
> > This issue is reported by Syzbot which notices that they are executed
> > while holding spinlocks and with interrupts disabled. Actually Syzbot
> > emits a first report and then, after fixing do_con_write(), a second
> > report for the same problem in con_flush_chars() because these functions
> > are called one after the other by con_write().
> >
> > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > Reported-by: [email protected]
> > Suggested-by: Marco Elver <[email protected]>
> > Signed-off-by: Fabio M. De Francesco <[email protected]>
> > ---
> > drivers/tty/vt/vt.c | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
> > index 7359c3e80d63..508f8a56d361 100644
> > --- a/drivers/tty/vt/vt.c
> > +++ b/drivers/tty/vt/vt.c
> > @@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty,
const unsigned char *buf, int co
> > struct vt_notifier_param param;
> > bool rescan;
> >
> > - if (in_interrupt())
> > + if (!preemptible())
> > return count;
>
> Very odd, what code is calling these functions to trigger this check?
This is the call trace reported by Syzbot (https://syzkaller.appspot.com/bug?
id=fe5a4d5a2482bd73064db5de5d28e024f1e2a387):
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
__might_resched.cold+0x222/0x26b kernel/sched/core.c:9539
console_lock+0x17/0x80 kernel/printk/printk.c:2522
do_con_write+0x10f/0x1e40 drivers/tty/vt/vt.c:2908
con_write+0x21/0x40 drivers/tty/vt/vt.c:3295
n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
__start_tty drivers/tty/tty_io.c:806 [inline]
__start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
n_tty_ioctl_helper+0x299/0x2d0 drivers/tty/tty_ioctl.c:880
^^^^^^^^^^
n_tty_ioctl_helper() disabled interrupts via spin_lock_irq(&tty->flow.lock).
n_hdlc_tty_ioctl+0xd2/0x340 drivers/tty/n_hdlc.c:633
tty_ioctl+0xc69/0x1670 drivers/tty/tty_io.c:2814
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:874 [inline]
__se_sys_ioctl fs/ioctl.c:860 [inline]
__x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
> Shouldn't the caller be fixed instead?
Maybe that the caller has no need to disable IRQs, but I cannot yet answer to
this particular question.
> What changed to suddenly cause this to show up?
Commit c545b66c6922 ("tty: Serialize tcflow() with other tty flow control
changes") introduced a call to spin_lock_irq() for command "TCOON", just
before calling __start_tty().
Thanks,
Fabio M. De Francesco
> Given that this check has been here for a _very_ long time, changing it
> now without finding out the root cause is probably not a good idea.
>
> thanks,
>
> greg k-h
>
On Tue, Nov 16, 2021 at 04:35:07PM +0100, Fabio M. De Francesco wrote:
> On Tuesday, November 16, 2021 3:58:44 PM CET Greg Kroah-Hartman wrote:
> > On Tue, Nov 16, 2021 at 03:49:37PM +0100, Fabio M. De Francesco wrote:
> > > Fix two sleeping functions called from atomic context by doing immediate
> > > return to the caller if !preemptible() evaluates 'true'. Remove two
> > > in_interrupt() tests because they are not suited for being used here.
> > >
> > > Since functions do_con_write() and con_flush_chars() might sleep in
> > > console_lock(), it must be assured that they are never executed in
> > > atomic contexts.
> > >
> > > This issue is reported by Syzbot which notices that they are executed
> > > while holding spinlocks and with interrupts disabled. Actually Syzbot
> > > emits a first report and then, after fixing do_con_write(), a second
> > > report for the same problem in con_flush_chars() because these functions
> > > are called one after the other by con_write().
> > >
> > > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > > Reported-by: [email protected]
> > > Suggested-by: Marco Elver <[email protected]>
> > > Signed-off-by: Fabio M. De Francesco <[email protected]>
> > > ---
> > > drivers/tty/vt/vt.c | 4 ++--
> > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
> > > index 7359c3e80d63..508f8a56d361 100644
> > > --- a/drivers/tty/vt/vt.c
> > > +++ b/drivers/tty/vt/vt.c
> > > @@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty,
> const unsigned char *buf, int co
> > > struct vt_notifier_param param;
> > > bool rescan;
> > >
> > > - if (in_interrupt())
> > > + if (!preemptible())
> > > return count;
> >
> > Very odd, what code is calling these functions to trigger this check?
>
> This is the call trace reported by Syzbot (https://syzkaller.appspot.com/bug?
> id=fe5a4d5a2482bd73064db5de5d28e024f1e2a387):
>
> Call Trace:
> <TASK>
> __dump_stack lib/dump_stack.c:88 [inline]
> dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
> __might_resched.cold+0x222/0x26b kernel/sched/core.c:9539
> console_lock+0x17/0x80 kernel/printk/printk.c:2522
> do_con_write+0x10f/0x1e40 drivers/tty/vt/vt.c:2908
> con_write+0x21/0x40 drivers/tty/vt/vt.c:3295
> n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
> tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
> __start_tty drivers/tty/tty_io.c:806 [inline]
> __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
> n_tty_ioctl_helper+0x299/0x2d0 drivers/tty/tty_ioctl.c:880
>
> ^^^^^^^^^^
> n_tty_ioctl_helper() disabled interrupts via spin_lock_irq(&tty->flow.lock).
>
> n_hdlc_tty_ioctl+0xd2/0x340 drivers/tty/n_hdlc.c:633
> tty_ioctl+0xc69/0x1670 drivers/tty/tty_io.c:2814
> vfs_ioctl fs/ioctl.c:51 [inline]
> __do_sys_ioctl fs/ioctl.c:874 [inline]
> __se_sys_ioctl fs/ioctl.c:860 [inline]
> __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x44/0xae
>
> > Shouldn't the caller be fixed instead?
>
> Maybe that the caller has no need to disable IRQs, but I cannot yet answer to
> this particular question.
>
> > What changed to suddenly cause this to show up?
>
> Commit c545b66c6922 ("tty: Serialize tcflow() with other tty flow control
> changes") introduced a call to spin_lock_irq() for command "TCOON", just
> before calling __start_tty().
That commit happened in 2014. Why is this suddenly an issue now that no
one ever saw before?
I am worried you are not actually fixing the real issue here by just
making syzbot be quiet. Can you work to figure out what the real issue
is please?
thanks,
greg k-h
On Tuesday, November 16, 2021 5:59:24 PM CET Greg Kroah-Hartman wrote:
> On Tue, Nov 16, 2021 at 04:35:07PM +0100, Fabio M. De Francesco wrote:
> > On Tuesday, November 16, 2021 3:58:44 PM CET Greg Kroah-Hartman wrote:
> > > On Tue, Nov 16, 2021 at 03:49:37PM +0100, Fabio M. De Francesco wrote:
> > > > Fix two sleeping functions called from atomic context by doing
immediate
> > > > return to the caller if !preemptible() evaluates 'true'. Remove two
> > > > in_interrupt() tests because they are not suited for being used here.
> > > >
> > > > Since functions do_con_write() and con_flush_chars() might sleep in
> > > > console_lock(), it must be assured that they are never executed in
> > > > atomic contexts.
> > > >
> > > > This issue is reported by Syzbot which notices that they are executed
> > > > while holding spinlocks and with interrupts disabled. Actually Syzbot
> > > > emits a first report and then, after fixing do_con_write(), a second
> > > > report for the same problem in con_flush_chars() because these
functions
> > > > are called one after the other by con_write().
> > > >
> > > > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > > > Reported-by: [email protected]
> > > > Suggested-by: Marco Elver <[email protected]>
> > > > Signed-off-by: Fabio M. De Francesco <[email protected]>
> > > > ---
> > > > drivers/tty/vt/vt.c | 4 ++--
> > > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
> > > > index 7359c3e80d63..508f8a56d361 100644
> > > > --- a/drivers/tty/vt/vt.c
> > > > +++ b/drivers/tty/vt/vt.c
> > > > @@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty,
> > const unsigned char *buf, int co
> > > > struct vt_notifier_param param;
> > > > bool rescan;
> > > >
> > > > - if (in_interrupt())
> > > > + if (!preemptible())
> > > > return count;
> > >
> > > Very odd, what code is calling these functions to trigger this check?
> >
> > This is the call trace reported by Syzbot (https://syzkaller.appspot.com/
bug?
> > id=fe5a4d5a2482bd73064db5de5d28e024f1e2a387):
> >
> > Call Trace:
> > <TASK>
> > __dump_stack lib/dump_stack.c:88 [inline]
> > dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
> > __might_resched.cold+0x222/0x26b kernel/sched/core.c:9539
> > console_lock+0x17/0x80 kernel/printk/printk.c:2522
> > do_con_write+0x10f/0x1e40 drivers/tty/vt/vt.c:2908
> > con_write+0x21/0x40 drivers/tty/vt/vt.c:3295
> > n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
> > tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
> > __start_tty drivers/tty/tty_io.c:806 [inline]
> > __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
> > n_tty_ioctl_helper+0x299/0x2d0 drivers/tty/tty_ioctl.c:880
> >
> > ^^^^^^^^^^
> > n_tty_ioctl_helper() disabled interrupts via spin_lock_irq(&tty-
>flow.lock).
> >
> > n_hdlc_tty_ioctl+0xd2/0x340 drivers/tty/n_hdlc.c:633
> > tty_ioctl+0xc69/0x1670 drivers/tty/tty_io.c:2814
> > vfs_ioctl fs/ioctl.c:51 [inline]
> > __do_sys_ioctl fs/ioctl.c:874 [inline]
> > __se_sys_ioctl fs/ioctl.c:860 [inline]
> > __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
> > do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
> > entry_SYSCALL_64_after_hwframe+0x44/0xae
> >
> > > Shouldn't the caller be fixed instead?
> >
> > Maybe that the caller has no need to disable IRQs, but I cannot yet
answer to
> > this particular question.
> >
> > > What changed to suddenly cause this to show up?
> >
> > Commit c545b66c6922 ("tty: Serialize tcflow() with other tty flow control
> > changes") introduced a call to spin_lock_irq() for command "TCOON", just
> > before calling __start_tty().
>
> That commit happened in 2014. Why is this suddenly an issue now that no
> one ever saw before?
>
> I am worried you are not actually fixing the real issue here by just
> making syzbot be quiet. Can you work to figure out what the real issue
> is please?
Yes, Greg. For sure I'll try my best to figure out what the real issue is.
No doubts that this code has been around for very long time without no one
that had noticed anything wrong, so this report looks weird. There must be
something that I still can't see. Therefore, I agree with you that, with an
high degree of probability, the real issue is somewhere else. (if there is a
real one).
It looks interesting to investigate, although for doing it properly I'll need
some time because I know very little of this code.
Thanks for your reply,
Fabio M. De Francesco
>
> thanks,
>
> greg k-h
>
On 2021/11/16 23:49, Fabio M. De Francesco wrote:
> - if (in_interrupt())
> + if (!preemptible())
> return count;
preemptible() is "an unconditional 0" if CONFIG_PREEMPT_COUNT=n .
Is preemptible() really what we want? ;-)
On Wednesday, November 17, 2021 2:55:44 AM CET Tetsuo Handa wrote:
> On 2021/11/16 23:49, Fabio M. De Francesco wrote:
> > - if (in_interrupt())
> > + if (!preemptible())
> > return count;
>
> preemptible() is "an unconditional 0" if CONFIG_PREEMPT_COUNT=n .
> Is preemptible() really what we want? ;-)
>
Greg K-H made me notice that Linux has had this code for many years and no
one has ever found problems with it, therefore, whatever triggered Syzbot
must be recent changes up in the calls chain.
This reported bug must be better investigated because there is a high
probability that the "real" issue is somewhere else.
Unfortunately, as I replied to Greg, I know very little of tty/vt so I'll
take time to understand the relevant documentation and the related code.
For instance, I don't know why we need to disable IRQs with spin_lock_irq()
up in the call chain in n_tty_ioctl_helper(). Would a "normal" spin_lock() be
enough? Otherwise, what about spin_lock_bh()?
Do we really need to disable IRQs?
Aside from the reasoning reported above, for a moment let's speculate on the
remote possibility that this bugs are in do_con_write() and
con_flush_chars()...
When I asked Syzbot to run a test on my diff, the code was different than
what you see in this patch.
Marco Elver had proposed this "if (!preemptible()) return <something>;". I
didn't even know of that macro, so I proposed to open code the equivalent
implementation that Linux uses if CONFIG_PREEMPT_COUNT=y.
The end result, applying De Morgan's law on his proposal, was "if
((preempt_count() || irqs_disabled()) return <something>;".
Now, out of curiosity, if Greg's argument (with which I agree in full) was
not brought to my attention, what would happen by using that test if we have
kernels compiled with CONFIG_PREEMPT_COUNT=n (as it is in your example)?
I'd appreciate if you have time and want to answer my last question and
(possibly) also the one about using spin_lock() or spin_lock_bh() in
n_tty_ioctl_helper().
@Greg, the same two questions are for you, if you have time and want to
kindly help :)
Thanks to you all,
Fabio M. De Francesco
On Tuesday, November 16, 2021 5:59:24 PM CET Greg Kroah-Hartman wrote:
> On Tue, Nov 16, 2021 at 04:35:07PM +0100, Fabio M. De Francesco wrote:
> > On Tuesday, November 16, 2021 3:58:44 PM CET Greg Kroah-Hartman wrote:
> > > On Tue, Nov 16, 2021 at 03:49:37PM +0100, Fabio M. De Francesco wrote:
> > > > Fix two sleeping functions called from atomic context by doing
immediate
> > > > return to the caller if !preemptible() evaluates 'true'. Remove two
> > > > in_interrupt() tests because they are not suited for being used here.
> > > >
> > > > Since functions do_con_write() and con_flush_chars() might sleep in
> > > > console_lock(), it must be assured that they are never executed in
> > > > atomic contexts.
> > > >
> > > > This issue is reported by Syzbot which notices that they are executed
> > > > while holding spinlocks and with interrupts disabled. Actually Syzbot
> > > > emits a first report and then, after fixing do_con_write(), a second
> > > > report for the same problem in con_flush_chars() because these
functions
> > > > are called one after the other by con_write().
> > > >
> > > > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > > > Reported-by: [email protected]
> > > > Suggested-by: Marco Elver <[email protected]>
> > > > Signed-off-by: Fabio M. De Francesco <[email protected]>
> > > > ---
> > > > drivers/tty/vt/vt.c | 4 ++--
> > > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
> > > > index 7359c3e80d63..508f8a56d361 100644
> > > > --- a/drivers/tty/vt/vt.c
> > > > +++ b/drivers/tty/vt/vt.c
> > > > @@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty,
> > const unsigned char *buf, int co
> > > > struct vt_notifier_param param;
> > > > bool rescan;
> > > >
> > > > - if (in_interrupt())
> > > > + if (!preemptible())
> > > > return count;
> > >
> > > Very odd, what code is calling these functions to trigger this check?
> >
> > This is the call trace reported by Syzbot (https://syzkaller.appspot.com/
bug?
> > id=fe5a4d5a2482bd73064db5de5d28e024f1e2a387):
> >
> > Call Trace:
> > <TASK>
> > __dump_stack lib/dump_stack.c:88 [inline]
> > dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
> > __might_resched.cold+0x222/0x26b kernel/sched/core.c:9539
> > console_lock+0x17/0x80 kernel/printk/printk.c:2522
> > do_con_write+0x10f/0x1e40 drivers/tty/vt/vt.c:2908
> > con_write+0x21/0x40 drivers/tty/vt/vt.c:3295
> > n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
> > tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
> > __start_tty drivers/tty/tty_io.c:806 [inline]
> > __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
> > n_tty_ioctl_helper+0x299/0x2d0 drivers/tty/tty_ioctl.c:880
> >
> > ^^^^^^^^^^
> > n_tty_ioctl_helper() disabled interrupts via spin_lock_irq(&tty-
>flow.lock).
> >
> > n_hdlc_tty_ioctl+0xd2/0x340 drivers/tty/n_hdlc.c:633
> > tty_ioctl+0xc69/0x1670 drivers/tty/tty_io.c:2814
> > vfs_ioctl fs/ioctl.c:51 [inline]
> > __do_sys_ioctl fs/ioctl.c:874 [inline]
> > __se_sys_ioctl fs/ioctl.c:860 [inline]
> > __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
> > do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
> > entry_SYSCALL_64_after_hwframe+0x44/0xae
> >
> > > Shouldn't the caller be fixed instead?
> >
> > Maybe that the caller has no need to disable IRQs, but I cannot yet
answer to
> > this particular question.
> >
> > > What changed to suddenly cause this to show up?
> >
> > Commit c545b66c6922 ("tty: Serialize tcflow() with other tty flow control
> > changes") introduced a call to spin_lock_irq() for command "TCOON", just
> > before calling __start_tty().
>
> That commit happened in 2014. Why is this suddenly an issue now that no
> one ever saw before?
Hi Greg,
I have just been informed by Pavel Skripkin (who at this moment cannot
directly write to LKML) that this bug has been reported at least other two
times.
We cannot say why nobody has ever addressed this issue before.
Please read the following reports.
https://syzkaller.appspot.com/bug?id=9ed44c22919e3cd2c71907b963fb048ac1115073
https://syzkaller.appspot.com/bug?id=e806305a3a65a0bb2f1be7e672819a079d558016
Thanks,
Fabio
> I am worried you are not actually fixing the real issue here by just
> making syzbot be quiet. Can you work to figure out what the real issue
> is please?
>
> thanks,
>
> greg k-h
>
On Wed, Nov 17, 2021 at 09:23:45AM +0100, Fabio M. De Francesco wrote:
> On Tuesday, November 16, 2021 5:59:24 PM CET Greg Kroah-Hartman wrote:
> > On Tue, Nov 16, 2021 at 04:35:07PM +0100, Fabio M. De Francesco wrote:
> > > On Tuesday, November 16, 2021 3:58:44 PM CET Greg Kroah-Hartman wrote:
> > > > On Tue, Nov 16, 2021 at 03:49:37PM +0100, Fabio M. De Francesco wrote:
> > > > > Fix two sleeping functions called from atomic context by doing
> immediate
> > > > > return to the caller if !preemptible() evaluates 'true'. Remove two
> > > > > in_interrupt() tests because they are not suited for being used here.
> > > > >
> > > > > Since functions do_con_write() and con_flush_chars() might sleep in
> > > > > console_lock(), it must be assured that they are never executed in
> > > > > atomic contexts.
> > > > >
> > > > > This issue is reported by Syzbot which notices that they are executed
> > > > > while holding spinlocks and with interrupts disabled. Actually Syzbot
> > > > > emits a first report and then, after fixing do_con_write(), a second
> > > > > report for the same problem in con_flush_chars() because these
> functions
> > > > > are called one after the other by con_write().
> > > > >
> > > > > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > > > > Reported-by: [email protected]
> > > > > Suggested-by: Marco Elver <[email protected]>
> > > > > Signed-off-by: Fabio M. De Francesco <[email protected]>
> > > > > ---
> > > > > drivers/tty/vt/vt.c | 4 ++--
> > > > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
> > > > > index 7359c3e80d63..508f8a56d361 100644
> > > > > --- a/drivers/tty/vt/vt.c
> > > > > +++ b/drivers/tty/vt/vt.c
> > > > > @@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty,
> > > const unsigned char *buf, int co
> > > > > struct vt_notifier_param param;
> > > > > bool rescan;
> > > > >
> > > > > - if (in_interrupt())
> > > > > + if (!preemptible())
> > > > > return count;
> > > >
> > > > Very odd, what code is calling these functions to trigger this check?
> > >
> > > This is the call trace reported by Syzbot (https://syzkaller.appspot.com/
> bug?
> > > id=fe5a4d5a2482bd73064db5de5d28e024f1e2a387):
> > >
> > > Call Trace:
> > > <TASK>
> > > __dump_stack lib/dump_stack.c:88 [inline]
> > > dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
> > > __might_resched.cold+0x222/0x26b kernel/sched/core.c:9539
> > > console_lock+0x17/0x80 kernel/printk/printk.c:2522
> > > do_con_write+0x10f/0x1e40 drivers/tty/vt/vt.c:2908
> > > con_write+0x21/0x40 drivers/tty/vt/vt.c:3295
> > > n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
> > > tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
> > > __start_tty drivers/tty/tty_io.c:806 [inline]
> > > __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
> > > n_tty_ioctl_helper+0x299/0x2d0 drivers/tty/tty_ioctl.c:880
> > >
> > > ^^^^^^^^^^
> > > n_tty_ioctl_helper() disabled interrupts via spin_lock_irq(&tty-
> >flow.lock).
> > >
> > > n_hdlc_tty_ioctl+0xd2/0x340 drivers/tty/n_hdlc.c:633
> > > tty_ioctl+0xc69/0x1670 drivers/tty/tty_io.c:2814
> > > vfs_ioctl fs/ioctl.c:51 [inline]
> > > __do_sys_ioctl fs/ioctl.c:874 [inline]
> > > __se_sys_ioctl fs/ioctl.c:860 [inline]
> > > __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
> > > do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > > do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
> > > entry_SYSCALL_64_after_hwframe+0x44/0xae
> > >
> > > > Shouldn't the caller be fixed instead?
> > >
> > > Maybe that the caller has no need to disable IRQs, but I cannot yet
> answer to
> > > this particular question.
> > >
> > > > What changed to suddenly cause this to show up?
> > >
> > > Commit c545b66c6922 ("tty: Serialize tcflow() with other tty flow control
> > > changes") introduced a call to spin_lock_irq() for command "TCOON", just
> > > before calling __start_tty().
> >
> > That commit happened in 2014. Why is this suddenly an issue now that no
> > one ever saw before?
>
> Hi Greg,
>
> I have just been informed by Pavel Skripkin (who at this moment cannot
> directly write to LKML) that this bug has been reported at least other two
> times.
>
> We cannot say why nobody has ever addressed this issue before.
>
> Please read the following reports.
>
> https://syzkaller.appspot.com/bug?id=9ed44c22919e3cd2c71907b963fb048ac1115073
> https://syzkaller.appspot.com/bug?id=e806305a3a65a0bb2f1be7e672819a079d558016
Great, you have a reproducer, so you should be able to duplicate this
locally to figure out what is really happening here.
Good luck!
greg k-h
On 2021/11/17 17:54, Greg Kroah-Hartman wrote:
> Great, you have a reproducer, so you should be able to duplicate this
> locally to figure out what is really happening here.
Until commit ac751efa6a0d70f2 ("console: rename acquire/release_console_sem() to
console_lock/unlock()"), do_con_write() was surely designed to be able to sleep.
$ git blame ac751efa6a0d7~1 drivers/tty/vt/vt.c
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2122) /* acquires console_sem */
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2123) static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count)
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2124) {
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2125) #ifdef VT_BUF_VRAM_ONLY
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2126) #define FLUSH do { } while(0);
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2127) #else
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2128) #define FLUSH if (draw_x >= 0) { \
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2129) vc->vc_sw->con_putcs(vc, (u16 *)draw_from, (u16 *)draw_to - (u16 *)draw_from, vc->vc_y, draw_x); \
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2130) draw_x = -1; \
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2131) }
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2132) #endif
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2133)
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2134) int c, tc, ok, n = 0, draw_x = -1;
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2135) unsigned int currcons;
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2136) unsigned long draw_from = 0, draw_to = 0;
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2137) struct vc_data *vc;
2f1a2ccb9c0de drivers/char/vt.c (Egmont Koblinger 2007-05-08 00:30:37 -0700 2138) unsigned char vc_attr;
0341a4d0fdd2a drivers/char/vt.c (Karl Dahlke 2008-04-28 02:14:25 -0700 2139) struct vt_notifier_param param;
2f1a2ccb9c0de drivers/char/vt.c (Egmont Koblinger 2007-05-08 00:30:37 -0700 2140) uint8_t rescan;
2f1a2ccb9c0de drivers/char/vt.c (Egmont Koblinger 2007-05-08 00:30:37 -0700 2141) uint8_t inverse;
2f1a2ccb9c0de drivers/char/vt.c (Egmont Koblinger 2007-05-08 00:30:37 -0700 2142) uint8_t width;
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2143) u16 himask, charmask;
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2144)
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2145) if (in_interrupt())
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2146) return count;
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2147)
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2148) might_sleep();
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2149)
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2150) acquire_console_sem();
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2151) vc = tty->driver_data;
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2152) if (vc == NULL) {
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2153) printk(KERN_ERR "vt: argh, driver_data is NULL !\n");
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2154) release_console_sem();
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2155) return 0;
^1da177e4c3f4 drivers/char/vt.c (Linus Torvalds 2005-04-16 15:20:36 -0700 2156) }
Until that commit, n_hdlc_send_frames() was prepared for being interrupted by signal
while sleeping.
$ git blame ac751efa6a0d7~1 drivers/tty/n_hdlc.c
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 379) /**
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 380) * n_hdlc_send_frames - send frames on pending send buffer list
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 381) * @n_hdlc - pointer to ldisc instance data
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 382) * @tty - pointer to tty instance data
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 383) *
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 384) * Send frames on pending send buffer list until the driver does not accept a
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 385) * frame (busy) this function is called after adding a frame to the send buffer
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 386) * list and by the tty wakeup callback.
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 387) */
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 388) static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 389) {
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 390) register int actual;
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 391) unsigned long flags;
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 392) struct n_hdlc_buf *tbuf;
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 393)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 394) if (debuglevel >= DEBUG_LEVEL_INFO)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 395) printk("%s(%d)n_hdlc_send_frames() called\n",__FILE__,__LINE__);
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 396) check_again:
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 397)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 398) spin_lock_irqsave(&n_hdlc->tx_buf_list.spinlock, flags);
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 399) if (n_hdlc->tbusy) {
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 400) n_hdlc->woke_up = 1;
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 401) spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags);
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 402) return;
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 403) }
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 404) n_hdlc->tbusy = 1;
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 405) n_hdlc->woke_up = 0;
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 406) spin_unlock_irqrestore(&n_hdlc->tx_buf_list.spinlock, flags);
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 407)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 408) /* get current transmit buffer or get new transmit */
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 409) /* buffer from list of pending transmit buffers */
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 410)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 411) tbuf = n_hdlc->tbuf;
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 412) if (!tbuf)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 413) tbuf = n_hdlc_buf_get(&n_hdlc->tx_buf_list);
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 414)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 415) while (tbuf) {
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 416) if (debuglevel >= DEBUG_LEVEL_INFO)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 417) printk("%s(%d)sending frame %p, count=%d\n",
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 418) __FILE__,__LINE__,tbuf,tbuf->count);
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 419)
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 420) /* Send the next block of data to device */
^1da177e4c3f4 drivers/char/n_hdlc.c (Linus Torvalds 2005-04-16 15:20:36 -0700 421) tty->flags |= (1 << TTY_DO_WRITE_WAKEUP);
f34d7a5b7010b drivers/char/n_hdlc.c (Alan Cox 2008-04-30 00:54:13 -0700 422) actual = tty->ops->write(tty, tbuf->buf, tbuf->count);
b0fed3140f57c drivers/char/n_hdlc.c (Jiri Slaby 2007-07-15 23:40:12 -0700 423)
b0fed3140f57c drivers/char/n_hdlc.c (Jiri Slaby 2007-07-15 23:40:12 -0700 424) /* rollback was possible and has been done */
b0fed3140f57c drivers/char/n_hdlc.c (Jiri Slaby 2007-07-15 23:40:12 -0700 425) if (actual == -ERESTARTSYS) {
b0fed3140f57c drivers/char/n_hdlc.c (Jiri Slaby 2007-07-15 23:40:12 -0700 426) n_hdlc->tbuf = tbuf;
b0fed3140f57c drivers/char/n_hdlc.c (Jiri Slaby 2007-07-15 23:40:12 -0700 427) break;
b0fed3140f57c drivers/char/n_hdlc.c (Jiri Slaby 2007-07-15 23:40:12 -0700 428) }
But as of commit c545b66c6922b002 ("tty: Serialize tcflow() with other tty flow
control changes"), start_tty() was already holding spinlock.
$ git blame c545b66c6922b002~1 drivers/tty/tty_io.c
f9e053dcfc02b drivers/tty/tty_io.c (Peter Hurley 2014-09-10 15:06:31 -0400 965) void start_tty(struct tty_struct *tty)
f9e053dcfc02b drivers/tty/tty_io.c (Peter Hurley 2014-09-10 15:06:31 -0400 966) {
f9e053dcfc02b drivers/tty/tty_io.c (Peter Hurley 2014-09-10 15:06:31 -0400 967) unsigned long flags;
f9e053dcfc02b drivers/tty/tty_io.c (Peter Hurley 2014-09-10 15:06:31 -0400 968)
f9e053dcfc02b drivers/tty/tty_io.c (Peter Hurley 2014-09-10 15:06:31 -0400 969) spin_lock_irqsave(&tty->flow_lock, flags);
f9e053dcfc02b drivers/tty/tty_io.c (Peter Hurley 2014-09-10 15:06:31 -0400 970) __start_tty(tty);
f9e053dcfc02b drivers/tty/tty_io.c (Peter Hurley 2014-09-10 15:06:31 -0400 971) spin_unlock_irqrestore(&tty->flow_lock, flags);
f9e053dcfc02b drivers/tty/tty_io.c (Peter Hurley 2014-09-10 15:06:31 -0400 972) }
Actually, it is commit f9e053dcfc02b0ad ("tty: Serialize tty flow control changes
with flow_lock") that started calling tty->ops->start(tty) from atomic context.
$ git blame f9e053dcfc02b~1 drivers/tty/tty_io.c
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 959) void start_tty(struct tty_struct *tty)
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 960) {
04f378b198da2 drivers/char/tty_io.c (Alan Cox 2008-04-30 00:53:29 -0700 961) unsigned long flags;
04f378b198da2 drivers/char/tty_io.c (Alan Cox 2008-04-30 00:53:29 -0700 962) spin_lock_irqsave(&tty->ctrl_lock, flags);
04f378b198da2 drivers/char/tty_io.c (Alan Cox 2008-04-30 00:53:29 -0700 963) if (!tty->stopped || tty->flow_stopped) {
04f378b198da2 drivers/char/tty_io.c (Alan Cox 2008-04-30 00:53:29 -0700 964) spin_unlock_irqrestore(&tty->ctrl_lock, flags);
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 965) return;
04f378b198da2 drivers/char/tty_io.c (Alan Cox 2008-04-30 00:53:29 -0700 966) }
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 967) tty->stopped = 0;
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 968) if (tty->link && tty->link->packet) {
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 969) tty->ctrl_status &= ~TIOCPKT_STOP;
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 970) tty->ctrl_status |= TIOCPKT_START;
4b19449db074e drivers/char/tty_io.c (Davide Libenzi 2009-03-31 15:24:24 -0700 971) wake_up_interruptible_poll(&tty->link->read_wait, POLLIN);
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 972) }
04f378b198da2 drivers/char/tty_io.c (Alan Cox 2008-04-30 00:53:29 -0700 973) spin_unlock_irqrestore(&tty->ctrl_lock, flags);
f34d7a5b7010b drivers/char/tty_io.c (Alan Cox 2008-04-30 00:54:13 -0700 974) if (tty->ops->start)
f34d7a5b7010b drivers/char/tty_io.c (Alan Cox 2008-04-30 00:54:13 -0700 975) (tty->ops->start)(tty);
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 976) /* If we have a running line discipline it may need kicking */
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 977) tty_wakeup(tty);
^1da177e4c3f4 drivers/char/tty_io.c (Linus Torvalds 2005-04-16 15:20:36 -0700 978) }
Therefore, I think that bisection will reach f9e053dcfc02b0ad, and I guess that
this bug was not noticed simply because little people tested n_hdlc driver.
Well, how to fix? Introduce a new flag for indicating "starting" state (like drivers/block/loop.c uses Lo_* state) ?
On Wednesday, November 17, 2021 9:54:59 AM CET Greg Kroah-Hartman wrote:
> On Wed, Nov 17, 2021 at 09:23:45AM +0100, Fabio M. De Francesco wrote:
> > On Tuesday, November 16, 2021 5:59:24 PM CET Greg Kroah-Hartman wrote:
> > > On Tue, Nov 16, 2021 at 04:35:07PM +0100, Fabio M. De Francesco wrote:
> > > > On Tuesday, November 16, 2021 3:58:44 PM CET Greg Kroah-Hartman
wrote:
> > > > > On Tue, Nov 16, 2021 at 03:49:37PM +0100, Fabio M. De Francesco
wrote:
> > > > > > Fix two sleeping functions called from atomic context by doing
> > immediate
> > > > > > return to the caller if !preemptible() evaluates 'true'. Remove
two
> > > > > > in_interrupt() tests because they are not suited for being used
here.
> > > > > >
> > > > > > Since functions do_con_write() and con_flush_chars() might sleep
in
> > > > > > console_lock(), it must be assured that they are never executed
in
> > > > > > atomic contexts.
> > > > > >
> > > > > > This issue is reported by Syzbot which notices that they are
executed
> > > > > > while holding spinlocks and with interrupts disabled. Actually
Syzbot
> > > > > > emits a first report and then, after fixing do_con_write(), a
second
> > > > > > report for the same problem in con_flush_chars() because these
> > functions
> > > > > > are called one after the other by con_write().
> > > > > >
> > > > > > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > > > > > Reported-by:
[email protected]
> > > > > > Suggested-by: Marco Elver <[email protected]>
> > > > > > Signed-off-by: Fabio M. De Francesco <[email protected]>
> > > > > > ---
> > > > > > drivers/tty/vt/vt.c | 4 ++--
> > > > > > 1 file changed, 2 insertions(+), 2 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
> > > > > > index 7359c3e80d63..508f8a56d361 100644
> > > > > > --- a/drivers/tty/vt/vt.c
> > > > > > +++ b/drivers/tty/vt/vt.c
> > > > > > @@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct
*tty,
> > > > const unsigned char *buf, int co
> > > > > > struct vt_notifier_param param;
> > > > > > bool rescan;
> > > > > >
> > > > > > - if (in_interrupt())
> > > > > > + if (!preemptible())
> > > > > > return count;
> > > > >
> > > > > Very odd, what code is calling these functions to trigger this
check?
> > > >
> > > > This is the call trace reported by Syzbot (https://
syzkaller.appspot.com/
> > bug?
> > > > id=fe5a4d5a2482bd73064db5de5d28e024f1e2a387):
> > > >
> > > > Call Trace:
> > > > <TASK>
> > > > __dump_stack lib/dump_stack.c:88 [inline]
> > > > dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
> > > > __might_resched.cold+0x222/0x26b kernel/sched/core.c:9539
> > > > console_lock+0x17/0x80 kernel/printk/printk.c:2522
> > > > do_con_write+0x10f/0x1e40 drivers/tty/vt/vt.c:2908
> > > > con_write+0x21/0x40 drivers/tty/vt/vt.c:3295
> > > > n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
> > > > tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
> > > > __start_tty drivers/tty/tty_io.c:806 [inline]
> > > > __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
> > > > n_tty_ioctl_helper+0x299/0x2d0 drivers/tty/tty_ioctl.c:880
> > > >
> > > > ^^^^^^^^^^
> > > > n_tty_ioctl_helper() disabled interrupts via spin_lock_irq(&tty-
> > >flow.lock).
> > > >
> > > > n_hdlc_tty_ioctl+0xd2/0x340 drivers/tty/n_hdlc.c:633
> > > > tty_ioctl+0xc69/0x1670 drivers/tty/tty_io.c:2814
> > > > vfs_ioctl fs/ioctl.c:51 [inline]
> > > > __do_sys_ioctl fs/ioctl.c:874 [inline]
> > > > __se_sys_ioctl fs/ioctl.c:860 [inline]
> > > > __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
> > > > do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> > > > do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
> > > > entry_SYSCALL_64_after_hwframe+0x44/0xae
> > > >
> > > > > Shouldn't the caller be fixed instead?
> > > >
> > > > Maybe that the caller has no need to disable IRQs, but I cannot yet
> > answer to
> > > > this particular question.
> > > >
> > > > > What changed to suddenly cause this to show up?
> > > >
> > > > Commit c545b66c6922 ("tty: Serialize tcflow() with other tty flow
control
> > > > changes") introduced a call to spin_lock_irq() for command "TCOON",
just
> > > > before calling __start_tty().
> > >
> > > That commit happened in 2014. Why is this suddenly an issue now that
no
> > > one ever saw before?
> >
> > Hi Greg,
> >
> > I have just been informed by Pavel Skripkin (who at this moment cannot
> > directly write to LKML) that this bug has been reported at least other
two
> > times.
> >
> > We cannot say why nobody has ever addressed this issue before.
> >
> > Please read the following reports.
> >
> > https://syzkaller.appspot.com/bug?
id=9ed44c22919e3cd2c71907b963fb048ac1115073
> > https://syzkaller.appspot.com/bug?
id=e806305a3a65a0bb2f1be7e672819a079d558016
>
> Great, you have a reproducer, so you should be able to duplicate this
> locally to figure out what is really happening here.
>
> Good luck!
>
> greg k-h
>
Hi Greg,
As you asked for, I've run the reproducer (as root) while "trace-cmd record -
g n_tty_ioctl_helper -p function_graph sleep 15" was in execution.
The call trace from n_tty_ioctl_helper() to console_lock() has been recorded
and for sure console_lock() is called while holding a spinlock and with IRQs
disabled via spin_lock_irq().
The following is the relevant part of "trace-cmd report"...
repro-27397 [004] 18814.535029: funcgraph_entry: |
n_tty_ioctl_helper() {
repro-27397 [004] 18814.535030: funcgraph_entry: 0.676 us
| tty_check_change();
repro-27397 [004] 18814.535032: funcgraph_entry: 0.761 us
| _raw_spin_lock_irq();
repro-27397 [004] 18814.535033: funcgraph_entry:
| __start_tty() {
repro-27397 [004] 18814.535034: funcgraph_entry:
| con_start() {
---- cut not relevant code ----
repro-27397 [004] 18814.535088: funcgraph_entry: |
con_write() {
repro-27397 [004] 18814.535088: funcgraph_entry:
| do_con_write() {
repro-27397 [004] 18814.535088: funcgraph_entry:
| console_lock() {
repro-27397 [004] 18814.535089: funcgraph_entry:
| __cond_resched() {
---- cut not relevant code ----
repro-27397 [004] 18814.571839: funcgraph_entry: 0.641 us |
__printk_safe_exit();
repro-27397 [004] 18814.571841: funcgraph_exit: + 18.902 us
| }
repro-27397 [004] 18814.571842: funcgraph_exit: + 31.651 us
| }
repro-27397 [004] 18814.571843: funcgraph_exit: # 36754.817
us | }
Please notice that "#36754.817 us" when finally console_lock() returned.
May very well be that I've overlooked something (as you know I'm a newbie and
all this is quite new for me :)
As I wrote (please read it again above) for now I confirm that commit
c545b66c6922 ("tty: Serialize tcflow() with other tty flow control changes")
introduced a call to spin_lock_irq() for command "TCOON".
However I have to double check it.
If it is confirmed, I will reply soon to Tetsuo's message because he says
that the relevant commit should be a different one.
However, what is relevant here is that for sure console_lock() is called in
atomic context and that the Syzbot report is not a false positive. Am I
missing something?
About the possible fixes I only see two potential solutions:
1) If we need for real spin_lock_irq() in n_tty_ioctl_helper() we should test
the context before calling console_lock() (it is annotated with
might_sleep()).
2) If we don't need to disable IRQs we should fix n_tty_ioctl_helper().
I'm not able to go deeper and understand whether or not that spin_lock_irq()
is an overkill and if it can be converted to a different kind of locking API.
I'd appreciate feedback from you and other experienced developers.
Thank you very much,
Fabio M. De Francesco
On Wednesday, November 17, 2021 11:51:13 AM CET Tetsuo Handa wrote:
> On 2021/11/17 17:54, Greg Kroah-Hartman wrote:
> > Great, you have a reproducer, so you should be able to duplicate this
> > locally to figure out what is really happening here.
>
> Until commit ac751efa6a0d70f2 ("console: rename acquire/release_console_sem() to
> console_lock/unlock()"), do_con_write() was surely designed to be able to sleep.
>
> > $ git blame ac751efa6a0d7~1 drivers/tty/vt/vt.c
>
> [...]
>
> Until that commit, n_hdlc_send_frames() was prepared for being interrupted by signal
> while sleeping.
>
> $ git blame ac751efa6a0d7~1 drivers/tty/n_hdlc.c
>
> [...]
>
> But as of commit c545b66c6922b002 ("tty: Serialize tcflow() with other tty flow
> control changes"), start_tty() was already holding spinlock.
Hi Tetsuo,
Actually, we don't care of start_tty(). It's not in the path that triggers sleeping in atomic bug.
According to Syzbot report and to my ftrace analysis it's __start_tty() that is called by
n_tty_ioctl_helper(), and it is this function that acquires a spinlock and disables interrupts.
I must admit that I've never used git-blame and I'm not sure to understand what you did here :(
Have you had a chance to read my analysis?
> $ git blame c545b66c6922b002~1 drivers/tty/tty_io.c
>
> [...]
>
> Actually, it is commit f9e053dcfc02b0ad ("tty: Serialize tty flow control changes
> with flow_lock") that started calling tty->ops->start(tty) from atomic context.
>
> $ git blame f9e053dcfc02b~1 drivers/tty/tty_io.c
>
> [...]
>
> Therefore, I think that bisection will reach f9e053dcfc02b0ad, and I guess that
> this bug was not noticed simply because little people tested n_hdlc driver.
>
> Well, how to fix? Introduce a new flag for indicating "starting" state (like drivers/block/loop.c uses Lo_* state) ?
I think this is not the correct fix, but I might very well be wrong...
Can you please reply to my last email (the one with the ftrace analysis)?
In the last lines I proposed two alternative solutions, what about them?
Thanks,
Fabio
On Thursday, November 18, 2021 9:31:06 AM CET Fabio M. De Francesco wrote:
> On Wednesday, November 17, 2021 11:51:13 AM CET Tetsuo Handa wrote:
> > On 2021/11/17 17:54, Greg Kroah-Hartman wrote:
> > > Great, you have a reproducer, so you should be able to duplicate this
> > > locally to figure out what is really happening here.
> >
> > Until commit ac751efa6a0d70f2 ("console: rename acquire/release_console_sem() to
> > console_lock/unlock()"), do_con_write() was surely designed to be able to sleep.
> >
> > > $ git blame ac751efa6a0d7~1 drivers/tty/vt/vt.c
> >
> > [...]
> >
> > Until that commit, n_hdlc_send_frames() was prepared for being interrupted by signal
> > while sleeping.
> >
> > $ git blame ac751efa6a0d7~1 drivers/tty/n_hdlc.c
> >
> > [...]
> >
> > But as of commit c545b66c6922b002 ("tty: Serialize tcflow() with other tty flow
> > control changes"), start_tty() was already holding spinlock.
>
> Hi Tetsuo,
>
> Actually, we don't care of start_tty(). It's not in the path that triggers sleeping in atomic bug.
> According to Syzbot report and to my ftrace analysis it's __start_tty() that is called by
> n_tty_ioctl_helper(), and it is this function that acquires a spinlock and disables interrupts.
>
> I must admit that I've never used git-blame and I'm not sure to understand what you did here :(
I've had a chance to look both at commit c545b66c6922 and f9e053dcfc02. They are so strictly
related (same code. same author, same date) that I'm not anymore sure of which is to blame.
However, at this moment I'm scarcely interested in figuring out which one actually is responsible
for this issue.
What I know is that this issue is _real_ and that it should be fixed some way.
> Have you had a chance to read my analysis?
>
> > $ git blame c545b66c6922b002~1 drivers/tty/tty_io.c
> >
> > [...]
> >
> > Actually, it is commit f9e053dcfc02b0ad ("tty: Serialize tty flow control changes
> > with flow_lock") that started calling tty->ops->start(tty) from atomic context.
> >
> > $ git blame f9e053dcfc02b~1 drivers/tty/tty_io.c
> >
> > [...]
> >
> > Therefore, I think that bisection will reach f9e053dcfc02b0ad, and I guess that
> > this bug was not noticed simply because little people tested n_hdlc driver.
> >
> > Well, how to fix? Introduce a new flag for indicating "starting" state (like drivers/block/loop.c uses Lo_* state) ?
>
> I think this is not the correct fix, but I might very well be wrong...
I've read (again) the code that you mentioned. I've changed my mind about it.
Now I think that a flag like that could be useful if there are no other means to
lock __start_tty() and __stop_tty() critical sections.
Thanks,
Fabio
> Can you please reply to my last email (the one with the ftrace analysis)?
> In the last lines I proposed two alternative solutions, what about them?
>
> Thanks,
>
> Fabio
>
>
>
>
>
On 2021/11/18 18:38, Fabio M. De Francesco wrote:
>> Actually, we don't care of start_tty(). It's not in the path that triggers sleeping in atomic bug.
>> According to Syzbot report and to my ftrace analysis it's __start_tty() that is called by
>> n_tty_ioctl_helper(), and it is this function that acquires a spinlock and disables interrupts.
>>
>> I must admit that I've never used git-blame and I'm not sure to understand what you did here :(
I just dumped the source code as of before these commits.
>
> I've had a chance to look both at commit c545b66c6922 and f9e053dcfc02. They are so strictly
> related (same code. same author, same date) that I'm not anymore sure of which is to blame.
>
> However, at this moment I'm scarcely interested in figuring out which one actually is responsible
> for this issue.
Commit f9e053dcfc02b0ad ("tty: Serialize tty flow control changes with flow_lock") is responsible,
for that commit says
Split out unlocked __start_tty()/__stop_tty() flavors for use by
ioctl(TCXONC) in follow-on patch.
and syzbot is hitting n_tty_ioctl_helper(tty, TCXONC, TCOON) path.
>>> Well, how to fix? Introduce a new flag for indicating "starting" state (like drivers/block/loop.c uses Lo_* state) ?
>>
>> I think this is not the correct fix, but I might very well be wrong...
>
> I've read (again) the code that you mentioned. I've changed my mind about it.
> Now I think that a flag like that could be useful if there are no other means to
> lock __start_tty() and __stop_tty() critical sections.
If ->flow.lock were held from only schedulable context, replacing this spinlock with
a mutex would be possible. But stop_tty() says "may be called from any context" which
means that we can't use a mutex...
Making do_con_write() no-op when called with IRQs disabled would be the minimal change
that can silence the syzbot. But this does not fix the regression for drivers/tty/n_hdlc.c
introduced by f9e053dcfc02b0ad.
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
struct vt_notifier_param param;
bool rescan;
- if (in_interrupt())
+ if (in_interrupt() || irqs_disabled())
return count;
console_lock();
@@ -3358,7 +3358,7 @@ static void con_flush_chars(struct tty_struct *tty)
{
struct vc_data *vc;
- if (in_interrupt()) /* from flush_to_ldisc */
+ if (in_interrupt() || irqs_disabled()) /* from flush_to_ldisc */
return;
/* if we race with con_close(), vt may be null */
According to scripts/get_maintainer.pl , Greg and Jiri are maintainers for the n_hdlc driver.
Greg and Jiri, what do you think? Is sacrificing ability to write to consoles when
n_hdlc_send_frames() is called from __start_tty() path considered tolerable? (Maybe
OK for now and stable kernels, for nobody was reporting this problem suggests that
nobody depends on this ability.)
But if we must fix the regression for drivers/tty/n_hdlc.c , we need to use something
like https://lkml.kernel.org/r/[email protected]
in order to achieve what f9e053dcfc02b0ad meant. That is, extend tty->stopped in order
to be able to represent "started state (currently indicated by tty->stopped == false)",
"stopped state (currently indicated by tty->stopped == true)" and "changing state
(currently impossible due to bool)", but this approach might need to touch many locations,
and I worry that touching many locations introduces some oversight bugs.
On Thursday, November 18, 2021 1:14:59 PM CET Tetsuo Handa wrote:
> On 2021/11/18 18:38, Fabio M. De Francesco wrote:
> If ->flow.lock were held from only schedulable context, replacing this spinlock with
> a mutex would be possible. But stop_tty() says "may be called from any context" which
> means that we can't use a mutex...
>
> Making do_con_write() no-op when called with IRQs disabled would be the minimal change
> that can silence the syzbot. But this does not fix the regression for drivers/tty/n_hdlc.c
> introduced by f9e053dcfc02b0ad.
>
> --- a/drivers/tty/vt/vt.c
> +++ b/drivers/tty/vt/vt.c
> @@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
> struct vt_notifier_param param;
> bool rescan;
>
> - if (in_interrupt())
> + if (in_interrupt() || irqs_disabled())
> return count;
>
> console_lock();
> @@ -3358,7 +3358,7 @@ static void con_flush_chars(struct tty_struct *tty)
> {
> struct vc_data *vc;
>
> - if (in_interrupt()) /* from flush_to_ldisc */
> + if (in_interrupt() || irqs_disabled()) /* from flush_to_ldisc */
> return;
>
> /* if we race with con_close(), vt may be null */
>
For what my opinion is worth, I like the solution by Tetsuo that is reported above.
The bug is real and I suppose that it must be addressed. This seems the most straightforward
and effective way to fix it.
Regards,
Fabio M. De Francesco
> According to scripts/get_maintainer.pl , Greg and Jiri are maintainers for the n_hdlc driver.
> Greg and Jiri, what do you think? Is sacrificing ability to write to consoles when
> n_hdlc_send_frames() is called from __start_tty() path considered tolerable? (Maybe
> OK for now and stable kernels, for nobody was reporting this problem suggests that
> nobody depends on this ability.)
>
> But if we must fix the regression for drivers/tty/n_hdlc.c , we need to use something
> like https://lkml.kernel.org/r/[email protected]
> in order to achieve what f9e053dcfc02b0ad meant. That is, extend tty->stopped in order
> to be able to represent "started state (currently indicated by tty->stopped == false)",
> "stopped state (currently indicated by tty->stopped == true)" and "changing state
> (currently impossible due to bool)", but this approach might need to touch many locations,
> and I worry that touching many locations introduces some oversight bugs.
>
>
syzbot is reporting that an unprivileged user who logged in from tty
console can crash the system using a reproducer shown below [1], for
commit f9e053dcfc02b0ad ("tty: Serialize tty flow control changes with
flow_lock") changed __start_tty() from schedulable context to atomic
context without realizing that drivers/tty/n_hdlc.c is calling
do_con_write() (which waits for console semaphore) from __start_tty().
----------
#include <sys/ioctl.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
const int disc = 0xd;
ioctl(1, TIOCSETD, &disc);
while (1) {
ioctl(1, TCXONC, 0);
write(1, "", 1);
ioctl(1, TCXONC, 1); /* Kernel panic - not syncing: scheduling while atomic */
}
}
----------
While we would need to bring __start_tty() back to schedulable context
in order to be able to call do_con_write() from ioctl(TCXONC, TCOON),
changes required might be complicated; an oversight will result in a
new sleep from atomic context bug or a new race window bug. Therefore,
firstly let's fix crash problem by protecting do_con_write() side.
Link: https://syzkaller.appspot.com/bug?extid=5f47a8cea6a12b77a876 [1]
Reported-by: syzbot <[email protected]>
Analyzed-by: Fabio M. De Francesco <[email protected]>
Signed-off-by: Tetsuo Handa <[email protected]>
Fixes: f9e053dcfc02b0ad ("tty: Serialize tty flow control changes with flow_lock")
---
drivers/tty/vt/vt.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 7359c3e80d63..efa16f51c978 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
struct vt_notifier_param param;
bool rescan;
- if (in_interrupt())
+ if (in_interrupt() || irqs_disabled())
return count;
console_lock();
@@ -3358,7 +3358,7 @@ static void con_flush_chars(struct tty_struct *tty)
{
struct vc_data *vc;
- if (in_interrupt()) /* from flush_to_ldisc */
+ if (in_interrupt() || irqs_disabled()) /* from flush_to_ldisc */
return;
/* if we race with con_close(), vt may be null */
--
2.32.0
What do you think? Can we apply this?
On 2021/11/19 23:55, Tetsuo Handa wrote:
> syzbot is reporting that an unprivileged user who logged in from tty
> console can crash the system using a reproducer shown below [1], for
> commit f9e053dcfc02b0ad ("tty: Serialize tty flow control changes with
> flow_lock") changed __start_tty() from schedulable context to atomic
> context without realizing that drivers/tty/n_hdlc.c is calling
> do_con_write() (which waits for console semaphore) from __start_tty().
>
> ----------
> #include <sys/ioctl.h>
> #include <unistd.h>
>
> int main(int argc, char *argv[])
> {
> const int disc = 0xd;
>
> ioctl(1, TIOCSETD, &disc);
> while (1) {
> ioctl(1, TCXONC, 0);
> write(1, "", 1);
> ioctl(1, TCXONC, 1); /* Kernel panic - not syncing: scheduling while atomic */
> }
> }
> ----------
>
> While we would need to bring __start_tty() back to schedulable context
> in order to be able to call do_con_write() from ioctl(TCXONC, TCOON),
> changes required might be complicated; an oversight will result in a
> new sleep from atomic context bug or a new race window bug. Therefore,
> firstly let's fix crash problem by protecting do_con_write() side.
>
> Link: https://syzkaller.appspot.com/bug?extid=5f47a8cea6a12b77a876 [1]
> Reported-by: syzbot <[email protected]>
> Analyzed-by: Fabio M. De Francesco <[email protected]>
> Signed-off-by: Tetsuo Handa <[email protected]>
> Fixes: f9e053dcfc02b0ad ("tty: Serialize tty flow control changes with flow_lock")
> ---
> drivers/tty/vt/vt.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
> index 7359c3e80d63..efa16f51c978 100644
> --- a/drivers/tty/vt/vt.c
> +++ b/drivers/tty/vt/vt.c
> @@ -2902,7 +2902,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co
> struct vt_notifier_param param;
> bool rescan;
>
> - if (in_interrupt())
> + if (in_interrupt() || irqs_disabled())
> return count;
>
> console_lock();
> @@ -3358,7 +3358,7 @@ static void con_flush_chars(struct tty_struct *tty)
> {
> struct vc_data *vc;
>
> - if (in_interrupt()) /* from flush_to_ldisc */
> + if (in_interrupt() || irqs_disabled()) /* from flush_to_ldisc */
> return;
>
> /* if we race with con_close(), vt may be null */
>
On Wed, Dec 01, 2021 at 10:40:38PM +0900, Tetsuo Handa wrote:
> What do you think? Can we apply this?
It would be great to get others to review it, but it's still in my
queue, I'll get to it "soon"...
thanks,
greg k-h
On Wed, Dec 1, 2021 at 5:41 AM Tetsuo Handa
<[email protected]> wrote:
>
> What do you think? Can we apply this?
I think this patch is only papering over the problem, and the issue goes deeper.
It may be that "papering over the issue" successfully hides it
completely, but it's still a horribly bad approach.
> > - if (in_interrupt())
> > + if (in_interrupt() || irqs_disabled())
> > return count;
This kind of stuff is broken. Pretty much always.
And in this case, it's still broken, because things like "called under
a non-irq spinlock" would still not show up.
And no, I do *not* mean that the code should try to figure that out. I
mean that the problem goes further up, and that the fact that we get
to do_con_write() in the first place when we're in an invalid context
is wrong, wrong, wrong.
How the heck do we get here from just an ioctl?
Looking at the backtrace, I see
n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
__start_tty drivers/tty/tty_io.c:806 [inline]
__start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
and apparently it's that hdlc line discipline (and
n_hdlc_send_frames() in particular) that is the problem here.
I think that's where the fix should be.
Linus
On 2021/12/02 4:05, Linus Torvalds wrote:
> On Wed, Dec 1, 2021 at 5:41 AM Tetsuo Handa
> <[email protected]> wrote:
>>
>> What do you think? Can we apply this?
>
> I think this patch is only papering over the problem, and the issue goes deeper.
I know. After this "stop bleeding" patch, I am planning to propose a patch for
fixing a regression introduced by commit f9e053dcfc02b0ad ("tty: Serialize
tty flow control changes with flow_lock"), something like shown below.
drivers/tty/tty.h | 2 --
drivers/tty/tty_io.c | 41 ++++++++++++++++++++---------------------
drivers/tty/tty_ioctl.c | 15 +++++++++------
3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h
index b710c5ef89ab..b19460dca58b 100644
--- a/drivers/tty/tty.h
+++ b/drivers/tty/tty.h
@@ -60,8 +60,6 @@ void tty_ldisc_unlock(struct tty_struct *tty);
int __tty_check_change(struct tty_struct *tty, int sig);
int tty_check_change(struct tty_struct *tty);
-void __stop_tty(struct tty_struct *tty);
-void __start_tty(struct tty_struct *tty);
void tty_vhangup_session(struct tty_struct *tty);
void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty);
int tty_signal_session_leader(struct tty_struct *tty, int exit_session);
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 6616d4a0d41d..84f4296eefed 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -761,15 +761,6 @@ int tty_hung_up_p(struct file *filp)
}
EXPORT_SYMBOL(tty_hung_up_p);
-void __stop_tty(struct tty_struct *tty)
-{
- if (tty->flow.stopped)
- return;
- tty->flow.stopped = true;
- if (tty->ops->stop)
- tty->ops->stop(tty);
-}
-
/**
* stop_tty - propagate flow control
* @tty: tty to stop
@@ -791,21 +782,15 @@ void stop_tty(struct tty_struct *tty)
unsigned long flags;
spin_lock_irqsave(&tty->flow.lock, flags);
- __stop_tty(tty);
+ if (!tty->flow.stopped) {
+ tty->flow.stopped = true;
+ if (tty->ops->stop)
+ tty->ops->stop(tty);
+ }
spin_unlock_irqrestore(&tty->flow.lock, flags);
}
EXPORT_SYMBOL(stop_tty);
-void __start_tty(struct tty_struct *tty)
-{
- if (!tty->flow.stopped || tty->flow.tco_stopped)
- return;
- tty->flow.stopped = false;
- if (tty->ops->start)
- tty->ops->start(tty);
- tty_wakeup(tty);
-}
-
/**
* start_tty - propagate flow control
* @tty: tty to start
@@ -821,8 +806,22 @@ void start_tty(struct tty_struct *tty)
{
unsigned long flags;
+ /*
+ * do_con_write() from tty_wakeup() needs to sleep. But I'm not sure
+ * whether all callers are allowed to sleep, for stop_tty() says that
+ * callers might not be allowed to sleep...
+ */
+ might_sleep();
+
spin_lock_irqsave(&tty->flow.lock, flags);
- __start_tty(tty);
+ if (tty->flow.stopped && !tty->flow.tco_stopped) {
+ spin_unlock_irqrestore(&tty->flow.lock, flags);
+ if (tty->ops->start)
+ tty->ops->start(tty);
+ tty_wakeup(tty);
+ spin_lock_irqsave(&tty->flow.lock, flags);
+ tty->flow.stopped = false;
+ }
spin_unlock_irqrestore(&tty->flow.lock, flags);
}
EXPORT_SYMBOL(start_tty);
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index 63181925ec1a..84c0742efd34 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -857,6 +857,7 @@ EXPORT_SYMBOL_GPL(tty_perform_flush);
int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd,
unsigned long arg)
{
+ static DEFINE_MUTEX(tty_tco_mutex);
int retval;
switch (cmd) {
@@ -866,20 +867,22 @@ int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd,
return retval;
switch (arg) {
case TCOOFF:
- spin_lock_irq(&tty->flow.lock);
+ if (mutex_lock_killable(&tty_tco_mutex))
+ return -EINTR;
if (!tty->flow.tco_stopped) {
tty->flow.tco_stopped = true;
- __stop_tty(tty);
+ stop_tty(tty);
}
- spin_unlock_irq(&tty->flow.lock);
+ mutex_unlock(&tty_tco_mutex);
break;
case TCOON:
- spin_lock_irq(&tty->flow.lock);
+ if (mutex_lock_killable(&tty_tco_mutex))
+ return -EINTR;
if (tty->flow.tco_stopped) {
tty->flow.tco_stopped = false;
- __start_tty(tty);
+ start_tty(tty);
}
- spin_unlock_irq(&tty->flow.lock);
+ mutex_unlock(&tty_tco_mutex);
break;
case TCIOFF:
if (STOP_CHAR(tty) != __DISABLED_CHAR)
I think that since tty->flow.tco_stopped is updated by only ioctl(TCXONC)
which is schedulable context, we can serialize using a mutex. Then, as
long as start_tty() can be called from schedulable context, we can allow
do_con_write() to work.
>
> It may be that "papering over the issue" successfully hides it
> completely, but it's still a horribly bad approach.
>
>>> - if (in_interrupt())
>>> + if (in_interrupt() || irqs_disabled())
>>> return count;
>
> This kind of stuff is broken. Pretty much always.
>
> And in this case, it's still broken, because things like "called under
> a non-irq spinlock" would still not show up.
As far as I'm aware, the commit did not introduce "called under a non-irq
spinlock" case.
>
> And no, I do *not* mean that the code should try to figure that out. I
> mean that the problem goes further up, and that the fact that we get
> to do_con_write() in the first place when we're in an invalid context
> is wrong, wrong, wrong.
>
> How the heck do we get here from just an ioctl?
Just an ioctl, but the commit made it to get inside spin_lock_irqsave()
section when tty_wakeup() is called.
>
> Looking at the backtrace, I see
>
> n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
> tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
> __start_tty drivers/tty/tty_io.c:806 [inline]
> __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
>
> and apparently it's that hdlc line discipline (and
> n_hdlc_send_frames() in particular) that is the problem here.
>
> I think that's where the fix should be.
Do you mean that we should change the behavior of n_hdlc_send_frames()
rather than trying to make __start_tty() schedulable again?
Then, n_hdlc_send_frames() saying "this function is called after adding
a frame to the send buffer list and by the tty wakeup callback." but
expecting tty->ops->write (which is do_con_write() which needs to sleep)
not to sleep is wrong?
Then, what we can do with n_hdlc_send_frames() ?
Make n_hdlc_send_frames() no-op when called from atomic context?
On Thu, Dec 2, 2021 at 7:41 AM Tetsuo Handa
<[email protected]> wrote:
>
> > Looking at the backtrace, I see
> >
> > n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
> > tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
> > __start_tty drivers/tty/tty_io.c:806 [inline]
> > __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
> >
> > and apparently it's that hdlc line discipline (and
> > n_hdlc_send_frames() in particular) that is the problem here.
> >
> > I think that's where the fix should be.
>
> Do you mean that we should change the behavior of n_hdlc_send_frames()
> rather than trying to make __start_tty() schedulable again?
I wouldn't change n_hdlc_send_frames() itself. It does what it says it does.
But n_hdlc_tty_wakeup() probably shouldn't call it directly. Other tty
line disciplines don't do that kind of thing - although I only looked
at a couple. They all seem to just set bits and prepare things. Like a
wakeup function should do.
So I think n_hdlc_tty_wakeup() should perhaps only do a
"schedule_work()" or similar to get that n_hdlc_send_frames() started,
rather than doing it itself.
Example: net/nfc/nci/uart.c. It does that
schedule_work(&nu->write_work);
instead of actually trying to do a write from a wakeup routine
(similar examples in ppp - "tasklet_schedule(&ap->tsk)" etc).
I mean, it's called "wakeup", not "write". So I think the fundamental
confusion here is in hdlc, not the tty layer.
Linus
On 02. 12. 21, 19:35, Linus Torvalds wrote:
> On Thu, Dec 2, 2021 at 7:41 AM Tetsuo Handa
> <[email protected]> wrote:
>>
>>> Looking at the backtrace, I see
>>>
>>> n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
>>> tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
>>> __start_tty drivers/tty/tty_io.c:806 [inline]
>>> __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
>>>
>>> and apparently it's that hdlc line discipline (and
>>> n_hdlc_send_frames() in particular) that is the problem here.
>>>
>>> I think that's where the fix should be.
>>
>> Do you mean that we should change the behavior of n_hdlc_send_frames()
>> rather than trying to make __start_tty() schedulable again?
>
> I wouldn't change n_hdlc_send_frames() itself. It does what it says it does.
>
> But n_hdlc_tty_wakeup() probably shouldn't call it directly. Other tty
> line disciplines don't do that kind of thing - although I only looked
> at a couple. They all seem to just set bits and prepare things. Like a
> wakeup function should do.
>
> So I think n_hdlc_tty_wakeup() should perhaps only do a
> "schedule_work()" or similar to get that n_hdlc_send_frames() started,
> rather than doing it itself.
Concurred, this is even documented:
write_wakeup
[DRV] void ()(struct tty_struct *tty)
This function is called by the low-level tty driver to signal that line
discpline should try to send more characters to the low-level driver for
transmission. If the line discpline does not have any more data to send,
it can just return. If the line discipline does have some data to send,
please arise a tasklet or workqueue to do the real data transfer. Do not
send data in this hook, it may lead to a deadlock.
thanks,
--
js
suse labs
On Thursday, December 2, 2021 7:35:16 PM CET Linus Torvalds wrote:
> On Thu, Dec 2, 2021 at 7:41 AM Tetsuo Handa
> <[email protected]> wrote:
> >
> > > Looking at the backtrace, I see
> > >
> > > n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
> > > tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
> > > __start_tty drivers/tty/tty_io.c:806 [inline]
> > > __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
> > >
> > > and apparently it's that hdlc line discipline (and
> > > n_hdlc_send_frames() in particular) that is the problem here.
> > >
> > > I think that's where the fix should be.
> >
> > Do you mean that we should change the behavior of n_hdlc_send_frames()
> > rather than trying to make __start_tty() schedulable again?
>
> I wouldn't change n_hdlc_send_frames() itself. It does what it says it does.
>
> But n_hdlc_tty_wakeup() probably shouldn't call it directly. Other tty
> line disciplines don't do that kind of thing - although I only looked
> at a couple. They all seem to just set bits and prepare things. Like a
> wakeup function should do.
>
> So I think n_hdlc_tty_wakeup() should perhaps only do a
> "schedule_work()" or similar to get that n_hdlc_send_frames() started,
> rather than doing it itself.
>
> Example: net/nfc/nci/uart.c. It does that
>
> schedule_work(&nu->write_work);
>
> instead of actually trying to do a write from a wakeup routine
> (similar examples in ppp - "tasklet_schedule(&ap->tsk)" etc).
>
> I mean, it's called "wakeup", not "write". So I think the fundamental
> confusion here is in hdlc, not the tty layer.
>
> Linus
>
This is what I understand from the above argument: do a schedule_work() to get
that n_hdlc_send_frames() started; in this way, n_hdlc_tty_wakeup() can
return to the caller and n_hdlc_send_frames() is executed asynchronously
(i.e., no longer in an atomic context).
I hope that I'm not missing something. If the above summary is correct,
please forgive a newbie for the following questions...
Commit f9e053dcfc02 ("tty: Serialize tty flow control changes with flow_lock")
has introduced spinlocks to serialize flow control changes and avoid the
concurrent executions of __start_tty() and __stop_tty().
This is an excerpt from the above-mentioned commit:
->
Introduce tty->flow_lock spinlock to serialize tty flow control changes.
Split out unlocked __start_tty()/__stop_tty() flavors for use by
ioctl(TCXONC) in follow-on patch.
<-
This is the reason why we are dealing with this bug. Currently we have __start_tty()
called with an acquired spinlock and IRQs disabled and the calls chain leads to
console_lock() while in atomic context.
In summation, my questions are...
1) Why do we still need to protect __start_tty() and __stop_tty() with spin_lock_irq()
if the solution to the bug is to execute n_hdlc_send_frames() asynchronously?
2) If it is true that we need to avoid concurrent executions of __start_tty() and
__stop_tty(), can we just use a Mutex in the IOCTL's helper?
Thanks,
Fabio M. De Francesco
On 2021/12/03 20:00, Fabio M. De Francesco wrote:
> On Thursday, December 2, 2021 7:35:16 PM CET Linus Torvalds wrote:
>> On Thu, Dec 2, 2021 at 7:41 AM Tetsuo Handa
>> <[email protected]> wrote:
>>>
>>>> Looking at the backtrace, I see
>>>>
>>>> n_hdlc_send_frames+0x24b/0x490 drivers/tty/n_hdlc.c:290
>>>> tty_wakeup+0xe1/0x120 drivers/tty/tty_io.c:534
>>>> __start_tty drivers/tty/tty_io.c:806 [inline]
>>>> __start_tty+0xfb/0x130 drivers/tty/tty_io.c:799
>>>>
>>>> and apparently it's that hdlc line discipline (and
>>>> n_hdlc_send_frames() in particular) that is the problem here.
>>>>
>>>> I think that's where the fix should be.
>>>
>>> Do you mean that we should change the behavior of n_hdlc_send_frames()
>>> rather than trying to make __start_tty() schedulable again?
>>
>> I wouldn't change n_hdlc_send_frames() itself. It does what it says it does.
>>
>> But n_hdlc_tty_wakeup() probably shouldn't call it directly. Other tty
>> line disciplines don't do that kind of thing - although I only looked
>> at a couple. They all seem to just set bits and prepare things. Like a
>> wakeup function should do.
>>
>> So I think n_hdlc_tty_wakeup() should perhaps only do a
>> "schedule_work()" or similar to get that n_hdlc_send_frames() started,
>> rather than doing it itself.
>>
>> Example: net/nfc/nci/uart.c. It does that
>>
>> schedule_work(&nu->write_work);
>>
>> instead of actually trying to do a write from a wakeup routine
>> (similar examples in ppp - "tasklet_schedule(&ap->tsk)" etc).
>>
>> I mean, it's called "wakeup", not "write". So I think the fundamental
>> confusion here is in hdlc, not the tty layer.
>>
>> Linus
>>
OK.
> This is what I understand from the above argument: do a schedule_work() to get
> that n_hdlc_send_frames() started; in this way, n_hdlc_tty_wakeup() can
> return to the caller and n_hdlc_send_frames() is executed asynchronously
> (i.e., no longer in an atomic context).
Yes. If we copy how net/nfc/nci/uart.c does, the fix would look like:
--------------------
drivers/tty/n_hdlc.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index 7e0884ecc74f..a71fcac60925 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -140,6 +140,8 @@ struct n_hdlc {
struct n_hdlc_buf_list rx_buf_list;
struct n_hdlc_buf_list tx_free_buf_list;
struct n_hdlc_buf_list rx_free_buf_list;
+ struct work_struct write_work;
+ struct tty_struct *tty_for_write_work;
};
/*
@@ -210,6 +212,8 @@ static void n_hdlc_tty_close(struct tty_struct *tty)
wake_up_interruptible(&tty->read_wait);
wake_up_interruptible(&tty->write_wait);
+ cancel_work_sync(&n_hdlc->write_work);
+
n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list);
n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list);
n_hdlc_free_buf_list(&n_hdlc->rx_buf_list);
@@ -334,6 +338,20 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
goto check_again;
} /* end of n_hdlc_send_frames() */
+/**
+ * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup
+ * @work: pointer to work_struct
+ *
+ * Called when low level device driver can accept more send data.
+ */
+static void n_hdlc_tty_write_work(struct work_struct *work)
+{
+ struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work);
+ struct tty_struct *tty = n_hdlc->tty_for_write_work;
+
+ n_hdlc_send_frames(n_hdlc, tty);
+} /* end of n_hdlc_tty_write_work() */
+
/**
* n_hdlc_tty_wakeup - Callback for transmit wakeup
* @tty: pointer to associated tty instance data
@@ -344,7 +362,8 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty)
{
struct n_hdlc *n_hdlc = tty->disc_data;
- n_hdlc_send_frames(n_hdlc, tty);
+ n_hdlc->tty_for_write_work = tty;
+ schedule_work(&n_hdlc->write_work);
} /* end of n_hdlc_tty_wakeup() */
/**
@@ -706,6 +725,7 @@ static struct n_hdlc *n_hdlc_alloc(void)
if (!n_hdlc)
return NULL;
+ INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work);
spin_lock_init(&n_hdlc->rx_free_buf_list.spinlock);
spin_lock_init(&n_hdlc->tx_free_buf_list.spinlock);
spin_lock_init(&n_hdlc->rx_buf_list.spinlock);
--------------------
>
> I hope that I'm not missing something. If the above summary is correct,
> please forgive a newbie for the following questions...
>
> Commit f9e053dcfc02 ("tty: Serialize tty flow control changes with flow_lock")
> has introduced spinlocks to serialize flow control changes and avoid the
> concurrent executions of __start_tty() and __stop_tty().
>
> This is an excerpt from the above-mentioned commit:
>
> ->
> Introduce tty->flow_lock spinlock to serialize tty flow control changes.
> Split out unlocked __start_tty()/__stop_tty() flavors for use by
> ioctl(TCXONC) in follow-on patch.
> <-
>
> This is the reason why we are dealing with this bug. Currently we have __start_tty()
> called with an acquired spinlock and IRQs disabled and the calls chain leads to
> console_lock() while in atomic context.
If we hit a race window described in that commit
CPU 0 | CPU 1
stop_tty() |
lock ctrl_lock |
tty->stopped = 1 |
unlock ctrl_lock |
| start_tty()
| lock ctrl_lock
| tty->stopped = 0
| unlock ctrl_lock
| driver->start()
driver->stop() |
In this case, the flow control state now indicates the tty has
been started, but the actual hardware state has actually been stopped.
, the tty->stopped flag remains 0 despite driver->stop() is called after
driver->start() finished. tty->stopped (the flow control state) says "not stopped"
but the actual hardware state is "stopped".
>
> In summation, my questions are...
>
> 1) Why do we still need to protect __start_tty() and __stop_tty() with spin_lock_irq()
> if the solution to the bug is to execute n_hdlc_send_frames() asynchronously?
Without serialization, tty->stopped flag and the actual hardware state can mismatch.
>
> 2) If it is true that we need to avoid concurrent executions of __start_tty() and
> __stop_tty(), can we just use a Mutex in the IOCTL's helper?
Yes if all __start_tty() and __stop_tty() callers were schedulable context.
But stop_tty() says that stop_tty() might be called from atomic context.
Thus, we can't use a mutex for protecting tty->stopped flag.
>
> Thanks,
>
> Fabio M. De Francesco
By the way, even with above patch, I think
CPU 0 | CPU 1 | CPU 2
stop_tty() | |
lock flow.lock | |
tty->stopped = 1 | |
driver->stop() | |
unlock flow.lock | |
| start_tty() |
| lock flow.lock |
| tty->stopped = 0 |
| driver->start() => Schedules n_hdlc_send_frames()
| unlock flow.lock |
stop_tty() | |
lock flow.lock | |
tty->stopped = 1 | |
driver->stop() | |
unlock flow.lock | |
| | Starts n_hdlc_send_frames()
(that is, the n_hdlc is writing to consoles despite tty->stopped is 1) can happen
until n_hdlc_send_frames() completes.
Then, even scheduling next n_hdlc_send_frames() while previous n_hdlc_send_frames() is
possible? In the worst case, multiple CPUs can run n_hdlc_send_frames() concurrently?
CPU 0 | CPU 1 | CPU 2 | CPU 3
stop_tty() | | |
lock flow.lock | | |
tty->stopped = 1 | | |
driver->stop() | | |
unlock flow.lock | | |
| start_tty() | |
| lock flow.lock | |
| tty->stopped = 0 | |
| driver->start() => Schedules n_hdlc_send_frames()
| unlock flow.lock | |
| | Starts n_hdlc_send_frames()
stop_tty() | | |
lock flow.lock | | |
tty->stopped = 1 | | |
driver->stop() | | |
unlock flow.lock | | |
| start_tty() | |
| lock flow.lock | |
| tty->stopped = 0 | |
| driver->start() => Schedules n_hdlc_send_frames()
| unlock flow.lock | |
| | | Starts n_hdlc_send_frames()
Ah, OK. n_hdlc->tbusy is there for serialization.
On Friday, December 3, 2021 1:32:21 PM CET Tetsuo Handa wrote:
> On 2021/12/03 20:00, Fabio M. De Francesco wrote:
> > On Thursday, December 2, 2021 7:35:16 PM CET Linus Torvalds wrote:
> >> [...]
> >> Example: net/nfc/nci/uart.c. It does that
> >>
> >> schedule_work(&nu->write_work);
> >>
> >> instead of actually trying to do a write from a wakeup routine
> >> (similar examples in ppp - "tasklet_schedule(&ap->tsk)" etc).
> >>
> >> I mean, it's called "wakeup", not "write". So I think the fundamental
> >> confusion here is in hdlc, not the tty layer.
> >>
> >> Linus
> >>
>
> OK.
>
> > This is what I understand from the above argument: do a schedule_work() to get
> > that n_hdlc_send_frames() started; in this way, n_hdlc_tty_wakeup() can
> > return to the caller and n_hdlc_send_frames() is executed asynchronously
> > (i.e., no longer in an atomic context).
>
> Yes.
Thanks for confirming :)
> [...]
>
> > [...]
> >
> > Commit f9e053dcfc02 ("tty: Serialize tty flow control changes with flow_lock")
> > has introduced spinlocks to serialize flow control changes and avoid the
> > concurrent executions of __start_tty() and __stop_tty().
> >
> > [...]
> >
> > This is the reason why we are dealing with this bug. Currently we have __start_tty()
> > called with an acquired spinlock and IRQs disabled and the calls chain leads to
> > console_lock() while in atomic context.
>
> If we hit a race window described in that commit
>
> CPU 0 | CPU 1
> stop_tty() |
> lock ctrl_lock |
> tty->stopped = 1 |
> unlock ctrl_lock |
> | start_tty()
> | lock ctrl_lock
> | tty->stopped = 0
> | unlock ctrl_lock
> | driver->start()
> driver->stop() |
>
> In this case, the flow control state now indicates the tty has
> been started, but the actual hardware state has actually been stopped.
>
> , the tty->stopped flag remains 0 despite driver->stop() is called after
> driver->start() finished. tty->stopped (the flow control state) says "not stopped"
> but the actual hardware state is "stopped".
This is clear and it is the reason why I cited that commit. I understand that
__stop_tty() and __start_tty() cannot run concurrently.
I'm sorry if my poor abilities to express complex thoughts in English may
have made you to think that I'm not aware of the real race condition issue
that commit f9e053dcfc02 is trying to address :(
Unfortunately, by fixing the SAC bug with a mere asynchronous execution of
n_hdlc_send_frames() in a work queue, we may still have different sources of
race conditions.
The point is that, when n_hdlc_tty_wakeup() returns to the IOCTL helper (the caller)
that acquired the spinlock, that same spinlock is immediately released without
even knowing whether or not n_hdlc_send_frames() has had the chance to run
and complete. Further __start_tty() or __stop_tty() calls are allowed to acquire that
spinlock again and we lose serialization because they still run concurrently.
Actually, in the final part of your email you say that more than one instance of
__start_tty() cannot run because of a variable that checks that another thread is
still running the same code. I haven't yet checked, however you showed other
steps that can lead to races and I agree that the problem is not yet fixed.
This is why I think that n_hdlc_send_frames(), when runs asynchronously with
the change that Linus suggested, should also signal completions or change a
a condition variable.
Still using locks locks where it is needed, we should somehow use completions
or condition variables to enforce an execution alternation between __stop_tty
and __start_tty.
Either:
1) wait_for_completion() and complete();
Or:
2) wait_event(tty_event, tty->flow.tco_stopped == true) and
wait_event(tty_event, tty->flow.tco_stopped == false) before entering respectively
__start_tty() and __stop_tty().
Regards,
Fabio M. De Francesco
syzbot is reporting that an unprivileged user who logged in from tty
console can crash the system using a reproducer shown below [1], for
n_hdlc_tty_wakeup() is synchronously calling n_hdlc_send_frames().
----------
#include <sys/ioctl.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
const int disc = 0xd;
ioctl(1, TIOCSETD, &disc);
while (1) {
ioctl(1, TCXONC, 0);
write(1, "", 1);
ioctl(1, TCXONC, 1); /* Kernel panic - not syncing: scheduling while atomic */
}
}
----------
Linus suspected that "struct tty_ldisc"->ops->write_wakeup() must not
sleep, and Jiri confirmed it from include/linux/tty_ldisc.h. Thus, defer
n_hdlc_send_frames() from n_hdlc_tty_wakeup() to a WQ context like
net/nfc/nci/uart.c does.
Link: https://syzkaller.appspot.com/bug?extid=5f47a8cea6a12b77a876 [1]
Reported-by: syzbot <[email protected]>
Analyzed-by: Fabio M. De Francesco <[email protected]>
Suggested-by: Linus Torvalds <[email protected]>
Confirmed-by: Jiri Slaby <[email protected]>
Signed-off-by: Tetsuo Handa <[email protected]>
---
drivers/tty/n_hdlc.c | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index 7e0884ecc74f..23ba1fc99df8 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -140,6 +140,8 @@ struct n_hdlc {
struct n_hdlc_buf_list rx_buf_list;
struct n_hdlc_buf_list tx_free_buf_list;
struct n_hdlc_buf_list rx_free_buf_list;
+ struct work_struct write_work;
+ struct tty_struct *tty_for_write_work;
};
/*
@@ -154,6 +156,7 @@ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list);
/* Local functions */
static struct n_hdlc *n_hdlc_alloc(void);
+static void n_hdlc_tty_write_work(struct work_struct *work);
/* max frame size for memory allocations */
static int maxframe = 4096;
@@ -210,6 +213,8 @@ static void n_hdlc_tty_close(struct tty_struct *tty)
wake_up_interruptible(&tty->read_wait);
wake_up_interruptible(&tty->write_wait);
+ cancel_work_sync(&n_hdlc->write_work);
+
n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list);
n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list);
n_hdlc_free_buf_list(&n_hdlc->rx_buf_list);
@@ -241,6 +246,8 @@ static int n_hdlc_tty_open(struct tty_struct *tty)
return -ENFILE;
}
+ INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work);
+ n_hdlc->tty_for_write_work = tty;
tty->disc_data = n_hdlc;
tty->receive_room = 65536;
@@ -334,6 +341,20 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
goto check_again;
} /* end of n_hdlc_send_frames() */
+/**
+ * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup
+ * @work: pointer to work_struct
+ *
+ * Called when low level device driver can accept more send data.
+ */
+static void n_hdlc_tty_write_work(struct work_struct *work)
+{
+ struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work);
+ struct tty_struct *tty = n_hdlc->tty_for_write_work;
+
+ n_hdlc_send_frames(n_hdlc, tty);
+} /* end of n_hdlc_tty_write_work() */
+
/**
* n_hdlc_tty_wakeup - Callback for transmit wakeup
* @tty: pointer to associated tty instance data
@@ -344,7 +365,7 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty)
{
struct n_hdlc *n_hdlc = tty->disc_data;
- n_hdlc_send_frames(n_hdlc, tty);
+ schedule_work(&n_hdlc->write_work);
} /* end of n_hdlc_tty_wakeup() */
/**
--
2.18.4
On Mon, Dec 6, 2021 at 3:45 AM Tetsuo Handa
<[email protected]> wrote:
>
> Linus suspected that "struct tty_ldisc"->ops->write_wakeup() must not
> sleep, and Jiri confirmed it from include/linux/tty_ldisc.h. Thus, defer
> n_hdlc_send_frames() from n_hdlc_tty_wakeup() to a WQ context like
> net/nfc/nci/uart.c does.
Thanks, this looks good to me.
That said, I think there's pretty much the *exact* same pattern in
drivers/net/caif/caif_serial.c:
write_wakeup() causes "handle_tx()", which then calls tty->ops->write().
drivers/net/hamradio/mkiss.c
mkiss_write_wakeup() -> tty->ops->write()
drivers/tty/n_gsm.c:
gsmld_write_wakeup -> gsm_data_kick() -> gsmld_output ->
gsm->tty->ops->write()
so this does seem to be a common bug pattern for code that has never
really seen a lot of testing.
The core tty stuff seems to get it right, but maybe I missed something
in my quick "grep and look for patterns".
So I think this patch is good, but I do wonder if perhaps we should
move the "work_struct" into the tty layer itself, and do the whole
"schedule_work()" at that level.
Some code never wants it (most notably the regular n_tty one), but at
least n_tty doesn't really care, I suspect. n_tty is using
write_wakeup() literally just for fasync handling, so I suspect it's
not exactly going to be performance-critical.
Of course, maybe the fix is to just fix caif_serial/mkiss and n_gsm.
Or mark them broken - does anybody use them?
Linus
Linus
On Monday, December 6, 2021 12:44:38 PM CET Tetsuo Handa wrote:
> syzbot is reporting that an unprivileged user who logged in from tty
> console can crash the system using a reproducer shown below [1], for
> n_hdlc_tty_wakeup() is synchronously calling n_hdlc_send_frames().
>
> ----------
> #include <sys/ioctl.h>
> #include <unistd.h>
>
> int main(int argc, char *argv[])
> {
> const int disc = 0xd;
>
> ioctl(1, TIOCSETD, &disc);
> while (1) {
> ioctl(1, TCXONC, 0);
> write(1, "", 1);
> ioctl(1, TCXONC, 1); /* Kernel panic - not syncing: scheduling while atomic */
> }
> }
> ----------
>
> Linus suspected that "struct tty_ldisc"->ops->write_wakeup() must not
> sleep, and Jiri confirmed it from include/linux/tty_ldisc.h. Thus, defer
> n_hdlc_send_frames() from n_hdlc_tty_wakeup() to a WQ context like
> net/nfc/nci/uart.c does.
>
> Link: https://syzkaller.appspot.com/bug?extid=5f47a8cea6a12b77a876 [1]
> Reported-by: syzbot <[email protected]>
> Analyzed-by: Fabio M. De Francesco <[email protected]>
> Suggested-by: Linus Torvalds <[email protected]>
> Confirmed-by: Jiri Slaby <[email protected]>
> Signed-off-by: Tetsuo Handa <[email protected]>
> ---
> drivers/tty/n_hdlc.c | 23 ++++++++++++++++++++++-
> 1 file changed, 22 insertions(+), 1 deletion(-)
This looks to be the correct solution, at least for fixing the SAC bug that
Syzbot reported.
I say "at least" only because (for the moment) we have lost the synchronization
that the spinlocks in n_hdlc_tty_ioctl() were meant to assure.
As we have discussed, now n_hdlc_tty_wakeup() returns immediately after
calling schedule_work(). Therefore, n_hdlc_tty_ioctl() releases the spinlock
without it being notified whether or not n_hdlc_send_frames() has had the
chance to run and complete.[1][2][3]
Only a minor note: since the purpose of the new "write_work" is to start tty,
I'd have chosen different name, like "start_work" or "start_write_work" and I'd
have used "n_hdlc_tty_start_work()" instead of "n_hdlc_tty_write_work()" for the
callback.
Since I have analyzed and discussed this bug with you and others, I assume that
I got the necessary knowledge of this subject that allows me to review this patch.
Therefore, despite the due reservations about the loss of alternation and
synchronization between __stop_tty () and __start_tty (), this work is useful for
fixing the reported bug, so I'd like to give my tag...
Reviewed-by: Fabio M. De Francesco <[email protected]>
Thanks for your patch,
Fabio M. De Francesco
[1] https://lore.kernel.org/lkml/[email protected]/
[2] https://lore.kernel.org/lkml/[email protected]/
[3] https://lore.kernel.org/lkml/[email protected]/
>
> diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
> index 7e0884ecc74f..23ba1fc99df8 100644
> --- a/drivers/tty/n_hdlc.c
> +++ b/drivers/tty/n_hdlc.c
> @@ -140,6 +140,8 @@ struct n_hdlc {
> struct n_hdlc_buf_list rx_buf_list;
> struct n_hdlc_buf_list tx_free_buf_list;
> struct n_hdlc_buf_list rx_free_buf_list;
> + struct work_struct write_work;
> + struct tty_struct *tty_for_write_work;
> };
>
> /*
> @@ -154,6 +156,7 @@ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list);
> /* Local functions */
>
> static struct n_hdlc *n_hdlc_alloc(void);
> +static void n_hdlc_tty_write_work(struct work_struct *work);
>
> /* max frame size for memory allocations */
> static int maxframe = 4096;
> @@ -210,6 +213,8 @@ static void n_hdlc_tty_close(struct tty_struct *tty)
> wake_up_interruptible(&tty->read_wait);
> wake_up_interruptible(&tty->write_wait);
>
> + cancel_work_sync(&n_hdlc->write_work);
> +
> n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list);
> n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list);
> n_hdlc_free_buf_list(&n_hdlc->rx_buf_list);
> @@ -241,6 +246,8 @@ static int n_hdlc_tty_open(struct tty_struct *tty)
> return -ENFILE;
> }
>
> + INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work);
> + n_hdlc->tty_for_write_work = tty;
> tty->disc_data = n_hdlc;
> tty->receive_room = 65536;
>
> @@ -334,6 +341,20 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
> goto check_again;
> } /* end of n_hdlc_send_frames() */
>
> +/**
> + * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup
> + * @work: pointer to work_struct
> + *
> + * Called when low level device driver can accept more send data.
> + */
> +static void n_hdlc_tty_write_work(struct work_struct *work)
> +{
> + struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work);
> + struct tty_struct *tty = n_hdlc->tty_for_write_work;
> +
> + n_hdlc_send_frames(n_hdlc, tty);
> +} /* end of n_hdlc_tty_write_work() */
> +
> /**
> * n_hdlc_tty_wakeup - Callback for transmit wakeup
> * @tty: pointer to associated tty instance data
> @@ -344,7 +365,7 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty)
> {
> struct n_hdlc *n_hdlc = tty->disc_data;
>
> - n_hdlc_send_frames(n_hdlc, tty);
> + schedule_work(&n_hdlc->write_work);
> } /* end of n_hdlc_tty_wakeup() */
>
> /**
> --
> 2.18.4
>
>
>
On 2021/12/07 3:07, Linus Torvalds wrote:
> On Mon, Dec 6, 2021 at 3:45 AM Tetsuo Handa
> <[email protected]> wrote:
>>
>> Linus suspected that "struct tty_ldisc"->ops->write_wakeup() must not
>> sleep, and Jiri confirmed it from include/linux/tty_ldisc.h. Thus, defer
>> n_hdlc_send_frames() from n_hdlc_tty_wakeup() to a WQ context like
>> net/nfc/nci/uart.c does.
>
> Thanks, this looks good to me.
>
> That said, I think there's pretty much the *exact* same pattern in
>
> drivers/net/caif/caif_serial.c:
> write_wakeup() causes "handle_tx()", which then calls tty->ops->write().
>
> drivers/net/hamradio/mkiss.c
> mkiss_write_wakeup() -> tty->ops->write()
>
> drivers/tty/n_gsm.c:
> gsmld_write_wakeup -> gsm_data_kick() -> gsmld_output ->
> gsm->tty->ops->write()
>
> so this does seem to be a common bug pattern for code that has never
> really seen a lot of testing.
Indeed.
>
> The core tty stuff seems to get it right, but maybe I missed something
> in my quick "grep and look for patterns".
handle_tx() in caif_serial.c has a line
/* skb_peek is safe because handle_tx is called after skb_queue_tail */
and I think that this comment is true only when handle_tx() is called from
"struct net_device_ops"->ndo_start_xmit (== caif_xmit()). If handle_tx() is
called from "struct tty_ldisc_ops"->write_wakeup (== ldisc_tx_wakeup()),
handle_tx() might be called before skb_queue_tail() is called?
>
> So I think this patch is good, but I do wonder if perhaps we should
> move the "work_struct" into the tty layer itself, and do the whole
> "schedule_work()" at that level.
I don't know about net_device_ops, but from synchronization point of view,
ser = tty->disc_data;
BUG_ON(ser == NULL);
WARN_ON(ser->tty != tty);
in ldisc_tx_wakeup() makes me feel uneasy, and I can't expect that ldisc_tx_wakeup()
will do safe synchronization by moving the "work_struct" into the tty layer itself.
That is, I think we somehow need to fix caif_serial.c after all.
>
> Some code never wants it (most notably the regular n_tty one), but at
> least n_tty doesn't really care, I suspect. n_tty is using
> write_wakeup() literally just for fasync handling, so I suspect it's
> not exactly going to be performance-critical.
>
> Of course, maybe the fix is to just fix caif_serial/mkiss and n_gsm.
> Or mark them broken - does anybody use them?
I think that fixing individual driver sounds safer choice.
syzbot is reporting that an unprivileged user who logged in from tty
console can crash the system using a reproducer shown below [1], for
n_hdlc_tty_wakeup() is synchronously calling n_hdlc_send_frames().
----------
#include <sys/ioctl.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
const int disc = 0xd;
ioctl(1, TIOCSETD, &disc);
while (1) {
ioctl(1, TCXONC, 0);
write(1, "", 1);
ioctl(1, TCXONC, 1); /* Kernel panic - not syncing: scheduling while atomic */
}
}
----------
Linus suspected that "struct tty_ldisc"->ops->write_wakeup() must not
sleep, and Jiri confirmed it from include/linux/tty_ldisc.h. Thus, defer
n_hdlc_send_frames() from n_hdlc_tty_wakeup() to a WQ context like
net/nfc/nci/uart.c does.
Link: https://syzkaller.appspot.com/bug?extid=5f47a8cea6a12b77a876 [1]
Reported-by: syzbot <[email protected]>
Analyzed-by: Fabio M. De Francesco <[email protected]>
Suggested-by: Linus Torvalds <[email protected]>
Confirmed-by: Jiri Slaby <[email protected]>
Reviewed-by: Fabio M. De Francesco <[email protected]>
Signed-off-by: Tetsuo Handa <[email protected]>
---
drivers/tty/n_hdlc.c | 23 ++++++++++++++++++++++-
1 file changed, 22 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index 7e0884ecc74f..23ba1fc99df8 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -140,6 +140,8 @@ struct n_hdlc {
struct n_hdlc_buf_list rx_buf_list;
struct n_hdlc_buf_list tx_free_buf_list;
struct n_hdlc_buf_list rx_free_buf_list;
+ struct work_struct write_work;
+ struct tty_struct *tty_for_write_work;
};
/*
@@ -154,6 +156,7 @@ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list);
/* Local functions */
static struct n_hdlc *n_hdlc_alloc(void);
+static void n_hdlc_tty_write_work(struct work_struct *work);
/* max frame size for memory allocations */
static int maxframe = 4096;
@@ -210,6 +213,8 @@ static void n_hdlc_tty_close(struct tty_struct *tty)
wake_up_interruptible(&tty->read_wait);
wake_up_interruptible(&tty->write_wait);
+ cancel_work_sync(&n_hdlc->write_work);
+
n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list);
n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list);
n_hdlc_free_buf_list(&n_hdlc->rx_buf_list);
@@ -241,6 +246,8 @@ static int n_hdlc_tty_open(struct tty_struct *tty)
return -ENFILE;
}
+ INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work);
+ n_hdlc->tty_for_write_work = tty;
tty->disc_data = n_hdlc;
tty->receive_room = 65536;
@@ -334,6 +341,20 @@ static void n_hdlc_send_frames(struct n_hdlc *n_hdlc, struct tty_struct *tty)
goto check_again;
} /* end of n_hdlc_send_frames() */
+/**
+ * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup
+ * @work: pointer to work_struct
+ *
+ * Called when low level device driver can accept more send data.
+ */
+static void n_hdlc_tty_write_work(struct work_struct *work)
+{
+ struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work);
+ struct tty_struct *tty = n_hdlc->tty_for_write_work;
+
+ n_hdlc_send_frames(n_hdlc, tty);
+} /* end of n_hdlc_tty_write_work() */
+
/**
* n_hdlc_tty_wakeup - Callback for transmit wakeup
* @tty: pointer to associated tty instance data
@@ -344,7 +365,7 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty)
{
struct n_hdlc *n_hdlc = tty->disc_data;
- n_hdlc_send_frames(n_hdlc, tty);
+ schedule_work(&n_hdlc->write_work);
} /* end of n_hdlc_tty_wakeup() */
/**
--
2.18.4