Hi Tytso,
I find a error in misc/fsck.c, if run the fsck -N command, processes
don't execute, just show what would be done. However, the pid whose
value is -1 is added to the instance_list list in the execute
function,if the kill_all function is called later, kill(-1, signum)
is executed, Signals are sent to all processes except the number one
process and itself. Other processes will be killed if they use the
default signal processing function.
execute:
if (noexecute)
pid = -1;
inst->pid = pid;
// Find the end of the list, so we add the instance on at the end.
kill_all:
for (inst = instance_list; inst; inst = inst->next) {
kill(inst->pid, signum);
}
misc/fsck.c:
main:
->PRS
case 'N':
noexecute++;
for (num_devices) {
if (cancel_requested) {
->kill_all(SIGTERM);
}
->fsck_device
->execute
}
(gdb) bt
#0 execute (type=<optimized out>, type@entry=0x412cd0 "ext4",
device=0x412b00 "/root/a.img", mntpt=<optimized out>,
interactive=interactive@entry=1) at fsck.c:523
#1 0x0000000000404959 in fsck_device (fs=fs@entry=0x412ac0,
interactive=interactive@entry=1) at fsck.c:727
#2 0x0000000000402704 in main (argc=<optimized out>, argv=<optimized
out>) at fsck.c:1333
(gdb) p inst->pid
$7 = -1
regards,
Zhan Chengbin
On Thu, Sep 29, 2022 at 03:39:57PM +0800, zhanchengbin wrote:
> Hi Tytso,
> I find a error in misc/fsck.c, if run the fsck -N command, processes
> don't execute, just show what would be done. However, the pid whose
> value is -1 is added to the instance_list list in the execute
> function,if the kill_all function is called later, kill(-1, signum)
> is executed, Signals are sent to all processes except the number one
> process and itself. Other processes will be killed if they use the
> default signal processing function.
Hi,
indeed we'd like to avoid killing the instance that was not ran because
of noexecute. Can you try the following patch?
Thanks!
-Lukas
diff --git a/misc/fsck.c b/misc/fsck.c
index 1f6ec7d9..8fae7730 100644
--- a/misc/fsck.c
+++ b/misc/fsck.c
@@ -497,9 +497,10 @@ static int execute(const char *type, const char *device, const char *mntpt,
}
/* Fork and execute the correct program. */
- if (noexecute)
+ if (noexecute) {
pid = -1;
- else if ((pid = fork()) < 0) {
+ inst->flags |= FLAG_DONE;
+ } else if ((pid = fork()) < 0) {
perror("fork");
free(inst);
return errno;
@@ -544,6 +545,9 @@ static int kill_all(int signum)
struct fsck_instance *inst;
int n = 0;
+ if (noexecute)
+ return 0;
+
for (inst = instance_list; inst; inst = inst->next) {
if (inst->flags & FLAG_DONE)
continue;
>
> execute:
> if (noexecute)
> pid = -1;
> inst->pid = pid;
> // Find the end of the list, so we add the instance on at the end.
>
> kill_all:
> for (inst = instance_list; inst; inst = inst->next) {
> kill(inst->pid, signum);
> }
>
> misc/fsck.c:
> main:
> ->PRS
> case 'N':
> noexecute++;
> for (num_devices) {
> if (cancel_requested) {
> ->kill_all(SIGTERM);
> }
> ->fsck_device
> ->execute
> }
>
> (gdb) bt
> #0 execute (type=<optimized out>, type@entry=0x412cd0 "ext4",
> device=0x412b00 "/root/a.img", mntpt=<optimized out>,
> interactive=interactive@entry=1) at fsck.c:523
> #1 0x0000000000404959 in fsck_device (fs=fs@entry=0x412ac0,
> interactive=interactive@entry=1) at fsck.c:727
> #2 0x0000000000402704 in main (argc=<optimized out>, argv=<optimized out>)
> at fsck.c:1333
> (gdb) p inst->pid
> $7 = -1
>
> regards,
> Zhan Chengbin
>
On 2022/9/29 19:28, Lukas Czerner wrote:
> Hi,
>
> indeed we'd like to avoid killing the instance that was not ran because
> of noexecute. Can you try the following patch?
>
> Thanks!
> -Lukas
Yes, you're right, I think we can fix it in this way.
diff --git a/misc/fsck.c b/misc/fsck.c
index 1f6ec7d9..91edbf17 100644
--- a/misc/fsck.c
+++ b/misc/fsck.c
@@ -547,6 +547,8 @@ static int kill_all(int signum)
for (inst = instance_list; inst; inst = inst->next) {
if (inst->flags & FLAG_DONE)
continue;
+ if (inst->pid == -1)
+ continue;
kill(inst->pid, signum);
n++;
}
>
>
> diff --git a/misc/fsck.c b/misc/fsck.c
> index 1f6ec7d9..8fae7730 100644
> --- a/misc/fsck.c
> +++ b/misc/fsck.c
> @@ -497,9 +497,10 @@ static int execute(const char *type, const char *device, const char *mntpt,
> }
>
> /* Fork and execute the correct program. */
> - if (noexecute)
> + if (noexecute) {
> pid = -1;
> - else if ((pid = fork()) < 0) {
> + inst->flags |= FLAG_DONE;
> + } else if ((pid = fork()) < 0) {
> perror("fork");
> free(inst);
> return errno;
> @@ -544,6 +545,9 @@ static int kill_all(int signum)
> struct fsck_instance *inst;
> int n = 0;
>
> + if (noexecute)
> + return 0;
> +
> for (inst = instance_list; inst; inst = inst->next) {
> if (inst->flags & FLAG_DONE)
> continue;
regards,
Zhan Chengbin
On Fri, Sep 30, 2022 at 09:42:52AM +0800, zhanchengbin wrote:
>
>
> On 2022/9/29 19:28, Lukas Czerner wrote:
> > Hi,
> >
> > indeed we'd like to avoid killing the instance that was not ran because
> > of noexecute. Can you try the following patch?
> >
> > Thanks!
> > -Lukas
>
> Yes, you're right, I think we can fix it in this way.
>
> diff --git a/misc/fsck.c b/misc/fsck.c
> index 1f6ec7d9..91edbf17 100644
> --- a/misc/fsck.c
> +++ b/misc/fsck.c
> @@ -547,6 +547,8 @@ static int kill_all(int signum)
> for (inst = instance_list; inst; inst = inst->next) {
> if (inst->flags & FLAG_DONE)
> continue;
> + if (inst->pid == -1)
> + continue;
Yeah, that works as well although I find the "if (noexecute)" condition
more obvious. We can do both. Also rather than checking for -1 we can
check for <= 0 since anything other than real pid at this point is a bug.
Feel free to send a proper patch.
Thanks!
-Lukas
> kill(inst->pid, signum);
> n++;
> }
> >
> >
> > diff --git a/misc/fsck.c b/misc/fsck.c
> > index 1f6ec7d9..8fae7730 100644
> > --- a/misc/fsck.c
> > +++ b/misc/fsck.c
> > @@ -497,9 +497,10 @@ static int execute(const char *type, const char *device, const char *mntpt,
> > }
> > /* Fork and execute the correct program. */
> > - if (noexecute)
> > + if (noexecute) {
> > pid = -1;
> > - else if ((pid = fork()) < 0) {
> > + inst->flags |= FLAG_DONE;
> > + } else if ((pid = fork()) < 0) {
> > perror("fork");
> > free(inst);
> > return errno;
> > @@ -544,6 +545,9 @@ static int kill_all(int signum)
> > struct fsck_instance *inst;
> > int n = 0;
> > + if (noexecute)
> > + return 0;
> > +
> > for (inst = instance_list; inst; inst = inst->next) {
> > if (inst->flags & FLAG_DONE)
> > continue;
> regards,
> Zhan Chengbin
>
[cc util-linux and karel zak]
TLDR: util-linux's fsck program has an interesting bug in it where if
someone runs "fsck -N", it will set up a fsck_instance context for each
filesystem with inst->pid = -1. If someone sends the fsck process a
SIGINT/SIGTERM before it finishes enumerating filesystems, it will try
to kill all the fsck instances via "kill(inst->pid, ...);" which will
terminate every process on the system.
On Fri, Sep 30, 2022 at 09:20:42AM +0200, Lukas Czerner wrote:
> On Fri, Sep 30, 2022 at 09:42:52AM +0800, zhanchengbin wrote:
> >
> >
> > On 2022/9/29 19:28, Lukas Czerner wrote:
> > > Hi,
> > >
> > > indeed we'd like to avoid killing the instance that was not ran because
> > > of noexecute. Can you try the following patch?
> > >
> > > Thanks!
> > > -Lukas
> >
> > Yes, you're right, I think we can fix it in this way.
> >
> > diff --git a/misc/fsck.c b/misc/fsck.c
> > index 1f6ec7d9..91edbf17 100644
> > --- a/misc/fsck.c
> > +++ b/misc/fsck.c
> > @@ -547,6 +547,8 @@ static int kill_all(int signum)
> > for (inst = instance_list; inst; inst = inst->next) {
> > if (inst->flags & FLAG_DONE)
> > continue;
> > + if (inst->pid == -1)
> > + continue;
>
> Yeah, that works as well although I find the "if (noexecute)" condition
> more obvious. We can do both. Also rather than checking for -1 we can
> check for <= 0 since anything other than real pid at this point is a bug.
>
> Feel free to send a proper patch.
I was about to ask why we even care about misc/fsck.c because it's
clearly a weird old program that has been bitrotting for years and
likely replaced by some other code in util-linux. Then I thought I had
better check util-linux, and...
https://git.kernel.org/pub/scm/utils/util-linux/util-linux.git/tree/disk-utils/fsck.c
/*
* fsck --- A generic, parallelizing front-end for the fsck program.
* It will automatically try to run fsck programs in parallel if the
* devices are on separate spindles. It is based on the same ideas as
* the generic front end for fsck by David Engel and Fred van Kempen,
* but it has been completely rewritten from scratch to support
* parallel execution.
*
* Written by Theodore Ts'o, <[email protected]>
LOL, it's the same source code, and I think it has the same bug, since
"noexecute" mode sets pid = -1 at like 688:
/* Fork and execute the correct program. */
if (noexecute)
pid = -1;
and then sets inst->pid = pid at line 703:
inst->pid = pid;
and kill_all() passes that to kill() at line 733:
for (inst = instance_list; inst; inst = inst->next) {
if (inst->flags & FLAG_DONE)
continue;
kill(inst->pid, signum);
n++;
}
From that I conclude that this is a real bug in util-linux, and we
ought to be talking to them about this. Evidently this has been broken
since e2fsprogs commit 33922999 in January 1999, though it was only
added to util-linux in commit 607c2a72952f in February 2009.
--D
> Thanks!
> -Lukas
>
> > kill(inst->pid, signum);
> > n++;
> > }
> > >
> > >
> > > diff --git a/misc/fsck.c b/misc/fsck.c
> > > index 1f6ec7d9..8fae7730 100644
> > > --- a/misc/fsck.c
> > > +++ b/misc/fsck.c
> > > @@ -497,9 +497,10 @@ static int execute(const char *type, const char *device, const char *mntpt,
> > > }
> > > /* Fork and execute the correct program. */
> > > - if (noexecute)
> > > + if (noexecute) {
> > > pid = -1;
> > > - else if ((pid = fork()) < 0) {
> > > + inst->flags |= FLAG_DONE;
> > > + } else if ((pid = fork()) < 0) {
> > > perror("fork");
> > > free(inst);
> > > return errno;
> > > @@ -544,6 +545,9 @@ static int kill_all(int signum)
> > > struct fsck_instance *inst;
> > > int n = 0;
> > > + if (noexecute)
> > > + return 0;
> > > +
> > > for (inst = instance_list; inst; inst = inst->next) {
> > > if (inst->flags & FLAG_DONE)
> > > continue;
> > regards,
> > Zhan Chengbin
> >
>
On Fri, Sep 30, 2022 at 09:20:42AM +0200, Lukas Czerner wrote:
> On Fri, Sep 30, 2022 at 09:42:52AM +0800, zhanchengbin wrote:
> >
> >
> > On 2022/9/29 19:28, Lukas Czerner wrote:
> > > Hi,
> > >
> > > indeed we'd like to avoid killing the instance that was not ran because
> > > of noexecute. Can you try the following patch?
> > >
> > > Thanks!
> > > -Lukas
> >
> > Yes, you're right, I think we can fix it in this way.
> >
> > diff --git a/misc/fsck.c b/misc/fsck.c
> > index 1f6ec7d9..91edbf17 100644
> > --- a/misc/fsck.c
> > +++ b/misc/fsck.c
> > @@ -547,6 +547,8 @@ static int kill_all(int signum)
> > for (inst = instance_list; inst; inst = inst->next) {
> > if (inst->flags & FLAG_DONE)
> > continue;
> > + if (inst->pid == -1)
> > + continue;
>
> Yeah, that works as well although I find the "if (noexecute)" condition
> more obvious. We can do both. Also rather than checking for -1 we can
> check for <= 0 since anything other than real pid at this point is a bug.
>
> Feel free to send a proper patch.
Yes, please. It would be nice to have the same solution in the both
(e2fsprogs and util-linux) trees.
Karel
--
Karel Zak <[email protected]>
http://karelzak.blogspot.com
在 2022/10/10 16:09, Karel Zak 写道:
> On Fri, Sep 30, 2022 at 09:20:42AM +0200, Lukas Czerner wrote:
>> On Fri, Sep 30, 2022 at 09:42:52AM +0800, zhanchengbin wrote:
>>>
>>>
>>> On 2022/9/29 19:28, Lukas Czerner wrote:
>>>> Hi,
>>>>
>>>> indeed we'd like to avoid killing the instance that was not ran because
>>>> of noexecute. Can you try the following patch?
>>>>
>>>> Thanks!
>>>> -Lukas
>>>
>>> Yes, you're right, I think we can fix it in this way.
>>>
>>> diff --git a/misc/fsck.c b/misc/fsck.c
>>> index 1f6ec7d9..91edbf17 100644
>>> --- a/misc/fsck.c
>>> +++ b/misc/fsck.c
>>> @@ -547,6 +547,8 @@ static int kill_all(int signum)
>>> for (inst = instance_list; inst; inst = inst->next) {
>>> if (inst->flags & FLAG_DONE)
>>> continue;
>>> + if (inst->pid == -1)
>>> + continue;
>>
>> Yeah, that works as well although I find the "if (noexecute)" condition
>> more obvious. We can do both. Also rather than checking for -1 we can
>> check for <= 0 since anything other than real pid at this point is a bug.
>>
>> Feel free to send a proper patch.
>
> Yes, please. It would be nice to have the same solution in the both
> (e2fsprogs and util-linux) trees.
Ok, I have sent the patch, please review it.
-zhanchengbin
>
> Karel
>