2020-04-24 22:40:45

by Joel Fernandes

[permalink] [raw]
Subject: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

Kthread running the test needs to be stopped or it can continue
executing code unloaded by module causing a crash.

Suggested-by: Steven Rostedt <[email protected]>
Reported-by: Xiao Yang <[email protected]>
Link: http://lore.kernel.org/r/[email protected]
Signed-off-by: Joel Fernandes (Google) <[email protected]>
---
kernel/trace/preemptirq_delay_test.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index 31c0fad4cb9e1..1c28ca20e30b6 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -145,10 +145,10 @@ static struct attribute_group attr_group = {
};

static struct kobject *preemptirq_delay_kobj;
+static struct task_struct *test_task;

static int __init preemptirq_delay_init(void)
{
- struct task_struct *test_task;
int retval;

test_task = preemptirq_start_test();
@@ -171,6 +171,9 @@ static int __init preemptirq_delay_init(void)
static void __exit preemptirq_delay_exit(void)
{
kobject_put(preemptirq_delay_kobj);
+
+ if (test_task)
+ kthread_stop(test_task);
}

module_init(preemptirq_delay_init)
--
2.26.2.303.gf8c07b1a785-goog


2020-04-28 10:23:30

by Xiao Yang

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

Hi Joel,

Thanks for your quick fix.

Unfortunately, it fixes my original panic but introduces other
issues(two wanings and one panic) on my arm64 vm, as below:
--------------------------------------------------------------------
[ 3465.434942] ------------[ cut here ]------------
[ 3465.435481] refcount_t: addition on 0; use-after-free.
[ 3465.437071] WARNING: CPU: 1 PID: 6708 at lib/refcount.c:25
refcount_warn_saturate+0x9c/0x140
[ 3465.437720] Modules linked in: preemptirq_delay_test(O-) sunrpc vfat
fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
ip_tables xfs libcrc32c virtio_net net_failover failover virtio_mmio
virtio_blk
[ 3465.439787] CPU: 1 PID: 6708 Comm: rmmod Tainted: G O
5.6.0-rc7+ #18
[ 3465.440316] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
02/06/2015
[ 3465.440967] pstate: 40000005 (nZcv daif -PAN -UAO)
[ 3465.441297] pc : refcount_warn_saturate+0x9c/0x140
[ 3465.441592] lr : refcount_warn_saturate+0x9c/0x140
[ 3465.441919] sp : fffffe001382fd70
[ 3465.442160] x29: fffffe001382fd70 x28: fffffc004876d200
[ 3465.442649] x27: 0000000000000000 x26: 0000000000000000
[ 3465.443071] x25: 0000000000000000 x24: fffffe00115bbbf0
[ 3465.443670] x23: 0000000000000000 x22: 0000000000000200
[ 3465.444194] x21: fffffe0011273988 x20: fffffc0063fdd228
[ 3465.444576] x19: fffffc0063fdd200 x18: 0000000000000010
[ 3465.444939] x17: 0000000000000000 x16: 0000000000000000
[ 3465.445329] x15: ffffffffffffffff x14: fffffe0011273988
[ 3465.445698] x13: fffffe009382fa97 x12: fffffe001382fa9f
[ 3465.446116] x11: fffffe00112b0000 x10: fffffe001382fa20
[ 3465.446498] x9 : 00000000ffffffd0 x8 : 6572662d72657466
[ 3465.446941] x7 : 0000000000000149 x6 : fffffe001127cf50
[ 3465.447375] x5 : fffffe001127c000 x4 : 0000000000000000
[ 3465.447757] x3 : fffffe001127cf50 x2 : 0000000000000000
[ 3465.448161] x1 : 2e36d2803fe6b700 x0 : 0000000000000000
[ 3465.448702] Call trace:
[ 3465.448979] refcount_warn_saturate+0x9c/0x140
[ 3465.449330] kthread_stop+0x48/0x278
[ 3465.450144] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
[ 3465.450625] __arm64_sys_delete_module+0x14c/0x298
[ 3465.450998] do_el0_svc+0xf8/0x1e0
[ 3465.451372] el0_sync_handler+0x134/0x1bc
[ 3465.451701] el0_sync+0x140/0x180
[ 3465.452099] ---[ end trace 1a8ec2201af5e8c7 ]---
[ 3465.478208] ------------[ cut here ]------------
[ 3465.478696] WARNING: CPU: 1 PID: 6708 at kernel/kthread.c:400
__kthread_bind_mask+0x34/0x90
[ 3465.479210] Modules linked in: preemptirq_delay_test(O-) sunrpc vfat
fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
ip_tables xfs libcrc32c virtio_net net_failover failover virtio_mmio
virtio_blk
[ 3465.480509] CPU: 1 PID: 6708 Comm: rmmod Tainted: G W O
5.6.0-rc7+ #18
[ 3465.480966] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
02/06/2015
[ 3465.481465] pstate: 40000005 (nZcv daif -PAN -UAO)
[ 3465.481821] pc : __kthread_bind_mask+0x34/0x90
[ 3465.482129] lr : __kthread_bind_mask+0x30/0x90
[ 3465.482419] sp : fffffe001382fd30
[ 3465.482639] x29: fffffe001382fd30 x28: fffffc004876d200
[ 3465.483017] x27: 0000000000000000 x26: 0000000000000000
[ 3465.483385] x25: 0000000000000000 x24: fffffe00115bbbf0
[ 3465.483795] x23: 0000000000000000 x22: 0000000000000200
[ 3465.484251] x21: fffffe0010a9cb88 x20: 0000000000000040
[ 3465.484599] x19: fffffc0063fdd200 x18: 0000000000000010
[ 3465.484957] x17: 0000000000000000 x16: 0000000000000000
[ 3465.485347] x15: ffffffffffffffff x14: fffffe0011273988
[ 3465.485714] x13: fffffe009382fa97 x12: fffffe001382fa9f
[ 3465.486210] x11: fffffe00112b0000 x10: fffffe001382fa20
[ 3465.486599] x9 : 00000000ffffffd0 x8 : 6572662d72657466
[ 3465.487039] x7 : 0000000000000149 x6 : fffffe001127cf50
[ 3465.487447] x5 : fffffe001127c000 x4 : 0000000000000001
[ 3465.487868] x3 : fffffe001127cf50 x2 : 2e36d2803fe6b700
[ 3465.488252] x1 : 0000000000000000 x0 : 0000000000000000
[ 3465.488628] Call trace:
[ 3465.488851] __kthread_bind_mask+0x34/0x90
[ 3465.489192] kthread_unpark+0xa0/0xb0
[ 3465.489456] kthread_stop+0x8c/0x278
[ 3465.489736] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
[ 3465.490216] __arm64_sys_delete_module+0x14c/0x298
[ 3465.490594] do_el0_svc+0xf8/0x1e0
[ 3465.490850] el0_sync_handler+0x134/0x1bc
[ 3465.491157] el0_sync+0x140/0x180
[ 3465.491413] ---[ end trace 1a8ec2201af5e8c8 ]---
[ 3465.504614] Unable to handle kernel NULL pointer dereference at
virtual address 0000000000000000
[ 3465.505340] Mem abort info:
[ 3465.505553] ESR = 0x96000006
[ 3465.505855] EC = 0x25: DABT (current EL), IL = 32 bits
[ 3465.506234] SET = 0, FnV = 0
[ 3465.506525] EA = 0, S1PTW = 0
[ 3465.506786] Data abort info:
[ 3465.507037] ISV = 0, ISS = 0x00000006
[ 3465.507304] CM = 0, WnR = 0
[ 3465.507685] user pgtable: 64k pages, 42-bit VAs, pgdp=0000000082450000
[ 3465.508225] [0000000000000000] pgd=0000000000000000,
pud=0000000000000000, pmd=0000000000000000
[ 3465.509049] Internal error: Oops: 96000006 [#1] PREEMPT SMP
[ 3465.509527] Modules linked in: preemptirq_delay_test(O-) sunrpc vfat
fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
ip_tables xfs libcrc32c virtio_net net_failover failover virtio_mmio
virtio_blk
[ 3465.510964] CPU: 1 PID: 6708 Comm: rmmod Tainted: G W O
5.6.0-rc7+ #18
[ 3465.511527] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
02/06/2015
[ 3465.512031] pstate: 60000085 (nZCv daIf -PAN -UAO)
[ 3465.512392] pc : __list_add_valid+0x18/0xa0
[ 3465.512740] lr : wait_for_completion+0xc8/0x178
[ 3465.513085] sp : fffffe001382fd00
[ 3465.513362] x29: fffffe001382fd00 x28: fffffc004876d200
[ 3465.513769] x27: 0000000000000000 x26: 0000000000000000
[ 3465.514205] x25: 0000000000000000 x24: fffffc0076e616c8
[ 3465.514638] x23: fffffe001382fd68 x22: 0000000000000000
[ 3465.515030] x21: fffffe0011273988 x20: fffffc0076e616c0
[ 3465.515444] x19: fffffc0076e616b8 x18: 0000000000000010
[ 3465.515826] x17: 0000000000000000 x16: 0000000000000000
[ 3465.516184] x15: ffffffffffffffff x14: fffffe0011273988
[ 3465.516584] x13: fffffe009382fa97 x12: fffffe001382fa9f
[ 3465.516976] x11: fffffe00112b0000 x10: fffffe001382fa20
[ 3465.517351] x9 : 00000000ffffffd0 x8 : 6572662d72657466
[ 3465.517750] x7 : 0000000000000149 x6 : fffffe001127cf50
[ 3465.518169] x5 : 0000000000000001 x4 : fffffc0076e616c8
[ 3465.518454] x3 : fffffe0010128b38 x2 : 0000000000000000
[ 3465.518711] x1 : 0000000000000000 x0 : fffffe001382fd68
[ 3465.518985] Call trace:
[ 3465.519157] __list_add_valid+0x18/0xa0
[ 3465.519351] wait_for_completion+0xc8/0x178
[ 3465.519578] kthread_stop+0x9c/0x278
[ 3465.519779] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
[ 3465.520109] __arm64_sys_delete_module+0x14c/0x298
[ 3465.520342] do_el0_svc+0xf8/0x1e0
[ 3465.520520] el0_sync_handler+0x134/0x1bc
[ 3465.520718] el0_sync+0x140/0x180
[ 3465.521177] Code: 910003fd f9400442 eb01005f 54000141 (f9400041)
[ 3465.522258] ---[ end trace 1a8ec2201af5e8c9 ]---
[ 3465.522746] Kernel panic - not syncing: Fatal exception
[ 3465.523242] SMP: stopping secondary CPUs
[ 3465.523898] Kernel Offset: disabled
[ 3465.524423] CPU features: 0x10002,20006082
[ 3465.524939] Memory Limit: none
[ 3465.525534] ---[ end Kernel panic - not syncing: Fatal exception ]---
--------------------------------------------------------------------

I am looking into these issues.

Thanks,
Xiao Yang
On 2020/4/25 6:36, Joel Fernandes (Google) wrote:
> Kthread running the test needs to be stopped or it can continue
> executing code unloaded by module causing a crash.
>
> Suggested-by: Steven Rostedt<[email protected]>
> Reported-by: Xiao Yang<[email protected]>
> Link: http://lore.kernel.org/r/[email protected]
> Signed-off-by: Joel Fernandes (Google)<[email protected]>
> ---
> kernel/trace/preemptirq_delay_test.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
> index 31c0fad4cb9e1..1c28ca20e30b6 100644
> --- a/kernel/trace/preemptirq_delay_test.c
> +++ b/kernel/trace/preemptirq_delay_test.c
> @@ -145,10 +145,10 @@ static struct attribute_group attr_group = {
> };
>
> static struct kobject *preemptirq_delay_kobj;
> +static struct task_struct *test_task;
>
> static int __init preemptirq_delay_init(void)
> {
> - struct task_struct *test_task;
> int retval;
>
> test_task = preemptirq_start_test();
> @@ -171,6 +171,9 @@ static int __init preemptirq_delay_init(void)
> static void __exit preemptirq_delay_exit(void)
> {
> kobject_put(preemptirq_delay_kobj);
> +
> + if (test_task)
> + kthread_stop(test_task);
> }
>
> module_init(preemptirq_delay_init)



2020-04-28 14:19:15

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

On Tue, Apr 28, 2020 at 06:19:05PM +0800, Xiao Yang wrote:
> Hi Joel,
>
> Thanks for your quick fix.
>
> Unfortunately, it fixes my original panic but introduces other
> issues(two wanings and one panic) on my arm64 vm, as below:
> --------------------------------------------------------------------
> [ 3465.434942] ------------[ cut here ]------------
> [ 3465.435481] refcount_t: addition on 0; use-after-free.
> [ 3465.437071] WARNING: CPU: 1 PID: 6708 at lib/refcount.c:25
> refcount_warn_saturate+0x9c/0x140
> [ 3465.437720] Modules linked in: preemptirq_delay_test(O-) sunrpc vfat
> fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
> ip_tables xfs libcrc32c virtio_net net_failover failover virtio_mmio
> virtio_blk
> [ 3465.439787] CPU: 1 PID: 6708 Comm: rmmod Tainted: G O
> 5.6.0-rc7+ #18
> [ 3465.440316] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
> 02/06/2015
> [ 3465.440967] pstate: 40000005 (nZcv daif -PAN -UAO)
> [ 3465.441297] pc : refcount_warn_saturate+0x9c/0x140
> [ 3465.441592] lr : refcount_warn_saturate+0x9c/0x140
> [ 3465.441919] sp : fffffe001382fd70
> [ 3465.442160] x29: fffffe001382fd70 x28: fffffc004876d200
> [ 3465.442649] x27: 0000000000000000 x26: 0000000000000000
> [ 3465.443071] x25: 0000000000000000 x24: fffffe00115bbbf0
> [ 3465.443670] x23: 0000000000000000 x22: 0000000000000200
> [ 3465.444194] x21: fffffe0011273988 x20: fffffc0063fdd228
> [ 3465.444576] x19: fffffc0063fdd200 x18: 0000000000000010
> [ 3465.444939] x17: 0000000000000000 x16: 0000000000000000
> [ 3465.445329] x15: ffffffffffffffff x14: fffffe0011273988
> [ 3465.445698] x13: fffffe009382fa97 x12: fffffe001382fa9f
> [ 3465.446116] x11: fffffe00112b0000 x10: fffffe001382fa20
> [ 3465.446498] x9 : 00000000ffffffd0 x8 : 6572662d72657466
> [ 3465.446941] x7 : 0000000000000149 x6 : fffffe001127cf50
> [ 3465.447375] x5 : fffffe001127c000 x4 : 0000000000000000
> [ 3465.447757] x3 : fffffe001127cf50 x2 : 0000000000000000
> [ 3465.448161] x1 : 2e36d2803fe6b700 x0 : 0000000000000000
> [ 3465.448702] Call trace:
> [ 3465.448979] refcount_warn_saturate+0x9c/0x140
> [ 3465.449330] kthread_stop+0x48/0x278
> [ 3465.450144] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
> [ 3465.450625] __arm64_sys_delete_module+0x14c/0x298
> [ 3465.450998] do_el0_svc+0xf8/0x1e0
> [ 3465.451372] el0_sync_handler+0x134/0x1bc
> [ 3465.451701] el0_sync+0x140/0x180
> [ 3465.452099] ---[ end trace 1a8ec2201af5e8c7 ]---
> [ 3465.478208] ------------[ cut here ]------------
> [ 3465.478696] WARNING: CPU: 1 PID: 6708 at kernel/kthread.c:400
> __kthread_bind_mask+0x34/0x90
> [ 3465.479210] Modules linked in: preemptirq_delay_test(O-) sunrpc vfat
> fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
> ip_tables xfs libcrc32c virtio_net net_failover failover virtio_mmio
> virtio_blk
> [ 3465.480509] CPU: 1 PID: 6708 Comm: rmmod Tainted: G W O
> 5.6.0-rc7+ #18
> [ 3465.480966] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
> 02/06/2015
> [ 3465.481465] pstate: 40000005 (nZcv daif -PAN -UAO)
> [ 3465.481821] pc : __kthread_bind_mask+0x34/0x90
> [ 3465.482129] lr : __kthread_bind_mask+0x30/0x90
> [ 3465.482419] sp : fffffe001382fd30
> [ 3465.482639] x29: fffffe001382fd30 x28: fffffc004876d200
> [ 3465.483017] x27: 0000000000000000 x26: 0000000000000000
> [ 3465.483385] x25: 0000000000000000 x24: fffffe00115bbbf0
> [ 3465.483795] x23: 0000000000000000 x22: 0000000000000200
> [ 3465.484251] x21: fffffe0010a9cb88 x20: 0000000000000040
> [ 3465.484599] x19: fffffc0063fdd200 x18: 0000000000000010
> [ 3465.484957] x17: 0000000000000000 x16: 0000000000000000
> [ 3465.485347] x15: ffffffffffffffff x14: fffffe0011273988
> [ 3465.485714] x13: fffffe009382fa97 x12: fffffe001382fa9f
> [ 3465.486210] x11: fffffe00112b0000 x10: fffffe001382fa20
> [ 3465.486599] x9 : 00000000ffffffd0 x8 : 6572662d72657466
> [ 3465.487039] x7 : 0000000000000149 x6 : fffffe001127cf50
> [ 3465.487447] x5 : fffffe001127c000 x4 : 0000000000000001
> [ 3465.487868] x3 : fffffe001127cf50 x2 : 2e36d2803fe6b700
> [ 3465.488252] x1 : 0000000000000000 x0 : 0000000000000000
> [ 3465.488628] Call trace:
> [ 3465.488851] __kthread_bind_mask+0x34/0x90
> [ 3465.489192] kthread_unpark+0xa0/0xb0
> [ 3465.489456] kthread_stop+0x8c/0x278
> [ 3465.489736] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
> [ 3465.490216] __arm64_sys_delete_module+0x14c/0x298
> [ 3465.490594] do_el0_svc+0xf8/0x1e0
> [ 3465.490850] el0_sync_handler+0x134/0x1bc
> [ 3465.491157] el0_sync+0x140/0x180
> [ 3465.491413] ---[ end trace 1a8ec2201af5e8c8 ]---
> [ 3465.504614] Unable to handle kernel NULL pointer dereference at
> virtual address 0000000000000000
> [ 3465.505340] Mem abort info:
> [ 3465.505553] ESR = 0x96000006
> [ 3465.505855] EC = 0x25: DABT (current EL), IL = 32 bits
> [ 3465.506234] SET = 0, FnV = 0
> [ 3465.506525] EA = 0, S1PTW = 0
> [ 3465.506786] Data abort info:
> [ 3465.507037] ISV = 0, ISS = 0x00000006
> [ 3465.507304] CM = 0, WnR = 0
> [ 3465.507685] user pgtable: 64k pages, 42-bit VAs, pgdp=0000000082450000
> [ 3465.508225] [0000000000000000] pgd=0000000000000000,
> pud=0000000000000000, pmd=0000000000000000
> [ 3465.509049] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> [ 3465.509527] Modules linked in: preemptirq_delay_test(O-) sunrpc vfat
> fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
> ip_tables xfs libcrc32c virtio_net net_failover failover virtio_mmio
> virtio_blk
> [ 3465.510964] CPU: 1 PID: 6708 Comm: rmmod Tainted: G W O
> 5.6.0-rc7+ #18
> [ 3465.511527] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
> 02/06/2015
> [ 3465.512031] pstate: 60000085 (nZCv daIf -PAN -UAO)
> [ 3465.512392] pc : __list_add_valid+0x18/0xa0
> [ 3465.512740] lr : wait_for_completion+0xc8/0x178
> [ 3465.513085] sp : fffffe001382fd00
> [ 3465.513362] x29: fffffe001382fd00 x28: fffffc004876d200
> [ 3465.513769] x27: 0000000000000000 x26: 0000000000000000
> [ 3465.514205] x25: 0000000000000000 x24: fffffc0076e616c8
> [ 3465.514638] x23: fffffe001382fd68 x22: 0000000000000000
> [ 3465.515030] x21: fffffe0011273988 x20: fffffc0076e616c0
> [ 3465.515444] x19: fffffc0076e616b8 x18: 0000000000000010
> [ 3465.515826] x17: 0000000000000000 x16: 0000000000000000
> [ 3465.516184] x15: ffffffffffffffff x14: fffffe0011273988
> [ 3465.516584] x13: fffffe009382fa97 x12: fffffe001382fa9f
> [ 3465.516976] x11: fffffe00112b0000 x10: fffffe001382fa20
> [ 3465.517351] x9 : 00000000ffffffd0 x8 : 6572662d72657466
> [ 3465.517750] x7 : 0000000000000149 x6 : fffffe001127cf50
> [ 3465.518169] x5 : 0000000000000001 x4 : fffffc0076e616c8
> [ 3465.518454] x3 : fffffe0010128b38 x2 : 0000000000000000
> [ 3465.518711] x1 : 0000000000000000 x0 : fffffe001382fd68
> [ 3465.518985] Call trace:
> [ 3465.519157] __list_add_valid+0x18/0xa0
> [ 3465.519351] wait_for_completion+0xc8/0x178
> [ 3465.519578] kthread_stop+0x9c/0x278
> [ 3465.519779] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
> [ 3465.520109] __arm64_sys_delete_module+0x14c/0x298
> [ 3465.520342] do_el0_svc+0xf8/0x1e0
> [ 3465.520520] el0_sync_handler+0x134/0x1bc
> [ 3465.520718] el0_sync+0x140/0x180
> [ 3465.521177] Code: 910003fd f9400442 eb01005f 54000141 (f9400041)
> [ 3465.522258] ---[ end trace 1a8ec2201af5e8c9 ]---
> [ 3465.522746] Kernel panic - not syncing: Fatal exception
> [ 3465.523242] SMP: stopping secondary CPUs
> [ 3465.523898] Kernel Offset: disabled
> [ 3465.524423] CPU features: 0x10002,20006082
> [ 3465.524939] Memory Limit: none
> [ 3465.525534] ---[ end Kernel panic - not syncing: Fatal exception ]---
> --------------------------------------------------------------------
>
> I am looking into these issues.

I am wondering if it is because in your test, the kthread exits too quickly.
We have these comments in kthread_stop():
* If threadfn() may call do_exit() itself, the caller must ensure
* task_struct can't go away.

Does the below diff on top of the previous patch help?

---8<-----------------------

diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index 1c28ca20e30b6..8051946a18989 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -152,6 +152,8 @@ static int __init preemptirq_delay_init(void)
int retval;

test_task = preemptirq_start_test();
+ get_task_struct(test_task);
+
retval = PTR_ERR_OR_ZERO(test_task);
if (retval != 0)
return retval;
@@ -172,8 +174,10 @@ static void __exit preemptirq_delay_exit(void)
{
kobject_put(preemptirq_delay_kobj);

- if (test_task)
+ if (test_task) {
kthread_stop(test_task);
+ put_task_struct(test_task);
+ }
}

module_init(preemptirq_delay_init)

2020-04-28 14:46:05

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

On Tue, 28 Apr 2020 18:19:05 +0800
Xiao Yang <[email protected]> wrote:

> Hi Joel,
>
> Thanks for your quick fix.
>
> Unfortunately, it fixes my original panic but introduces other
> issues(two wanings and one panic) on my arm64 vm, as below:
>

Does the following on top of Joel's patch fix it?

-- Steve

diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index 1c28ca20e30b..6d9131ae7e8c 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -113,15 +113,27 @@ static int preemptirq_delay_run(void *data)

for (i = 0; i < s; i++)
(testfuncs[i])(i);
+
+ while (!kthread_should_stop()) {
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+
+ __set_current_state(TASK_RUNNING);
+
return 0;
}

static struct task_struct *preemptirq_start_test(void)
{
+ struct task_struct *task;
char task_name[50];

snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
- return kthread_run(preemptirq_delay_run, NULL, task_name);
+ task = kthread_run(preemptirq_delay_run, NULL, task_name);
+ if (IS_ERR(task))
+ return NULL;
+ return task;
}



2020-04-28 14:48:04

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

On Tue, 28 Apr 2020 10:44:09 -0400
Steven Rostedt <[email protected]> wrote:

> On Tue, 28 Apr 2020 18:19:05 +0800
> Xiao Yang <[email protected]> wrote:
>
> > Hi Joel,
> >
> > Thanks for your quick fix.
> >
> > Unfortunately, it fixes my original panic but introduces other
> > issues(two wanings and one panic) on my arm64 vm, as below:
> >
>
> Does the following on top of Joel's patch fix it?
>
> -- Steve
>
> diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
> index 1c28ca20e30b..6d9131ae7e8c 100644
> --- a/kernel/trace/preemptirq_delay_test.c
> +++ b/kernel/trace/preemptirq_delay_test.c
> @@ -113,15 +113,27 @@ static int preemptirq_delay_run(void *data)
>
> for (i = 0; i < s; i++)
> (testfuncs[i])(i);
> +
> + while (!kthread_should_stop()) {
> + schedule();
> + set_current_state(TASK_INTERRUPTIBLE);
> + }
> +
> + __set_current_state(TASK_RUNNING);
> +
> return 0;
> }
>


The below isn't needed, as the return does check IS_ERR().

-- Steve

> static struct task_struct *preemptirq_start_test(void)
> {
> + struct task_struct *task;
> char task_name[50];
>
> snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
> - return kthread_run(preemptirq_delay_run, NULL, task_name);
> + task = kthread_run(preemptirq_delay_run, NULL, task_name);
> + if (IS_ERR(task))
> + return NULL;
> + return task;
> }
>
>
>

2020-04-29 11:39:54

by Xiao Yang

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

Hi Joel, Steven

First of all, the previous dmesg may not be reliable and I often got the
following dmesg by inserting and remving preemptirq_delay_test in loops:
(btw: I think these dmesgs are caused by the same bug).
---------------------------------------------------
[ 178.484520] ------------[ cut here ]------------
[ 178.510058] refcount_t: addition on 0; use-after-free.
[ 178.511663] WARNING: CPU: 0 PID: 2389 at lib/refcount.c:25
refcount_warn_saturate+0x9c/0x140
[ 178.512266] Modules linked in: preemptirq_delay_test(-) sunrpc vfat
fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
ip_tables xfs libcrc32c virtio_net net_failover virtio_mmio failover
virtio_blk [last unloaded: preemptirq_delay_test]
[ 178.514061] CPU: 0 PID: 2389 Comm: rmmod Not tainted 5.6.0-rc7+ #18
[ 178.514389] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
02/06/2015
[ 178.514988] pstate: 40000005 (nZcv daif -PAN -UAO)
[ 178.515259] pc : refcount_warn_saturate+0x9c/0x140
[ 178.515562] lr : refcount_warn_saturate+0x9c/0x140
[ 178.515865] sp : fffffe001300fd70
[ 178.516082] x29: fffffe001300fd70 x28: fffffc004533e380
[ 178.516366] x27: 0000000000000000 x26: 0000000000000000
[ 178.516611] x25: 0000000000000000 x24: fffffe00115bbbf0
[ 178.516847] x23: 0000000000000000 x22: 0000000000000200
[ 178.517121] x21: fffffe0011273988 x20: fffffc007cf7af28
[ 178.517554] x19: fffffc007cf7af00 x18: 0000000000000010
[ 178.517795] x17: 0000000000000000 x16: 0000000000000000
[ 178.518044] x15: ffffffffffffffff x14: fffffe0011273988
[ 178.518279] x13: fffffe009300fa97 x12: fffffe001300fa9f
[ 178.518518] x11: fffffe00112b0000 x10: fffffe001300fa20
[ 178.518885] x9 : 00000000ffffffd0 x8 : 75203b30206e6f20
[ 178.519316] x7 : 000000000000014e x6 : fffffe001127cf50
[ 178.519661] x5 : fffffe001127c000 x4 : 0000000000000000
[ 178.519978] x3 : fffffe001127cf50 x2 : 0000000000000000
[ 178.520245] x1 : cecd1d29479b5500 x0 : 0000000000000000
[ 178.520659] Call trace:
[ 178.520850] refcount_warn_saturate+0x9c/0x140
[ 178.521084] kthread_stop+0x48/0x278
[ 178.521703] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
[ 178.522006] __arm64_sys_delete_module+0x14c/0x298
[ 178.522226] do_el0_svc+0xf8/0x1e0
[ 178.522403] el0_sync_handler+0x134/0x1bc
[ 178.522598] el0_sync+0x140/0x180
[ 178.522862] ---[ end trace 437c7bc9df5e92dd ]---
[ 178.524159] ------------[ cut here ]------------
[ 178.524420] refcount_t: underflow; use-after-free.
[ 178.524734] WARNING: CPU: 0 PID: 2389 at lib/refcount.c:28
refcount_warn_saturate+0xf0/0x140
[ 178.525079] Modules linked in: preemptirq_delay_test(-) sunrpc vfat
fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
ip_tables xfs libcrc32c virtio_net net_failover virtio_mmio failover
virtio_blk [last unloaded: preemptirq_delay_test]
[ 178.526190] CPU: 0 PID: 2389 Comm: rmmod Tainted: G W
5.6.0-rc7+ #18
[ 178.526541] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
02/06/2015
[ 178.526836] pstate: 40000005 (nZcv daif -PAN -UAO)
[ 178.527060] pc : refcount_warn_saturate+0xf0/0x140
[ 178.527269] lr : refcount_warn_saturate+0xf0/0x140
[ 178.527484] sp : fffffe001300fd70
[ 178.527643] x29: fffffe001300fd70 x28: fffffc004533e380
[ 178.527905] x27: 0000000000000000 x26: 0000000000000000
[ 178.528166] x25: 0000000000000000 x24: fffffe00115bbbf0
[ 178.528489] x23: 0000000000000000 x22: 0000000000000200
[ 178.528808] x21: 0000000000000000 x20: fffffc007cf7af28
[ 178.529080] x19: fffffc007cf7af00 x18: 0000000000000010
[ 178.529429] x17: 0000000000000000 x16: 0000000000000000
[ 178.529747] x15: ffffffffffffffff x14: 0720072007200720
[ 178.530100] x13: 0720072007200720 x12: 0720072007200720
[ 178.530366] x11: 0720072007200720 x10: 0720072007200720
[ 178.530632] x9 : 0720072007200720 x8 : 072007200720072e
[ 178.530891] x7 : 0000000000000170 x6 : fffffe001127cf50
[ 178.531154] x5 : fffffe001127c000 x4 : 0000000000000000
[ 178.531431] x3 : fffffe001127cf50 x2 : 0000000000000000
[ 178.531705] x1 : cecd1d29479b5500 x0 : 0000000000000000
[ 178.532003] Call trace:
[ 178.532166] refcount_warn_saturate+0xf0/0x140
[ 178.532434] kthread_stop+0x254/0x278
[ 178.532644] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
[ 178.532963] __arm64_sys_delete_module+0x14c/0x298
[ 178.533210] do_el0_svc+0xf8/0x1e0
[ 178.533415] el0_sync_handler+0x134/0x1bc
[ 178.533627] el0_sync+0x140/0x180
[ 178.533807] ---[ end trace 437c7bc9df5e92de ]---
[ 178.626471] Unable to handle kernel paging request at virtual address
0000fffffc0043ca
[ 178.627253] Mem abort info:
[ 178.627539] ESR = 0x96000004
[ 178.627927] EC = 0x25: DABT (current EL), IL = 32 bits
[ 178.628414] SET = 0, FnV = 0
[ 178.628806] EA = 0, S1PTW = 0
[ 178.629090] Data abort info:
[ 178.639723] ISV = 0, ISS = 0x00000004
[ 178.640182] CM = 0, WnR = 0
[ 178.640530] [0000fffffc0043ca] address between user and kernel
address ranges
[ 178.641300] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 178.641931] Modules linked in: sunrpc vfat fat ext4 mbcache jbd2
crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce ip_tables xfs
libcrc32c virtio_net net_failover virtio_mmio failover virtio_blk [last
unloaded: preemptirq_delay_test]
[ 178.643547] CPU: 1 PID: 2394 Comm: pmlogger_check Tainted: G W
5.6.0-rc7+ #18
[ 178.644219] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
02/06/2015
[ 178.644782] pstate: 60000005 (nZCv daif -PAN -UAO)
[ 178.645219] pc : __kmalloc+0xc8/0x2e8
[ 178.645538] lr : __kmalloc+0x9c/0x2e8
[ 178.645841] sp : fffffe001312fc20
[ 178.646127] x29: fffffe001312fc20 x28: fffffc0042a0af00
[ 178.646532] x27: fffffc005a7ad000 x26: 00000000cc79f001
[ 178.647037] x25: fffffc0042a0af00 x24: fffffc007d76fc00
[ 178.647431] x23: fffffe00103e95e4 x22: 000000000000001b
[ 178.647895] x21: 0000000000000cc0 x20: fffffc007d76fc00
[ 178.648276] x19: 0000fffffc0043ca x18: 0000000000000000
[ 178.648673] x17: 0000000000000000 x16: 0000000000000000
[ 178.649077] x15: 0000000000000000 x14: 0000000000000010
[ 178.649608] x13: 0000000000000000 x12: 0000000000000000
[ 178.649963] x11: 0000000000000000 x10: 0000000000000000
[ 178.650337] x9 : 0000000000000000 x8 : 0000000000001f18
[ 178.650683] x7 : 0000000000001f18 x6 : fffffc005a7ad1f8
[ 178.651099] x5 : 0000000000000000 x4 : 0000000000000000
[ 178.651557] x3 : 0000000000000000 x2 : fffffe001021d1e0
[ 178.652006] x1 : 0000000000000000 x0 : 0000000000000001
[ 178.652407] Call trace:
[ 178.652694] __kmalloc+0xc8/0x2e8
[ 178.652972] load_elf_binary+0xdc/0xdb8
[ 178.653286] search_binary_handler+0x98/0x268
[ 178.653626] __do_execve_file+0x4ec/0x7c8
[ 178.653985] __arm64_sys_execve+0x40/0x50
[ 178.654310] do_el0_svc+0xf8/0x1e0
[ 178.654578] el0_sync_handler+0x134/0x1bc
[ 178.654962] el0_sync+0x140/0x180
[ 178.655509] Code: b4001073 b9402281 b9401380 11000400 (f8616a7b)
[ 178.656543] ---[ end trace 437c7bc9df5e92df ]---
[ 178.657100] Kernel panic - not syncing: Fatal exception
[ 178.657630] SMP: stopping secondary CPUs
[ 178.658237] Kernel Offset: disabled
[ 178.658644] CPU features: 0x10002,20006082
[ 178.658903] Memory Limit: none
[ 178.659263] ---[ end Kernel panic - not syncing: Fatal exception ]---
---------------------------------------------------

Thanks,
Xiao Yang
On 2020/4/28 18:19, Xiao Yang wrote:
> Hi Joel,
>
> Thanks for your quick fix.
>
> Unfortunately, it fixes my original panic but introduces other
> issues(two wanings and one panic) on my arm64 vm, as below:
> --------------------------------------------------------------------
> [ 3465.434942] ------------[ cut here ]------------
> [ 3465.435481] refcount_t: addition on 0; use-after-free.
> [ 3465.437071] WARNING: CPU: 1 PID: 6708 at lib/refcount.c:25
> refcount_warn_saturate+0x9c/0x140
> [ 3465.437720] Modules linked in: preemptirq_delay_test(O-) sunrpc vfat
> fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
> ip_tables xfs libcrc32c virtio_net net_failover failover virtio_mmio
> virtio_blk
> [ 3465.439787] CPU: 1 PID: 6708 Comm: rmmod Tainted: G O
> 5.6.0-rc7+ #18
> [ 3465.440316] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
> 02/06/2015
> [ 3465.440967] pstate: 40000005 (nZcv daif -PAN -UAO)
> [ 3465.441297] pc : refcount_warn_saturate+0x9c/0x140
> [ 3465.441592] lr : refcount_warn_saturate+0x9c/0x140
> [ 3465.441919] sp : fffffe001382fd70
> [ 3465.442160] x29: fffffe001382fd70 x28: fffffc004876d200
> [ 3465.442649] x27: 0000000000000000 x26: 0000000000000000
> [ 3465.443071] x25: 0000000000000000 x24: fffffe00115bbbf0
> [ 3465.443670] x23: 0000000000000000 x22: 0000000000000200
> [ 3465.444194] x21: fffffe0011273988 x20: fffffc0063fdd228
> [ 3465.444576] x19: fffffc0063fdd200 x18: 0000000000000010
> [ 3465.444939] x17: 0000000000000000 x16: 0000000000000000
> [ 3465.445329] x15: ffffffffffffffff x14: fffffe0011273988
> [ 3465.445698] x13: fffffe009382fa97 x12: fffffe001382fa9f
> [ 3465.446116] x11: fffffe00112b0000 x10: fffffe001382fa20
> [ 3465.446498] x9 : 00000000ffffffd0 x8 : 6572662d72657466
> [ 3465.446941] x7 : 0000000000000149 x6 : fffffe001127cf50
> [ 3465.447375] x5 : fffffe001127c000 x4 : 0000000000000000
> [ 3465.447757] x3 : fffffe001127cf50 x2 : 0000000000000000
> [ 3465.448161] x1 : 2e36d2803fe6b700 x0 : 0000000000000000
> [ 3465.448702] Call trace:
> [ 3465.448979] refcount_warn_saturate+0x9c/0x140
> [ 3465.449330] kthread_stop+0x48/0x278
> [ 3465.450144] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
> [ 3465.450625] __arm64_sys_delete_module+0x14c/0x298
> [ 3465.450998] do_el0_svc+0xf8/0x1e0
> [ 3465.451372] el0_sync_handler+0x134/0x1bc
> [ 3465.451701] el0_sync+0x140/0x180
> [ 3465.452099] ---[ end trace 1a8ec2201af5e8c7 ]---
> [ 3465.478208] ------------[ cut here ]------------
> [ 3465.478696] WARNING: CPU: 1 PID: 6708 at kernel/kthread.c:400
> __kthread_bind_mask+0x34/0x90
> [ 3465.479210] Modules linked in: preemptirq_delay_test(O-) sunrpc vfat
> fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
> ip_tables xfs libcrc32c virtio_net net_failover failover virtio_mmio
> virtio_blk
> [ 3465.480509] CPU: 1 PID: 6708 Comm: rmmod Tainted: G W O
> 5.6.0-rc7+ #18
> [ 3465.480966] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
> 02/06/2015
> [ 3465.481465] pstate: 40000005 (nZcv daif -PAN -UAO)
> [ 3465.481821] pc : __kthread_bind_mask+0x34/0x90
> [ 3465.482129] lr : __kthread_bind_mask+0x30/0x90
> [ 3465.482419] sp : fffffe001382fd30
> [ 3465.482639] x29: fffffe001382fd30 x28: fffffc004876d200
> [ 3465.483017] x27: 0000000000000000 x26: 0000000000000000
> [ 3465.483385] x25: 0000000000000000 x24: fffffe00115bbbf0
> [ 3465.483795] x23: 0000000000000000 x22: 0000000000000200
> [ 3465.484251] x21: fffffe0010a9cb88 x20: 0000000000000040
> [ 3465.484599] x19: fffffc0063fdd200 x18: 0000000000000010
> [ 3465.484957] x17: 0000000000000000 x16: 0000000000000000
> [ 3465.485347] x15: ffffffffffffffff x14: fffffe0011273988
> [ 3465.485714] x13: fffffe009382fa97 x12: fffffe001382fa9f
> [ 3465.486210] x11: fffffe00112b0000 x10: fffffe001382fa20
> [ 3465.486599] x9 : 00000000ffffffd0 x8 : 6572662d72657466
> [ 3465.487039] x7 : 0000000000000149 x6 : fffffe001127cf50
> [ 3465.487447] x5 : fffffe001127c000 x4 : 0000000000000001
> [ 3465.487868] x3 : fffffe001127cf50 x2 : 2e36d2803fe6b700
> [ 3465.488252] x1 : 0000000000000000 x0 : 0000000000000000
> [ 3465.488628] Call trace:
> [ 3465.488851] __kthread_bind_mask+0x34/0x90
> [ 3465.489192] kthread_unpark+0xa0/0xb0
> [ 3465.489456] kthread_stop+0x8c/0x278
> [ 3465.489736] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
> [ 3465.490216] __arm64_sys_delete_module+0x14c/0x298
> [ 3465.490594] do_el0_svc+0xf8/0x1e0
> [ 3465.490850] el0_sync_handler+0x134/0x1bc
> [ 3465.491157] el0_sync+0x140/0x180
> [ 3465.491413] ---[ end trace 1a8ec2201af5e8c8 ]---
> [ 3465.504614] Unable to handle kernel NULL pointer dereference at
> virtual address 0000000000000000
> [ 3465.505340] Mem abort info:
> [ 3465.505553] ESR = 0x96000006
> [ 3465.505855] EC = 0x25: DABT (current EL), IL = 32 bits
> [ 3465.506234] SET = 0, FnV = 0
> [ 3465.506525] EA = 0, S1PTW = 0
> [ 3465.506786] Data abort info:
> [ 3465.507037] ISV = 0, ISS = 0x00000006
> [ 3465.507304] CM = 0, WnR = 0
> [ 3465.507685] user pgtable: 64k pages, 42-bit VAs, pgdp=0000000082450000
> [ 3465.508225] [0000000000000000] pgd=0000000000000000,
> pud=0000000000000000, pmd=0000000000000000
> [ 3465.509049] Internal error: Oops: 96000006 [#1] PREEMPT SMP
> [ 3465.509527] Modules linked in: preemptirq_delay_test(O-) sunrpc vfat
> fat ext4 mbcache jbd2 crct10dif_ce ghash_ce sha2_ce sha256_arm64 sha1_ce
> ip_tables xfs libcrc32c virtio_net net_failover failover virtio_mmio
> virtio_blk
> [ 3465.510964] CPU: 1 PID: 6708 Comm: rmmod Tainted: G W O
> 5.6.0-rc7+ #18
> [ 3465.511527] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0
> 02/06/2015
> [ 3465.512031] pstate: 60000085 (nZCv daIf -PAN -UAO)
> [ 3465.512392] pc : __list_add_valid+0x18/0xa0
> [ 3465.512740] lr : wait_for_completion+0xc8/0x178
> [ 3465.513085] sp : fffffe001382fd00
> [ 3465.513362] x29: fffffe001382fd00 x28: fffffc004876d200
> [ 3465.513769] x27: 0000000000000000 x26: 0000000000000000
> [ 3465.514205] x25: 0000000000000000 x24: fffffc0076e616c8
> [ 3465.514638] x23: fffffe001382fd68 x22: 0000000000000000
> [ 3465.515030] x21: fffffe0011273988 x20: fffffc0076e616c0
> [ 3465.515444] x19: fffffc0076e616b8 x18: 0000000000000010
> [ 3465.515826] x17: 0000000000000000 x16: 0000000000000000
> [ 3465.516184] x15: ffffffffffffffff x14: fffffe0011273988
> [ 3465.516584] x13: fffffe009382fa97 x12: fffffe001382fa9f
> [ 3465.516976] x11: fffffe00112b0000 x10: fffffe001382fa20
> [ 3465.517351] x9 : 00000000ffffffd0 x8 : 6572662d72657466
> [ 3465.517750] x7 : 0000000000000149 x6 : fffffe001127cf50
> [ 3465.518169] x5 : 0000000000000001 x4 : fffffc0076e616c8
> [ 3465.518454] x3 : fffffe0010128b38 x2 : 0000000000000000
> [ 3465.518711] x1 : 0000000000000000 x0 : fffffe001382fd68
> [ 3465.518985] Call trace:
> [ 3465.519157] __list_add_valid+0x18/0xa0
> [ 3465.519351] wait_for_completion+0xc8/0x178
> [ 3465.519578] kthread_stop+0x9c/0x278
> [ 3465.519779] preemptirq_delay_exit+0x28/0xfc8c [preemptirq_delay_test]
> [ 3465.520109] __arm64_sys_delete_module+0x14c/0x298
> [ 3465.520342] do_el0_svc+0xf8/0x1e0
> [ 3465.520520] el0_sync_handler+0x134/0x1bc
> [ 3465.520718] el0_sync+0x140/0x180
> [ 3465.521177] Code: 910003fd f9400442 eb01005f 54000141 (f9400041)
> [ 3465.522258] ---[ end trace 1a8ec2201af5e8c9 ]---
> [ 3465.522746] Kernel panic - not syncing: Fatal exception
> [ 3465.523242] SMP: stopping secondary CPUs
> [ 3465.523898] Kernel Offset: disabled
> [ 3465.524423] CPU features: 0x10002,20006082
> [ 3465.524939] Memory Limit: none
> [ 3465.525534] ---[ end Kernel panic - not syncing: Fatal exception ]---
> --------------------------------------------------------------------
>
> I am looking into these issues.
>
> Thanks,
> Xiao Yang
> On 2020/4/25 6:36, Joel Fernandes (Google) wrote:
>> Kthread running the test needs to be stopped or it can continue
>> executing code unloaded by module causing a crash.
>>
>> Suggested-by: Steven Rostedt<[email protected]>
>> Reported-by: Xiao Yang<[email protected]>
>> Link: http://lore.kernel.org/r/[email protected]
>> Signed-off-by: Joel Fernandes (Google)<[email protected]>
>> ---
>> kernel/trace/preemptirq_delay_test.c | 5 ++++-
>> 1 file changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
>> index 31c0fad4cb9e1..1c28ca20e30b6 100644
>> --- a/kernel/trace/preemptirq_delay_test.c
>> +++ b/kernel/trace/preemptirq_delay_test.c
>> @@ -145,10 +145,10 @@ static struct attribute_group attr_group = {
>> };
>>
>> static struct kobject *preemptirq_delay_kobj;
>> +static struct task_struct *test_task;
>>
>> static int __init preemptirq_delay_init(void)
>> {
>> - struct task_struct *test_task;
>> int retval;
>>
>> test_task = preemptirq_start_test();
>> @@ -171,6 +171,9 @@ static int __init preemptirq_delay_init(void)
>> static void __exit preemptirq_delay_exit(void)
>> {
>> kobject_put(preemptirq_delay_kobj);
>> +
>> + if (test_task)
>> + kthread_stop(test_task);
>> }
>>
>> module_init(preemptirq_delay_init)
>



2020-04-29 11:49:57

by Xiao Yang

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

On 2020/4/28 22:15, Joel Fernandes wrote:
> I am wondering if it is because in your test, the kthread exits too quickly.
> We have these comments in kthread_stop():
> * If threadfn() may call do_exit() itself, the caller must ensure
> * task_struct can't go away.
>
> Does the below diff on top of the previous patch help?
>
> ---8<-----------------------
>
> diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
> index 1c28ca20e30b6..8051946a18989 100644
> --- a/kernel/trace/preemptirq_delay_test.c
> +++ b/kernel/trace/preemptirq_delay_test.c
> @@ -152,6 +152,8 @@ static int __init preemptirq_delay_init(void)
> int retval;
>
> test_task = preemptirq_start_test();
> + get_task_struct(test_task);
> +
> retval = PTR_ERR_OR_ZERO(test_task);
> if (retval != 0)
> return retval;
> @@ -172,8 +174,10 @@ static void __exit preemptirq_delay_exit(void)
> {
> kobject_put(preemptirq_delay_kobj);
>
> - if (test_task)
> + if (test_task) {
> kthread_stop(test_task);
> + put_task_struct(test_task);
> + }
> }

Hi Joel,

Thanks for your additional patch.

First, We have to avoid kbuild error by including <linux/sched/task.h>
---------------------------------------
kernel/trace/preemptirq_delay_test.c: In function ‘preemptirq_delay_init’:
kernel/trace/preemptirq_delay_test.c:155:2: error: implicit declaration
of function ‘get_task_struct’; did you mean ‘set_task_cpu’?
[-Werror=implicit-function-declaration]
get_task_struct(test_task);
^~~~~~~~~~~~~~~
set_task_cpu
kernel/trace/preemptirq_delay_test.c: In function ‘preemptirq_delay_exit’:
kernel/trace/preemptirq_delay_test.c:179:3: error: implicit declaration
of function ‘put_task_struct’; did you mean ‘set_task_cpu’?
[-Werror=implicit-function-declaration]
put_task_struct(test_task);
^~~~~~~~~~~~~~~
set_task_cpu
cc1: some warnings being treated as errors
---------------------------------------

Second, I used the following steps to do test and didn't get any
warning/panic after applying your additional patch:
---------------------------------------
for i in $(seq 1 100); do modprobe preemptirq_delay_test test_mode=irq
delay=500000; rmmod preemptirq_delay_test; done
for i in $(seq 1 100); do modprobe preemptirq_delay_test
test_mode=preempt delay=500000; rmmod preemptirq_delay_test; done
---------------------------------------

Thanks,
Xiao Yang


2020-04-29 11:56:22

by Xiao Yang

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

On 2020/4/28 22:45, Steven Rostedt wrote:
> diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
>> index 1c28ca20e30b..6d9131ae7e8c 100644
>> --- a/kernel/trace/preemptirq_delay_test.c
>> +++ b/kernel/trace/preemptirq_delay_test.c
>> @@ -113,15 +113,27 @@ static int preemptirq_delay_run(void *data)
>>
>> for (i = 0; i< s; i++)
>> (testfuncs[i])(i);
>> +
>> + while (!kthread_should_stop()) {
>> + schedule();
>> + set_current_state(TASK_INTERRUPTIBLE);
>> + }
>> +
>> + __set_current_state(TASK_RUNNING);
>> +
>> return 0;
>> }
>>
Hi Steven,

Thanks for your patch.

I also used the following steps to do test and didn't get any
warning/panic after applying your patch.
---------------------------------
for i in $(seq 1 100); do modprobe preemptirq_delay_test test_mode=irq
delay=500000; rmmod preemptirq_delay_test; done
for i in $(seq 1 100); do modprobe preemptirq_delay_test
test_mode=preempt delay=500000; rmmod preemptirq_delay_test; done
---------------------------------

But I am not sure which fix(from you and Joel) is better.

Thanks,
Xiao Yang



2020-04-29 16:34:21

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

On Wed, 29 Apr 2020 19:54:16 +0800
Xiao Yang <[email protected]> wrote:

> On 2020/4/28 22:45, Steven Rostedt wrote:
> > diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
> >> index 1c28ca20e30b..6d9131ae7e8c 100644
> >> --- a/kernel/trace/preemptirq_delay_test.c
> >> +++ b/kernel/trace/preemptirq_delay_test.c
> >> @@ -113,15 +113,27 @@ static int preemptirq_delay_run(void *data)
> >>
> >> for (i = 0; i< s; i++)
> >> (testfuncs[i])(i);
> >> +
> >> + while (!kthread_should_stop()) {
> >> + schedule();
> >> + set_current_state(TASK_INTERRUPTIBLE);
> >> + }
> >> +
> >> + __set_current_state(TASK_RUNNING);
> >> +
> >> return 0;
> >> }
> >>
> Hi Steven,
>
> Thanks for your patch.
>
> I also used the following steps to do test and didn't get any
> warning/panic after applying your patch.
> ---------------------------------
> for i in $(seq 1 100); do modprobe preemptirq_delay_test test_mode=irq
> delay=500000; rmmod preemptirq_delay_test; done
> for i in $(seq 1 100); do modprobe preemptirq_delay_test
> test_mode=preempt delay=500000; rmmod preemptirq_delay_test; done
> ---------------------------------
>
> But I am not sure which fix(from you and Joel) is better.

Mine ;-)

The "kthread_should_stop()" is the normal way to end kernel threads. Using
get/put is not the orthodox way.

-- Steve

2020-04-29 19:14:15

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

On Wed, Apr 29, 2020 at 12:31:41PM -0400, Steven Rostedt wrote:
> On Wed, 29 Apr 2020 19:54:16 +0800
> Xiao Yang <[email protected]> wrote:
>
> > On 2020/4/28 22:45, Steven Rostedt wrote:
> > > diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
> > >> index 1c28ca20e30b..6d9131ae7e8c 100644
> > >> --- a/kernel/trace/preemptirq_delay_test.c
> > >> +++ b/kernel/trace/preemptirq_delay_test.c
> > >> @@ -113,15 +113,27 @@ static int preemptirq_delay_run(void *data)
> > >>
> > >> for (i = 0; i< s; i++)
> > >> (testfuncs[i])(i);
> > >> +
> > >> + while (!kthread_should_stop()) {
> > >> + schedule();
> > >> + set_current_state(TASK_INTERRUPTIBLE);
> > >> + }
> > >> +
> > >> + __set_current_state(TASK_RUNNING);
> > >> +
> > >> return 0;
> > >> }
> > >>
> > Hi Steven,
> >
> > Thanks for your patch.
> >
> > I also used the following steps to do test and didn't get any
> > warning/panic after applying your patch.
> > ---------------------------------
> > for i in $(seq 1 100); do modprobe preemptirq_delay_test test_mode=irq
> > delay=500000; rmmod preemptirq_delay_test; done
> > for i in $(seq 1 100); do modprobe preemptirq_delay_test
> > test_mode=preempt delay=500000; rmmod preemptirq_delay_test; done
> > ---------------------------------
> >
> > But I am not sure which fix(from you and Joel) is better.
>
> Mine ;-)
>
> The "kthread_should_stop()" is the normal way to end kernel threads. Using
> get/put is not the orthodox way.

Agreed :-) thanks.

- Joel

2020-05-06 13:42:45

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] kernel/trace: Stop and wait for kthread on preempt irq module unload

On Wed, 29 Apr 2020 15:12:24 -0400
Joel Fernandes <[email protected]> wrote:

> > Mine ;-)
> >
> > The "kthread_should_stop()" is the normal way to end kernel threads. Using
> > get/put is not the orthodox way.
>
> Agreed :-) thanks.

And while I'm working on getting a new "urgent" branch out, I found this
code is buggier than I thought. I'll have a new patch soon.

-- Steve

2020-05-06 19:10:42

by Steven Rostedt

[permalink] [raw]
Subject: [PATCH] tracing: Wait for preempt irq delay thread to finish

From: "Steven Rostedt (VMware)" <[email protected]>

Running on a slower machine, it is possible that the preempt delay kernel
thread may still be executing if the module was immediately removed after
added, and this can cause the kernel to crash as the kernel thread might be
executing after its code has been removed.

There's no reason that the caller of the code shouldn't just wait for the
delay thread to finish, as the thread can also be created by a trigger in
the sysfs code, which also has the same issues.

Link: http://lore.kernel.org/r/[email protected]

Cc: [email protected]
Fixes: 793937236d1ee ("lib: Add module for testing preemptoff/irqsoff latency tracers")
Reported-by: Xiao Yang <[email protected]>
Signed-off-by: Steven Rostedt (VMware) <[email protected]>
---
kernel/trace/preemptirq_delay_test.c | 30 ++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index 31c0fad4cb9e..c4c86de63cf9 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -113,22 +113,42 @@ static int preemptirq_delay_run(void *data)

for (i = 0; i < s; i++)
(testfuncs[i])(i);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ while (!kthread_should_stop()) {
+ schedule();
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+
+ __set_current_state(TASK_RUNNING);
+
return 0;
}

-static struct task_struct *preemptirq_start_test(void)
+static int preemptirq_run_test(void)
{
+ struct task_struct *task;
+
char task_name[50];

snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
- return kthread_run(preemptirq_delay_run, NULL, task_name);
+ task = kthread_run(preemptirq_delay_run, NULL, task_name);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ if (task)
+ kthread_stop(task);
+ return 0;
}


static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
- preemptirq_start_test();
+ ssize_t ret;
+
+ ret = preemptirq_run_test();
+ if (ret)
+ return ret;
return count;
}

@@ -148,11 +168,9 @@ static struct kobject *preemptirq_delay_kobj;

static int __init preemptirq_delay_init(void)
{
- struct task_struct *test_task;
int retval;

- test_task = preemptirq_start_test();
- retval = PTR_ERR_OR_ZERO(test_task);
+ retval = preemptirq_run_test();
if (retval != 0)
return retval;

--
2.20.1

2020-05-07 10:07:32

by Xiao Yang

[permalink] [raw]
Subject: Re: [PATCH] tracing: Wait for preempt irq delay thread to finish

Hi Steven,

Thanks for your further investigation.

I used the following ways to test your fix patch on my slow vm and
didn't see any issue:
1) Insert and remove preemptirq_delay_test in loops.
2) Insert preemptirq_delay_test, write to
/sys/kernel/preemptirq_delay_test/trigger and remove
preemptirq_delay_test in loops.
3) Ran irqsoff_tracer.tc in loops.

BTW: For irqsoff_tracer.tc, should we extend code to test the burst
feature and the sysfs trigger?

Reviewed-by: Xiao Yang <[email protected]>

Thanks,
Xiao Yang
On 2020/5/6 22:30, Steven Rostedt wrote:
> From: "Steven Rostedt (VMware)"<[email protected]>
>
> Running on a slower machine, it is possible that the preempt delay kernel
> thread may still be executing if the module was immediately removed after
> added, and this can cause the kernel to crash as the kernel thread might be
> executing after its code has been removed.
>
> There's no reason that the caller of the code shouldn't just wait for the
> delay thread to finish, as the thread can also be created by a trigger in
> the sysfs code, which also has the same issues.
>
> Link: http://lore.kernel.org/r/[email protected]
>
> Cc: [email protected]
> Fixes: 793937236d1ee ("lib: Add module for testing preemptoff/irqsoff latency tracers")
> Reported-by: Xiao Yang<[email protected]>
> Signed-off-by: Steven Rostedt (VMware)<[email protected]>
> ---
> kernel/trace/preemptirq_delay_test.c | 30 ++++++++++++++++++++++------
> 1 file changed, 24 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
> index 31c0fad4cb9e..c4c86de63cf9 100644
> --- a/kernel/trace/preemptirq_delay_test.c
> +++ b/kernel/trace/preemptirq_delay_test.c
> @@ -113,22 +113,42 @@ static int preemptirq_delay_run(void *data)
>
> for (i = 0; i< s; i++)
> (testfuncs[i])(i);
> +
> + set_current_state(TASK_INTERRUPTIBLE);
> + while (!kthread_should_stop()) {
> + schedule();
> + set_current_state(TASK_INTERRUPTIBLE);
> + }
> +
> + __set_current_state(TASK_RUNNING);
> +
> return 0;
> }
>
> -static struct task_struct *preemptirq_start_test(void)
> +static int preemptirq_run_test(void)
> {
> + struct task_struct *task;
> +
> char task_name[50];
>
> snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
> - return kthread_run(preemptirq_delay_run, NULL, task_name);
> + task = kthread_run(preemptirq_delay_run, NULL, task_name);
> + if (IS_ERR(task))
> + return PTR_ERR(task);
> + if (task)
> + kthread_stop(task);
> + return 0;
> }
>
>
> static ssize_t trigger_store(struct kobject *kobj, struct kobj_attribute *attr,
> const char *buf, size_t count)
> {
> - preemptirq_start_test();
> + ssize_t ret;
> +
> + ret = preemptirq_run_test();
> + if (ret)
> + return ret;
> return count;
> }
>
> @@ -148,11 +168,9 @@ static struct kobject *preemptirq_delay_kobj;
>
> static int __init preemptirq_delay_init(void)
> {
> - struct task_struct *test_task;
> int retval;
>
> - test_task = preemptirq_start_test();
> - retval = PTR_ERR_OR_ZERO(test_task);
> + retval = preemptirq_run_test();
> if (retval != 0)
> return retval;
>



2020-05-07 12:32:08

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH] tracing: Wait for preempt irq delay thread to finish



On May 7, 2020 6:05:02 AM EDT, Xiao Yang <[email protected]> wrote:
>Hi Steven,
>
>Thanks for your further investigation.
>
>I used the following ways to test your fix patch on my slow vm and
>didn't see any issue:
>1) Insert and remove preemptirq_delay_test in loops.
>2) Insert preemptirq_delay_test, write to
>/sys/kernel/preemptirq_delay_test/trigger and remove
>preemptirq_delay_test in loops.
>3) Ran irqsoff_tracer.tc in loops.
>
>BTW: For irqsoff_tracer.tc, should we extend code to test the burst
>feature and the sysfs trigger?
>
>Reviewed-by: Xiao Yang <[email protected]>
>

Thanks!

Reviewed-by: Joel Fernandes <[email protected]>

- Joel


>Thanks,
>Xiao Yang
>On 2020/5/6 22:30, Steven Rostedt wrote:
>> From: "Steven Rostedt (VMware)"<[email protected]>
>>
>> Running on a slower machine, it is possible that the preempt delay
>kernel
>> thread may still be executing if the module was immediately removed
>after
>> added, and this can cause the kernel to crash as the kernel thread
>might be
>> executing after its code has been removed.
>>
>> There's no reason that the caller of the code shouldn't just wait for
>the
>> delay thread to finish, as the thread can also be created by a
>trigger in
>> the sysfs code, which also has the same issues.
>>
>> Link: http://lore.kernel.org/r/[email protected]
>>
>> Cc: [email protected]
>> Fixes: 793937236d1ee ("lib: Add module for testing preemptoff/irqsoff
>latency tracers")
>> Reported-by: Xiao Yang<[email protected]>
>> Signed-off-by: Steven Rostedt (VMware)<[email protected]>
>> ---
>> kernel/trace/preemptirq_delay_test.c | 30
>++++++++++++++++++++++------
>> 1 file changed, 24 insertions(+), 6 deletions(-)
>>
>> diff --git a/kernel/trace/preemptirq_delay_test.c
>b/kernel/trace/preemptirq_delay_test.c
>> index 31c0fad4cb9e..c4c86de63cf9 100644
>> --- a/kernel/trace/preemptirq_delay_test.c
>> +++ b/kernel/trace/preemptirq_delay_test.c
>> @@ -113,22 +113,42 @@ static int preemptirq_delay_run(void *data)
>>
>> for (i = 0; i< s; i++)
>> (testfuncs[i])(i);
>> +
>> + set_current_state(TASK_INTERRUPTIBLE);
>> + while (!kthread_should_stop()) {
>> + schedule();
>> + set_current_state(TASK_INTERRUPTIBLE);
>> + }
>> +
>> + __set_current_state(TASK_RUNNING);
>> +
>> return 0;
>> }
>>
>> -static struct task_struct *preemptirq_start_test(void)
>> +static int preemptirq_run_test(void)
>> {
>> + struct task_struct *task;
>> +
>> char task_name[50];
>>
>> snprintf(task_name, sizeof(task_name), "%s_test", test_mode);
>> - return kthread_run(preemptirq_delay_run, NULL, task_name);
>> + task = kthread_run(preemptirq_delay_run, NULL, task_name);
>> + if (IS_ERR(task))
>> + return PTR_ERR(task);
>> + if (task)
>> + kthread_stop(task);
>> + return 0;
>> }
>>
>>
>> static ssize_t trigger_store(struct kobject *kobj, struct
>kobj_attribute *attr,
>> const char *buf, size_t count)
>> {
>> - preemptirq_start_test();
>> + ssize_t ret;
>> +
>> + ret = preemptirq_run_test();
>> + if (ret)
>> + return ret;
>> return count;
>> }
>>
>> @@ -148,11 +168,9 @@ static struct kobject *preemptirq_delay_kobj;
>>
>> static int __init preemptirq_delay_init(void)
>> {
>> - struct task_struct *test_task;
>> int retval;
>>
>> - test_task = preemptirq_start_test();
>> - retval = PTR_ERR_OR_ZERO(test_task);
>> + retval = preemptirq_run_test();
>> if (retval != 0)
>> return retval;
>>

--
Sent from my Android device with K-9 Mail. Please excuse my brevity.