2023-05-09 11:17:54

by Nicholas Piggin

[permalink] [raw]
Subject: [RFC PATCH 0/3] Prohibit irq disable when disabled or enable when enabled

Hi,

Any thoughts on tightening this up and adding some warnings for this?
Motivated by a bug I added to powerpc which broke disabled-disable
callers.

Not that I necessarily want archs to be able to depend on this, it
would always be better to be tolerant. But it seems a risky pattern
for random code to be using.

This still fires off quite a few warnings on powerpc, and I haven't
test booted any other arch so probably wouldn't ask to upstream patch
3 just yet, but if there are no objections to the idea I might do a
bit more work on it.

Thanks,
Nick

Nicholas Piggin (3):
hrtimer: balance irq save/restore
init: Require archs call start_kernel with arch irqs disabled
irqflags: Warn on irq disable when disabled and enable when enabled

include/linux/irqflags.h | 26 ++++++++++++++++++++++++--
init/main.c | 6 +++++-
kernel/locking/irqflag-debug.c | 14 ++++++++++++++
kernel/time/hrtimer.c | 22 +++++++++++-----------
4 files changed, 54 insertions(+), 14 deletions(-)

--
2.40.1


2023-05-09 11:21:18

by Nicholas Piggin

[permalink] [raw]
Subject: [RFC PATCH 1/3] hrtimer: balance irq save/restore

hrtimers uses a trick where irq flags are saved initially and then
passed to callers to restore irqs when dropping the lock, which then
re-lock with raw_spin_lock_irq.

This is not actually a bug, but it is one of the few places in the
kernel that may disable irqs when they are already disabled. It would
be nice to have a debug check for that and enforce irq calls are
balanced. It's one thing for core code to be clever, but something like
this in a crufty driver could be a red flag.

Signed-off-by: Nicholas Piggin <[email protected]>
---
kernel/time/hrtimer.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e8c08292defc..8baa36b091f0 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1364,13 +1364,13 @@ static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
* allows the waiter to acquire the lock and make progress.
*/
static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
- unsigned long flags)
+ unsigned long *flags)
{
if (atomic_read(&cpu_base->timer_waiters)) {
- raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_base->lock, *flags);
spin_unlock(&cpu_base->softirq_expiry_lock);
spin_lock(&cpu_base->softirq_expiry_lock);
- raw_spin_lock_irq(&cpu_base->lock);
+ raw_spin_lock_irqsave(&cpu_base->lock, *flags);
}
}

@@ -1424,7 +1424,7 @@ hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
static inline void
hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
- unsigned long flags) { }
+ unsigned long *flags) { }
#endif

/**
@@ -1642,7 +1642,7 @@ EXPORT_SYMBOL_GPL(hrtimer_active);
static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
struct hrtimer_clock_base *base,
struct hrtimer *timer, ktime_t *now,
- unsigned long flags) __must_hold(&cpu_base->lock)
+ unsigned long *flags) __must_hold(&cpu_base->lock)
{
enum hrtimer_restart (*fn)(struct hrtimer *);
bool expires_in_hardirq;
@@ -1678,7 +1678,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
* protected against migration to a different CPU even if the lock
* is dropped.
*/
- raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+ raw_spin_unlock_irqrestore(&cpu_base->lock, *flags);
trace_hrtimer_expire_entry(timer, now);
expires_in_hardirq = lockdep_hrtimer_enter(timer);

@@ -1686,7 +1686,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,

lockdep_hrtimer_exit(expires_in_hardirq);
trace_hrtimer_expire_exit(timer);
- raw_spin_lock_irq(&cpu_base->lock);
+ raw_spin_lock_irqsave(&cpu_base->lock, *flags);

/*
* Note: We clear the running state after enqueue_hrtimer and
@@ -1715,7 +1715,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
}

static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
- unsigned long flags, unsigned int active_mask)
+ unsigned long *flags, unsigned int active_mask)
{
struct hrtimer_clock_base *base;
unsigned int active = cpu_base->active_bases & active_mask;
@@ -1763,7 +1763,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
raw_spin_lock_irqsave(&cpu_base->lock, flags);

now = hrtimer_update_base(cpu_base);
- __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
+ __hrtimer_run_queues(cpu_base, now, &flags, HRTIMER_ACTIVE_SOFT);

cpu_base->softirq_activated = 0;
hrtimer_update_softirq_timer(cpu_base, true);
@@ -1808,7 +1808,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
raise_softirq_irqoff(HRTIMER_SOFTIRQ);
}

- __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
+ __hrtimer_run_queues(cpu_base, now, &flags, HRTIMER_ACTIVE_HARD);

/* Reevaluate the clock bases for the [soft] next expiry */
expires_next = hrtimer_update_next_event(cpu_base);
@@ -1921,7 +1921,7 @@ void hrtimer_run_queues(void)
raise_softirq_irqoff(HRTIMER_SOFTIRQ);
}

- __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
+ __hrtimer_run_queues(cpu_base, now, &flags, HRTIMER_ACTIVE_HARD);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
}

--
2.40.1

2023-05-09 11:40:52

by Nicholas Piggin

[permalink] [raw]
Subject: [RFC PATCH 3/3] irqflags: Warn on irq disable when disabled and enable when enabled

Add CONFIG_IRQFLAGS_DEBUG checks for local_irq_disable() when irqs
are already disabled, and local_irq_enable() when they are enabled.
This could help catch risky or unbalanced irq manipulation.

Signed-off-by: Nicholas Piggin <[email protected]>
---
include/linux/irqflags.h | 26 ++++++++++++++++++++++++--
kernel/locking/irqflag-debug.c | 14 ++++++++++++++
2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 5ec0fa71399e..82f54cda2c20 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -157,6 +157,18 @@ do { \
#endif

#ifdef CONFIG_DEBUG_IRQFLAGS
+extern void warn_bogus_irq_disable(void);
+#define raw_check_bogus_irq_disable() \
+ do { \
+ if (unlikely(arch_irqs_disabled())) \
+ warn_bogus_irq_disable(); \
+ } while (0)
+extern void warn_bogus_irq_enable(void);
+#define raw_check_bogus_irq_enable() \
+ do { \
+ if (unlikely(!arch_irqs_disabled())) \
+ warn_bogus_irq_enable(); \
+ } while (0)
extern void warn_bogus_irq_restore(void);
#define raw_check_bogus_irq_restore() \
do { \
@@ -164,14 +176,24 @@ extern void warn_bogus_irq_restore(void);
warn_bogus_irq_restore(); \
} while (0)
#else
+#define raw_check_bogus_irq_disable() do { } while (0)
+#define raw_check_bogus_irq_enable() do { } while (0)
#define raw_check_bogus_irq_restore() do { } while (0)
#endif

/*
* Wrap the arch provided IRQ routines to provide appropriate checks.
*/
-#define raw_local_irq_disable() arch_local_irq_disable()
-#define raw_local_irq_enable() arch_local_irq_enable()
+#define raw_local_irq_disable() \
+ do { \
+ raw_check_bogus_irq_disable(); \
+ arch_local_irq_disable(); \
+ } while (0)
+#define raw_local_irq_enable() \
+ do { \
+ raw_check_bogus_irq_enable(); \
+ arch_local_irq_enable(); \
+ } while (0)
#define raw_local_irq_save(flags) \
do { \
typecheck(unsigned long, flags); \
diff --git a/kernel/locking/irqflag-debug.c b/kernel/locking/irqflag-debug.c
index 810b50344d35..20e5e1b9a86f 100644
--- a/kernel/locking/irqflag-debug.c
+++ b/kernel/locking/irqflag-debug.c
@@ -4,6 +4,20 @@
#include <linux/export.h>
#include <linux/irqflags.h>

+noinstr void warn_bogus_irq_disable(void)
+{
+ instrumentation_begin();
+ WARN_ONCE(1, "raw_local_irq_disable() called with IRQs disabled\n");
+ instrumentation_end();
+}
+EXPORT_SYMBOL(warn_bogus_irq_disable);
+noinstr void warn_bogus_irq_enable(void)
+{
+ instrumentation_begin();
+ WARN_ONCE(1, "raw_local_irq_enable() called with IRQs enabled\n");
+ instrumentation_end();
+}
+EXPORT_SYMBOL(warn_bogus_irq_enable);
noinstr void warn_bogus_irq_restore(void)
{
instrumentation_begin();
--
2.40.1

2023-05-22 08:34:54

by Yujie Liu

[permalink] [raw]
Subject: Re: [RFC PATCH 3/3] irqflags: Warn on irq disable when disabled and enable when enabled

Hello,

kernel test robot noticed "WARNING:at_kernel/locking/irqflag-debug.c:#warn_bogus_irq_disable" on:

commit: d91910ca3451a1d633351e3492117ca62054eb92 ("[RFC PATCH 3/3] irqflags: Warn on irq disable when disabled and enable when enabled")
url: https://github.com/intel-lab-lkp/linux/commits/Nicholas-Piggin/hrtimer-balance-irq-save-restore/20230509-191049
base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git 158009f1b4a33bc0f354b994eea361362bd83226
patch link: https://lore.kernel.org/all/[email protected]/
patch subject: [RFC PATCH 3/3] irqflags: Warn on irq disable when disabled and enable when enabled

in testcase: trinity
version:
with following parameters:

runtime: 600s

test-description: Trinity is a linux system call fuzz tester.
test-url: http://codemonkey.org.uk/projects/trinity/

compiler: clang-14
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)


If you fix the issue, kindly add following tag
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-lkp/[email protected]


[ 0.010000][ C0] ------------[ cut here ]------------
[ 0.010000][ C0] raw_local_irq_disable() called with IRQs disabled
[ 0.010000][ C0] WARNING: CPU: 0 PID: 0 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_disable (kernel/locking/irqflag-debug.c:10)
[ 0.010000][ C0] Modules linked in:
[ 0.010000][ C0] CPU: 0 PID: 0 Comm: swapper Not tainted 6.3.0-rc3-00033-gd91910ca3451 #1 4790732644a1b20c085ca6f51351525a5135ad3c
[ 0.010000][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014
[ 0.010000][ C0] EIP: warn_bogus_irq_disable (kernel/locking/irqflag-debug.c:10)
[ 0.010000][ C0] Code: b3 e9 e9 fe ff ff 00 00 55 89 e5 80 3d 7d f1 48 b3 00 74 02 5d c3 c6 05 7d f1 48 b3 01 68 ea 43 d0 b2 e8 81 38 f0 fe 83 c4 04 <0f> 0b 5d c3 90 90 90 90 90 90 90 90 90 90 55 89 e5 80 3d 7e f1 48
All code
========
0: b3 e9 mov $0xe9,%bl
2: e9 fe ff ff 00 jmp 0x1000005
7: 00 55 89 add %dl,-0x77(%rbp)
a: e5 80 in $0x80,%eax
c: 3d 7d f1 48 b3 cmp $0xb348f17d,%eax
11: 00 74 02 5d add %dh,0x5d(%rdx,%rax,1)
15: c3 ret
16: c6 05 7d f1 48 b3 01 movb $0x1,-0x4cb70e83(%rip) # 0xffffffffb348f19a
1d: 68 ea 43 d0 b2 push $0xffffffffb2d043ea
22: e8 81 38 f0 fe call 0xfffffffffef038a8
27: 83 c4 04 add $0x4,%esp
2a:* 0f 0b ud2 <-- trapping instruction
2c: 5d pop %rbp
2d: c3 ret
2e: 90 nop
2f: 90 nop
30: 90 nop
31: 90 nop
32: 90 nop
33: 90 nop
34: 90 nop
35: 90 nop
36: 90 nop
37: 90 nop
38: 55 push %rbp
39: 89 e5 mov %esp,%ebp
3b: 80 .byte 0x80
3c: 3d .byte 0x3d
3d: 7e f1 jle 0x30
3f: 48 rex.W

Code starting with the faulting instruction
===========================================
0: 0f 0b ud2
2: 5d pop %rbp
3: c3 ret
4: 90 nop
5: 90 nop
6: 90 nop
7: 90 nop
8: 90 nop
9: 90 nop
a: 90 nop
b: 90 nop
c: 90 nop
d: 90 nop
e: 55 push %rbp
f: 89 e5 mov %esp,%ebp
11: 80 .byte 0x80
12: 3d .byte 0x3d
13: 7e f1 jle 0x6
15: 48 rex.W
[ 0.010000][ C0] EAX: 00000031 EBX: 00000030 ECX: 00000000 EDX: b2e15db8
[ 0.010000][ C0] ESI: 00200096 EDI: 00000030 EBP: b2e15e64 ESP: b2e15e64
[ 0.010000][ C0] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00210082
[ 0.010000][ C0] CR0: 80050033 CR2: ffd99000 CR3: 035b4000 CR4: 00040690
[ 0.010000][ C0] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[ 0.010000][ C0] DR6: fffe0ff0 DR7: 00000400
[ 0.010000][ C0] Call Trace:
[ 0.010000][ C0] __irq_exit_rcu (kernel/softirq.c:?)
[ 0.010000][ C0] irq_exit_rcu (kernel/softirq.c:664)
[ 0.010000][ C0] common_interrupt (arch/x86/kernel/irq.c:240)
[ 0.010000][ C0] asm_common_interrupt (arch/x86/entry/entry_32.S:636)
[ 0.010000][ C0] EIP: _raw_spin_unlock_irqrestore (kernel/locking/spinlock.c:195)
[ 0.010000][ C0] Code: 74 05 e8 07 dc fb fe c7 45 f4 ff ff ff ff 9c 8f 45 f4 f7 45 f4 00 02 00 00 75 16 f7 c6 00 02 00 00 74 01 fb ff 0d 84 e8 48 b3 <83> c4 04 5e 5f 5d c3 e8 58 49 ff ff f7 c6 00 02 00 00 75 e5 eb e4
All code
========
0: 74 05 je 0x7
2: e8 07 dc fb fe call 0xfffffffffefbdc0e
7: c7 45 f4 ff ff ff ff movl $0xffffffff,-0xc(%rbp)
e: 9c pushf
f: 8f 45 f4 pop -0xc(%rbp)
12: f7 45 f4 00 02 00 00 testl $0x200,-0xc(%rbp)
19: 75 16 jne 0x31
1b: f7 c6 00 02 00 00 test $0x200,%esi
21: 74 01 je 0x24
23: fb sti
24: ff 0d 84 e8 48 b3 decl -0x4cb7177c(%rip) # 0xffffffffb348e8ae
2a:* 83 c4 04 add $0x4,%esp <-- trapping instruction
2d: 5e pop %rsi
2e: 5f pop %rdi
2f: 5d pop %rbp
30: c3 ret
31: e8 58 49 ff ff call 0xffffffffffff498e
36: f7 c6 00 02 00 00 test $0x200,%esi
3c: 75 e5 jne 0x23
3e: eb e4 jmp 0x24

Code starting with the faulting instruction
===========================================
0: 83 c4 04 add $0x4,%esp
3: 5e pop %rsi
4: 5f pop %rdi
5: 5d pop %rbp
6: c3 ret
7: e8 58 49 ff ff call 0xffffffffffff4964
c: f7 c6 00 02 00 00 test $0x200,%esi
12: 75 e5 jne 0xfffffffffffffff9
14: eb e4 jmp 0xfffffffffffffffa
[ 0.010000][ C0] EAX: 00000193 EBX: b5dfd634 ECX: 00000000 EDX: b2e259b0
[ 0.010000][ C0] ESI: 00200282 EDI: b5dfd660 EBP: b2e15f08 ESP: b2e15efc
[ 0.010000][ C0] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 EFLAGS: 00200202
[ 0.010000][ C0] ? _raw_spin_unlock_irqrestore (kernel/locking/spinlock.c:195)
[ 0.010000][ C0] __setup_irq (kernel/irq/internals.h:155)
[ 0.010000][ C0] request_threaded_irq (kernel/irq/manage.c:2201)
[ 0.010000][ C0] setup_default_timer_irq (include/linux/interrupt.h:168 arch/x86/kernel/time.c:70)
[ 0.010000][ C0] hpet_time_init (arch/x86/kernel/time.c:?)
[ 0.010000][ C0] x86_late_time_init (arch/x86/kernel/time.c:100)
[ 0.010000][ C0] start_kernel (init/main.c:1117)
[ 0.010000][ C0] i386_start_kernel (arch/x86/kernel/head32.c:57)
[ 0.010000][ C0] startup_32_smp (arch/x86/kernel/head_32.S:319)
[ 0.010000][ C0] irq event stamp: 404
[ 0.010000][ C0] hardirqs last enabled at (403): _raw_spin_unlock_irqrestore (arch/x86/include/asm/irqflags.h:19 arch/x86/include/asm/irqflags.h:67 arch/x86/include/asm/irqflags.h:127 include/linux/spinlock_api_smp.h:151 kernel/locking/spinlock.c:194)
[ 0.010000][ C0] hardirqs last disabled at (404): common_interrupt (arch/x86/kernel/irq.c:?)
[ 0.010000][ C0] softirqs last enabled at (0): 0x0
[ 0.010000][ C0] softirqs last disabled at (0): 0x0
[ 0.010000][ C0] ---[ end trace 0000000000000000 ]---


To reproduce:

# build kernel
cd linux
cp config-6.3.0-rc3-00033-gd91910ca3451 .config
make HOSTCC=clang-14 CC=clang-14 ARCH=i386 olddefconfig prepare modules_prepare bzImage modules
make HOSTCC=clang-14 CC=clang-14 ARCH=i386 INSTALL_MOD_PATH=<mod-install-dir> modules_install
cd <mod-install-dir>
find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz


git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email

# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.


--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


Attachments:
(No filename) (8.69 kB)
config-6.3.0-rc3-00033-gd91910ca3451 (152.67 kB)
job-script (4.45 kB)
dmesg.xz (53.41 kB)
trinity (3.09 kB)
Download all attachments