Subject: [ANNOUNCE] v4.18.12-rt7

Dear RT folks!

I'm pleased to announce the v4.18.12-rt7 patch set.

Changes since v4.18.12-rt6:

- Let the watchdog core expire the watchdog timer hardirq context.
Otherwise a high priority thread may not allow the watchdog to run.
The user must still adjust the priority of "watchdogd". Reported by
Steffen Trumtrar and Tim Sander, patched by Julia Cartwright.

- Make kasan related lock a raw_lock_t. Patch by Clark Williams.

- Explicitly initialize a variable in the amba-pl011 driver to avoid
false "uninitialized" warning. Patch by Kurt Kanzenbach.

- Export the __migrate_disabled symbol so modules (like lttng-modules)
don't complain about a missing symbol. Reported and fix suggested by
Jonathan Rajotte-Julien.

Known issues
- A warning triggered in "rcu_note_context_switch" originated from
SyS_timer_gettime(). The issue was always there, it is now
visible. Reported by Grygorii Strashko and Daniel Wagner.

The delta patch against v4.18.12-rt6 is appended below and can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.18/incr/patch-4.18.12-rt6-rt7.patch.xz

You can get this release via the git tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.18.12-rt7

The RT patch against v4.18.12 can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patch-4.18.12-rt7.patch.xz

The split quilt queue is available at:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.18/older/patches-4.18.12-rt7.tar.xz

Sebastian

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 484861278e9c3..a658214486e76 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2211,7 +2211,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count)
{
struct uart_amba_port *uap = amba_ports[co->index];
unsigned int old_cr = 0, new_cr;
- unsigned long flags;
+ unsigned long flags = 0;
int locked = 1;

clk_enable(uap->clk);
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index ffbdc4642ea55..84f75b5045f66 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -147,7 +147,7 @@ static inline void watchdog_update_worker(struct watchdog_device *wdd)
ktime_t t = watchdog_next_keepalive(wdd);

if (t > 0)
- hrtimer_start(&wd_data->timer, t, HRTIMER_MODE_REL);
+ hrtimer_start(&wd_data->timer, t, HRTIMER_MODE_REL_HARD);
} else {
hrtimer_cancel(&wd_data->timer);
}
@@ -166,7 +166,7 @@ static int __watchdog_ping(struct watchdog_device *wdd)
if (ktime_after(earliest_keepalive, now)) {
hrtimer_start(&wd_data->timer,
ktime_sub(earliest_keepalive, now),
- HRTIMER_MODE_REL);
+ HRTIMER_MODE_REL_HARD);
return 0;
}

@@ -945,7 +945,7 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
return -ENODEV;

kthread_init_work(&wd_data->work, watchdog_ping_work);
- hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
wd_data->timer.function = watchdog_timer_expired;

if (wdd->id == 0) {
@@ -992,7 +992,7 @@ static int watchdog_cdev_register(struct watchdog_device *wdd, dev_t devno)
__module_get(wdd->ops->owner);
kref_get(&wd_data->kref);
if (handle_boot_enabled)
- hrtimer_start(&wd_data->timer, 0, HRTIMER_MODE_REL);
+ hrtimer_start(&wd_data->timer, 0, HRTIMER_MODE_REL_HARD);
else
pr_info("watchdog%d running and kernel based pre-userspace handler disabled\n",
wdd->id);
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c1961761311db..ad292898f7f2b 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -85,7 +85,7 @@ enum {

struct kthread_worker {
unsigned int flags;
- spinlock_t lock;
+ raw_spinlock_t lock;
struct list_head work_list;
struct list_head delayed_work_list;
struct task_struct *task;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 486dedbd9af58..c1d9ee6671c67 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -597,7 +597,7 @@ void __kthread_init_worker(struct kthread_worker *worker,
struct lock_class_key *key)
{
memset(worker, 0, sizeof(struct kthread_worker));
- spin_lock_init(&worker->lock);
+ raw_spin_lock_init(&worker->lock);
lockdep_set_class_and_name(&worker->lock, key, name);
INIT_LIST_HEAD(&worker->work_list);
INIT_LIST_HEAD(&worker->delayed_work_list);
@@ -639,21 +639,21 @@ int kthread_worker_fn(void *worker_ptr)

if (kthread_should_stop()) {
__set_current_state(TASK_RUNNING);
- spin_lock_irq(&worker->lock);
+ raw_spin_lock_irq(&worker->lock);
worker->task = NULL;
- spin_unlock_irq(&worker->lock);
+ raw_spin_unlock_irq(&worker->lock);
return 0;
}

work = NULL;
- spin_lock_irq(&worker->lock);
+ raw_spin_lock_irq(&worker->lock);
if (!list_empty(&worker->work_list)) {
work = list_first_entry(&worker->work_list,
struct kthread_work, node);
list_del_init(&work->node);
}
worker->current_work = work;
- spin_unlock_irq(&worker->lock);
+ raw_spin_unlock_irq(&worker->lock);

if (work) {
__set_current_state(TASK_RUNNING);
@@ -810,12 +810,12 @@ bool kthread_queue_work(struct kthread_worker *worker,
bool ret = false;
unsigned long flags;

- spin_lock_irqsave(&worker->lock, flags);
+ raw_spin_lock_irqsave(&worker->lock, flags);
if (!queuing_blocked(worker, work)) {
kthread_insert_work(worker, work, &worker->work_list);
ret = true;
}
- spin_unlock_irqrestore(&worker->lock, flags);
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(kthread_queue_work);
@@ -841,7 +841,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
if (WARN_ON_ONCE(!worker))
return;

- spin_lock(&worker->lock);
+ raw_spin_lock(&worker->lock);
/* Work must not be used with >1 worker, see kthread_queue_work(). */
WARN_ON_ONCE(work->worker != worker);

@@ -850,7 +850,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
list_del_init(&work->node);
kthread_insert_work(worker, work, &worker->work_list);

- spin_unlock(&worker->lock);
+ raw_spin_unlock(&worker->lock);
}
EXPORT_SYMBOL(kthread_delayed_work_timer_fn);

@@ -906,14 +906,14 @@ bool kthread_queue_delayed_work(struct kthread_worker *worker,
unsigned long flags;
bool ret = false;

- spin_lock_irqsave(&worker->lock, flags);
+ raw_spin_lock_irqsave(&worker->lock, flags);

if (!queuing_blocked(worker, work)) {
__kthread_queue_delayed_work(worker, dwork, delay);
ret = true;
}

- spin_unlock_irqrestore(&worker->lock, flags);
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
@@ -949,7 +949,7 @@ void kthread_flush_work(struct kthread_work *work)
if (!worker)
return;

- spin_lock_irq(&worker->lock);
+ raw_spin_lock_irq(&worker->lock);
/* Work must not be used with >1 worker, see kthread_queue_work(). */
WARN_ON_ONCE(work->worker != worker);

@@ -961,7 +961,7 @@ void kthread_flush_work(struct kthread_work *work)
else
noop = true;

- spin_unlock_irq(&worker->lock);
+ raw_spin_unlock_irq(&worker->lock);

if (!noop)
wait_for_completion(&fwork.done);
@@ -994,9 +994,9 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
* any queuing is blocked by setting the canceling counter.
*/
work->canceling++;
- spin_unlock_irqrestore(&worker->lock, *flags);
+ raw_spin_unlock_irqrestore(&worker->lock, *flags);
del_timer_sync(&dwork->timer);
- spin_lock_irqsave(&worker->lock, *flags);
+ raw_spin_lock_irqsave(&worker->lock, *flags);
work->canceling--;
}

@@ -1043,7 +1043,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
unsigned long flags;
int ret = false;

- spin_lock_irqsave(&worker->lock, flags);
+ raw_spin_lock_irqsave(&worker->lock, flags);

/* Do not bother with canceling when never queued. */
if (!work->worker)
@@ -1060,7 +1060,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
fast_queue:
__kthread_queue_delayed_work(worker, dwork, delay);
out:
- spin_unlock_irqrestore(&worker->lock, flags);
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
@@ -1074,7 +1074,7 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
if (!worker)
goto out;

- spin_lock_irqsave(&worker->lock, flags);
+ raw_spin_lock_irqsave(&worker->lock, flags);
/* Work must not be used with >1 worker, see kthread_queue_work(). */
WARN_ON_ONCE(work->worker != worker);

@@ -1088,13 +1088,13 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
* In the meantime, block any queuing by setting the canceling counter.
*/
work->canceling++;
- spin_unlock_irqrestore(&worker->lock, flags);
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
kthread_flush_work(work);
- spin_lock_irqsave(&worker->lock, flags);
+ raw_spin_lock_irqsave(&worker->lock, flags);
work->canceling--;

out_fast:
- spin_unlock_irqrestore(&worker->lock, flags);
+ raw_spin_unlock_irqrestore(&worker->lock, flags);
out:
return ret;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7d789c1b316b3..4739472fb72ef 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1100,6 +1100,7 @@ int __migrate_disabled(struct task_struct *p)
{
return p->migrate_disable;
}
+EXPORT_SYMBOL_GPL(__migrate_disabled);
#endif

static void __do_set_cpus_allowed_tail(struct task_struct *p,
diff --git a/localversion-rt b/localversion-rt
index 8fc605d806670..045478966e9f1 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt6
+-rt7
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 3a8ddf8baf7dc..b209dbaefde82 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -103,7 +103,7 @@ static int quarantine_head;
static int quarantine_tail;
/* Total size of all objects in global_quarantine across all batches. */
static unsigned long quarantine_size;
-static DEFINE_SPINLOCK(quarantine_lock);
+static DEFINE_RAW_SPINLOCK(quarantine_lock);
DEFINE_STATIC_SRCU(remove_cache_srcu);

/* Maximum size of the global queue. */
@@ -190,7 +190,7 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
qlist_move_all(q, &temp);

- spin_lock(&quarantine_lock);
+ raw_spin_lock(&quarantine_lock);
WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
if (global_quarantine[quarantine_tail].bytes >=
@@ -203,7 +203,7 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
if (new_tail != quarantine_head)
quarantine_tail = new_tail;
}
- spin_unlock(&quarantine_lock);
+ raw_spin_unlock(&quarantine_lock);
}

local_irq_restore(flags);
@@ -230,7 +230,7 @@ void quarantine_reduce(void)
* expected case).
*/
srcu_idx = srcu_read_lock(&remove_cache_srcu);
- spin_lock_irqsave(&quarantine_lock, flags);
+ raw_spin_lock_irqsave(&quarantine_lock, flags);

/*
* Update quarantine size in case of hotplug. Allocate a fraction of
@@ -254,7 +254,7 @@ void quarantine_reduce(void)
quarantine_head = 0;
}

- spin_unlock_irqrestore(&quarantine_lock, flags);
+ raw_spin_unlock_irqrestore(&quarantine_lock, flags);

qlist_free_all(&to_free, NULL);
srcu_read_unlock(&remove_cache_srcu, srcu_idx);
@@ -310,17 +310,17 @@ void quarantine_remove_cache(struct kmem_cache *cache)
*/
on_each_cpu(per_cpu_remove_cache, cache, 1);

- spin_lock_irqsave(&quarantine_lock, flags);
+ raw_spin_lock_irqsave(&quarantine_lock, flags);
for (i = 0; i < QUARANTINE_BATCHES; i++) {
if (qlist_empty(&global_quarantine[i]))
continue;
qlist_move_cache(&global_quarantine[i], &to_free, cache);
/* Scanning whole quarantine can take a while. */
- spin_unlock_irqrestore(&quarantine_lock, flags);
+ raw_spin_unlock_irqrestore(&quarantine_lock, flags);
cond_resched();
- spin_lock_irqsave(&quarantine_lock, flags);
+ raw_spin_lock_irqsave(&quarantine_lock, flags);
}
- spin_unlock_irqrestore(&quarantine_lock, flags);
+ raw_spin_unlock_irqrestore(&quarantine_lock, flags);

qlist_free_all(&to_free, cache);



2018-10-10 15:57:46

by Tim Sander

[permalink] [raw]
Subject: Re: [ANNOUNCE] v4.18.12-rt7 stall

Hi

I just tested this kernel and saw the stall output below. I think there is something
fishy with the ethernet driver. I had one time where it just locked up on
network traffic on issuing "ip a" via serial port on the device. All the problems i see,
seem to be related to network traffic via the socfpga-dwmac stmicro/stmmac.
Platform is pretty dated Intel/Altera Cortex A9 socfpga.

I think this problem is there for a while but since i had problems due to the
watchdog i was not able to detect it.

Best regards
Tim

[ 251.440019] INFO: rcu_preempt self-detected stall on CPU
[ 251.440036] 1-...!: (21000 ticks this GP) idle=5ae/1/1073741826 softirq=0/0 fqs=0
[ 251.440039] (t=21000 jiffies g=7702 c=7701 q=346)
[ 251.440053] rcu_preempt kthread starved for 21000 jiffies! g7702 c7701 f0x0 RCU_GP_WAIT_FQS(3) ->state=0x402 ->cpu=1
[ 251.440055] RCU grace-period kthread stack dump:
[ 251.440059] rcu_preempt I 0 11 2 0x00000000
[ 251.440066] Backtrace:
[ 251.440086] [<8062d4b0>] (__schedule) from [<8062da30>] (schedule+0x68/0x128)
[ 251.440096] r10:80a1569e r9:87d9a680 r8:80a04100 r7:80055eec r6:87d9a680 r5:80034600
[ 251.440100] r4:80054000
[ 251.440111] [<8062d9c8>] (schedule) from [<806306dc>] (schedule_timeout+0x1cc/0x368)
[ 251.440116] r5:80a06488 r4:fffef04c
[ 251.440128] [<80630510>] (schedule_timeout) from [<80184fdc>] (rcu_gp_kthread+0x750/0xac0)
[ 251.440137] r10:80a1569e r9:80a04100 r8:00000001 r7:00000003 r6:80a15690 r5:80a1569c
[ 251.440140] r4:80a154c0
[ 251.440150] [<8018488c>] (rcu_gp_kthread) from [<801461a8>] (kthread+0x138/0x168)
[ 251.440153] r7:80a154c0
[ 251.440163] [<80146070>] (kthread) from [<801010bc>] (ret_from_fork+0x14/0x38)
[ 251.440168] Exception stack(0x80055fb0 to 0x80055ff8)
[ 251.440174] 5fa0: 00000000 00000000 00000000 00000000
[ 251.440183] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[ 251.440189] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000
[ 251.440198] r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:80146070
[ 251.440202] r4:8150fac0 r3:80054000
[ 251.440215] NMI backtrace for cpu 1
[ 251.440226] CPU: 1 PID: 157 Comm: RawMeasThread Tainted: G W O 4.18.12-rt7 #1
[ 251.440229] Hardware name: Altera SOCFPGA
[ 251.440231] Backtrace:
[ 251.440243] [<8010dda4>] (dump_backtrace) from [<8010e09c>] (show_stack+0x20/0x24)
[ 251.440250] r7:80a573f8 r6:00000000 r5:600d0193 r4:80a573f8
[ 251.440264] [<8010e07c>] (show_stack) from [<80616120>] (dump_stack+0xb0/0xdc)
[ 251.440278] [<80616070>] (dump_stack) from [<8061cb74>] (nmi_cpu_backtrace+0xc0/0xc4)
[ 251.440286] r9:800d0193 r8:00000180 r7:807017c4 r6:00000001 r5:00000000 r4:00000001
[ 251.440296] [<8061cab4>] (nmi_cpu_backtrace) from [<8061ccdc>] (nmi_trigger_cpumask_backtrace+0x164/0x1b0)
[ 251.440301] r5:80a0906c r4:8010fa94
[ 251.440312] [<8061cb78>] (nmi_trigger_cpumask_backtrace) from [<8011077c>] (arch_trigger_cpumask_backtrace+0x20/0x24)
[ 251.440318] r7:80a154c0 r6:807017bc r5:80a06534 r4:80a154c0
[ 251.440328] [<8011075c>] (arch_trigger_cpumask_backtrace) from [<80187944>] (rcu_dump_cpu_stacks+0xac/0xdc)
[ 251.440337] [<80187898>] (rcu_dump_cpu_stacks) from [<801864b0>] (rcu_check_callbacks+0x9e8/0xb08)
[ 251.440346] r10:80a06574 r9:80a154c0 r8:80a06528 r7:80a154c0 r6:07439000 r5:87d9edc0
[ 251.440350] r4:80965dc0 r3:6c2a9c31
[ 251.440360] [<80185ac8>] (rcu_check_callbacks) from [<8018e834>] (update_process_times+0x40/0x6c)
[ 251.440368] r10:801a3024 r9:87d9b1a0 r8:87d9b000 r7:0000003a r6:8afdf535 r5:00000001
[ 251.440372] r4:871baa00
[ 251.440383] [<8018e7f4>] (update_process_times) from [<801a30ac>] (tick_sched_timer+0x88/0xf4)
[ 251.440387] r5:867cffb0 r4:87d9b310
[ 251.440396] [<801a3024>] (tick_sched_timer) from [<8018fc54>] (__hrtimer_run_queues+0x194/0x3e8)
[ 251.440403] r7:80a064b0 r6:867ce000 r5:87d9b060 r4:87d9b310
[ 251.440411] [<8018fac0>] (__hrtimer_run_queues) from [<80190648>] (hrtimer_interrupt+0x138/0x2b0)
[ 251.440419] r10:87d9b00c r9:87d9b1a0 r8:ffffffff r7:7fffffff r6:00000003 r5:200d0193
[ 251.440422] r4:87d9b000
[ 251.440432] [<80190510>] (hrtimer_interrupt) from [<8011140c>] (twd_handler+0x40/0x50)
[ 251.440441] r10:765b03e0 r9:00000010 r8:80a06d3c r7:00000000 r6:8001a500 r5:00000010
[ 251.440444] r4:00000001
[ 251.440454] [<801113cc>] (twd_handler) from [<80178510>] (handle_percpu_devid_irq+0x98/0x2dc)
[ 251.440459] r5:00000010 r4:81503cc0
[ 251.440472] [<80178478>] (handle_percpu_devid_irq) from [<8017230c>] (generic_handle_irq+0x34/0x44)
[ 251.440480] r10:765b03e0 r9:90803100 r8:80009000 r7:00000000 r6:00000000 r5:00000010
[ 251.440484] r4:80965208 r3:80178478
[ 251.440495] [<801722d8>] (generic_handle_irq) from [<801729e0>] (__handle_domain_irq+0x6c/0xc4)
[ 251.440505] [<80172974>] (__handle_domain_irq) from [<80102310>] (gic_handle_irq+0x5c/0xa0)
[ 251.440514] r9:90803100 r8:90802100 r7:867cffb0 r6:9080210c r5:80a06d3c r4:80a3c52c
[ 251.440521] [<801022b4>] (gic_handle_irq) from [<80101e58>] (__irq_usr+0x58/0x80)
[ 251.440525] Exception stack(0x867cffb0 to 0x867cfff8)
[ 251.440532] ffa0: 7ee6fce0 765b9000 0003b810 00000000
[ 251.440541] ffc0: 7ee6fcd8 7ee6fce0 765b9000 765afdf0 00000000 765aff20 765b03e0 7ee6fc28
[ 251.440548] ffe0: 00000000 765afd28 00015c45 000160a2 a00d0030 ffffffff
[ 251.440557] r9:765aff20 r8:10c5387d r7:10c5387d r6:ffffffff r5:a00d0030 r4:000160a2