Hello RT folks!
This patchset brings back many RT-specific fixes that have gone into
subsequent 4.14-rt and 4.16-rt releases.
One of my x86 boxes very intermittently triggers a WARN_ON() on bootup
in migrate_enable(), which I'm still trying to triage. If you can more
reliably reproduce this, please let me know.
This release candidate will not be pushed to the git tree.
To build 4.9.115-rt94-rc1 directly, the following patches should be applied:
http://www.kernel.org/pub/linux/kernel/v4.x/linux-4.9.tar.xz
http://www.kernel.org/pub/linux/kernel/v4.x/patch-4.9.115.xz
http://www.kernel.org/pub/linux/kernel/projects/rt/4.9/patch-4.9.115-rt94-rc1.patch.xz
If all goes well with testing, this rc will be promoted to an official
release on 8/16/2018.
Please go forth and test!
Thanks,
Julia
---
Boqun Feng (1):
rtmutex: Make rt_mutex_futex_unlock() safe for irq-off callsites
Julia Cartwright (4):
locallock: provide {get,put}_locked_ptr() variants
squashfs: make use of local lock in multi_cpu decompressor
seqlock: provide the same ordering semantics as mainline
Linux 4.9.115-rt94-rc1
Paul E. McKenney (1):
rcu: Suppress lockdep false-positive ->boost_mtx complaints
Peter Zijlstra (4):
futex: Fix pi_state->owner serialization
futex: Fix more put_pi_state() vs. exit_pi_state_list() races
futex: Avoid violating the 10th rule of futex
futex: Fix OWNER_DEAD fixup
Sebastian Andrzej Siewior (12):
rcu: Do not include rtmutex_common.h unconditionally
sched, tracing: Fix trace_sched_pi_setprio() for deboosting
crypto: limit more FPU-enabled sections
arm*: disable NEON in kernel mode
mm/slub: close possible memory-leak in kmem_cache_alloc_bulk()
locking: add types.h
net: use task_struct instead of CPU number as the queue owner on -RT
Revert "rt,ntp: Move call to schedule_delayed_work() to helper thread"
Revert "block: blk-mq: Use swait"
block: blk-mq: move blk_queue_usage_counter_release() into process
context
alarmtimer: Prevent live lock in alarm_cancel()
posix-timers: move the rcu head out of the union
arch/arm/Kconfig | 2 +-
arch/arm64/crypto/Kconfig | 14 +-
arch/x86/crypto/camellia_aesni_avx2_glue.c | 20 +++
arch/x86/crypto/camellia_aesni_avx_glue.c | 19 +++
arch/x86/crypto/cast6_avx_glue.c | 24 +++-
arch/x86/crypto/chacha20_glue.c | 9 +-
arch/x86/crypto/serpent_avx2_glue.c | 19 +++
arch/x86/crypto/serpent_avx_glue.c | 23 +++-
arch/x86/crypto/serpent_sse2_glue.c | 23 +++-
arch/x86/crypto/twofish_avx_glue.c | 27 +++-
arch/x86/include/asm/fpu/api.h | 1 +
arch/x86/kernel/fpu/core.c | 12 ++
block/blk-core.c | 22 +++-
block/blk-mq.c | 6 +-
fs/squashfs/decompressor_multi_percpu.c | 16 ++-
include/linux/blkdev.h | 4 +-
include/linux/locallock.h | 10 ++
include/linux/netdevice.h | 54 +++++++-
include/linux/posix-timers.h | 2 +-
include/linux/seqlock.h | 1 +
include/linux/spinlock_types_raw.h | 2 +
include/trace/events/sched.h | 4 +-
kernel/futex.c | 144 ++++++++++++++++-----
kernel/locking/rtmutex.c | 31 +++--
kernel/locking/rtmutex_common.h | 1 +
kernel/rcu/tree_plugin.h | 5 +-
kernel/time/alarmtimer.c | 2 +-
kernel/time/ntp.c | 26 ----
kernel/time/posix-timers.c | 4 +-
localversion-rt | 2 +-
mm/slub.c | 1 +
net/core/dev.c | 6 +-
32 files changed, 412 insertions(+), 124 deletions(-)
--
2.18.0
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
Currently, the squashfs multi_cpu decompressor makes use of
get_cpu_ptr()/put_cpu_ptr(), which unconditionally disable preemption
during decompression.
Because the workload is distributed across CPUs, all CPUs can observe a
very high wakeup latency, which has been seen to be as much as 8000us.
Convert this decompressor to make use of a local lock, which will allow
execution of the decompressor with preemption-enabled, but also ensure
concurrent accesses to the percpu compressor data on the local CPU will
be serialized.
Cc: [email protected]
Reported-by: Alexander Stein <[email protected]>
Tested-by: Alexander Stein <[email protected]>
Signed-off-by: Julia Cartwright <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit c160736542d7b3d67da32848d2f028b8e35730e5)
Signed-off-by: Julia Cartwright <[email protected]>
---
fs/squashfs/decompressor_multi_percpu.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
index 23a9c28ad8ea..6a73c4fa88e7 100644
--- a/fs/squashfs/decompressor_multi_percpu.c
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/percpu.h>
#include <linux/buffer_head.h>
+#include <linux/locallock.h>
#include "squashfs_fs.h"
#include "squashfs_fs_sb.h"
@@ -25,6 +26,8 @@ struct squashfs_stream {
void *stream;
};
+static DEFINE_LOCAL_IRQ_LOCK(stream_lock);
+
void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
void *comp_opts)
{
@@ -79,10 +82,15 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
{
struct squashfs_stream __percpu *percpu =
(struct squashfs_stream __percpu *) msblk->stream;
- struct squashfs_stream *stream = get_cpu_ptr(percpu);
- int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
- offset, length, output);
- put_cpu_ptr(stream);
+ struct squashfs_stream *stream;
+ int res;
+
+ stream = get_locked_ptr(stream_lock, percpu);
+
+ res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+ offset, length, output);
+
+ put_locked_ptr(stream_lock, stream);
if (res < 0)
ERROR("%s decompression failed, data probably corrupt\n",
--
2.18.0
From: Peter Zijlstra <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
[ Upstream commit c1e2f0eaf015fb7076d51a339011f2383e6dd389 ]
Julia reported futex state corruption in the following scenario:
waiter waker stealer (prio > waiter)
futex(WAIT_REQUEUE_PI, uaddr, uaddr2,
timeout=[N ms])
futex_wait_requeue_pi()
futex_wait_queue_me()
freezable_schedule()
<scheduled out>
futex(LOCK_PI, uaddr2)
futex(CMP_REQUEUE_PI, uaddr,
uaddr2, 1, 0)
/* requeues waiter to uaddr2 */
futex(UNLOCK_PI, uaddr2)
wake_futex_pi()
cmp_futex_value_locked(uaddr2, waiter)
wake_up_q()
<woken by waker>
<hrtimer_wakeup() fires,
clears sleeper->task>
futex(LOCK_PI, uaddr2)
__rt_mutex_start_proxy_lock()
try_to_take_rt_mutex() /* steals lock */
rt_mutex_set_owner(lock, stealer)
<preempted>
<scheduled in>
rt_mutex_wait_proxy_lock()
__rt_mutex_slowlock()
try_to_take_rt_mutex() /* fails, lock held by stealer */
if (timeout && !timeout->task)
return -ETIMEDOUT;
fixup_owner()
/* lock wasn't acquired, so,
fixup_pi_state_owner skipped */
return -ETIMEDOUT;
/* At this point, we've returned -ETIMEDOUT to userspace, but the
* futex word shows waiter to be the owner, and the pi_mutex has
* stealer as the owner */
futex_lock(LOCK_PI, uaddr2)
-> bails with EDEADLK, futex word says we're owner.
And suggested that what commit:
73d786bd043e ("futex: Rework inconsistent rt_mutex/futex_q state")
removes from fixup_owner() looks to be just what is needed. And indeed
it is -- I completely missed that requeue_pi could also result in this
case. So we need to restore that, except that subsequent patches, like
commit:
16ffa12d7425 ("futex: Pull rt_mutex_futex_unlock() out from under hb->lock")
changed all the locking rules. Even without that, the sequence:
- if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) {
- locked = 1;
- goto out;
- }
- raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
- owner = rt_mutex_owner(&q->pi_state->pi_mutex);
- if (!owner)
- owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
- raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
- ret = fixup_pi_state_owner(uaddr, q, owner);
already suggests there were races; otherwise we'd never have to look
at next_owner.
So instead of doing 3 consecutive wait_lock sections with who knows
what races, we do it all in a single section. Additionally, the usage
of pi_state->owner in fixup_owner() was only safe because only the
rt_mutex owner would modify it, which this additional case wrecks.
Luckily the values can only change away and not to the value we're
testing, this means we can do a speculative test and double check once
we have the wait_lock.
Fixes: 73d786bd043e ("futex: Rework inconsistent rt_mutex/futex_q state")
Reported-by: Julia Cartwright <[email protected]>
Reported-by: Gratian Crisan <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Julia Cartwright <[email protected]>
Tested-by: Gratian Crisan <[email protected]>
Cc: Darren Hart <[email protected]>
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Julia Cartwright <[email protected]>
---
kernel/futex.c | 83 ++++++++++++++++++++++++++-------
kernel/locking/rtmutex.c | 26 ++++++++---
kernel/locking/rtmutex_common.h | 1 +
3 files changed, 87 insertions(+), 23 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c
index 270148be5647..cdd68ba6e3a6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2287,21 +2287,17 @@ static void unqueue_me_pi(struct futex_q *q)
spin_unlock(q->lock_ptr);
}
-/*
- * Fixup the pi_state owner with the new owner.
- *
- * Must be called with hash bucket lock held and mm->sem held for non
- * private futexes.
- */
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
- struct task_struct *newowner)
+ struct task_struct *argowner)
{
- u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
u32 uval, uninitialized_var(curval), newval;
- struct task_struct *oldowner;
+ struct task_struct *oldowner, *newowner;
+ u32 newtid;
int ret;
+ lockdep_assert_held(q->lock_ptr);
+
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
oldowner = pi_state->owner;
@@ -2310,11 +2306,17 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
newtid |= FUTEX_OWNER_DIED;
/*
- * We are here either because we stole the rtmutex from the
- * previous highest priority waiter or we are the highest priority
- * waiter but have failed to get the rtmutex the first time.
+ * We are here because either:
+ *
+ * - we stole the lock and pi_state->owner needs updating to reflect
+ * that (@argowner == current),
*
- * We have to replace the newowner TID in the user space variable.
+ * or:
+ *
+ * - someone stole our lock and we need to fix things to point to the
+ * new owner (@argowner == NULL).
+ *
+ * Either way, we have to replace the TID in the user space variable.
* This must be atomic as we have to preserve the owner died bit here.
*
* Note: We write the user space value _before_ changing the pi_state
@@ -2327,6 +2329,42 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* in the PID check in lookup_pi_state.
*/
retry:
+ if (!argowner) {
+ if (oldowner != current) {
+ /*
+ * We raced against a concurrent self; things are
+ * already fixed up. Nothing to do.
+ */
+ ret = 0;
+ goto out_unlock;
+ }
+
+ if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
+ /* We got the lock after all, nothing to fix. */
+ ret = 0;
+ goto out_unlock;
+ }
+
+ /*
+ * Since we just failed the trylock; there must be an owner.
+ */
+ newowner = rt_mutex_owner(&pi_state->pi_mutex);
+ BUG_ON(!newowner);
+ } else {
+ WARN_ON_ONCE(argowner != current);
+ if (oldowner == current) {
+ /*
+ * We raced against a concurrent self; things are
+ * already fixed up. Nothing to do.
+ */
+ ret = 0;
+ goto out_unlock;
+ }
+ newowner = argowner;
+ }
+
+ newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
+
if (get_futex_value_locked(&uval, uaddr))
goto handle_fault;
@@ -2427,15 +2465,28 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* Got the lock. We might not be the anticipated owner if we
* did a lock-steal - fix up the PI-state in that case:
*
- * We can safely read pi_state->owner without holding wait_lock
- * because we now own the rt_mutex, only the owner will attempt
- * to change it.
+ * Speculative pi_state->owner read (we don't hold wait_lock);
+ * since we own the lock pi_state->owner == current is the
+ * stable state, anything else needs more attention.
*/
if (q->pi_state->owner != current)
ret = fixup_pi_state_owner(uaddr, q, current);
goto out;
}
+ /*
+ * If we didn't get the lock; check if anybody stole it from us. In
+ * that case, we need to fix up the uval to point to them instead of
+ * us, otherwise bad things happen. [10]
+ *
+ * Another speculative read; pi_state->owner == current is unstable
+ * but needs our attention.
+ */
+ if (q->pi_state->owner == current) {
+ ret = fixup_pi_state_owner(uaddr, q, NULL);
+ goto out;
+ }
+
/*
* Paranoia check. If we did not take the lock, then we should not be
* the owner of the rt_mutex.
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3a8b5d44aaf8..57361d631749 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1849,6 +1849,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
return ret;
}
+static inline int __rt_mutex_slowtrylock(struct rt_mutex *lock)
+{
+ int ret = try_to_take_rt_mutex(lock, current, NULL);
+
+ /*
+ * try_to_take_rt_mutex() sets the lock waiters bit
+ * unconditionally. Clean this up.
+ */
+ fixup_rt_mutex_waiters(lock);
+
+ return ret;
+}
+
/*
* Slow path try-lock function:
*/
@@ -1871,13 +1884,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
*/
raw_spin_lock_irqsave(&lock->wait_lock, flags);
- ret = try_to_take_rt_mutex(lock, current, NULL);
-
- /*
- * try_to_take_rt_mutex() sets the lock waiters bit
- * unconditionally. Clean this up.
- */
- fixup_rt_mutex_waiters(lock);
+ ret = __rt_mutex_slowtrylock(lock);
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
@@ -2102,6 +2109,11 @@ int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
return rt_mutex_slowtrylock(lock);
}
+int __sched __rt_mutex_futex_trylock(struct rt_mutex *lock)
+{
+ return __rt_mutex_slowtrylock(lock);
+}
+
/**
* rt_mutex_timed_lock - lock a rt_mutex interruptible
* the timeout structure is provided
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 64d89d780059..50c0a1043556 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -122,6 +122,7 @@ extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter);
extern int rt_mutex_futex_trylock(struct rt_mutex *l);
+extern int __rt_mutex_futex_trylock(struct rt_mutex *l);
extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
[ Upstream commit 4ff648decf4712d39f184fc2df3163f43975575a ]
Since the following commit:
b91473ff6e97 ("sched,tracing: Update trace_sched_pi_setprio()")
the sched_pi_setprio trace point shows the "newprio" during a deboost:
|futex sched_pi_setprio: comm=futex_requeue_p pid"34 oldprio newprio=98
|futex sched_switch: prev_comm=futex_requeue_p prev_pid"34 prev_prio0
This patch open codes __rt_effective_prio() in the tracepoint as the
'newprio' to get the old behaviour back / the correct priority:
|futex sched_pi_setprio: comm=futex_requeue_p pid"20 oldprio newprio=120
|futex sched_switch: prev_comm=futex_requeue_p prev_pid"20 prev_prio0
Peter suggested to open code the new priority so people using tracehook
could get the deadline data out.
Reported-by: Mansky Christian <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Fixes: b91473ff6e97 ("sched,tracing: Update trace_sched_pi_setprio()")
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Julia Cartwright <[email protected]>
---
include/trace/events/sched.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 516ae88cddf4..742682079acf 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -429,7 +429,9 @@ TRACE_EVENT(sched_pi_setprio,
memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
__entry->pid = tsk->pid;
__entry->oldprio = tsk->prio;
- __entry->newprio = pi_task ? pi_task->prio : tsk->prio;
+ __entry->newprio = pi_task ?
+ min(tsk->normal_prio, pi_task->prio) :
+ tsk->normal_prio;
/* XXX SCHED_DEADLINE bits missing */
),
--
2.18.0
From: Peter Zijlstra <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
[ Upstream commit a97cb0e7b3f4c6297fd857055ae8e895f402f501 ]
Both Geert and DaveJ reported that the recent futex commit:
c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex")
introduced a problem with setting OWNER_DEAD. We set the bit on an
uninitialized variable and then entirely optimize it away as a
dead-store.
Move the setting of the bit to where it is more useful.
Reported-by: Geert Uytterhoeven <[email protected]>
Reported-by: Dave Jones <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Fixes: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex")
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Julia Cartwright <[email protected]>
---
kernel/futex.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c
index cdd68ba6e3a6..57038131ad3f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2301,9 +2301,6 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
oldowner = pi_state->owner;
- /* Owner died? */
- if (!pi_state->owner)
- newtid |= FUTEX_OWNER_DIED;
/*
* We are here because either:
@@ -2364,6 +2361,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
}
newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
+ /* Owner died? */
+ if (!pi_state->owner)
+ newtid |= FUTEX_OWNER_DIED;
if (get_futex_value_locked(&uval, uaddr))
goto handle_fault;
--
2.18.0
From: Boqun Feng <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
[ Upstream commit 6b0ef92fee2a3189eba6d6b827b247cb4f6da7e9 ]
When running rcutorture with TREE03 config, CONFIG_PROVE_LOCKING=y, and
kernel cmdline argument "rcutorture.gp_exp=1", lockdep reports a
HARDIRQ-safe->HARDIRQ-unsafe deadlock:
=============================== WARNING: inconsistent lock state
4.16.0-rc4+ #1 Not tainted
--------------------------------
inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
takes:
__schedule+0xbe/0xaf0
{IN-HARDIRQ-W} state was registered at:
_raw_spin_lock+0x2a/0x40
scheduler_tick+0x47/0xf0
...
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&rq->lock);
<Interrupt>
lock(&rq->lock);
*** DEADLOCK ***
1 lock held by rcu_torture_rea/724:
rcu_torture_read_lock+0x0/0x70
stack backtrace:
CPU: 2 PID: 724 Comm: rcu_torture_rea Not tainted 4.16.0-rc4+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014
Call Trace:
lock_acquire+0x90/0x200
? __schedule+0xbe/0xaf0
_raw_spin_lock+0x2a/0x40
? __schedule+0xbe/0xaf0
__schedule+0xbe/0xaf0
preempt_schedule_irq+0x2f/0x60
retint_kernel+0x1b/0x2d
RIP: 0010:rcu_read_unlock_special+0x0/0x680
? rcu_torture_read_unlock+0x60/0x60
__rcu_read_unlock+0x64/0x70
rcu_torture_read_unlock+0x17/0x60
rcu_torture_reader+0x275/0x450
? rcutorture_booster_init+0x110/0x110
? rcu_torture_stall+0x230/0x230
? kthread+0x10e/0x130
kthread+0x10e/0x130
? kthread_create_worker_on_cpu+0x70/0x70
? call_usermodehelper_exec_async+0x11a/0x150
ret_from_fork+0x3a/0x50
This happens with the following even sequence:
preempt_schedule_irq();
local_irq_enable();
__schedule():
local_irq_disable(); // irq off
...
rcu_note_context_switch():
rcu_note_preempt_context_switch():
rcu_read_unlock_special():
local_irq_save(flags);
...
raw_spin_unlock_irqrestore(...,flags); // irq remains off
rt_mutex_futex_unlock():
raw_spin_lock_irq();
...
raw_spin_unlock_irq(); // accidentally set irq on
<return to __schedule()>
rq_lock():
raw_spin_lock(); // acquiring rq->lock with irq on
which means rq->lock becomes a HARDIRQ-unsafe lock, which can cause
deadlocks in scheduler code.
This problem was introduced by commit 02a7c234e540 ("rcu: Suppress
lockdep false-positive ->boost_mtx complaints"). That brought the user
of rt_mutex_futex_unlock() with irq off.
To fix this, replace the *lock_irq() in rt_mutex_futex_unlock() with
*lock_irq{save,restore}() to make it safe to call rt_mutex_futex_unlock()
with irq off.
Fixes: 02a7c234e540 ("rcu: Suppress lockdep false-positive ->boost_mtx complaints")
Signed-off-by: Boqun Feng <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Josh Triplett <[email protected]>
Cc: Mathieu Desnoyers <[email protected]>
Cc: "Paul E . McKenney" <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Julia Cartwright <[email protected]>
---
kernel/locking/rtmutex.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 57361d631749..5e15f5c73637 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -2213,11 +2213,12 @@ void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
{
WAKE_Q(wake_q);
WAKE_Q(wake_sleeper_q);
+ unsigned long flags;
bool postunlock;
- raw_spin_lock_irq(&lock->wait_lock);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q);
- raw_spin_unlock_irq(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
if (postunlock)
rt_mutex_postunlock(&wake_q, &wake_sleeper_q);
--
2.18.0
From: Peter Zijlstra <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
[ Upstream commit 51d00899f7e6ded15c89cb4e2cb11a35283bac81 ]
Dmitry (through syzbot) reported being able to trigger the WARN in
get_pi_state() and a use-after-free on:
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
Both are due to this race:
exit_pi_state_list() put_pi_state()
lock(&curr->pi_lock)
while() {
pi_state = list_first_entry(head);
hb = hash_futex(&pi_state->key);
unlock(&curr->pi_lock);
dec_and_test(&pi_state->refcount);
lock(&hb->lock)
lock(&pi_state->pi_mutex.wait_lock) // uaf if pi_state free'd
lock(&curr->pi_lock);
....
unlock(&curr->pi_lock);
get_pi_state(); // WARN; refcount==0
The problem is we take the reference count too late, and don't allow it
being 0. Fix it by using inc_not_zero() and simply retrying the loop
when we fail to get a refcount. In that case put_pi_state() should
remove the entry from the list.
Reported-by: Dmitry Vyukov <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Gratian Crisan <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Cc: syzbot <bot+2af19c9e1ffe4d4ee1d16c56ae7580feaee75765@syzkaller.appspotmail.com>
Cc: [email protected]
Cc: <[email protected]>
Fixes: c74aef2d06a9 ("futex: Fix pi_state->owner serialization")
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Julia Cartwright <[email protected]>
---
kernel/futex.c | 23 ++++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c
index 47e42faad6c5..270148be5647 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -899,11 +899,27 @@ void exit_pi_state_list(struct task_struct *curr)
*/
raw_spin_lock_irq(&curr->pi_lock);
while (!list_empty(head)) {
-
next = head->next;
pi_state = list_entry(next, struct futex_pi_state, list);
key = pi_state->key;
hb = hash_futex(&key);
+
+ /*
+ * We can race against put_pi_state() removing itself from the
+ * list (a waiter going away). put_pi_state() will first
+ * decrement the reference count and then modify the list, so
+ * its possible to see the list entry but fail this reference
+ * acquire.
+ *
+ * In that case; drop the locks to let put_pi_state() make
+ * progress and retry the loop.
+ */
+ if (!atomic_inc_not_zero(&pi_state->refcount)) {
+ raw_spin_unlock_irq(&curr->pi_lock);
+ cpu_relax();
+ raw_spin_lock_irq(&curr->pi_lock);
+ continue;
+ }
raw_spin_unlock_irq(&curr->pi_lock);
spin_lock(&hb->lock);
@@ -914,10 +930,12 @@ void exit_pi_state_list(struct task_struct *curr)
* task still owns the PI-state:
*/
if (head->next != next) {
+ /* retain curr->pi_lock for the loop invariant */
raw_spin_unlock(&curr->pi_lock);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(&hb->lock);
raw_spin_lock_irq(&curr->pi_lock);
+ put_pi_state(pi_state);
continue;
}
@@ -925,9 +943,8 @@ void exit_pi_state_list(struct task_struct *curr)
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
pi_state->owner = NULL;
- raw_spin_unlock(&curr->pi_lock);
- get_pi_state(pi_state);
+ raw_spin_unlock(&curr->pi_lock);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(&hb->lock);
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
In commit ("net: move xmit_recursion to per-task variable on -RT") the
recursion level was changed to be per-task since we can get preempted in
BH on -RT. The lock owner should consequently be recorded as the task
that holds the lock and not the CPU. Otherwise we trigger the "Dead loop
on virtual device" warning on SMP systems.
Cc: [email protected]
Reported-by: Kurt Kanzenbach <[email protected]>
Tested-by: Kurt Kanzenbach <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit d3a66ffd1c4f0253076069b10a8223e7b6e80e38)
Signed-off-by: Julia Cartwright <[email protected]>
---
include/linux/netdevice.h | 54 ++++++++++++++++++++++++++++++++++-----
net/core/dev.c | 6 ++++-
2 files changed, 53 insertions(+), 7 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 85fc72b8a92b..6f1a3f286c4b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -594,7 +594,11 @@ struct netdev_queue {
* write-mostly part
*/
spinlock_t _xmit_lock ____cacheline_aligned_in_smp;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ struct task_struct *xmit_lock_owner;
+#else
int xmit_lock_owner;
+#endif
/*
* Time (in jiffies) of last Tx
*/
@@ -3610,41 +3614,79 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
return (1 << debug_value) - 1;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu)
+{
+ txq->xmit_lock_owner = current;
+}
+
+static inline void netdev_queue_clear_owner(struct netdev_queue *txq)
+{
+ txq->xmit_lock_owner = NULL;
+}
+
+static inline bool netdev_queue_has_owner(struct netdev_queue *txq)
+{
+ if (txq->xmit_lock_owner != NULL)
+ return true;
+ return false;
+}
+
+#else
+
+static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu)
+{
+ txq->xmit_lock_owner = cpu;
+}
+
+static inline void netdev_queue_clear_owner(struct netdev_queue *txq)
+{
+ txq->xmit_lock_owner = -1;
+}
+
+static inline bool netdev_queue_has_owner(struct netdev_queue *txq)
+{
+ if (txq->xmit_lock_owner != -1)
+ return true;
+ return false;
+}
+#endif
+
static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
{
spin_lock(&txq->_xmit_lock);
- txq->xmit_lock_owner = cpu;
+ netdev_queue_set_owner(txq, cpu);
}
static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
{
spin_lock_bh(&txq->_xmit_lock);
- txq->xmit_lock_owner = smp_processor_id();
+ netdev_queue_set_owner(txq, smp_processor_id());
}
static inline bool __netif_tx_trylock(struct netdev_queue *txq)
{
bool ok = spin_trylock(&txq->_xmit_lock);
if (likely(ok))
- txq->xmit_lock_owner = smp_processor_id();
+ netdev_queue_set_owner(txq, smp_processor_id());
return ok;
}
static inline void __netif_tx_unlock(struct netdev_queue *txq)
{
- txq->xmit_lock_owner = -1;
+ netdev_queue_clear_owner(txq);
spin_unlock(&txq->_xmit_lock);
}
static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
{
- txq->xmit_lock_owner = -1;
+ netdev_queue_clear_owner(txq);
spin_unlock_bh(&txq->_xmit_lock);
}
static inline void txq_trans_update(struct netdev_queue *txq)
{
- if (txq->xmit_lock_owner != -1)
+ if (netdev_queue_has_owner(txq))
txq->trans_start = jiffies;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 93995575d23a..e7dc4700e463 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3449,7 +3449,11 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (txq->xmit_lock_owner != current) {
+#else
if (txq->xmit_lock_owner != cpu) {
+#endif
if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT))
goto recursion_alert;
@@ -7168,7 +7172,7 @@ static void netdev_init_one_queue(struct net_device *dev,
/* Initialize queue lock */
spin_lock_init(&queue->_xmit_lock);
netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
- queue->xmit_lock_owner = -1;
+ netdev_queue_clear_owner(queue);
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
#ifdef CONFIG_BQL
--
2.18.0
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
The mainline implementation of read_seqbegin() orders prior loads w.r.t.
the read-side critical section. Fixup the RT writer-boosting
implementation to provide the same guarantee.
Also, while we're here, update the usage of ACCESS_ONCE() to use
READ_ONCE().
Fixes: e69f15cf77c23 ("seqlock: Prevent rt starvation")
Cc: [email protected]
Signed-off-by: Julia Cartwright <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit afa4c06b89a3c0fb7784ff900ccd707bef519cb7)
Signed-off-by: Julia Cartwright <[email protected]>
---
include/linux/seqlock.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 3d7223ffdd3b..de04d6d0face 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -461,6 +461,7 @@ static inline unsigned read_seqbegin(seqlock_t *sl)
spin_unlock_wait(&sl->lock);
goto repeat;
}
+ smp_rmb();
return ret;
}
#endif
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
I've been looking at this in v3.10-RT where it got in. The patch
description says
|The ntp code for notify_cmos_timer() is called from a hard interrupt
|context.
I see only one caller of ntp_notify_cmos_timer() and that is
do_adjtimex() after "raw_spin_unlock_irqrestore()".
I see a few callers of do_adjtimex() which is SYS_adjtimex() (+compat)
and posix_clock_realtime_adj() which in turn is called by
SYS_clock_adjtime().
Reverting the patch.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit 932c5783d4434250a1019f49ae81b80731dfd4cd)
Signed-off-by: Julia Cartwright <[email protected]>
---
kernel/time/ntp.c | 26 --------------------------
1 file changed, 26 deletions(-)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 05b7391bf9bd..6df8927c58a5 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -17,7 +17,6 @@
#include <linux/module.h>
#include <linux/rtc.h>
#include <linux/math64.h>
-#include <linux/swork.h>
#include "ntp_internal.h"
#include "timekeeping_internal.h"
@@ -569,35 +568,10 @@ static void sync_cmos_clock(struct work_struct *work)
&sync_cmos_work, timespec64_to_jiffies(&next));
}
-#ifdef CONFIG_PREEMPT_RT_FULL
-
-static void run_clock_set_delay(struct swork_event *event)
-{
- queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
-}
-
-static struct swork_event ntp_cmos_swork;
-
-void ntp_notify_cmos_timer(void)
-{
- swork_queue(&ntp_cmos_swork);
-}
-
-static __init int create_cmos_delay_thread(void)
-{
- WARN_ON(swork_get());
- INIT_SWORK(&ntp_cmos_swork, run_clock_set_delay);
- return 0;
-}
-early_initcall(create_cmos_delay_thread);
-
-#else
-
void ntp_notify_cmos_timer(void)
{
queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
}
-#endif /* CONFIG_PREEMPT_RT_FULL */
#else
void ntp_notify_cmos_timer(void) { }
--
2.18.0
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
---
localversion-rt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/localversion-rt b/localversion-rt
index e98a1fe050bd..dcc2fd2ca155 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt93
+-rt94-rc1
--
2.18.0
From: "Paul E. McKenney" <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
[ Upstream commit 02a7c234e54052101164368ff981bd72f7acdd65 ]
RCU priority boosting uses rt_mutex_init_proxy_locked() to initialize an
rt_mutex structure in locked state held by some other task. When that
other task releases it, lockdep complains (quite accurately, but a bit
uselessly) that the other task never acquired it. This complaint can
suppress other, more helpful, lockdep complaints, and in any case it is
a false positive.
This commit therefore switches from rt_mutex_unlock() to
rt_mutex_futex_unlock(), thereby avoiding the lockdep annotations.
Of course, if lockdep ever learns about rt_mutex_init_proxy_locked(),
addtional adjustments will be required.
Suggested-by: Peter Zijlstra <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Julia Cartwright <[email protected]>
---
kernel/rcu/tree_plugin.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3d18d08e8382..510de72ad8a3 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -486,7 +486,7 @@ void rcu_read_unlock_special(struct task_struct *t)
/* Unboost if we were boosted. */
if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex)
- rt_mutex_unlock(&rnp->boost_mtx);
+ rt_mutex_futex_unlock(&rnp->boost_mtx);
/*
* If this was the last task on the expedited lists,
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
Those crypto drivers use SSE/AVX/… for their crypto work and in order to
do so in kernel they need to enable the "FPU" in kernel mode which
disables preemption.
There are two problems with the way they are used:
- the while loop which processes X bytes may create latency spikes and
should be avoided or limited.
- the cipher-walk-next part may allocate/free memory and may use
kmap_atomic().
The whole kernel_fpu_begin()/end() processing isn't probably that cheap.
It most likely makes sense to process as much of those as possible in one
go. The new *_fpu_sched_rt() schedules only if a RT task is pending.
Probably we should measure the performance those ciphers in pure SW
mode and with this optimisations to see if it makes sense to keep them
for RT.
This kernel_fpu_resched() makes the code more preemptible which might hurt
performance.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit 0dcc4c1693ef37e166da420ef7c68c7047c996f1)
Signed-off-by: Julia Cartwright <[email protected]>
---
arch/x86/crypto/camellia_aesni_avx2_glue.c | 20 ++++++++++++++++
arch/x86/crypto/camellia_aesni_avx_glue.c | 19 +++++++++++++++
arch/x86/crypto/cast6_avx_glue.c | 24 +++++++++++++++----
arch/x86/crypto/chacha20_glue.c | 9 ++++----
arch/x86/crypto/serpent_avx2_glue.c | 19 +++++++++++++++
arch/x86/crypto/serpent_avx_glue.c | 23 ++++++++++++++----
arch/x86/crypto/serpent_sse2_glue.c | 23 ++++++++++++++----
arch/x86/crypto/twofish_avx_glue.c | 27 ++++++++++++++++++++--
arch/x86/include/asm/fpu/api.h | 1 +
arch/x86/kernel/fpu/core.c | 12 ++++++++++
10 files changed, 158 insertions(+), 19 deletions(-)
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 60907c139c4e..0902db7d326a 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -206,6 +206,20 @@ struct crypt_priv {
bool fpu_enabled;
};
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void camellia_fpu_end_rt(struct crypt_priv *ctx)
+{
+ bool fpu_enabled = ctx->fpu_enabled;
+
+ if (!fpu_enabled)
+ return;
+ camellia_fpu_end(fpu_enabled);
+ ctx->fpu_enabled = false;
+}
+#else
+static void camellia_fpu_end_rt(struct crypt_priv *ctx) { }
+#endif
+
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
@@ -221,16 +235,19 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
}
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
+ kernel_fpu_resched();
camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
+ kernel_fpu_resched();
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
+ camellia_fpu_end_rt(ctx);
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
@@ -251,16 +268,19 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
}
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
+ kernel_fpu_resched();
camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
+ kernel_fpu_resched();
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
+ camellia_fpu_end_rt(ctx);
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index d96429da88eb..3b8e91841039 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -210,6 +210,21 @@ struct crypt_priv {
bool fpu_enabled;
};
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void camellia_fpu_end_rt(struct crypt_priv *ctx)
+{
+ bool fpu_enabled = ctx->fpu_enabled;
+
+ if (!fpu_enabled)
+ return;
+ camellia_fpu_end(fpu_enabled);
+ ctx->fpu_enabled = false;
+}
+
+#else
+static void camellia_fpu_end_rt(struct crypt_priv *ctx) { }
+#endif
+
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
@@ -225,10 +240,12 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
+ kernel_fpu_resched();
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
+ camellia_fpu_end_rt(ctx);
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
@@ -249,10 +266,12 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
}
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
+ kernel_fpu_resched();
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
}
+ camellia_fpu_end_rt(ctx);
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 50e684768c55..8caf9ba8c1da 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -205,19 +205,33 @@ struct crypt_priv {
bool fpu_enabled;
};
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void cast6_fpu_end_rt(struct crypt_priv *ctx)
+{
+ bool fpu_enabled = ctx->fpu_enabled;
+
+ if (!fpu_enabled)
+ return;
+ cast6_fpu_end(fpu_enabled);
+ ctx->fpu_enabled = false;
+}
+
+#else
+static void cast6_fpu_end_rt(struct crypt_priv *ctx) { }
+#endif
+
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = CAST6_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
- ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
+ ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
+ cast6_fpu_end_rt(ctx);
return;
}
-
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
__cast6_encrypt(ctx->ctx, srcdst, srcdst);
}
@@ -228,10 +242,10 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
struct crypt_priv *ctx = priv;
int i;
- ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
+ ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
+ cast6_fpu_end_rt(ctx);
return;
}
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index f910d1d449f0..f9e5820ae06a 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -80,23 +80,24 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
- kernel_fpu_begin();
-
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+ kernel_fpu_begin();
+
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
+ kernel_fpu_end();
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % CHACHA20_BLOCK_SIZE);
}
if (walk.nbytes) {
+ kernel_fpu_begin();
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
walk.nbytes);
+ kernel_fpu_end();
err = blkcipher_walk_done(desc, &walk, 0);
}
- kernel_fpu_end();
-
return err;
}
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 870f6d812a2d..5c806bf39f1d 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -184,6 +184,21 @@ struct crypt_priv {
bool fpu_enabled;
};
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void serpent_fpu_end_rt(struct crypt_priv *ctx)
+{
+ bool fpu_enabled = ctx->fpu_enabled;
+
+ if (!fpu_enabled)
+ return;
+ serpent_fpu_end(fpu_enabled);
+ ctx->fpu_enabled = false;
+}
+
+#else
+static void serpent_fpu_end_rt(struct crypt_priv *ctx) { }
+#endif
+
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = SERPENT_BLOCK_SIZE;
@@ -199,10 +214,12 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
}
while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
+ kernel_fpu_resched();
serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
}
+ serpent_fpu_end_rt(ctx);
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
__serpent_encrypt(ctx->ctx, srcdst, srcdst);
@@ -223,10 +240,12 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
}
while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
+ kernel_fpu_resched();
serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
}
+ serpent_fpu_end_rt(ctx);
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
__serpent_decrypt(ctx->ctx, srcdst, srcdst);
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 6f778d3daa22..46dcbdbd0518 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -218,16 +218,31 @@ struct crypt_priv {
bool fpu_enabled;
};
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void serpent_fpu_end_rt(struct crypt_priv *ctx)
+{
+ bool fpu_enabled = ctx->fpu_enabled;
+
+ if (!fpu_enabled)
+ return;
+ serpent_fpu_end(fpu_enabled);
+ ctx->fpu_enabled = false;
+}
+
+#else
+static void serpent_fpu_end_rt(struct crypt_priv *ctx) { }
+#endif
+
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = SERPENT_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
+ serpent_fpu_end_rt(ctx);
return;
}
@@ -241,10 +256,10 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
struct crypt_priv *ctx = priv;
int i;
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
+ serpent_fpu_end_rt(ctx);
return;
}
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 644f97ab8cac..7b135b4f6262 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -187,16 +187,31 @@ struct crypt_priv {
bool fpu_enabled;
};
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void serpent_fpu_end_rt(struct crypt_priv *ctx)
+{
+ bool fpu_enabled = ctx->fpu_enabled;
+
+ if (!fpu_enabled)
+ return;
+ serpent_fpu_end(fpu_enabled);
+ ctx->fpu_enabled = false;
+}
+
+#else
+static void serpent_fpu_end_rt(struct crypt_priv *ctx) { }
+#endif
+
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = SERPENT_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+ serpent_fpu_end_rt(ctx);
return;
}
@@ -210,10 +225,10 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
struct crypt_priv *ctx = priv;
int i;
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+ serpent_fpu_end_rt(ctx);
return;
}
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index b7a3904b953c..de00fe24927e 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -218,6 +218,21 @@ struct crypt_priv {
bool fpu_enabled;
};
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void twofish_fpu_end_rt(struct crypt_priv *ctx)
+{
+ bool fpu_enabled = ctx->fpu_enabled;
+
+ if (!fpu_enabled)
+ return;
+ twofish_fpu_end(fpu_enabled);
+ ctx->fpu_enabled = false;
+}
+
+#else
+static void twofish_fpu_end_rt(struct crypt_priv *ctx) { }
+#endif
+
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = TF_BLOCK_SIZE;
@@ -228,12 +243,16 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
+ twofish_fpu_end_rt(ctx);
return;
}
- for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
+ for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) {
+ kernel_fpu_resched();
twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
+ }
+ twofish_fpu_end_rt(ctx);
nbytes %= bsize * 3;
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
@@ -250,11 +269,15 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
+ twofish_fpu_end_rt(ctx);
return;
}
- for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
+ for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) {
+ kernel_fpu_resched();
twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
+ }
+ twofish_fpu_end_rt(ctx);
nbytes %= bsize * 3;
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 1429a7c736db..85428df40a22 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -24,6 +24,7 @@ extern void __kernel_fpu_begin(void);
extern void __kernel_fpu_end(void);
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
+extern void kernel_fpu_resched(void);
extern bool irq_fpu_usable(void);
/*
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 96d80dfac383..6e473a44afb7 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -158,6 +158,18 @@ void kernel_fpu_end(void)
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
+void kernel_fpu_resched(void)
+{
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+
+ if (should_resched(PREEMPT_OFFSET)) {
+ kernel_fpu_end();
+ cond_resched();
+ kernel_fpu_begin();
+ }
+}
+EXPORT_SYMBOL_GPL(kernel_fpu_resched);
+
/*
* CR0::TS save/restore functions:
*/
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
During the stable update the arm architecture did not compile anymore
due to missing definition of u16/32.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit 1289b06974d64f244a26455fab699c6a1332f4bc)
Signed-off-by: Julia Cartwright <[email protected]>
---
include/linux/spinlock_types_raw.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h
index edffc4d53fc9..03235b475b77 100644
--- a/include/linux/spinlock_types_raw.h
+++ b/include/linux/spinlock_types_raw.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
#define __LINUX_SPINLOCK_TYPES_RAW_H
+#include <linux/types.h>
+
#if defined(CONFIG_SMP)
# include <asm/spinlock_types.h>
#else
--
2.18.0
From: Peter Zijlstra <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
[ Upstream commit c74aef2d06a9f59cece89093eecc552933cba72a ]
There was a reported suspicion about a race between exit_pi_state_list()
and put_pi_state(). The same report mentioned the comment with
put_pi_state() said it should be called with hb->lock held, and it no
longer is in all places.
As it turns out, the pi_state->owner serialization is indeed broken. As per
the new rules:
734009e96d19 ("futex: Change locking rules")
pi_state->owner should be serialized by pi_state->pi_mutex.wait_lock.
For the sites setting pi_state->owner we already hold wait_lock (where
required) but exit_pi_state_list() and put_pi_state() were not and
raced on clearing it.
Fixes: 734009e96d19 ("futex: Change locking rules")
Reported-by: Gratian Crisan <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Julia Cartwright <[email protected]>
---
kernel/futex.c | 34 ++++++++++++++++++++++------------
1 file changed, 22 insertions(+), 12 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c
index 8ab0ddd4cf8f..47e42faad6c5 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -819,8 +819,6 @@ static void get_pi_state(struct futex_pi_state *pi_state)
/*
* Drops a reference to the pi_state object and frees or caches it
* when the last reference is gone.
- *
- * Must be called with the hb lock held.
*/
static void put_pi_state(struct futex_pi_state *pi_state)
{
@@ -835,16 +833,22 @@ static void put_pi_state(struct futex_pi_state *pi_state)
* and has cleaned up the pi_state already
*/
if (pi_state->owner) {
- raw_spin_lock_irq(&pi_state->owner->pi_lock);
- list_del_init(&pi_state->list);
- raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+ struct task_struct *owner;
- rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ owner = pi_state->owner;
+ if (owner) {
+ raw_spin_lock(&owner->pi_lock);
+ list_del_init(&pi_state->list);
+ raw_spin_unlock(&owner->pi_lock);
+ }
+ rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
}
- if (current->pi_state_cache)
+ if (current->pi_state_cache) {
kfree(pi_state);
- else {
+ } else {
/*
* pi_state->list is already empty.
* clear pi_state->owner.
@@ -903,14 +907,15 @@ void exit_pi_state_list(struct task_struct *curr)
raw_spin_unlock_irq(&curr->pi_lock);
spin_lock(&hb->lock);
-
- raw_spin_lock_irq(&curr->pi_lock);
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+ raw_spin_lock(&curr->pi_lock);
/*
* We dropped the pi-lock, so re-check whether this
* task still owns the PI-state:
*/
if (head->next != next) {
- raw_spin_unlock_irq(&curr->pi_lock);
+ raw_spin_unlock(&curr->pi_lock);
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(&hb->lock);
raw_spin_lock_irq(&curr->pi_lock);
continue;
@@ -920,9 +925,10 @@ void exit_pi_state_list(struct task_struct *curr)
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
pi_state->owner = NULL;
- raw_spin_unlock_irq(&curr->pi_lock);
+ raw_spin_unlock(&curr->pi_lock);
get_pi_state(pi_state);
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(&hb->lock);
rt_mutex_futex_unlock(&pi_state->pi_mutex);
@@ -1204,6 +1210,10 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &p->pi_state_list);
+ /*
+ * Assignment without holding pi_state->pi_mutex.wait_lock is safe
+ * because there is no concurrency as the object is not published yet.
+ */
pi_state->owner = p;
raw_spin_unlock_irq(&p->pi_lock);
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
NEON in kernel mode is used by the crypto algorithms and raid6 code.
While the raid6 code looks okay, the crypto algorithms do not: NEON
is enabled on first invocation and may allocate/free/map memory before
the NEON mode is disabled again.
This needs to be changed until it can be enabled.
On ARM NEON in kernel mode can be simply disabled. on ARM64 it needs to
stay on due to possible EFI callbacks so here I disable each algorithm.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit b3a776555e0d465df138d254d6dc3ac1b718ac6d)
Signed-off-by: Julia Cartwright <[email protected]>
---
arch/arm/Kconfig | 2 +-
arch/arm64/crypto/Kconfig | 14 +++++++-------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 5715844e83e3..8c40f7d73251 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2158,7 +2158,7 @@ config NEON
config KERNEL_MODE_NEON
bool "Support for NEON in kernel mode"
- depends on NEON && AEABI
+ depends on NEON && AEABI && !PREEMPT_RT_BASE
help
Say Y to include support for NEON in kernel mode.
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 2cf32e9887e1..cd71b3432720 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -10,41 +10,41 @@ if ARM64_CRYPTO
config CRYPTO_SHA1_ARM64_CE
tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
config CRYPTO_SHA2_ARM64_CE
tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_HASH
config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_ALGAPI
config CRYPTO_AES_ARM64_CE_CCM
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_ALGAPI
select CRYPTO_AES_ARM64_CE
select CRYPTO_AEAD
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_AES_ARM64_CE
select CRYPTO_ABLK_HELPER
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
- depends on ARM64 && KERNEL_MODE_NEON
+ depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE
select CRYPTO_BLKCIPHER
select CRYPTO_AES
select CRYPTO_ABLK_HELPER
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
| BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914
| in_atomic(): 1, irqs_disabled(): 0, pid: 255, name: kworker/u257:6
| 5 locks held by kworker/u257:6/255:
| #0: ("events_unbound"){.+.+.+}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
| #1: ((&entry->work)){+.+.+.}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
| #2: (&shost->scan_mutex){+.+.+.}, at: [<ffffffffa000faa3>] __scsi_add_device+0xa3/0x130 [scsi_mod]
| #3: (&set->tag_list_lock){+.+...}, at: [<ffffffff812f09fa>] blk_mq_init_queue+0x96a/0xa50
| #4: (rcu_read_lock_sched){......}, at: [<ffffffff8132887d>] percpu_ref_kill_and_confirm+0x1d/0x120
| Preemption disabled at:[<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
|
| CPU: 2 PID: 255 Comm: kworker/u257:6 Not tainted 3.18.7-rt0+ #1
| Workqueue: events_unbound async_run_entry_fn
| 0000000000000003 ffff8800bc29f998 ffffffff815b3a12 0000000000000000
| 0000000000000000 ffff8800bc29f9b8 ffffffff8109aa16 ffff8800bc29fa28
| ffff8800bc5d1bc8 ffff8800bc29f9e8 ffffffff815b8dd4 ffff880000000000
| Call Trace:
| [<ffffffff815b3a12>] dump_stack+0x4f/0x7c
| [<ffffffff8109aa16>] __might_sleep+0x116/0x190
| [<ffffffff815b8dd4>] rt_spin_lock+0x24/0x60
| [<ffffffff810b6089>] __wake_up+0x29/0x60
| [<ffffffff812ee06e>] blk_mq_usage_counter_release+0x1e/0x20
| [<ffffffff81328966>] percpu_ref_kill_and_confirm+0x106/0x120
| [<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
| [<ffffffff812f0000>] blk_mq_update_tag_set_depth+0x40/0xd0
| [<ffffffff812f0a1c>] blk_mq_init_queue+0x98c/0xa50
| [<ffffffffa000dcf0>] scsi_mq_alloc_queue+0x20/0x60 [scsi_mod]
| [<ffffffffa000ea35>] scsi_alloc_sdev+0x2f5/0x370 [scsi_mod]
| [<ffffffffa000f494>] scsi_probe_and_add_lun+0x9e4/0xdd0 [scsi_mod]
| [<ffffffffa000fb26>] __scsi_add_device+0x126/0x130 [scsi_mod]
| [<ffffffffa013033f>] ata_scsi_scan_host+0xaf/0x200 [libata]
| [<ffffffffa012b5b6>] async_port_probe+0x46/0x60 [libata]
| [<ffffffff810978fb>] async_run_entry_fn+0x3b/0xf0
| [<ffffffff8108ee81>] process_one_work+0x201/0x5e0
percpu_ref_kill_and_confirm() invokes blk_mq_usage_counter_release() in
a rcu-sched region. swait based wake queue can't be used due to
wake_up_all() usage and disabled interrupts in !RT configs (as reported
by Corey Minyard).
The wq_has_sleeper() check has been suggested by Peter Zijlstra.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit 2d701058d614554cce412a787f00568b9fdffade)
Signed-off-by: Julia Cartwright <[email protected]>
---
block/blk-core.c | 14 +++++++++++++-
include/linux/blkdev.h | 2 ++
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 87d3e0a503e5..346d5bba3948 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -675,12 +675,21 @@ void blk_queue_exit(struct request_queue *q)
percpu_ref_put(&q->q_usage_counter);
}
+static void blk_queue_usage_counter_release_swork(struct swork_event *sev)
+{
+ struct request_queue *q =
+ container_of(sev, struct request_queue, mq_pcpu_wake);
+
+ wake_up_all(&q->mq_freeze_wq);
+}
+
static void blk_queue_usage_counter_release(struct percpu_ref *ref)
{
struct request_queue *q =
container_of(ref, struct request_queue, q_usage_counter);
- wake_up_all(&q->mq_freeze_wq);
+ if (wq_has_sleeper(&q->mq_freeze_wq))
+ swork_queue(&q->mq_pcpu_wake);
}
static void blk_rq_timed_out_timer(unsigned long data)
@@ -751,6 +760,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
init_waitqueue_head(&q->mq_freeze_wq);
+ INIT_SWORK(&q->mq_pcpu_wake, blk_queue_usage_counter_release_swork);
/*
* Init percpu_ref in atomic mode so that it's faster to shutdown.
@@ -3556,6 +3566,8 @@ int __init blk_dev_init(void)
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
+ BUG_ON(swork_get());
+
request_cachep = kmem_cache_create("blkdev_requests",
sizeof(struct request), 0, SLAB_PANIC, NULL);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fdb449fe3ff7..ab039211ab9f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@
#include <linux/rcupdate.h>
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
+#include <linux/swork.h>
struct module;
struct scsi_ioctl_command;
@@ -469,6 +470,7 @@ struct request_queue {
#endif
struct rcu_head rcu_head;
wait_queue_head_t mq_freeze_wq;
+ struct swork_event mq_pcpu_wake;
struct percpu_ref q_usage_counter;
struct list_head all_q_node;
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
[ upstream commit b88697810d7c1d102a529990f9071b0f14cfe6df ]
This commit adjusts include files and provides definitions in preparation
for suppressing lockdep false-positive ->boost_mtx complaints. Without
this preparation, architectures not supporting rt_mutex will get build
failures.
Reported-by: kbuild test robot <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
Signed-off-by: Julia Cartwright <[email protected]>
---
kernel/rcu/tree_plugin.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a1a7bafcce15..3d18d08e8382 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -37,6 +37,7 @@
* This probably needs to be excluded from -rt builds.
*/
#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; })
+#define rt_mutex_futex_unlock(x) WARN_ON_ONCE(1)
#endif /* #else #ifdef CONFIG_RCU_BOOST */
@@ -834,8 +835,6 @@ static void rcu_cpu_kthread_setup(unsigned int cpu)
#ifdef CONFIG_RCU_BOOST
-#include "../locking/rtmutex_common.h"
-
#ifdef CONFIG_RCU_TRACE
static void rcu_initiate_boost_trace(struct rcu_node *rnp)
--
2.18.0
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
Provide a set of locallocked accessors for pointers to per-CPU data;
this is useful for dynamically-allocated per-CPU regions, for example.
These are symmetric with the {get,put}_cpu_ptr() per-CPU accessor
variants.
Signed-off-by: Julia Cartwright <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit 3d45cf23db4f76cd356ebb0aa4cdaa7d92d1a64e)
Signed-off-by: Julia Cartwright <[email protected]>
---
include/linux/locallock.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/include/linux/locallock.h b/include/linux/locallock.h
index 280f884a05a3..0c3ff5b23f6a 100644
--- a/include/linux/locallock.h
+++ b/include/linux/locallock.h
@@ -238,6 +238,14 @@ static inline int __local_unlock_irqrestore(struct local_irq_lock *lv,
#define put_locked_var(lvar, var) local_unlock(lvar);
+#define get_locked_ptr(lvar, var) \
+ ({ \
+ local_lock(lvar); \
+ this_cpu_ptr(var); \
+ })
+
+#define put_locked_ptr(lvar, var) local_unlock(lvar);
+
#define local_lock_cpu(lvar) \
({ \
local_lock(lvar); \
@@ -278,6 +286,8 @@ static inline void local_irq_lock_init(int lvar) { }
#define get_locked_var(lvar, var) get_cpu_var(var)
#define put_locked_var(lvar, var) put_cpu_var(var)
+#define get_locked_ptr(lvar, var) get_cpu_ptr(var)
+#define put_locked_ptr(lvar, var) put_cpu_ptr(var)
#define local_lock_cpu(lvar) get_cpu()
#define local_unlock_cpu(lvar) put_cpu()
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
On RT the timer can be preempted while running and therefore we wait
with timer_wait_for_callback() for the timer to complete (instead of
busy looping). The RCU-readlock is held to ensure that this posix timer
is not removed while we wait on it.
If the timer is removed then it invokes call_rcu() with a pointer that
is shared with the hrtimer because it is part of the same union.
In order to avoid any possible side effects I am moving the rcu pointer
out of the union.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit b8401365af110949f12c7cf1fa86b4c0ea069bbd)
Signed-off-by: Julia Cartwright <[email protected]>
---
include/linux/posix-timers.h | 2 +-
kernel/time/posix-timers.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 62d44c176071..cbd3f9334543 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -92,8 +92,8 @@ struct k_itimer {
struct alarm alarmtimer;
ktime_t interval;
} alarm;
- struct rcu_head rcu;
} it;
+ struct rcu_head rcu;
};
struct k_clock {
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 6084618436fd..45d8033caec4 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -568,7 +568,7 @@ static struct k_itimer * alloc_posix_timer(void)
static void k_itimer_rcu_free(struct rcu_head *head)
{
- struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+ struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);
kmem_cache_free(posix_timers_cache, tmr);
}
@@ -585,7 +585,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
}
put_pid(tmr->it_pid);
sigqueue_free(tmr->sigq);
- call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+ call_rcu(&tmr->rcu, k_itimer_rcu_free);
}
static struct k_clock *clockid_to_kclock(const clockid_t id)
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
Under certain circumstances we could leak elements which were moved to
the local "to_free" list. The damage is limited since I can't find
any users here.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit 5022166d3b225bf5e343efb3ea01b3c5a41d69ba)
Signed-off-by: Julia Cartwright <[email protected]>
---
mm/slub.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/mm/slub.c b/mm/slub.c
index 67eb368b9314..738b2bccbd5f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3209,6 +3209,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
return i;
error:
local_irq_enable();
+ free_delayed(&to_free);
slab_post_alloc_hook(s, flags, i, p);
__kmem_cache_free_bulk(s, i, p);
return 0;
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
If alarm_try_to_cancel() requires a retry, then depending on the
priority setting the retry loop might prevent timer callback completion
on RT. Prevent that by waiting for completion on RT, no change for a
non RT kernel.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry picked from commit 51e376c469bf05f32cb1ceb9e39d31bb92f1f6c8)
Signed-off-by: Julia Cartwright <[email protected]>
---
kernel/time/alarmtimer.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index d67ef56ca9bc..61b20a656863 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -407,7 +407,7 @@ int alarm_cancel(struct alarm *alarm)
int ret = alarm_try_to_cancel(alarm);
if (ret >= 0)
return ret;
- cpu_relax();
+ hrtimer_wait_for_timer(&alarm->timer);
}
}
EXPORT_SYMBOL_GPL(alarm_cancel);
--
2.18.0
From: Sebastian Andrzej Siewior <[email protected]>
4.9.115-rt94-rc1 stable review patch.
If you have any objection to the inclusion of this patch, let me know.
--- 8< --- 8< --- 8< ---
This reverts commit "block: blk-mq: Use swait". The issue remains but
will be fixed differently.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
(cherry-picked from ca3fd6cf836739fd59eac2f7a9b0261365e818bb)
Signed-off-by: Julia Cartwright <[email protected]>
---
block/blk-core.c | 10 +++++-----
block/blk-mq.c | 6 +++---
include/linux/blkdev.h | 2 +-
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index e4ac43392875..87d3e0a503e5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -662,9 +662,9 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
if (nowait)
return -EBUSY;
- swait_event(q->mq_freeze_wq,
- !atomic_read(&q->mq_freeze_depth) ||
- blk_queue_dying(q));
+ wait_event(q->mq_freeze_wq,
+ !atomic_read(&q->mq_freeze_depth) ||
+ blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
}
@@ -680,7 +680,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
struct request_queue *q =
container_of(ref, struct request_queue, q_usage_counter);
- swake_up_all(&q->mq_freeze_wq);
+ wake_up_all(&q->mq_freeze_wq);
}
static void blk_rq_timed_out_timer(unsigned long data)
@@ -750,7 +750,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
q->bypass_depth = 1;
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
- init_swait_queue_head(&q->mq_freeze_wq);
+ init_waitqueue_head(&q->mq_freeze_wq);
/*
* Init percpu_ref in atomic mode so that it's faster to shutdown.
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e0a804ab5420..3a49552974ec 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
static void blk_mq_freeze_queue_wait(struct request_queue *q)
{
- swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
+ wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
}
/*
@@ -110,7 +110,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
WARN_ON_ONCE(freeze_depth < 0);
if (!freeze_depth) {
percpu_ref_reinit(&q->q_usage_counter);
- swake_up_all(&q->mq_freeze_wq);
+ wake_up_all(&q->mq_freeze_wq);
}
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
@@ -129,7 +129,7 @@ void blk_mq_wake_waiters(struct request_queue *q)
* dying, we need to ensure that processes currently waiting on
* the queue are notified as well.
*/
- swake_up_all(&q->mq_freeze_wq);
+ wake_up_all(&q->mq_freeze_wq);
}
bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 34fd1ed9845e..fdb449fe3ff7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -468,7 +468,7 @@ struct request_queue {
struct throtl_data *td;
#endif
struct rcu_head rcu_head;
- struct swait_queue_head mq_freeze_wq;
+ wait_queue_head_t mq_freeze_wq;
struct percpu_ref q_usage_counter;
struct list_head all_q_node;
--
2.18.0