-stable review patch. If anyone has any objections, please let us know.
---------------------
From: Thomas Gleixner <[email protected]>
Not all architectures implement futex_atomic_cmpxchg_inatomic(). The default
implementation returns -ENOSYS, which is currently not handled inside of the
futex guts.
Futex PI calls and robust list exits with a held futex result in an endless
loop in the futex code on architectures which have no support.
Fixing up every place where futex_atomic_cmpxchg_inatomic() is called would
add a fair amount of extra if/else constructs to the already complex code. It
is also not possible to disable the robust feature before user space tries to
register robust lists.
Compile time disabling is not a good idea either, as there are already
architectures with runtime detection of futex_atomic_cmpxchg_inatomic support.
Detect the functionality at runtime instead by calling
cmpxchg_futex_value_locked() with a NULL pointer from the futex initialization
code. This is guaranteed to fail, but the call of
futex_atomic_cmpxchg_inatomic() happens with pagefaults disabled.
On architectures, which use the asm-generic implementation or have a runtime
CPU feature detection, a -ENOSYS return value disables the PI/robust features.
On architectures with a working implementation the call returns -EFAULT and
the PI/robust features are enabled.
The relevant syscalls return -ENOSYS and the robust list exit code is blocked,
when the detection fails.
Fixes http://lkml.org/lkml/2008/2/11/149
Originally reported by: Lennart Buytenhek
Signed-off-by: Thomas Gleixner <[email protected]>
Acked-by: Ingo Molnar <[email protected]>
Cc: Lennert Buytenhek <[email protected]>
Cc: Riku Voipio <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Chris Wright <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>
---
include/linux/futex.h | 1 +
kernel/futex.c | 38 ++++++++++++++++++++++++++++++++++----
kernel/futex_compat.c | 9 +++++++++
3 files changed, 44 insertions(+), 4 deletions(-)
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -153,6 +153,7 @@ union futex_key {
#ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr);
extern void exit_pi_state_list(struct task_struct *curr);
+extern int futex_cmpxchg_enabled;
#else
static inline void exit_robust_list(struct task_struct *curr)
{
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -60,6 +60,8 @@
#include "rtmutex_common.h"
+int __read_mostly futex_cmpxchg_enabled;
+
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
/*
@@ -466,6 +468,8 @@ void exit_pi_state_list(struct task_stru
struct futex_hash_bucket *hb;
union futex_key key;
+ if (!futex_cmpxchg_enabled)
+ return;
/*
* We are a ZOMBIE and nobody can enqueue itself on
* pi_state_list anymore, but we have to be careful
@@ -1854,6 +1858,8 @@ asmlinkage long
sys_set_robust_list(struct robust_list_head __user *head,
size_t len)
{
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
/*
* The kernel knows only one size for now:
*/
@@ -1878,6 +1884,9 @@ sys_get_robust_list(int pid, struct robu
struct robust_list_head __user *head;
unsigned long ret;
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
if (!pid)
head = current->robust_list;
else {
@@ -1980,6 +1989,9 @@ void exit_robust_list(struct task_struct
unsigned long futex_offset;
int rc;
+ if (!futex_cmpxchg_enabled)
+ return;
+
/*
* Fetch the list head (which was registered earlier, via
* sys_set_robust_list()):
@@ -2034,7 +2046,7 @@ void exit_robust_list(struct task_struct
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3)
{
- int ret;
+ int ret = -ENOSYS;
int cmd = op & FUTEX_CMD_MASK;
struct rw_semaphore *fshared = NULL;
@@ -2062,13 +2074,16 @@ long do_futex(u32 __user *uaddr, int op,
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
break;
case FUTEX_LOCK_PI:
- ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
+ if (futex_cmpxchg_enabled)
+ ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
break;
case FUTEX_UNLOCK_PI:
- ret = futex_unlock_pi(uaddr, fshared);
+ if (futex_cmpxchg_enabled)
+ ret = futex_unlock_pi(uaddr, fshared);
break;
case FUTEX_TRYLOCK_PI:
- ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
+ if (futex_cmpxchg_enabled)
+ ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
break;
default:
ret = -ENOSYS;
@@ -2123,8 +2138,23 @@ static struct file_system_type futex_fs_
static int __init init(void)
{
+ u32 curval;
int i;
+ /*
+ * This will fail and we want it. Some arch implementations do
+ * runtime detection of the futex_atomic_cmpxchg_inatomic()
+ * functionality. We want to know that before we call in any
+ * of the complex code paths. Also we want to prevent
+ * registration of robust lists in that case. NULL is
+ * guaranteed to fault and we get -EFAULT on functional
+ * implementation, the non functional ones will return
+ * -ENOSYS.
+ */
+ curval = cmpxchg_futex_value_locked(NULL, 0, 0);
+ if (curval == -EFAULT)
+ futex_cmpxchg_enabled = 1;
+
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
spin_lock_init(&futex_queues[i].lock);
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -54,6 +54,9 @@ void compat_exit_robust_list(struct task
compat_long_t futex_offset;
int rc;
+ if (!futex_cmpxchg_enabled)
+ return;
+
/*
* Fetch the list head (which was registered earlier, via
* sys_set_robust_list()):
@@ -115,6 +118,9 @@ asmlinkage long
compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
compat_size_t len)
{
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
if (unlikely(len != sizeof(*head)))
return -EINVAL;
@@ -130,6 +136,9 @@ compat_sys_get_robust_list(int pid, comp
struct compat_robust_list_head __user *head;
unsigned long ret;
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
if (!pid)
head = current->compat_robust_list;
else {
--
On Fri, 21 Mar 2008, Chris Wright wrote:
> -stable review patch. If anyone has any objections, please let us know.
> ---------------------
For this one you need the S390 fixes:
504e75d0ed3389747e40a4aaa4b1f4e569576665
Author: Heiko Carstens <[email protected]>
Date: Thu Mar 20 17:33:38 2008 +0100
[S390] futex: let futex_atomic_cmpxchg_pt survive early functional tests.
commit d5b02b3ff1d9a2e1074f559c84ed378cfa6fc3c0
Author: Heiko Carstens <[email protected]>
Date: Tue Feb 19 15:29:34 2008 +0100
[S390] Fix futex_atomic_cmpxchg_std inline assembly.
Thanks,
tglx
On Sat, Mar 22, 2008 at 08:37:07AM +0100, Thomas Gleixner wrote:
> On Fri, 21 Mar 2008, Chris Wright wrote:
>
> > -stable review patch. If anyone has any objections, please let us know.
> > ---------------------
>
> For this one you need the S390 fixes:
>
> 504e75d0ed3389747e40a4aaa4b1f4e569576665
> Author: Heiko Carstens <[email protected]>
> Date: Thu Mar 20 17:33:38 2008 +0100
>
> [S390] futex: let futex_atomic_cmpxchg_pt survive early functional tests.
Yes, forgot to add -stable to the cc list. Sorry.
> commit d5b02b3ff1d9a2e1074f559c84ed378cfa6fc3c0
> Author: Heiko Carstens <[email protected]>
> Date: Tue Feb 19 15:29:34 2008 +0100
>
> [S390] Fix futex_atomic_cmpxchg_std inline assembly.
This one is already in 2.6.24.3.
* Heiko Carstens ([email protected]) wrote:
> > 504e75d0ed3389747e40a4aaa4b1f4e569576665
> > Author: Heiko Carstens <[email protected]>
> > Date: Thu Mar 20 17:33:38 2008 +0100
> >
> > [S390] futex: let futex_atomic_cmpxchg_pt survive early functional tests.
>
> Yes, forgot to add -stable to the cc list. Sorry.
Got it, thanks.
-chris
From: Thomas Gleixner <[email protected]>
Date: Sat, 22 Mar 2008 08:37:07 +0100 (CET)
> On Fri, 21 Mar 2008, Chris Wright wrote:
>
> > -stable review patch. If anyone has any objections, please let us know.
> > ---------------------
>
> For this one you need the S390 fixes:
And sparc64 needed similar fixes, but they did get submitted in time
thankfully.
I think this futex change should have gotten more exposure, and proper
coordination with arch maintainers so that it didn't go into -stable
unless the necessary arch level fixups were known to go in at the same
time.
On Fri, 2008-03-21 at 15:43 -0700, Chris Wright wrote:
> plain text document attachment
> (futex-runtime-enable-pi-and-robust-functionality.patch)
> -stable review patch. If anyone has any objections, please let us know.
> ---------------------
>
> From: Thomas Gleixner <[email protected]>
>
> Not all architectures implement futex_atomic_cmpxchg_inatomic(). The default
> implementation returns -ENOSYS, which is currently not handled inside of the
> futex guts.
I think that breaks some embedded PowerPC platforms. It's also very
fishy... So early, we haven't activated an mm yet, it's like trying
to access user memory from a kernel thread... might happen to give
you -EFAULT on x86 but you are making pretty big assumptions on how
low level mm works in architectures...
Ben.
> Futex PI calls and robust list exits with a held futex result in an endless
> loop in the futex code on architectures which have no support.
>
> Fixing up every place where futex_atomic_cmpxchg_inatomic() is called would
> add a fair amount of extra if/else constructs to the already complex code. It
> is also not possible to disable the robust feature before user space tries to
> register robust lists.
>
> Compile time disabling is not a good idea either, as there are already
> architectures with runtime detection of futex_atomic_cmpxchg_inatomic support.
>
> Detect the functionality at runtime instead by calling
> cmpxchg_futex_value_locked() with a NULL pointer from the futex initialization
> code. This is guaranteed to fail, but the call of
> futex_atomic_cmpxchg_inatomic() happens with pagefaults disabled.
>
> On architectures, which use the asm-generic implementation or have a runtime
> CPU feature detection, a -ENOSYS return value disables the PI/robust features.
>
> On architectures with a working implementation the call returns -EFAULT and
> the PI/robust features are enabled.
>
> The relevant syscalls return -ENOSYS and the robust list exit code is blocked,
> when the detection fails.
>
> Fixes http://lkml.org/lkml/2008/2/11/149
> Originally reported by: Lennart Buytenhek
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Acked-by: Ingo Molnar <[email protected]>
> Cc: Lennert Buytenhek <[email protected]>
> Cc: Riku Voipio <[email protected]>
> Signed-off-by: Andrew Morton <[email protected]>
> Signed-off-by: Linus Torvalds <[email protected]>
> Signed-off-by: Chris Wright <[email protected]>
> Signed-off-by: Greg Kroah-Hartman <[email protected]>
> ---
> include/linux/futex.h | 1 +
> kernel/futex.c | 38 ++++++++++++++++++++++++++++++++++----
> kernel/futex_compat.c | 9 +++++++++
> 3 files changed, 44 insertions(+), 4 deletions(-)
>
> --- a/include/linux/futex.h
> +++ b/include/linux/futex.h
> @@ -153,6 +153,7 @@ union futex_key {
> #ifdef CONFIG_FUTEX
> extern void exit_robust_list(struct task_struct *curr);
> extern void exit_pi_state_list(struct task_struct *curr);
> +extern int futex_cmpxchg_enabled;
> #else
> static inline void exit_robust_list(struct task_struct *curr)
> {
> --- a/kernel/futex.c
> +++ b/kernel/futex.c
> @@ -60,6 +60,8 @@
>
> #include "rtmutex_common.h"
>
> +int __read_mostly futex_cmpxchg_enabled;
> +
> #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
>
> /*
> @@ -466,6 +468,8 @@ void exit_pi_state_list(struct task_stru
> struct futex_hash_bucket *hb;
> union futex_key key;
>
> + if (!futex_cmpxchg_enabled)
> + return;
> /*
> * We are a ZOMBIE and nobody can enqueue itself on
> * pi_state_list anymore, but we have to be careful
> @@ -1854,6 +1858,8 @@ asmlinkage long
> sys_set_robust_list(struct robust_list_head __user *head,
> size_t len)
> {
> + if (!futex_cmpxchg_enabled)
> + return -ENOSYS;
> /*
> * The kernel knows only one size for now:
> */
> @@ -1878,6 +1884,9 @@ sys_get_robust_list(int pid, struct robu
> struct robust_list_head __user *head;
> unsigned long ret;
>
> + if (!futex_cmpxchg_enabled)
> + return -ENOSYS;
> +
> if (!pid)
> head = current->robust_list;
> else {
> @@ -1980,6 +1989,9 @@ void exit_robust_list(struct task_struct
> unsigned long futex_offset;
> int rc;
>
> + if (!futex_cmpxchg_enabled)
> + return;
> +
> /*
> * Fetch the list head (which was registered earlier, via
> * sys_set_robust_list()):
> @@ -2034,7 +2046,7 @@ void exit_robust_list(struct task_struct
> long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
> u32 __user *uaddr2, u32 val2, u32 val3)
> {
> - int ret;
> + int ret = -ENOSYS;
> int cmd = op & FUTEX_CMD_MASK;
> struct rw_semaphore *fshared = NULL;
>
> @@ -2062,13 +2074,16 @@ long do_futex(u32 __user *uaddr, int op,
> ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
> break;
> case FUTEX_LOCK_PI:
> - ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
> + if (futex_cmpxchg_enabled)
> + ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
> break;
> case FUTEX_UNLOCK_PI:
> - ret = futex_unlock_pi(uaddr, fshared);
> + if (futex_cmpxchg_enabled)
> + ret = futex_unlock_pi(uaddr, fshared);
> break;
> case FUTEX_TRYLOCK_PI:
> - ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
> + if (futex_cmpxchg_enabled)
> + ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
> break;
> default:
> ret = -ENOSYS;
> @@ -2123,8 +2138,23 @@ static struct file_system_type futex_fs_
>
> static int __init init(void)
> {
> + u32 curval;
> int i;
>
> + /*
> + * This will fail and we want it. Some arch implementations do
> + * runtime detection of the futex_atomic_cmpxchg_inatomic()
> + * functionality. We want to know that before we call in any
> + * of the complex code paths. Also we want to prevent
> + * registration of robust lists in that case. NULL is
> + * guaranteed to fault and we get -EFAULT on functional
> + * implementation, the non functional ones will return
> + * -ENOSYS.
> + */
> + curval = cmpxchg_futex_value_locked(NULL, 0, 0);
> + if (curval == -EFAULT)
> + futex_cmpxchg_enabled = 1;
> +
> for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
> plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
> spin_lock_init(&futex_queues[i].lock);
> --- a/kernel/futex_compat.c
> +++ b/kernel/futex_compat.c
> @@ -54,6 +54,9 @@ void compat_exit_robust_list(struct task
> compat_long_t futex_offset;
> int rc;
>
> + if (!futex_cmpxchg_enabled)
> + return;
> +
> /*
> * Fetch the list head (which was registered earlier, via
> * sys_set_robust_list()):
> @@ -115,6 +118,9 @@ asmlinkage long
> compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
> compat_size_t len)
> {
> + if (!futex_cmpxchg_enabled)
> + return -ENOSYS;
> +
> if (unlikely(len != sizeof(*head)))
> return -EINVAL;
>
> @@ -130,6 +136,9 @@ compat_sys_get_robust_list(int pid, comp
> struct compat_robust_list_head __user *head;
> unsigned long ret;
>
> + if (!futex_cmpxchg_enabled)
> + return -ENOSYS;
> +
> if (!pid)
> head = current->compat_robust_list;
> else {
>