2006-10-16 11:28:18

by Peter Zijlstra

[permalink] [raw]
Subject: [RFC][PATCH] ->signal->tty locking

Oleg wrote:
"Historically ->signal/->sighand (both ptrs and their contents) were globally
protected by tasklist_lock. 'current' can use these pointers lockless, they
can't be changed under him.

Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock.
Unless you are current, you can't lock ->siglock directly (without holding
tasklist_lock), you should use lock_task_sighand()."

Then, to be consistent with the rest of the kernel, ->signal->tty
locking should look like so:

mutex_lock(&tty_mutex)
read_lock(&tasklist_lock)
lock_task_sighand(p, &flags)

However, lock_task_sighand(), is a conditional lock, p might not have a
->sighand, in which case it returns NULL. What would that mean for
->signal, can I then still modify it?

struct sighand_struct *sighand = lock_task_sighand(p, &flags);
if (sighand) {
/* modify/use ->signal->tty */
unlock_task_sighand(p, &flags);
} else
/* now what !? */

The same problem appears in fs/proc/array.c:do_task_stat(), there the
locking doesn't look quite right either.

Before realizing this I came this far:
---
drivers/char/tty_io.c | 86 ++++++++++++++++++++++++++++++++++++++------------
kernel/auditsc.c | 1
kernel/exit.c | 3 +
kernel/sys.c | 4 ++
4 files changed, 75 insertions(+), 19 deletions(-)

Index: linux-2.6/drivers/char/tty_io.c
===================================================================
--- linux-2.6.orig/drivers/char/tty_io.c
+++ linux-2.6/drivers/char/tty_io.c
@@ -63,6 +63,12 @@
*
* Move do_SAK() into process context. Less stack use in devfs functions.
* alloc_tty_struct() always uses kmalloc() -- Andrew Morton <[email protected]> 17Mar01
+ *
+ * A word on (struct task)::signal->tty locking
+ *
+ * mutex_lock(&tty_mutex)
+ * read_lock(&tasklist_lock)
+ * lock_task_sighand()
*/

#include <linux/types.h>
@@ -1282,6 +1288,7 @@ static void do_tty_hangup(void *data)
struct task_struct *p;
struct tty_ldisc *ld;
int closecount = 0, n;
+ unsigned long flags;

if (!tty)
return;
@@ -1350,20 +1357,26 @@ static void do_tty_hangup(void *data)
This should get done automatically when the port closes and
tty_release is called */

+ mutex_lock(&tty_mutex);
read_lock(&tasklist_lock);
if (tty->session > 0) {
do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
if (p->signal->tty == tty)
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
if (!p->signal->leader)
continue;
group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ lock_task_sighand(p, &flags);
if (tty->pgrp > 0)
p->signal->tty_old_pgrp = tty->pgrp;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(tty->session, PIDTYPE_SID, p);
}
read_unlock(&tasklist_lock);
+ mutex_unlock(&tty_mutex);

tty->flags = 0;
tty->session = 0;
@@ -1479,10 +1492,12 @@ void disassociate_ctty(int on_exit)
struct tty_struct *tty;
struct task_struct *p;
int tty_pgrp = -1;
+ unsigned long flags;

lock_kernel();

mutex_lock(&tty_mutex);
+ /* XXX is this save wrt siglock!? */
tty = current->signal->tty;
if (tty) {
tty_pgrp = tty->pgrp;
@@ -1490,9 +1505,10 @@ void disassociate_ctty(int on_exit)
if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
tty_vhangup(tty);
} else {
- if (current->signal->tty_old_pgrp) {
- kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
- kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
+ pid_t old_pgrp = current->signal->tty_old_pgrp;
+ if (old_pgrp) {
+ kill_pg(old_pgrp, SIGHUP, on_exit);
+ kill_pg(old_pgrp, SIGCONT, on_exit);
}
mutex_unlock(&tty_mutex);
unlock_kernel();
@@ -1506,14 +1522,27 @@ void disassociate_ctty(int on_exit)

/* Must lock changes to tty_old_pgrp */
mutex_lock(&tty_mutex);
+ lock_task_sighand(current, &flags);
current->signal->tty_old_pgrp = 0;
- tty->session = 0;
- tty->pgrp = -1;
+
+ /* It is possible that do_tty_hangup has free'd this tty */
+ if (current->signal->tty) {
+ current->signal->tty->session = 0;
+ current->signal->tty->pgrp = 0;
+ } else {
+#ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
+ " = NULL", tty);
+#endif
+ }
+ unlock_task_sighand(current, &flags);

/* Now clear signal->tty under the lock */
read_lock(&tasklist_lock);
do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
read_unlock(&tasklist_lock);
mutex_unlock(&tty_mutex);
@@ -2340,11 +2369,15 @@ static void release_dev(struct file * fi

read_lock(&tasklist_lock);
do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(tty->session, PIDTYPE_SID, p);
if (o_tty)
do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
read_unlock(&tasklist_lock);
}
@@ -2455,6 +2488,7 @@ static int tty_open(struct inode * inode
int index;
dev_t device = inode->i_rdev;
unsigned short saved_flags = filp->f_flags;
+ unsigned long flags;

nonseekable_open(inode, filp);

@@ -2466,7 +2500,9 @@ retry_open:
mutex_lock(&tty_mutex);

if (device == MKDEV(TTYAUX_MAJOR,0)) {
+ lock_task_sighand(current, &flags);
if (!current->signal->tty) {
+ unlock_task_sighand(current, &flags);
mutex_unlock(&tty_mutex);
return -ENXIO;
}
@@ -2474,6 +2510,7 @@ retry_open:
index = current->signal->tty->index;
filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
/* noctty = 1; */
+ unlock_task_sighand(current, &flags);
goto got_driver;
}
#ifdef CONFIG_VT
@@ -2546,17 +2583,21 @@ got_driver:
filp->f_op = &tty_fops;
goto retry_open;
}
+
+ mutex_lock(&tty_mutex);
+ lock_task_sighand(current, &flags);
if (!noctty &&
current->signal->leader &&
!current->signal->tty &&
tty->session == 0) {
- task_lock(current);
current->signal->tty = tty;
- task_unlock(current);
current->signal->tty_old_pgrp = 0;
tty->session = current->signal->session;
tty->pgrp = process_group(current);
}
+ /* don't we leak tty in the else case !? */
+ unlock_task_sighand(current, &flags);
+ mutex_unlock(&tty_mutex);
return 0;
}

@@ -2888,6 +2929,7 @@ static int fionbio(struct file *file, in
static int tiocsctty(struct tty_struct *tty, int arg)
{
struct task_struct *p;
+ unsigned long flags;

if (current->signal->leader &&
(current->signal->session == tty->session))
@@ -2898,6 +2940,7 @@ static int tiocsctty(struct tty_struct *
*/
if (!current->signal->leader || current->signal->tty)
return -EPERM;
+ mutex_lock(&tty_mutex);
if (tty->session > 0) {
/*
* This tty is already the controlling
@@ -2910,20 +2953,23 @@ static int tiocsctty(struct tty_struct *

read_lock(&tasklist_lock);
do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(tty->session, PIDTYPE_SID, p);
read_unlock(&tasklist_lock);
- } else
+ } else {
+ mutex_unlock(&tty_mutex);
return -EPERM;
+ }
}
- mutex_lock(&tty_mutex);
- task_lock(current);
- current->signal->tty = tty;
- task_unlock(current);
- mutex_unlock(&tty_mutex);
- current->signal->tty_old_pgrp = 0;
tty->session = current->signal->session;
tty->pgrp = process_group(current);
+ lock_task_sighand(current, &flags);
+ current->signal->tty = tty;
+ current->signal->tty_old_pgrp = 0;
+ unlock_task_sighand(current, &flags);
+ mutex_unlock(&tty_mutex);
return 0;
}

@@ -2936,7 +2982,7 @@ static int tiocsctty(struct tty_struct *
* Obtain the process group of the tty. If there is no process group
* return an error.
*
- * Locking: none. Reference to ->signal->tty is safe.
+ * Locking: none. Reference to current->signal->tty is safe.
*/

static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2994,7 +3040,7 @@ static int tiocspgrp(struct tty_struct *
* Obtain the session id of the tty. If there is no session
* return an error.
*
- * Locking: none. Reference to ->signal->tty is safe.
+ * Locking: none. Reference to current->signal->tty is safe.
*/

static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -3139,6 +3185,7 @@ int tty_ioctl(struct inode * inode, stru
void __user *p = (void __user *)arg;
int retval;
struct tty_ldisc *ld;
+ unsigned long flags;

tty = (struct tty_struct *)file->private_data;
if (tty_paranoia_check(tty, inode, "tty_ioctl"))
@@ -3213,14 +3260,15 @@ int tty_ioctl(struct inode * inode, stru
clear_bit(TTY_EXCLUSIVE, &tty->flags);
return 0;
case TIOCNOTTY:
- /* FIXME: taks lock or tty_mutex ? */
if (current->signal->tty != tty)
return -ENOTTY;
if (current->signal->leader)
disassociate_ctty(0);
- task_lock(current);
+ mutex_lock(&tty_mutex);
+ lock_task_sighand(current, &flags)
current->signal->tty = NULL;
- task_unlock(current);
+ unlock_task_sighand(current, &flags);
+ mutex_unlock(&tty_mutex);
return 0;
case TIOCSCTTY:
return tiocsctty(tty, arg);
Index: linux-2.6/kernel/auditsc.c
===================================================================
--- linux-2.6.orig/kernel/auditsc.c
+++ linux-2.6/kernel/auditsc.c
@@ -823,6 +823,7 @@ static void audit_log_exit(struct audit_
context->return_code);

mutex_lock(&tty_mutex);
+ /* XXX doesn't this race like mad? */
if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
tty = tsk->signal->tty->name;
else
Index: linux-2.6/kernel/exit.c
===================================================================
--- linux-2.6.orig/kernel/exit.c
+++ linux-2.6/kernel/exit.c
@@ -370,6 +370,7 @@ void daemonize(const char *name, ...)
va_list args;
struct fs_struct *fs;
sigset_t blocked;
+ unsigned long flags;

va_start(args, name);
vsnprintf(current->comm, sizeof(current->comm), name, args);
@@ -384,7 +385,9 @@ void daemonize(const char *name, ...)

set_special_pids(1, 1);
mutex_lock(&tty_mutex);
+ lock_task_sighand(current, &flags);
current->signal->tty = NULL;
+ unlock_task_sighand(current, &flags);
mutex_unlock(&tty_mutex);

/* Block and flush all signals */
Index: linux-2.6/kernel/sys.c
===================================================================
--- linux-2.6.orig/kernel/sys.c
+++ linux-2.6/kernel/sys.c
@@ -1483,6 +1483,7 @@ asmlinkage long sys_setsid(void)
struct task_struct *group_leader = current->group_leader;
pid_t session;
int err = -EPERM;
+ unsigned long flags;

mutex_lock(&tty_mutex);
write_lock_irq(&tasklist_lock);
@@ -1504,8 +1505,11 @@ asmlinkage long sys_setsid(void)

group_leader->signal->leader = 1;
__set_special_pids(session, session);
+ /* XXX move this lock up 2 lines? */
+ lock_task_sighand(group_leader, &flags);
group_leader->signal->tty = NULL;
group_leader->signal->tty_old_pgrp = 0;
+ unlock_task_sighand(group_leader, &flags);
err = process_group(group_leader);
out:
write_unlock_irq(&tasklist_lock);



2006-10-16 13:39:34

by Prarit Bhargava

[permalink] [raw]
Subject: Re: [RFC][PATCH] ->signal->tty locking



Peter Zijlstra wrote:
> Oleg wrote:
> "Historically ->signal/->sighand (both ptrs and their contents) were globally
> protected by tasklist_lock. 'current' can use these pointers lockless, they
> can't be changed under him.
>
> Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock.
> Unless you are current, you can't lock ->siglock directly (without holding
> tasklist_lock), you should use lock_task_sighand()."
>
> Then, to be consistent with the rest of the kernel, ->signal->tty
> locking should look like so:
>
> mutex_lock(&tty_mutex)
> read_lock(&tasklist_lock)
> lock_task_sighand(p, &flags)
>

It would be nice if we could clean up some of the complicated locking in
this code. For example, from do_tty_hangup,

*
* Locking:
* BKL
* redirect lock for undoing redirection
* file list lock for manipulating list of ttys
* tty_ldisc_lock from called functions
* termios_sem resetting termios data
* tasklist_lock to walk task list for hangup event

P.

2006-10-17 08:10:32

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [RFC][PATCH] ->signal->tty locking

On 10/16, Peter Zijlstra wrote:
>
> Oleg wrote:
> "Historically ->signal/->sighand (both ptrs and their contents) were globally
> protected by tasklist_lock. 'current' can use these pointers lockless, they
> can't be changed under him.
>
> Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock.
> Unless you are current, you can't lock ->siglock directly (without holding
> tasklist_lock), you should use lock_task_sighand()."
>
> Then, to be consistent with the rest of the kernel, ->signal->tty
> locking should look like so:
>
> mutex_lock(&tty_mutex)
> read_lock(&tasklist_lock)
> lock_task_sighand(p, &flags)

I've also started similar patches, but have no time to finish it.

I don't think we need tasklist_lock. I think ->sighand->siglock is enough.

So do_task_stat() doesn't need to take tty_mutex at all.

However, tty_mutex protects ->tty from release_dev(tty), so it is also
possible to do:

mutex_lock(&tty_mutex);
tty = task->signal->tty;
barrier();
if (tty) {
// ->tty could be changed/cleared from under us,
// but it can't be released while we are holding
// tty_mutex
do_something(tty);
}
...

But first I think we should kill task_lock(). This is changelog for
the first patch in unfinished series:

Taking task_lock() when updating/using signal->tty doesn't help because

- it is used only in some random places

- signal->tty is per-process, while ->alloc_lock is per-thread

We can improve this if we take task_lock(task->group_leader), but I think
this is not the best option and we should use sighand->siglock instead,
because

- task_lock() interacts badly with write_lock_irq(&tasklist_lock),
sys_setsid() won't be happy.

- unless we are 'current' or tasklist_lock is held, we anyway need
->siglock to access ->signal. Actually, even reading ->group_leader
is not safe unless we know the task was not released.

- most of signal_struct's contents is already protected by ->siglock,
why ->tty isn't?

> However, lock_task_sighand(), is a conditional lock, p might not have a
> ->sighand, in which case it returns NULL. What would that mean for
> ->signal, can I then still modify it?

This means the task has already dead, it doesn't have ->signal.

> struct sighand_struct *sighand = lock_task_sighand(p, &flags);
> if (sighand) {
> /* modify/use ->signal->tty */
> unlock_task_sighand(p, &flags);
> } else
> /* now what !? */

see above.

> The same problem appears in fs/proc/array.c:do_task_stat(), there the
> locking doesn't look quite right either.

Hmm. I think do_task_stat() is fine, it does nothing when lock_task_sighand()
fails.

> --- linux-2.6.orig/drivers/char/tty_io.c
> +++ linux-2.6/drivers/char/tty_io.c
> @@ -63,6 +63,12 @@
> *
> * Move do_SAK() into process context. Less stack use in devfs functions.
> * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <[email protected]> 17Mar01
> + *
> + * A word on (struct task)::signal->tty locking
> + *
> + * mutex_lock(&tty_mutex)
> + * read_lock(&tasklist_lock)
> + * lock_task_sighand()
> */
>
> #include <linux/types.h>
> @@ -1282,6 +1288,7 @@ static void do_tty_hangup(void *data)
> struct task_struct *p;
> struct tty_ldisc *ld;
> int closecount = 0, n;
> + unsigned long flags;
>
> if (!tty)
> return;
> @@ -1350,20 +1357,26 @@ static void do_tty_hangup(void *data)
> This should get done automatically when the port closes and
> tty_release is called */
>
> + mutex_lock(&tty_mutex);

I am not sure it is needed.

> read_lock(&tasklist_lock);
> if (tty->session > 0) {
> do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> + lock_task_sighand(p, &flags);
> if (p->signal->tty == tty)
> p->signal->tty = NULL;
> + unlock_task_sighand(p, &flags);

We don't need lock_task_sighand() here, we can use spin_lock_irq(->siglock).

We are holding tasklist_lock. This means that all tasks found by
do_each_task_pid() have a valid ->signal/->sighand != NULL.
tasklist_lock protects against release_task()->__exit_signal() and
from changing ->sighand by de_thread().

> @@ -1506,14 +1522,27 @@ void disassociate_ctty(int on_exit)
>
> /* Must lock changes to tty_old_pgrp */
> mutex_lock(&tty_mutex);
> + lock_task_sighand(current, &flags);

Again, we can use spin_lock_irq(current->signal->siglock). It is safe to
use current->sighand directly, it can't be freed or changed from under us.

> current->signal->tty_old_pgrp = 0;
> - tty->session = 0;
> - tty->pgrp = -1;
> +
> + /* It is possible that do_tty_hangup has free'd this tty */
> + if (current->signal->tty) {
> + current->signal->tty->session = 0;
> + current->signal->tty->pgrp = 0;
> + } else {
> +#ifdef TTY_DEBUG_HANGUP
> + printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
> + " = NULL", tty);
> +#endif
> + }
> + unlock_task_sighand(current, &flags);
>
> /* Now clear signal->tty under the lock */
> read_lock(&tasklist_lock);
> do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
> + lock_task_sighand(p, &flags);
> p->signal->tty = NULL;
> + unlock_task_sighand(p, &flags);
> } while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
> read_unlock(&tasklist_lock);
> mutex_unlock(&tty_mutex);
> @@ -2340,11 +2369,15 @@ static void release_dev(struct file * fi
>
> read_lock(&tasklist_lock);
> do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> + lock_task_sighand(p, &flags);
> p->signal->tty = NULL;
> + unlock_task_sighand(p, &flags);
> } while_each_task_pid(tty->session, PIDTYPE_SID, p);
> if (o_tty)
> do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
> + lock_task_sighand(p, &flags);
> p->signal->tty = NULL;
> + unlock_task_sighand(p, &flags);
> } while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
> read_unlock(&tasklist_lock);

While doing a similar changes, I introduced a couple of simple
helpers:

static inline void proc_clear_tty(struct task_struct *p)
{
spin_lock_irq(&p->sighand->siglock);
p->signal->tty = NULL;
spin_unlock_irq(&p->sighand->siglock);
}

static void session_clear_tty(pid_t session)
{
struct task_struct *p;

do_each_task_pid(session, PIDTYPE_SID, p) {
proc_clear_tty(p);
} while_each_task_pid(session, PIDTYPE_SID, p);
}

I think it makes sense.

> static int tiocsctty(struct tty_struct *tty, int arg)
> {
> struct task_struct *p;
> + unsigned long flags;
>
> if (current->signal->leader &&
> (current->signal->session == tty->session))
> @@ -2898,6 +2940,7 @@ static int tiocsctty(struct tty_struct *
> */
> if (!current->signal->leader || current->signal->tty)
> return -EPERM;
> + mutex_lock(&tty_mutex);
> if (tty->session > 0) {
> /*
> * This tty is already the controlling
> @@ -2910,20 +2953,23 @@ static int tiocsctty(struct tty_struct *
>
> read_lock(&tasklist_lock);
> do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> + lock_task_sighand(p, &flags);
> p->signal->tty = NULL;
> + unlock_task_sighand(p, &flags);
> } while_each_task_pid(tty->session, PIDTYPE_SID, p);
> read_unlock(&tasklist_lock);
> - } else
> + } else {
> + mutex_unlock(&tty_mutex);
> return -EPERM;
> + }
> }
> - mutex_lock(&tty_mutex);
> - task_lock(current);
> - current->signal->tty = tty;
> - task_unlock(current);
> - mutex_unlock(&tty_mutex);
> - current->signal->tty_old_pgrp = 0;
> tty->session = current->signal->session;
> tty->pgrp = process_group(current);
> + lock_task_sighand(current, &flags);
> + current->signal->tty = tty;
> + current->signal->tty_old_pgrp = 0;
> + unlock_task_sighand(current, &flags);
> + mutex_unlock(&tty_mutex);
> return 0;
> }

There is a very similar code in tty_open(), probably we need another
helper, proc_set_tty().

But I am not sure about locking. I think we should check
->signal->leader/->signal->tty and set ->tty in proc_set_tty()
under ->siglock, this way we can remove tty_mutex from sys_setsid().

Oleg.

2006-10-17 12:18:23

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH] ->signal->tty locking

On Tue, 2006-10-17 at 12:10 +0400, Oleg Nesterov wrote:
> On 10/16, Peter Zijlstra wrote:
> >
> > Oleg wrote:
> > "Historically ->signal/->sighand (both ptrs and their contents) were globally
> > protected by tasklist_lock. 'current' can use these pointers lockless, they
> > can't be changed under him.
> >
> > Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock.
> > Unless you are current, you can't lock ->siglock directly (without holding
> > tasklist_lock), you should use lock_task_sighand()."
> >
> > Then, to be consistent with the rest of the kernel, ->signal->tty
> > locking should look like so:
> >
> > mutex_lock(&tty_mutex)
> > read_lock(&tasklist_lock)
> > lock_task_sighand(p, &flags)
>
> I've also started similar patches, but have no time to finish it.
>
> I don't think we need tasklist_lock. I think ->sighand->siglock is enough.

Right, sys_unshare() makes tasklist_lock meaningless wrt ->siglock.

> So do_task_stat() doesn't need to take tty_mutex at all.
>
> However, tty_mutex protects ->tty from release_dev(tty), so it is also
> possible to do:
>
> mutex_lock(&tty_mutex);
> tty = task->signal->tty;
> barrier();
> if (tty) {
> // ->tty could be changed/cleared from under us,
> // but it can't be released while we are holding
> // tty_mutex
> do_something(tty);
> }
> ...

Nice, I think we have to convert all those callers like sys_vhangup() to
this form.


> > @@ -1350,20 +1357,26 @@ static void do_tty_hangup(void *data)
> > This should get done automatically when the port closes and
> > tty_release is called */
> >
> > + mutex_lock(&tty_mutex);
>
> I am not sure it is needed.

Right, this would only be needed when using the tty, not when changing
signal->tty.

> > read_lock(&tasklist_lock);
> > if (tty->session > 0) {
> > do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> > + lock_task_sighand(p, &flags);
> > if (p->signal->tty == tty)
> > p->signal->tty = NULL;
> > + unlock_task_sighand(p, &flags);
>
> We don't need lock_task_sighand() here, we can use spin_lock_irq(->siglock).
>
> We are holding tasklist_lock. This means that all tasks found by
> do_each_task_pid() have a valid ->signal/->sighand != NULL.
> tasklist_lock protects against release_task()->__exit_signal() and
> from changing ->sighand by de_thread().

I think sys_unshare() spoils the game here; it changes ->sighand in
midair without holding tasklist_lock. So any ->sighand but current's is
fair game.

Hmm, either sys_unshare() is broken in that it doesn't take the
tasklist_lock or a lot of other code is broken.

let us take send_sig_info() vs. sys_unshare()

1 2

read_lock(&tasklist_lock)
spin_lock_irqsave(&p->sighand->siglock, flags);

rcu_assign_pointer(current->sighand, new_sigh)

spin_unlock_irqsave(&p->sighand->siglock, flags);
read_unlock(&tasklist_lock);

what happens when 2's current is 1's p....

> > @@ -2910,20 +2953,23 @@ static int tiocsctty(struct tty_struct *
> >
> > read_lock(&tasklist_lock);
> > do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> > + lock_task_sighand(p, &flags);
> > p->signal->tty = NULL;
> > + unlock_task_sighand(p, &flags);
> > } while_each_task_pid(tty->session, PIDTYPE_SID, p);
> > read_unlock(&tasklist_lock);
> > - } else
> > + } else {
> > + mutex_unlock(&tty_mutex);
> > return -EPERM;
> > + }
> > }
> > - mutex_lock(&tty_mutex);
> > - task_lock(current);
> > - current->signal->tty = tty;
> > - task_unlock(current);
> > - mutex_unlock(&tty_mutex);
> > - current->signal->tty_old_pgrp = 0;
> > tty->session = current->signal->session;
> > tty->pgrp = process_group(current);
> > + lock_task_sighand(current, &flags);
> > + current->signal->tty = tty;
> > + current->signal->tty_old_pgrp = 0;
> > + unlock_task_sighand(current, &flags);
> > + mutex_unlock(&tty_mutex);
> > return 0;
> > }
>
> There is a very similar code in tty_open(), probably we need another
> helper, proc_set_tty().
>
> But I am not sure about locking. I think we should check
> ->signal->leader/->signal->tty and set ->tty in proc_set_tty()
> under ->siglock, this way we can remove tty_mutex from sys_setsid().

Right, use tty_mutex when using the tty, use ->sighand when changing
signal->tty.

2006-10-17 12:31:31

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [RFC][PATCH] ->signal->tty locking

On 10/17, Peter Zijlstra wrote:
>
> On Tue, 2006-10-17 at 12:10 +0400, Oleg Nesterov wrote:
> >
> > We don't need lock_task_sighand() here, we can use spin_lock_irq(->siglock).
> >
> > We are holding tasklist_lock. This means that all tasks found by
> > do_each_task_pid() have a valid ->signal/->sighand != NULL.
> > tasklist_lock protects against release_task()->__exit_signal() and
> > from changing ->sighand by de_thread().
>
> I think sys_unshare() spoils the game here; it changes ->sighand in
> midair without holding tasklist_lock. So any ->sighand but current's is
> fair game.
>
> Hmm, either sys_unshare() is broken in that it doesn't take the
> tasklist_lock or a lot of other code is broken.

Yes, it is broken, please look at
http://marc.theaimsgroup.com/?t=114253118100003

I sent a patch,
http://marc.theaimsgroup.com/?l=linux-kernel&m=114268787415193

but it was ignored. Probably I should re-send it.

> Right, use tty_mutex when using the tty, use ->sighand when changing
> signal->tty.

I think that things like do_task_stat()/do_acct_process() do not need
global tty_mutex, they can use ->siglock.

Oleg.

2006-10-17 12:59:52

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH] ->signal->tty locking

On Tue, 2006-10-17 at 16:33 +0400, Oleg Nesterov wrote:
> On 10/17, Peter Zijlstra wrote:

> > Hmm, either sys_unshare() is broken in that it doesn't take the
> > tasklist_lock or a lot of other code is broken.
>
> Yes, it is broken, please look at

Ah good :-)

How about something like this; I'm still shaky on the lifetime rules of
tty objects, I'm about to add a refcount and spinlock/mutex to
tty_struct, this is madness....

---
arch/sparc64/solaris/misc.c | 4 -
arch/um/kernel/exec.c | 7 +-
drivers/char/tty_io.c | 126 ++++++++++++++++++++++++--------------------
drivers/s390/char/fs3270.c | 10 ++-
fs/dquot.c | 14 ++--
fs/open.c | 1
include/linux/tty.h | 28 +++++++++
kernel/acct.c | 9 +--
kernel/auditsc.c | 2
kernel/exit.c | 4 -
kernel/sys.c | 6 +-
security/selinux/hooks.c | 15 +++--
12 files changed, 141 insertions(+), 85 deletions(-)

Index: linux-2.6.18.noarch/drivers/char/tty_io.c
===================================================================
--- linux-2.6.18.noarch.orig/drivers/char/tty_io.c
+++ linux-2.6.18.noarch/drivers/char/tty_io.c
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(tty_std_termios);

LIST_HEAD(tty_drivers); /* linked list of tty drivers */

-/* Semaphore to protect creating and releasing a tty. This is shared with
+/* Mutex to protect creating and releasing a tty. This is shared with
vt.c for deeply disgusting hack reasons */
DEFINE_MUTEX(tty_mutex);
EXPORT_SYMBOL(tty_mutex);
@@ -616,7 +616,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_strin
* they are not on hot paths so a little discipline won't do
* any harm.
*
- * Locking: takes termios_sem
+ * Locking: takes termios_mutex
*/

static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
@@ -917,7 +917,7 @@ static void tty_ldisc_enable(struct tty_
* context.
*
* Locking: takes tty_ldisc_lock.
- * called functions take termios_sem
+ * called functions take termios_mutex
*/

static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
@@ -1269,12 +1269,12 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
*
* Locking:
* BKL
- * redirect lock for undoing redirection
- * file list lock for manipulating list of ttys
- * tty_ldisc_lock from called functions
- * termios_sem resetting termios data
- * tasklist_lock to walk task list for hangup event
- *
+ * redirect lock for undoing redirection
+ * file list lock for manipulating list of ttys
+ * tty_ldisc_lock from called functions
+ * termios_mutex resetting termios data
+ * tasklist_lock to walk task list for hangup event
+ * ->siglock to protect ->signal/->sighand
*/
static void do_tty_hangup(void *data)
{
@@ -1355,14 +1355,18 @@ static void do_tty_hangup(void *data)
read_lock(&tasklist_lock);
if (tty->session > 0) {
do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+ spin_lock_irq(&p->sighand->siglock);
if (p->signal->tty == tty)
p->signal->tty = NULL;
- if (!p->signal->leader)
+ if (!p->signal->leader) {
+ spin_unlock_irq(&p->sighand->siglock);
continue;
- group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
- group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ }
+ __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+ __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
if (tty->pgrp > 0)
p->signal->tty_old_pgrp = tty->pgrp;
+ spin_unlock_irq(&p->sighand->siglock);
} while_each_task_pid(tty->session, PIDTYPE_SID, p);
}
read_unlock(&tasklist_lock);
@@ -1470,10 +1474,12 @@ EXPORT_SYMBOL(tty_hung_up_p);
* The argument on_exit is set to 1 if called when a process is
* exiting; it is 0 if called by the ioctl TIOCNOTTY.
*
- * Locking: tty_mutex is taken to protect current->signal->tty
+ * Locking:
* BKL is taken for hysterical raisins
- * Tasklist lock is taken (under tty_mutex) to walk process
- * lists for the session.
+ * tty_mutex is taken to protect tty
+ * ->siglock is taken to protect ->signal/->sighand
+ * tasklist_lock is taken to walk process list for sessions
+ * ->siglock is taken to protect ->signal/->sighand
*/

void disassociate_ctty(int on_exit)
@@ -1481,20 +1487,23 @@ void disassociate_ctty(int on_exit)
struct tty_struct *tty;
struct task_struct *p;
int tty_pgrp = -1;
+ int session;

lock_kernel();

mutex_lock(&tty_mutex);
- tty = current->signal->tty;
+ tty = current_get_tty();
if (tty) {
tty_pgrp = tty->pgrp;
mutex_unlock(&tty_mutex);
+ /* XXX: here we race, there is nothing protecting tty */
if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
tty_vhangup(tty);
} else {
- if (current->signal->tty_old_pgrp) {
- kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
- kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
+ pid_t old_pgrp = current->signal->tty_old_pgrp;
+ if (old_pgrp) {
+ kill_pg(old_pgrp, SIGHUP, on_exit);
+ kill_pg(old_pgrp, SIGCONT, on_exit);
}
mutex_unlock(&tty_mutex);
unlock_kernel();
@@ -1506,19 +1515,29 @@ void disassociate_ctty(int on_exit)
kill_pg(tty_pgrp, SIGCONT, on_exit);
}

- /* Must lock changes to tty_old_pgrp */
- mutex_lock(&tty_mutex);
+ spin_lock_irq(&current->sighand->siglock);
current->signal->tty_old_pgrp = 0;
- tty->session = 0;
- tty->pgrp = -1;
+ session = current->signal->session;
+ spin_unlock_irq(&current->sighand->siglock);
+
+ mutex_lock(&tty_mutex);
+ /* It is possible that do_tty_hangup has free'd this tty */
+ tty = current_get_tty();
+ if (tty) {
+ tty->session = 0;
+ tty->pgrp = 0;
+ } else {
+#ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
+ " = NULL", tty);
+#endif
+ }
+ mutex_unlock(&tty_mutex);

/* Now clear signal->tty under the lock */
read_lock(&tasklist_lock);
- do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
- p->signal->tty = NULL;
- } while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
+ session_clear_tty(session);
read_unlock(&tasklist_lock);
- mutex_unlock(&tty_mutex);
unlock_kernel();
}

@@ -2340,13 +2359,9 @@ static void release_dev(struct file * fi
struct task_struct *p;

read_lock(&tasklist_lock);
- do_each_task_pid(tty->session, PIDTYPE_SID, p) {
- p->signal->tty = NULL;
- } while_each_task_pid(tty->session, PIDTYPE_SID, p);
+ session_clear_tty(tty->session);
if (o_tty)
- do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
- p->signal->tty = NULL;
- } while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
+ session_clear_tty(o_tty->session);
read_unlock(&tasklist_lock);
}

@@ -2467,12 +2482,13 @@ retry_open:
mutex_lock(&tty_mutex);

if (device == MKDEV(TTYAUX_MAJOR,0)) {
- if (!current->signal->tty) {
+ tty = current_get_tty();
+ if (!tty) {
mutex_unlock(&tty_mutex);
return -ENXIO;
}
- driver = current->signal->tty->driver;
- index = current->signal->tty->index;
+ driver = tty->driver;
+ index = tty->index;
filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
/* noctty = 1; */
goto got_driver;
@@ -2547,17 +2563,21 @@ got_driver:
filp->f_op = &tty_fops;
goto retry_open;
}
+
+ mutex_lock(&tty_mutex);
+ spin_lock_irq(&current->sighand->siglock);
if (!noctty &&
current->signal->leader &&
!current->signal->tty &&
tty->session == 0) {
- task_lock(current);
current->signal->tty = tty;
- task_unlock(current);
current->signal->tty_old_pgrp = 0;
tty->session = current->signal->session;
tty->pgrp = process_group(current);
}
+ /* don't we leak tty in the else case !? */
+ spin_unlock_irq(&current->sighand->siglock);
+ mutex_unlock(&tty_mutex);
return 0;
}

@@ -2899,6 +2919,7 @@ static int tiocsctty(struct tty_struct *
*/
if (!current->signal->leader || current->signal->tty)
return -EPERM;
+ mutex_lock(&tty_mutex);
if (tty->session > 0) {
/*
* This tty is already the controlling
@@ -2908,23 +2929,21 @@ static int tiocsctty(struct tty_struct *
/*
* Steal it away
*/
-
read_lock(&tasklist_lock);
- do_each_task_pid(tty->session, PIDTYPE_SID, p) {
- p->signal->tty = NULL;
- } while_each_task_pid(tty->session, PIDTYPE_SID, p);
+ session_clear_tty(tty->session);
read_unlock(&tasklist_lock);
- } else
+ } else {
+ mutex_unlock(&tty_mutex);
return -EPERM;
+ }
}
- mutex_lock(&tty_mutex);
- task_lock(current);
- current->signal->tty = tty;
- task_unlock(current);
- mutex_unlock(&tty_mutex);
- current->signal->tty_old_pgrp = 0;
+ spin_lock_irq(&current->sighand->siglock);
tty->session = current->signal->session;
tty->pgrp = process_group(current);
+ current->signal->tty = tty;
+ current->signal->tty_old_pgrp = 0;
+ spin_unlock_irq(&current->sighand->siglock);
+ mutex_unlock(&tty_mutex);
return 0;
}

@@ -2937,7 +2956,7 @@ static int tiocsctty(struct tty_struct *
* Obtain the process group of the tty. If there is no process group
* return an error.
*
- * Locking: none. Reference to ->signal->tty is safe.
+ * Locking: none. Reference to current->signal->tty is safe.
*/

static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2995,7 +3014,7 @@ static int tiocspgrp(struct tty_struct *
* Obtain the session id of the tty. If there is no session
* return an error.
*
- * Locking: none. Reference to ->signal->tty is safe.
+ * Locking: none. Reference to current->signal->tty is safe.
*/

static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -3214,14 +3233,11 @@ int tty_ioctl(struct inode * inode, stru
clear_bit(TTY_EXCLUSIVE, &tty->flags);
return 0;
case TIOCNOTTY:
- /* FIXME: taks lock or tty_mutex ? */
if (current->signal->tty != tty)
return -ENOTTY;
if (current->signal->leader)
disassociate_ctty(0);
- task_lock(current);
- current->signal->tty = NULL;
- task_unlock(current);
+ proc_set_tty(current, NULL);
return 0;
case TIOCSCTTY:
return tiocsctty(tty, arg);
Index: linux-2.6.18.noarch/kernel/auditsc.c
===================================================================
--- linux-2.6.18.noarch.orig/kernel/auditsc.c
+++ linux-2.6.18.noarch/kernel/auditsc.c
@@ -819,10 +819,12 @@ static void audit_log_exit(struct audit_
context->return_code);

mutex_lock(&tty_mutex);
+ read_lock(&tasklist_lock);
if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
tty = tsk->signal->tty->name;
else
tty = "(none)";
+ read_unlock(&tasklist_lock);
audit_log_format(ab,
" a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
" ppid=%d pid=%d auid=%u uid=%u gid=%u"
Index: linux-2.6.18.noarch/kernel/exit.c
===================================================================
--- linux-2.6.18.noarch.orig/kernel/exit.c
+++ linux-2.6.18.noarch/kernel/exit.c
@@ -382,9 +382,7 @@ void daemonize(const char *name, ...)
exit_mm(current);

set_special_pids(1, 1);
- mutex_lock(&tty_mutex);
- current->signal->tty = NULL;
- mutex_unlock(&tty_mutex);
+ proc_set_tty(current, NULL);

/* Block and flush all signals */
sigfillset(&blocked);
Index: linux-2.6.18.noarch/kernel/sys.c
===================================================================
--- linux-2.6.18.noarch.orig/kernel/sys.c
+++ linux-2.6.18.noarch/kernel/sys.c
@@ -1379,7 +1379,6 @@ asmlinkage long sys_setsid(void)
pid_t session;
int err = -EPERM;

- mutex_lock(&tty_mutex);
write_lock_irq(&tasklist_lock);

/* Fail if I am already a session leader */
@@ -1399,12 +1398,15 @@ asmlinkage long sys_setsid(void)

group_leader->signal->leader = 1;
__set_special_pids(session, session);
+
+ spin_lock(&group_leader->sighand->siglock);
group_leader->signal->tty = NULL;
group_leader->signal->tty_old_pgrp = 0;
+ spin_unlock(&group_leader->sighand->siglock);
+
err = process_group(group_leader);
out:
write_unlock_irq(&tasklist_lock);
- mutex_unlock(&tty_mutex);
return err;
}

Index: linux-2.6.18.noarch/arch/sparc64/solaris/misc.c
===================================================================
--- linux-2.6.18.noarch.orig/arch/sparc64/solaris/misc.c
+++ linux-2.6.18.noarch/arch/sparc64/solaris/misc.c
@@ -423,9 +423,7 @@ asmlinkage int solaris_procids(int cmd,
Solaris setpgrp and setsid? */
ret = sys_setpgid(0, 0);
if (ret) return ret;
- mutex_lock(&tty_mutex);
- current->signal->tty = NULL;
- mutex_unlock(&tty_mutex);
+ proc_set_tty(current, NULL);
return process_group(current);
}
case 2: /* getsid */
Index: linux-2.6.18.noarch/arch/um/kernel/exec.c
===================================================================
--- linux-2.6.18.noarch.orig/arch/um/kernel/exec.c
+++ linux-2.6.18.noarch/arch/um/kernel/exec.c
@@ -39,12 +39,13 @@ static long execve1(char *file, char __u
char __user *__user *env)
{
long error;
+ struct tty_struct *tty;

#ifdef CONFIG_TTY_LOG
mutex_lock(&tty_mutex);
- task_lock(current); /* FIXME: is this needed ? */
- log_exec(argv, current->signal->tty);
- task_unlock(current);
+ tty = current_get_tty();
+ if (tty)
+ log_exec(argv, tty);
mutex_unlock(&tty_mutex);
#endif
error = do_execve(file, argv, env, &current->thread.regs);
Index: linux-2.6.18.noarch/drivers/s390/char/fs3270.c
===================================================================
--- linux-2.6.18.noarch.orig/drivers/s390/char/fs3270.c
+++ linux-2.6.18.noarch/drivers/s390/char/fs3270.c
@@ -425,11 +425,15 @@ fs3270_open(struct inode *inode, struct
minor = iminor(filp->f_dentry->d_inode);
/* Check for minor 0 multiplexer. */
if (minor == 0) {
- if (!current->signal->tty)
+ struct tty_struct *tty;
+ mutex_lock(&tty_mutex);
+ tty = current_get_tty();
+ if (!tty)
return -ENODEV;
- if (current->signal->tty->driver->major != IBM_TTY3270_MAJOR)
+ if (tty->driver->major != IBM_TTY3270_MAJOR)
return -ENODEV;
- minor = current->signal->tty->index + RAW3270_FIRSTMINOR;
+ minor = tty->index + RAW3270_FIRSTMINOR;
+ mutex_unlock(&tty_mutex);
}
/* Check if some other program is already using fullscreen mode. */
fp = (struct fs3270 *) raw3270_find_view(&fs3270_fn, minor);
Index: linux-2.6.18.noarch/fs/dquot.c
===================================================================
--- linux-2.6.18.noarch.orig/fs/dquot.c
+++ linux-2.6.18.noarch/fs/dquot.c
@@ -828,6 +828,7 @@ static inline int need_print_warning(str
static void print_warning(struct dquot *dquot, const char warntype)
{
char *msg = NULL;
+ struct tty_struct *tty;
int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS_B :
((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES_B : 0);

@@ -835,14 +836,15 @@ static void print_warning(struct dquot *
return;

mutex_lock(&tty_mutex);
- if (!current->signal->tty)
+ tty = current_get_tty();
+ if (!tty)
goto out_lock;
- tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
+ tty_write_message(tty, dquot->dq_sb->s_id);
if (warntype == ISOFTWARN || warntype == BSOFTWARN)
- tty_write_message(current->signal->tty, ": warning, ");
+ tty_write_message(tty, ": warning, ");
else
- tty_write_message(current->signal->tty, ": write failed, ");
- tty_write_message(current->signal->tty, quotatypes[dquot->dq_type]);
+ tty_write_message(tty, ": write failed, ");
+ tty_write_message(tty, quotatypes[dquot->dq_type]);
switch (warntype) {
case IHARDWARN:
msg = " file limit reached.\r\n";
@@ -863,7 +865,7 @@ static void print_warning(struct dquot *
msg = " block quota exceeded.\r\n";
break;
}
- tty_write_message(current->signal->tty, msg);
+ tty_write_message(tty, msg);
out_lock:
mutex_unlock(&tty_mutex);
}
Index: linux-2.6.18.noarch/fs/open.c
===================================================================
--- linux-2.6.18.noarch.orig/fs/open.c
+++ linux-2.6.18.noarch/fs/open.c
@@ -1204,6 +1204,7 @@ EXPORT_SYMBOL(sys_close);
asmlinkage long sys_vhangup(void)
{
if (capable(CAP_SYS_TTY_CONFIG)) {
+ /* XXX: this needs locking */
tty_vhangup(current->signal->tty);
return 0;
}
Index: linux-2.6.18.noarch/include/linux/tty.h
===================================================================
--- linux-2.6.18.noarch.orig/include/linux/tty.h
+++ linux-2.6.18.noarch/include/linux/tty.h
@@ -338,5 +338,33 @@ static inline dev_t tty_devnum(struct tt
return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
}

+static inline void proc_set_tty(struct task_struct *p, struct tty_struct *tty)
+{
+ spin_lock_irq(&p->sighand->siglock);
+ p->signal->tty = tty;
+ spin_unlock_irq(&p->sighand->siglock);
+}
+
+static inline void session_clear_tty(pid_t session)
+{
+ struct task_struct *p;
+ do_each_task_pid(session, PIDTYPE_SID, p) {
+ proc_set_tty(p, NULL);
+ } while_each_task_pid(session, PIDTYPE_SID, p);
+}
+
+static inline void current_get_tty(void)
+{
+ struct tty_struct *tty;
+ WARN_ON_ONCE(!mutex_locked(&tty_mutex));
+ tty = current->session->tty;
+ barrier();
+ /*
+ * ->tty could be changed/cleared from under us, but it can't be
+ * released while we are holding tty_mutex
+ */
+ return tty;
+}
+
#endif /* __KERNEL__ */
#endif
Index: linux-2.6.18.noarch/kernel/acct.c
===================================================================
--- linux-2.6.18.noarch.orig/kernel/acct.c
+++ linux-2.6.18.noarch/kernel/acct.c
@@ -427,6 +427,7 @@ static void do_acct_process(struct file
u64 elapsed;
u64 run_time;
struct timespec uptime;
+ struct tty_struct *tty;

/*
* First check to see if there is enough free_space to continue
@@ -484,12 +485,8 @@ static void do_acct_process(struct file
#endif

mutex_lock(&tty_mutex);
- /* FIXME: Whoever is responsible for current->signal locking needs
- to use the same locking all over the kernel and document it */
- read_lock(&tasklist_lock);
- ac.ac_tty = current->signal->tty ?
- old_encode_dev(tty_devnum(current->signal->tty)) : 0;
- read_unlock(&tasklist_lock);
+ tty = current_get_tty();
+ ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
mutex_unlock(&tty_mutex);

spin_lock_irq(&current->sighand->siglock);
Index: linux-2.6.18.noarch/security/selinux/hooks.c
===================================================================
--- linux-2.6.18.noarch.orig/security/selinux/hooks.c
+++ linux-2.6.18.noarch/security/selinux/hooks.c
@@ -1708,9 +1708,10 @@ static inline void flush_unauthorized_fi
struct tty_struct *tty;
struct fdtable *fdt;
long j = -1;
+ int drop_tty = 0;

mutex_lock(&tty_mutex);
- tty = current->signal->tty;
+ tty = current_get_tty();
if (tty) {
file_list_lock();
file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
@@ -1723,12 +1724,18 @@ static inline void flush_unauthorized_fi
struct inode *inode = file->f_dentry->d_inode;
if (inode_has_perm(current, inode,
FILE__READ | FILE__WRITE, NULL)) {
- /* Reset controlling tty. */
- current->signal->tty = NULL;
- current->signal->tty_old_pgrp = 0;
+ drop_tty = 1;
}
}
file_list_unlock();
+
+ if (drop_tty) {
+ /* Reset controlling tty. */
+ spin_lock_irq(&current->sighand->siglock);
+ current->signal->tty = NULL;
+ current->signal->tty_old_pgrp = 0;
+ spin_unlock_irq(&current->sighand->siglock);
+ }
}
mutex_unlock(&tty_mutex);



2006-10-17 13:03:08

by Alan

[permalink] [raw]
Subject: Re: [RFC][PATCH] ->signal->tty locking

Ar Maw, 2006-10-17 am 16:33 +0400, ysgrifennodd Oleg Nesterov:
> I sent a patch,
> http://marc.theaimsgroup.com/?l=linux-kernel&m=114268787415193
>
> but it was ignored. Probably I should re-send it.

Definitely - we still see reports of tty slab scribbles

> > Right, use tty_mutex when using the tty, use ->sighand when changing
> > signal->tty.
>
> I think that things like do_task_stat()/do_acct_process() do not need
> global tty_mutex, they can use ->siglock.

Please keep the tty_mutex as it will protect against other stuff later.
Once tty is a bit saner then someone brave can refcount it properly and
that'll make it much prettier.

2006-10-17 13:41:24

by Alan

[permalink] [raw]
Subject: Re: [RFC][PATCH] ->signal->tty locking

Ar Maw, 2006-10-17 am 15:00 +0200, ysgrifennodd Peter Zijlstra:
> How about something like this; I'm still shaky on the lifetime rules of
> tty objects, I'm about to add a refcount and spinlock/mutex to
> tty_struct, this is madness....

It has two already.

I wouldn't worry about being shaky about the lifetime rules, thats a
forensics job at the moment.

> + tty = current_get_tty();

Sensible way to go whichever path we use and once it returns a refcount
bumped object in future it'll clean up a lot more. get_current_tty or
just current_tty() would fit the naming in the kernel better


I agree entirely with the path this patch is taking.

Alan

2006-10-18 16:56:13

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [RFC][PATCH] ->signal->tty locking

On 10/17, Peter Zijlstra wrote:
>
> How about something like this; I'm still shaky on the lifetime rules of
> tty objects, I'm about to add a refcount and spinlock/mutex to
> tty_struct, this is madness....

Sorry for delay, a couple of minor nits...

> static void do_tty_hangup(void *data)
> {
> @@ -1355,14 +1355,18 @@ static void do_tty_hangup(void *data)
> read_lock(&tasklist_lock);
> if (tty->session > 0) {
> do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> + spin_lock_irq(&p->sighand->siglock);
> if (p->signal->tty == tty)
> p->signal->tty = NULL;
> - if (!p->signal->leader)
> + if (!p->signal->leader) {
> + spin_unlock_irq(&p->sighand->siglock);
> continue;
> - group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
> - group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
> + }
> + __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
> + __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);

So we are skipping security_task_kill() and audit_signal_info().
I don't claim this is bad, I just don't know.

> @@ -2899,6 +2919,7 @@ static int tiocsctty(struct tty_struct *
> */
> if (!current->signal->leader || current->signal->tty)
> return -EPERM;
> + mutex_lock(&tty_mutex);

This is still racy (consider 2 threads doing tiocsctty() at the same time),
probably it is better to take tty_mutex before the check?

> --- linux-2.6.18.noarch.orig/include/linux/tty.h
> +++ linux-2.6.18.noarch/include/linux/tty.h
> @@ -338,5 +338,33 @@ static inline dev_t tty_devnum(struct tt
> return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
> }
>
> +static inline void proc_set_tty(struct task_struct *p, struct tty_struct *tty)
> +{
> + spin_lock_irq(&p->sighand->siglock);
> + p->signal->tty = tty;
> + spin_unlock_irq(&p->sighand->siglock);
> +}

Note that it is always called with tty == NULL parameter. That is why
I proposed proc_clear_tty(struct task_struct *p). We can't use this
helper for tiocsctty/tty_open anyway.

> +static inline void session_clear_tty(pid_t session)
> +{
> + struct task_struct *p;
> + do_each_task_pid(session, PIDTYPE_SID, p) {
> + proc_set_tty(p, NULL);
> + } while_each_task_pid(session, PIDTYPE_SID, p);
> +}
> +

I'd suggest to move it to tty_io.c and make it static (not inline).

> ===================================================================
> --- linux-2.6.18.noarch.orig/security/selinux/hooks.c
> +++ linux-2.6.18.noarch/security/selinux/hooks.c
> @@ -1708,9 +1708,10 @@ static inline void flush_unauthorized_fi
> struct tty_struct *tty;
> struct fdtable *fdt;
> long j = -1;
> + int drop_tty = 0;
>
> mutex_lock(&tty_mutex);
> - tty = current->signal->tty;
> + tty = current_get_tty();
> if (tty) {
> file_list_lock();
> file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
> @@ -1723,12 +1724,18 @@ static inline void flush_unauthorized_fi
> struct inode *inode = file->f_dentry->d_inode;
> if (inode_has_perm(current, inode,
> FILE__READ | FILE__WRITE, NULL)) {
> - /* Reset controlling tty. */
> - current->signal->tty = NULL;
> - current->signal->tty_old_pgrp = 0;
> + drop_tty = 1;
> }
> }
> file_list_unlock();
> +
> + if (drop_tty) {
> + /* Reset controlling tty. */
> + spin_lock_irq(&current->sighand->siglock);
> + current->signal->tty = NULL;
> + current->signal->tty_old_pgrp = 0;

Probably the last line should go to proc_clear_tty() ?

On the other hand, when signal->tty != NULL, ->tty_old_pgrp
should be == 0, may be it is unneeded.

In any case, I think we should use proc_set_tty() here.

Oleg.

2006-10-18 17:21:12

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [RFC][PATCH] ->signal->tty locking

On 10/17, Alan Cox wrote:
>
> Ar Maw, 2006-10-17 am 16:33 +0400, ysgrifennodd Oleg Nesterov:
> > I sent a patch,
> > http://marc.theaimsgroup.com/?l=linux-kernel&m=114268787415193
> >
> > but it was ignored. Probably I should re-send it.
>
> Definitely - we still see reports of tty slab scribbles

This patch can't fix anything, sorry for the confusion.

Yes, the 'if (new_sigh) {}' code in sys_unshare() is broken, but it is
never executed, because unshare_sighand() never populates new_sighp.

Probably it is better to just remove this code.

> > > Right, use tty_mutex when using the tty, use ->sighand when changing
> > > signal->tty.
> >
> > I think that things like do_task_stat()/do_acct_process() do not need
> > global tty_mutex, they can use ->siglock.
>
> Please keep the tty_mutex as it will protect against other stuff later.
> Once tty is a bit saner then someone brave can refcount it properly and
> that'll make it much prettier.

Oh, but it is silly to take the global tty_mutex in do_task_stat().
Why do we need it if ->siglock protects ->signal->tty ? We are only
reading the tty_struct, tty->driver can't go away ...

Oleg.