2010-07-07 12:24:14

by Nick Piggin

[permalink] [raw]
Subject: [patch 1/2] fs: cleanup files_lock locking

Al, would you consider merging these two patches please?

Thanks,
Nick
--

fs: cleanup files_lock locking

Lock tty_files with a new spinlock, tty_files_lock; provide helpers to
manipulate the per-sb files list; unexport the files_lock spinlock.

Acked-by: Andi Kleen <[email protected]>
Acked-by: Greg Kroah-Hartman <[email protected]>
Signed-off-by: Nick Piggin <[email protected]>
---
drivers/char/pty.c | 6 +++++-
drivers/char/tty_io.c | 26 ++++++++++++++++++--------
fs/file_table.c | 42 ++++++++++++++++++------------------------
fs/open.c | 4 ++--
include/linux/fs.h | 7 ++-----
include/linux/tty.h | 1 +
security/selinux/hooks.c | 4 ++--
7 files changed, 48 insertions(+), 42 deletions(-)

Index: linux-2.6/security/selinux/hooks.c
===================================================================
--- linux-2.6.orig/security/selinux/hooks.c
+++ linux-2.6/security/selinux/hooks.c
@@ -2219,7 +2219,7 @@ static inline void flush_unauthorized_fi

tty = get_current_tty();
if (tty) {
- file_list_lock();
+ spin_lock(&tty_files_lock);
if (!list_empty(&tty->tty_files)) {
struct inode *inode;

@@ -2235,7 +2235,7 @@ static inline void flush_unauthorized_fi
drop_tty = 1;
}
}
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
tty_kref_put(tty);
}
/* Reset controlling tty. */
Index: linux-2.6/drivers/char/pty.c
===================================================================
--- linux-2.6.orig/drivers/char/pty.c
+++ linux-2.6/drivers/char/pty.c
@@ -650,7 +650,11 @@ static int __ptmx_open(struct inode *ino

set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
filp->private_data = tty;
- file_move(filp, &tty->tty_files);
+
+ file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+ spin_lock(&tty_files_lock);
+ list_add(&filp->f_u.fu_list, &tty->tty_files);
+ spin_unlock(&tty_files_lock);

retval = devpts_pty_new(inode, tty->link);
if (retval)
Index: linux-2.6/drivers/char/tty_io.c
===================================================================
--- linux-2.6.orig/drivers/char/tty_io.c
+++ linux-2.6/drivers/char/tty_io.c
@@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list
DEFINE_MUTEX(tty_mutex);
EXPORT_SYMBOL(tty_mutex);

+/* Spinlock to protect the tty->tty_files list */
+DEFINE_SPINLOCK(tty_files_lock);
+
static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -234,11 +237,11 @@ static int check_tty_count(struct tty_st
struct list_head *p;
int count = 0;

- file_list_lock();
+ spin_lock(&tty_files_lock);
list_for_each(p, &tty->tty_files) {
count++;
}
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
tty->driver->subtype == PTY_TYPE_SLAVE &&
tty->link && tty->link->count)
@@ -517,7 +520,7 @@ static void do_tty_hangup(struct work_st
lock_kernel();
check_tty_count(tty, "do_tty_hangup");

- file_list_lock();
+ spin_lock(&tty_files_lock);
/* This breaks for file handles being sent over AF_UNIX sockets ? */
list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
if (filp->f_op->write == redirected_tty_write)
@@ -528,7 +531,7 @@ static void do_tty_hangup(struct work_st
tty_fasync(-1, filp, 0); /* can't block */
filp->f_op = &hung_up_tty_fops;
}
- file_list_unlock();
+ spin_unlock(&tty_files_lock);

tty_ldisc_hangup(tty);

@@ -1419,9 +1422,9 @@ static void release_one_tty(struct work_
tty_driver_kref_put(driver);
module_put(driver->owner);

- file_list_lock();
+ spin_lock(&tty_files_lock);
list_del_init(&tty->tty_files);
- file_list_unlock();
+ spin_unlock(&tty_files_lock);

put_pid(tty->pgrp);
put_pid(tty->session);
@@ -1666,7 +1669,10 @@ int tty_release(struct inode *inode, str
* - do_tty_hangup no longer sees this file descriptor as
* something that needs to be handled for hangups.
*/
- file_kill(filp);
+ spin_lock(&tty_files_lock);
+ BUG_ON(list_empty(&filp->f_u.fu_list));
+ list_del_init(&filp->f_u.fu_list);
+ spin_unlock(&tty_files_lock);
filp->private_data = NULL;

/*
@@ -1835,7 +1841,11 @@ got_driver:
}

filp->private_data = tty;
- file_move(filp, &tty->tty_files);
+ BUG_ON(list_empty(&filp->f_u.fu_list));
+ file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+ spin_lock(&tty_files_lock);
+ list_add(&filp->f_u.fu_list, &tty->tty_files);
+ spin_unlock(&tty_files_lock);
check_tty_count(tty, "tty_open");
if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
tty->driver->subtype == PTY_TYPE_MASTER)
Index: linux-2.6/fs/file_table.c
===================================================================
--- linux-2.6.orig/fs/file_table.c
+++ linux-2.6/fs/file_table.c
@@ -32,8 +32,7 @@ struct files_stat_struct files_stat = {
.max_files = NR_FILE
};

-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);

/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __read_mostly;
@@ -249,7 +248,7 @@ static void __fput(struct file *file)
cdev_put(inode->i_cdev);
fops_put(file->f_op);
put_pid(file->f_owner.pid);
- file_kill(file);
+ file_sb_list_del(file);
if (file->f_mode & FMODE_WRITE)
drop_file_write_access(file);
file->f_path.dentry = NULL;
@@ -319,31 +318,29 @@ struct file *fget_light(unsigned int fd,
return file;
}

-
void put_filp(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
security_file_free(file);
- file_kill(file);
+ file_sb_list_del(file);
file_free(file);
}
}

-void file_move(struct file *file, struct list_head *list)
+void file_sb_list_add(struct file *file, struct super_block *sb)
{
- if (!list)
- return;
- file_list_lock();
- list_move(&file->f_u.fu_list, list);
- file_list_unlock();
+ spin_lock(&files_lock);
+ BUG_ON(!list_empty(&file->f_u.fu_list));
+ list_add(&file->f_u.fu_list, &sb->s_files);
+ spin_unlock(&files_lock);
}

-void file_kill(struct file *file)
+void file_sb_list_del(struct file *file)
{
if (!list_empty(&file->f_u.fu_list)) {
- file_list_lock();
+ spin_lock(&files_lock);
list_del_init(&file->f_u.fu_list);
- file_list_unlock();
+ spin_unlock(&files_lock);
}
}

@@ -352,7 +349,7 @@ int fs_may_remount_ro(struct super_block
struct file *file;

/* Check that no files are currently opened for writing. */
- file_list_lock();
+ spin_lock(&files_lock);
list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
struct inode *inode = file->f_path.dentry->d_inode;

@@ -364,10 +361,10 @@ int fs_may_remount_ro(struct super_block
if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
goto too_bad;
}
- file_list_unlock();
+ spin_unlock(&files_lock);
return 1; /* Tis' cool bro. */
too_bad:
- file_list_unlock();
+ spin_unlock(&files_lock);
return 0;
}

@@ -383,7 +380,7 @@ void mark_files_ro(struct super_block *s
struct file *f;

retry:
- file_list_lock();
+ spin_lock(&files_lock);
list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
struct vfsmount *mnt;
if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
@@ -399,16 +396,13 @@ retry:
continue;
file_release_write(f);
mnt = mntget(f->f_path.mnt);
- file_list_unlock();
- /*
- * This can sleep, so we can't hold
- * the file_list_lock() spinlock.
- */
+ /* This can sleep, so we can't hold the spinlock. */
+ spin_unlock(&files_lock);
mnt_drop_write(mnt);
mntput(mnt);
goto retry;
}
- file_list_unlock();
+ spin_unlock(&files_lock);
}

void __init files_init(unsigned long mempages)
Index: linux-2.6/fs/open.c
===================================================================
--- linux-2.6.orig/fs/open.c
+++ linux-2.6/fs/open.c
@@ -675,7 +675,7 @@ static struct file *__dentry_open(struct
f->f_path.mnt = mnt;
f->f_pos = 0;
f->f_op = fops_get(inode->i_fop);
- file_move(f, &inode->i_sb->s_files);
+ file_sb_list_add(f, inode->i_sb);

error = security_dentry_open(f, cred);
if (error)
@@ -721,7 +721,7 @@ cleanup_all:
mnt_drop_write(mnt);
}
}
- file_kill(f);
+ file_sb_list_del(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
cleanup_file:
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -949,9 +949,6 @@ struct file {
unsigned long f_mnt_write_state;
#endif
};
-extern spinlock_t files_lock;
-#define file_list_lock() spin_lock(&files_lock);
-#define file_list_unlock() spin_unlock(&files_lock);

#define get_file(x) atomic_long_inc(&(x)->f_count)
#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
@@ -2182,8 +2179,8 @@ static inline void insert_inode_hash(str
__insert_inode_hash(inode, inode->i_ino);
}

-extern void file_move(struct file *f, struct list_head *list);
-extern void file_kill(struct file *f);
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
#ifdef CONFIG_BLOCK
struct bio;
extern void submit_bio(int, struct bio *);
Index: linux-2.6/include/linux/tty.h
===================================================================
--- linux-2.6.orig/include/linux/tty.h
+++ linux-2.6/include/linux/tty.h
@@ -467,6 +467,7 @@ extern struct tty_struct *tty_pair_get_t
extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);

extern struct mutex tty_mutex;
+extern spinlock_t tty_files_lock;

extern void tty_write_unlock(struct tty_struct *tty);
extern int tty_write_lock(struct tty_struct *tty, int ndelay);


2010-07-07 12:25:17

by Nick Piggin

[permalink] [raw]
Subject: [patch 2/2] tty fix fu_list abuse

tty: fix fu_list abuse

tty code abuses fu_list, which causes a bug in remount,ro handling.

If a tty device node is opened on a filesystem, then the name unlinked, the
filesystem will be allowed to be remounted readonly. This is because
fs_may_remount_ro does not find the 0 link tty inode on the file sb list
(because the tty code incorrectly removed it to use for its own purpose). This
can result in a filesystem with errors after it is marked "clean". So add a
new private list for ttys and leave tty files on the sb list so they are caught
by this check. This makes tty nodes behave the same way as other device nodes.

The next step is to allocate a tty private structure at private_data and get
rid of f_ttyonly_list, but the error handling is not trivial in the tty code.
Hence this intermediate step.

[ Arguably on-disk device inode would not be referenced by driver's pseudo
inode once it is open, but in practice it's not clear whether that will ever
be worth implementing. ]

Signed-off-by: Nick Piggin <[email protected]>
---
drivers/char/pty.c | 3 +--
drivers/char/tty_io.c | 9 +++------
fs/internal.h | 2 ++
include/linux/fs.h | 3 +--
security/selinux/hooks.c | 3 ++-
5 files changed, 9 insertions(+), 11 deletions(-)

Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -939,6 +939,7 @@ struct file {
#endif
/* needed for tty driver, and maybe others */
void *private_data;
+ struct list_head f_ttyonly_list; /* No new users! */

#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
@@ -2179,8 +2180,6 @@ static inline void insert_inode_hash(str
__insert_inode_hash(inode, inode->i_ino);
}

-extern void file_sb_list_add(struct file *f, struct super_block *sb);
-extern void file_sb_list_del(struct file *f);
#ifdef CONFIG_BLOCK
struct bio;
extern void submit_bio(int, struct bio *);
Index: linux-2.6/drivers/char/pty.c
===================================================================
--- linux-2.6.orig/drivers/char/pty.c
+++ linux-2.6/drivers/char/pty.c
@@ -651,9 +651,8 @@ static int __ptmx_open(struct inode *ino
set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
filp->private_data = tty;

- file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
spin_lock(&tty_files_lock);
- list_add(&filp->f_u.fu_list, &tty->tty_files);
+ list_add(&filp->f_ttyonly_list, &tty->tty_files);
spin_unlock(&tty_files_lock);

retval = devpts_pty_new(inode, tty->link);
Index: linux-2.6/drivers/char/tty_io.c
===================================================================
--- linux-2.6.orig/drivers/char/tty_io.c
+++ linux-2.6/drivers/char/tty_io.c
@@ -522,7 +522,7 @@ static void do_tty_hangup(struct work_st

spin_lock(&tty_files_lock);
/* This breaks for file handles being sent over AF_UNIX sockets ? */
- list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
+ list_for_each_entry(filp, &tty->tty_files, f_ttyonly_list) {
if (filp->f_op->write == redirected_tty_write)
cons_filp = filp;
if (filp->f_op->write != tty_write)
@@ -1670,8 +1670,7 @@ int tty_release(struct inode *inode, str
* something that needs to be handled for hangups.
*/
spin_lock(&tty_files_lock);
- BUG_ON(list_empty(&filp->f_u.fu_list));
- list_del_init(&filp->f_u.fu_list);
+ list_del(&filp->f_ttyonly_list);
spin_unlock(&tty_files_lock);
filp->private_data = NULL;

@@ -1841,10 +1840,8 @@ got_driver:
}

filp->private_data = tty;
- BUG_ON(list_empty(&filp->f_u.fu_list));
- file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
spin_lock(&tty_files_lock);
- list_add(&filp->f_u.fu_list, &tty->tty_files);
+ list_add(&filp->f_ttyonly_list, &tty->tty_files);
spin_unlock(&tty_files_lock);
check_tty_count(tty, "tty_open");
if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
Index: linux-2.6/security/selinux/hooks.c
===================================================================
--- linux-2.6.orig/security/selinux/hooks.c
+++ linux-2.6/security/selinux/hooks.c
@@ -2228,7 +2228,8 @@ static inline void flush_unauthorized_fi
than using file_has_perm, as this particular open
file may belong to another process and we are only
interested in the inode-based check here. */
- file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list);
+ file = list_first_entry(&tty->tty_files,
+ struct file, f_ttyonly_list);
inode = file->f_path.dentry->d_inode;
if (inode_has_perm(cred, inode,
FILE__READ | FILE__WRITE, NULL)) {
Index: linux-2.6/fs/internal.h
===================================================================
--- linux-2.6.orig/fs/internal.h
+++ linux-2.6/fs/internal.h
@@ -80,6 +80,8 @@ extern void chroot_fs_refs(struct path *
/*
* file_table.c
*/
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
extern void mark_files_ro(struct super_block *);
extern struct file *get_empty_filp(void);

2010-07-07 16:08:57

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [patch 2/2] tty fix fu_list abuse

On Wed, Jul 07, 2010 at 10:25:10PM +1000, Nick Piggin wrote:
> tty: fix fu_list abuse
>
> tty code abuses fu_list, which causes a bug in remount,ro handling.
>
> If a tty device node is opened on a filesystem, then the name unlinked, the
> filesystem will be allowed to be remounted readonly. This is because
> fs_may_remount_ro does not find the 0 link tty inode on the file sb list
> (because the tty code incorrectly removed it to use for its own purpose). This
> can result in a filesystem with errors after it is marked "clean". So add a
> new private list for ttys and leave tty files on the sb list so they are caught
> by this check. This makes tty nodes behave the same way as other device nodes.
>
> The next step is to allocate a tty private structure at private_data and get
> rid of f_ttyonly_list, but the error handling is not trivial in the tty code.
> Hence this intermediate step.

Growing struct file by two pointers for the tty special case simply
isn't acceptable.

2010-07-07 16:17:26

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 2/2] tty fix fu_list abuse

On Wed, Jul 07, 2010 at 06:08:35PM +0200, Christoph Hellwig wrote:
> On Wed, Jul 07, 2010 at 10:25:10PM +1000, Nick Piggin wrote:
> > tty: fix fu_list abuse
> >
> > tty code abuses fu_list, which causes a bug in remount,ro handling.
> >
> > If a tty device node is opened on a filesystem, then the name unlinked, the
> > filesystem will be allowed to be remounted readonly. This is because
> > fs_may_remount_ro does not find the 0 link tty inode on the file sb list
> > (because the tty code incorrectly removed it to use for its own purpose). This
> > can result in a filesystem with errors after it is marked "clean". So add a
> > new private list for ttys and leave tty files on the sb list so they are caught
> > by this check. This makes tty nodes behave the same way as other device nodes.
> >
> > The next step is to allocate a tty private structure at private_data and get
> > rid of f_ttyonly_list, but the error handling is not trivial in the tty code.
> > Hence this intermediate step.
>
> Growing struct file by two pointers for the tty special case simply
> isn't acceptable.

I agree, so it can be removed when tty gets its act together. But tty
breaks the s_files list right now which is not acceptable either. So I
think it is better to take the simple fix that can be easily applied. I
liked your patch, but error handling was totally broken in it so that
indicates it is going to require some maturing in the tty tree.