2010-06-24 03:16:45

by Nick Piggin

[permalink] [raw]
Subject: [patch 29/52] fs: icache lock i_count

Protect inode->i_count with i_lock, rather than having it atomic.
Next step should also be to move things together (eg. the refcount increment
into d_instantiate, which will remove a lock/unlock cycle on i_lock).

Signed-off-by: Nick Piggin <[email protected]>
---
arch/powerpc/platforms/cell/spufs/file.c | 2 -
fs/affs/inode.c | 4 ++-
fs/afs/dir.c | 4 ++-
fs/anon_inodes.c | 4 ++-
fs/bfs/dir.c | 4 ++-
fs/block_dev.c | 15 ++++++++++--
fs/btrfs/inode.c | 4 ++-
fs/cifs/inode.c | 2 -
fs/coda/dir.c | 4 ++-
fs/exofs/inode.c | 12 +++++++---
fs/exofs/namei.c | 4 ++-
fs/ext2/namei.c | 4 ++-
fs/ext3/ialloc.c | 4 +--
fs/ext3/namei.c | 4 ++-
fs/ext4/ialloc.c | 4 +--
fs/ext4/namei.c | 4 ++-
fs/fs-writeback.c | 4 +--
fs/gfs2/ops_inode.c | 4 ++-
fs/hfsplus/dir.c | 4 ++-
fs/hpfs/inode.c | 2 -
fs/inode.c | 37 ++++++++++++++++++++-----------
fs/jffs2/dir.c | 8 +++++-
fs/jfs/jfs_txnmgr.c | 4 ++-
fs/jfs/namei.c | 4 ++-
fs/libfs.c | 4 ++-
fs/locks.c | 3 --
fs/minix/namei.c | 4 ++-
fs/namei.c | 7 ++++-
fs/nfs/dir.c | 4 ++-
fs/nfs/getroot.c | 4 ++-
fs/nfs/inode.c | 4 +--
fs/nilfs2/mdt.c | 2 -
fs/nilfs2/namei.c | 4 ++-
fs/notify/inode_mark.c | 22 +++++++++++-------
fs/notify/inotify/inotify.c | 28 +++++++++++++----------
fs/ntfs/super.c | 4 ++-
fs/ocfs2/namei.c | 4 ++-
fs/reiserfs/file.c | 4 +--
fs/reiserfs/namei.c | 4 ++-
fs/reiserfs/stree.c | 2 -
fs/sysv/namei.c | 4 ++-
fs/ubifs/dir.c | 4 ++-
fs/ubifs/super.c | 2 -
fs/udf/namei.c | 4 ++-
fs/ufs/namei.c | 4 ++-
fs/xfs/linux-2.6/xfs_iops.c | 4 ++-
fs/xfs/xfs_iget.c | 2 -
fs/xfs/xfs_inode.h | 6 +++--
include/linux/fs.h | 2 -
ipc/mqueue.c | 7 ++++-
kernel/futex.c | 4 ++-
mm/shmem.c | 4 ++-
52 files changed, 201 insertions(+), 96 deletions(-)

Index: linux-2.6/arch/powerpc/platforms/cell/spufs/file.c
===================================================================
--- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/file.c
+++ linux-2.6/arch/powerpc/platforms/cell/spufs/file.c
@@ -1549,7 +1549,7 @@ static int spufs_mfc_open(struct inode *
if (ctx->owner != current->mm)
return -EINVAL;

- if (atomic_read(&inode->i_count) != 1)
+ if (inode->i_count != 1)
return -EBUSY;

mutex_lock(&ctx->mapping_lock);
Index: linux-2.6/fs/affs/inode.c
===================================================================
--- linux-2.6.orig/fs/affs/inode.c
+++ linux-2.6/fs/affs/inode.c
@@ -380,7 +380,9 @@ affs_add_entry(struct inode *dir, struct
affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
mark_buffer_dirty_inode(inode_bh, inode);
inode->i_nlink = 2;
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
}
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
Index: linux-2.6/fs/afs/dir.c
===================================================================
--- linux-2.6.orig/fs/afs/dir.c
+++ linux-2.6/fs/afs/dir.c
@@ -1002,7 +1002,9 @@ static int afs_link(struct dentry *from,
if (ret < 0)
goto link_error;

- atomic_inc(&vnode->vfs_inode.i_count);
+ spin_lock(&vnode->vfs_inode.i_lock);
+ vnode->vfs_inode.i_count++;
+ spin_unlock(&vnode->vfs_inode.i_lock);
d_instantiate(dentry, &vnode->vfs_inode);
key_put(key);
_leave(" = 0");
Index: linux-2.6/fs/anon_inodes.c
===================================================================
--- linux-2.6.orig/fs/anon_inodes.c
+++ linux-2.6/fs/anon_inodes.c
@@ -114,7 +114,9 @@ struct file *anon_inode_getfile(const ch
* so we can avoid doing an igrab() and we can use an open-coded
* atomic_inc().
*/
- atomic_inc(&anon_inode_inode->i_count);
+ spin_lock(&anon_inode_inode->i_lock);
+ anon_inode_inode->i_count++;
+ spin_unlock(&anon_inode_inode->i_lock);

path.dentry->d_op = &anon_inodefs_dentry_operations;
d_instantiate(path.dentry, anon_inode_inode);
Index: linux-2.6/fs/block_dev.c
===================================================================
--- linux-2.6.orig/fs/block_dev.c
+++ linux-2.6/fs/block_dev.c
@@ -549,7 +549,12 @@ EXPORT_SYMBOL(bdget);
*/
struct block_device *bdgrab(struct block_device *bdev)
{
- atomic_inc(&bdev->bd_inode->i_count);
+ struct inode *inode = bdev->bd_inode;
+
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
+
return bdev;
}

@@ -579,7 +584,9 @@ static struct block_device *bd_acquire(s
spin_lock(&bdev_lock);
bdev = inode->i_bdev;
if (bdev) {
- atomic_inc(&bdev->bd_inode->i_count);
+ spin_lock(&inode->i_lock);
+ bdev->bd_inode->i_count++;
+ spin_unlock(&inode->i_lock);
spin_unlock(&bdev_lock);
return bdev;
}
@@ -595,7 +602,9 @@ static struct block_device *bd_acquire(s
* So, we can access it via ->i_mapping always
* without igrab().
*/
- atomic_inc(&bdev->bd_inode->i_count);
+ spin_lock(&inode->i_lock);
+ bdev->bd_inode->i_count++;
+ spin_unlock(&inode->i_lock);
inode->i_bdev = bdev;
inode->i_mapping = bdev->bd_inode->i_mapping;
list_add(&inode->i_devices, &bdev->bd_inodes);
Index: linux-2.6/fs/ext2/namei.c
===================================================================
--- linux-2.6.orig/fs/ext2/namei.c
+++ linux-2.6/fs/ext2/namei.c
@@ -206,7 +206,9 @@ static int ext2_link (struct dentry * ol

inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);

err = ext2_add_link(dentry, inode);
if (!err) {
Index: linux-2.6/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.orig/fs/ext3/ialloc.c
+++ linux-2.6/fs/ext3/ialloc.c
@@ -100,9 +100,9 @@ void ext3_free_inode (handle_t *handle,
struct ext3_sb_info *sbi;
int fatal = 0, err;

- if (atomic_read(&inode->i_count) > 1) {
+ if (inode->i_count > 1) {
printk ("ext3_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ inode->i_count);
return;
}
if (inode->i_nlink) {
Index: linux-2.6/fs/ext3/namei.c
===================================================================
--- linux-2.6.orig/fs/ext3/namei.c
+++ linux-2.6/fs/ext3/namei.c
@@ -2261,7 +2261,9 @@ retry:

inode->i_ctime = CURRENT_TIME_SEC;
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);

err = ext3_add_entry(handle, dentry, inode);
if (!err) {
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c
+++ linux-2.6/fs/fs-writeback.c
@@ -427,7 +427,7 @@ writeback_single_inode(struct inode *ino
unsigned dirty;
int ret;

- if (!atomic_read(&inode->i_count))
+ if (!inode->i_count)
WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
else
WARN_ON(inode->i_state & I_WILL_FREE);
@@ -551,7 +551,7 @@ select_queue:
inode->i_state |= I_DIRTY_PAGES;
redirty_tail(inode);
}
- } else if (atomic_read(&inode->i_count)) {
+ } else if (inode->i_count) {
/*
* The inode is clean, inuse
*/
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c
+++ linux-2.6/fs/inode.c
@@ -33,14 +33,13 @@
* inode_hash_lock protects:
* inode hash table, i_hash
* inode->i_lock protects:
- * i_state
+ * i_state, i_count
*
* Ordering:
* inode_lock
* sb_inode_list_lock
* inode->i_lock
- * inode_lock
- * inode_hash_lock
+ * inode_hash_lock
*/
/*
* This is needed for the following functions:
@@ -151,7 +150,7 @@ int inode_init_always(struct super_block
inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0;
- atomic_set(&inode->i_count, 1);
+ inode->i_count = 1;
inode->i_op = &empty_iops;
inode->i_fop = &empty_fops;
inode->i_nlink = 1;
@@ -306,7 +305,8 @@ void __iget(struct inode *inode)
{
assert_spin_locked(&inode->i_lock);

- if (atomic_inc_return(&inode->i_count) != 1)
+ inode->i_count++;
+ if (inode->i_count > 1)
return;

if (!(inode->i_state & (I_DIRTY|I_SYNC)))
@@ -412,7 +412,7 @@ static int invalidate_list(struct list_h
continue;
}
invalidate_inode_buffers(inode);
- if (!atomic_read(&inode->i_count)) {
+ if (!inode->i_count) {
list_move(&inode->i_list, dispose);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
@@ -463,7 +463,7 @@ static int can_unuse(struct inode *inode
return 0;
if (inode_has_buffers(inode))
return 0;
- if (atomic_read(&inode->i_count))
+ if (inode->i_count)
return 0;
if (inode->i_data.nrpages)
return 0;
@@ -501,7 +501,7 @@ static void prune_icache(int nr_to_scan)
inode = list_entry(inode_unused.prev, struct inode, i_list);

spin_lock(&inode->i_lock);
- if (inode->i_state || atomic_read(&inode->i_count)) {
+ if (inode->i_state || inode->i_count) {
list_move(&inode->i_list, &inode_unused);
spin_unlock(&inode->i_lock);
continue;
@@ -1290,8 +1290,6 @@ void generic_delete_inode(struct inode *
{
const struct super_operations *op = inode->i_sb->s_op;

- spin_lock(&sb_inode_list_lock);
- spin_lock(&inode->i_lock);
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
spin_unlock(&sb_inode_list_lock);
@@ -1336,8 +1334,6 @@ int generic_detach_inode(struct inode *i
{
struct super_block *sb = inode->i_sb;

- spin_lock(&sb_inode_list_lock);
- spin_lock(&inode->i_lock);
if (!hlist_unhashed(&inode->i_hash)) {
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_unused);
@@ -1436,8 +1432,24 @@ void iput(struct inode *inode)
if (inode) {
BUG_ON(inode->i_state == I_CLEAR);

- if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
+retry:
+ spin_lock(&inode->i_lock);
+ if (inode->i_count == 1) {
+ if (!spin_trylock(&inode_lock)) {
+ spin_unlock(&inode->i_lock);
+ goto retry;
+ }
+ if (!spin_trylock(&sb_inode_list_lock)) {
+ spin_unlock(&inode_lock);
+ spin_unlock(&inode->i_lock);
+ goto retry;
+ }
+ inode->i_count--;
iput_final(inode);
+ } else {
+ inode->i_count--;
+ spin_unlock(&inode->i_lock);
+ }
}
}
EXPORT_SYMBOL(iput);
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c
+++ linux-2.6/fs/libfs.c
@@ -277,7 +277,9 @@ int simple_link(struct dentry *old_dentr

inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
dget(dentry);
d_instantiate(dentry, inode);
return 0;
Index: linux-2.6/fs/locks.c
===================================================================
--- linux-2.6.orig/fs/locks.c
+++ linux-2.6/fs/locks.c
@@ -1375,8 +1375,7 @@ int generic_setlease(struct file *filp,
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
goto out;
if ((arg == F_WRLCK)
- && (dentry->d_count > 1
- || (atomic_read(&inode->i_count) > 1)))
+ && (dentry->d_count > 1 || inode->i_count > 1))
goto out;
}

Index: linux-2.6/fs/namei.c
===================================================================
--- linux-2.6.orig/fs/namei.c
+++ linux-2.6/fs/namei.c
@@ -2306,8 +2306,11 @@ static long do_unlinkat(int dfd, const c
if (nd.last.name[nd.last.len])
goto slashes;
inode = dentry->d_inode;
- if (inode)
- atomic_inc(&inode->i_count);
+ if (inode) {
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
+ }
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit2;
Index: linux-2.6/fs/nfs/dir.c
===================================================================
--- linux-2.6.orig/fs/nfs/dir.c
+++ linux-2.6/fs/nfs/dir.c
@@ -1570,7 +1570,9 @@ nfs_link(struct dentry *old_dentry, stru
d_drop(dentry);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
d_add(dentry, inode);
}
return error;
Index: linux-2.6/fs/nfs/getroot.c
===================================================================
--- linux-2.6.orig/fs/nfs/getroot.c
+++ linux-2.6/fs/nfs/getroot.c
@@ -55,7 +55,9 @@ static int nfs_superblock_set_dummy_root
return -ENOMEM;
}
/* Circumvent igrab(): we know the inode is not being freed */
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
/*
* Ensure that this dentry is invisible to d_find_alias().
* Otherwise, it may be spliced into the tree by
Index: linux-2.6/fs/notify/inotify/inotify.c
===================================================================
--- linux-2.6.orig/fs/notify/inotify/inotify.c
+++ linux-2.6/fs/notify/inotify/inotify.c
@@ -404,23 +404,28 @@ void inotify_unmount_inodes(struct list_
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
- if (!atomic_read(&inode->i_count))
+ if (!inode->i_count)
continue;

need_iput_tmp = need_iput;
need_iput = NULL;
/* In case inotify_remove_watch_locked() drops a reference. */
- if (inode != need_iput_tmp)
+ if (inode != need_iput_tmp) {
+ spin_lock(&inode->i_lock);
__iget(inode);
- else
+ spin_unlock(&inode->i_lock);
+ } else
need_iput_tmp = NULL;
/* In case the dropping of a reference would nuke next_i. */
- if ((&next_i->i_sb_list != list) &&
- atomic_read(&next_i->i_count) &&
- !(next_i->i_state & (I_CLEAR | I_FREEING |
- I_WILL_FREE))) {
- __iget(next_i);
- need_iput = next_i;
+ if (&next_i->i_sb_list != list) {
+ spin_lock(&next_i->i_lock);
+ if (next_i->i_count &&
+ !(next_i->i_state &
+ (I_CLEAR|I_FREEING|I_WILL_FREE))) {
+ __iget(next_i);
+ need_iput = next_i;
+ }
+ spin_unlock(&next_i->i_lock);
}

/*
@@ -439,11 +444,10 @@ void inotify_unmount_inodes(struct list_
mutex_lock(&inode->inotify_mutex);
watches = &inode->inotify_watches;
list_for_each_entry_safe(watch, next_w, watches, i_list) {
- struct inotify_handle *ih= watch->ih;
+ struct inotify_handle *ih = watch->ih;
get_inotify_watch(watch);
mutex_lock(&ih->mutex);
- ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
- NULL, NULL);
+ ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL);
inotify_remove_watch_locked(ih, watch);
mutex_unlock(&ih->mutex);
put_inotify_watch(watch);
Index: linux-2.6/fs/xfs/linux-2.6/xfs_iops.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_iops.c
+++ linux-2.6/fs/xfs/linux-2.6/xfs_iops.c
@@ -360,7 +360,9 @@ xfs_vn_link(
if (unlikely(error))
return -error;

- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
d_instantiate(dentry, inode);
return 0;
}
Index: linux-2.6/fs/xfs/xfs_inode.h
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_inode.h
+++ linux-2.6/fs/xfs/xfs_inode.h
@@ -483,8 +483,10 @@ void xfs_mark_inode_dirty_sync(xfs_inod

#define IHOLD(ip) \
do { \
- ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
- atomic_inc(&(VFS_I(ip)->i_count)); \
+ spin_lock(&VFS_I(ip)->i_lock); \
+ ASSERT(VFS_I(ip)->i_count > 0) ; \
+ VFS_I(ip)->i_count++; \
+ spin_unlock(&VFS_I(ip)->i_lock); \
trace_xfs_ihold(ip, _THIS_IP_); \
} while (0)

Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -729,7 +729,7 @@ struct inode {
struct list_head i_sb_list;
struct list_head i_dentry;
unsigned long i_ino;
- atomic_t i_count;
+ unsigned int i_count;
unsigned int i_nlink;
uid_t i_uid;
gid_t i_gid;
Index: linux-2.6/ipc/mqueue.c
===================================================================
--- linux-2.6.orig/ipc/mqueue.c
+++ linux-2.6/ipc/mqueue.c
@@ -769,8 +769,11 @@ SYSCALL_DEFINE1(mq_unlink, const char __
}

inode = dentry->d_inode;
- if (inode)
- atomic_inc(&inode->i_count);
+ if (inode) {
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
+ }
err = mnt_want_write(ipc_ns->mq_mnt);
if (err)
goto out_err;
Index: linux-2.6/kernel/futex.c
===================================================================
--- linux-2.6.orig/kernel/futex.c
+++ linux-2.6/kernel/futex.c
@@ -168,7 +168,9 @@ static void get_futex_key_refs(union fut

switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
- atomic_inc(&key->shared.inode->i_count);
+ spin_lock(&key->shared.inode->i_lock);
+ key->shared.inode->i_count++;
+ spin_unlock(&key->shared.inode->i_lock);
break;
case FUT_OFF_MMSHARED:
atomic_inc(&key->private.mm->mm_count);
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c
+++ linux-2.6/mm/shmem.c
@@ -1877,7 +1877,9 @@ static int shmem_link(struct dentry *old
dir->i_size += BOGO_DIRENT_SIZE;
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inc_nlink(inode);
- atomic_inc(&inode->i_count); /* New dentry reference */
+ spin_lock(&inode->i_lock);
+ inode->i_count++; /* New dentry reference */
+ spin_unlock(&inode->i_lock);
dget(dentry); /* Extra pinning count for the created dentry */
d_instantiate(dentry, inode);
out:
Index: linux-2.6/fs/bfs/dir.c
===================================================================
--- linux-2.6.orig/fs/bfs/dir.c
+++ linux-2.6/fs/bfs/dir.c
@@ -176,7 +176,9 @@ static int bfs_link(struct dentry *old,
inc_nlink(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
d_instantiate(new, inode);
mutex_unlock(&info->bfs_lock);
return 0;
Index: linux-2.6/fs/btrfs/inode.c
===================================================================
--- linux-2.6.orig/fs/btrfs/inode.c
+++ linux-2.6/fs/btrfs/inode.c
@@ -4753,7 +4753,9 @@ static int btrfs_link(struct dentry *old
}

btrfs_set_trans_block_group(trans, dir);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);

err = btrfs_add_nondir(trans, dentry, inode, 1, index);

Index: linux-2.6/fs/coda/dir.c
===================================================================
--- linux-2.6.orig/fs/coda/dir.c
+++ linux-2.6/fs/coda/dir.c
@@ -303,7 +303,9 @@ static int coda_link(struct dentry *sour
}

coda_dir_update_mtime(dir_inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
d_instantiate(de, inode);
inc_nlink(inode);

Index: linux-2.6/fs/exofs/inode.c
===================================================================
--- linux-2.6.orig/fs/exofs/inode.c
+++ linux-2.6/fs/exofs/inode.c
@@ -1124,7 +1124,9 @@ static void create_done(struct exofs_io_

set_obj_created(oi);

- atomic_dec(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count--;
+ spin_unlock(&inode->i_lock);
wake_up(&oi->i_wq);
}

@@ -1177,14 +1179,18 @@ struct inode *exofs_new_inode(struct ino
/* increment the refcount so that the inode will still be around when we
* reach the callback
*/
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);

ios->done = create_done;
ios->private = inode;
ios->cred = oi->i_cred;
ret = exofs_sbi_create(ios);
if (ret) {
- atomic_dec(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count--;
+ spin_unlock(&inode->i_lock);
exofs_put_io_state(ios);
return ERR_PTR(ret);
}
Index: linux-2.6/fs/exofs/namei.c
===================================================================
--- linux-2.6.orig/fs/exofs/namei.c
+++ linux-2.6/fs/exofs/namei.c
@@ -153,7 +153,9 @@ static int exofs_link(struct dentry *old

inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);

return exofs_add_nondir(dentry, inode);
}
Index: linux-2.6/fs/ext4/ialloc.c
===================================================================
--- linux-2.6.orig/fs/ext4/ialloc.c
+++ linux-2.6/fs/ext4/ialloc.c
@@ -189,9 +189,9 @@ void ext4_free_inode(handle_t *handle, s
struct ext4_sb_info *sbi;
int fatal = 0, err, count, cleared;

- if (atomic_read(&inode->i_count) > 1) {
+ if (inode->i_count > 1) {
printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
- atomic_read(&inode->i_count));
+ inode->i_count);
return;
}
if (inode->i_nlink) {
Index: linux-2.6/fs/ext4/namei.c
===================================================================
--- linux-2.6.orig/fs/ext4/namei.c
+++ linux-2.6/fs/ext4/namei.c
@@ -2340,7 +2340,9 @@ retry:

inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);

err = ext4_add_entry(handle, dentry, inode);
if (!err) {
Index: linux-2.6/fs/gfs2/ops_inode.c
===================================================================
--- linux-2.6.orig/fs/gfs2/ops_inode.c
+++ linux-2.6/fs/gfs2/ops_inode.c
@@ -253,7 +253,9 @@ out_parent:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
if (!error) {
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
d_instantiate(dentry, inode);
mark_inode_dirty(inode);
}
Index: linux-2.6/fs/hfsplus/dir.c
===================================================================
--- linux-2.6.orig/fs/hfsplus/dir.c
+++ linux-2.6/fs/hfsplus/dir.c
@@ -301,7 +301,9 @@ static int hfsplus_link(struct dentry *s

inc_nlink(inode);
hfsplus_instantiate(dst_dentry, inode, cnid);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
HFSPLUS_SB(sb).file_count++;
Index: linux-2.6/fs/hpfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hpfs/inode.c
+++ linux-2.6/fs/hpfs/inode.c
@@ -183,7 +183,7 @@ void hpfs_write_inode(struct inode *i)
struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
struct inode *parent;
if (i->i_ino == hpfs_sb(i->i_sb)->sb_root) return;
- if (hpfs_inode->i_rddir_off && !atomic_read(&i->i_count)) {
+ if (hpfs_inode->i_rddir_off && !i->i_count) {
if (*hpfs_inode->i_rddir_off) printk("HPFS: write_inode: some position still there\n");
kfree(hpfs_inode->i_rddir_off);
hpfs_inode->i_rddir_off = NULL;
Index: linux-2.6/fs/jffs2/dir.c
===================================================================
--- linux-2.6.orig/fs/jffs2/dir.c
+++ linux-2.6/fs/jffs2/dir.c
@@ -290,7 +290,9 @@ static int jffs2_link (struct dentry *ol
mutex_unlock(&f->sem);
d_instantiate(dentry, old_dentry->d_inode);
dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
- atomic_inc(&old_dentry->d_inode->i_count);
+ spin_lock(&old_dentry->d_inode->i_lock);
+ old_dentry->d_inode->i_count++;
+ spin_unlock(&old_dentry->d_inode->i_lock);
}
return ret;
}
@@ -871,7 +873,9 @@ static int jffs2_rename (struct inode *o
printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
/* Might as well let the VFS know */
d_instantiate(new_dentry, old_dentry->d_inode);
- atomic_inc(&old_dentry->d_inode->i_count);
+ spin_lock(&old_dentry->d_inode->i_lock);
+ old_dentry->d_inode->i_count++;
+ spin_unlock(&old_dentry->d_inode->i_lock);
new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
return ret;
}
Index: linux-2.6/fs/jfs/jfs_txnmgr.c
===================================================================
--- linux-2.6.orig/fs/jfs/jfs_txnmgr.c
+++ linux-2.6/fs/jfs/jfs_txnmgr.c
@@ -1279,7 +1279,9 @@ int txCommit(tid_t tid, /* transaction
* lazy commit thread finishes processing
*/
if (tblk->xflag & COMMIT_DELETE) {
- atomic_inc(&tblk->u.ip->i_count);
+ spin_lock(&tblk->u.ip->i_lock);
+ tblk->u.ip->i_count++;
+ spin_unlock(&tblk->u.ip->i_lock);
/*
* Avoid a rare deadlock
*
Index: linux-2.6/fs/jfs/namei.c
===================================================================
--- linux-2.6.orig/fs/jfs/namei.c
+++ linux-2.6/fs/jfs/namei.c
@@ -839,7 +839,9 @@ static int jfs_link(struct dentry *old_d
ip->i_ctime = CURRENT_TIME;
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
mark_inode_dirty(dir);
- atomic_inc(&ip->i_count);
+ spin_lock(&ip->i_lock);
+ ip->i_count++;
+ spin_unlock(&ip->i_lock);

iplist[0] = ip;
iplist[1] = dir;
Index: linux-2.6/fs/minix/namei.c
===================================================================
--- linux-2.6.orig/fs/minix/namei.c
+++ linux-2.6/fs/minix/namei.c
@@ -101,7 +101,9 @@ static int minix_link(struct dentry * ol

inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
return add_nondir(dentry, inode);
}

Index: linux-2.6/fs/nfs/inode.c
===================================================================
--- linux-2.6.orig/fs/nfs/inode.c
+++ linux-2.6/fs/nfs/inode.c
@@ -377,7 +377,7 @@ nfs_fhget(struct super_block *sb, struct
dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
- atomic_read(&inode->i_count));
+ inode->i_count);

out:
return inode;
@@ -1123,7 +1123,7 @@ static int nfs_update_inode(struct inode

dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
__func__, inode->i_sb->s_id, inode->i_ino,
- atomic_read(&inode->i_count), fattr->valid);
+ inode->i_count, fattr->valid);

if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
goto out_fileid;
Index: linux-2.6/fs/nilfs2/mdt.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/mdt.c
+++ linux-2.6/fs/nilfs2/mdt.c
@@ -479,7 +479,7 @@ nilfs_mdt_new_common(struct the_nilfs *n
inode->i_sb = sb; /* sb may be NULL for some meta data files */
inode->i_blkbits = nilfs->ns_blocksize_bits;
inode->i_flags = 0;
- atomic_set(&inode->i_count, 1);
+ inode->i_count = 1;
inode->i_nlink = 1;
inode->i_ino = ino;
inode->i_mode = S_IFREG;
Index: linux-2.6/fs/nilfs2/namei.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/namei.c
+++ linux-2.6/fs/nilfs2/namei.c
@@ -219,7 +219,9 @@ static int nilfs_link(struct dentry *old

inode->i_ctime = CURRENT_TIME;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);

err = nilfs_add_nondir(dentry, inode);
if (!err)
Index: linux-2.6/fs/ocfs2/namei.c
===================================================================
--- linux-2.6.orig/fs/ocfs2/namei.c
+++ linux-2.6/fs/ocfs2/namei.c
@@ -722,7 +722,9 @@ static int ocfs2_link(struct dentry *old
goto out_commit;
}

- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);

Index: linux-2.6/fs/reiserfs/file.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/file.c
+++ linux-2.6/fs/reiserfs/file.c
@@ -39,7 +39,7 @@ static int reiserfs_file_release(struct
BUG_ON(!S_ISREG(inode->i_mode));

/* fast out for when nothing needs to be done */
- if ((atomic_read(&inode->i_count) > 1 ||
+ if ((inode->i_count > 1 ||
!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
!tail_has_to_be_packed(inode)) &&
REISERFS_I(inode)->i_prealloc_count <= 0) {
@@ -94,7 +94,7 @@ static int reiserfs_file_release(struct
if (!err)
err = jbegin_failure;

- if (!err && atomic_read(&inode->i_count) <= 1 &&
+ if (!err && inode->i_count <= 1 &&
(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
tail_has_to_be_packed(inode)) {
/* if regular file is released by last holder and it has been
Index: linux-2.6/fs/reiserfs/namei.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/namei.c
+++ linux-2.6/fs/reiserfs/namei.c
@@ -1156,7 +1156,9 @@ static int reiserfs_link(struct dentry *
inode->i_ctime = CURRENT_TIME_SEC;
reiserfs_update_sd(&th, inode);

- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
d_instantiate(dentry, inode);
retval = journal_end(&th, dir->i_sb, jbegin_count);
reiserfs_write_unlock(dir->i_sb);
Index: linux-2.6/fs/reiserfs/stree.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/stree.c
+++ linux-2.6/fs/reiserfs/stree.c
@@ -1477,7 +1477,7 @@ static int maybe_indirect_to_direct(stru
** reading in the last block. The user will hit problems trying to
** read the file, but for now we just skip the indirect2direct
*/
- if (atomic_read(&inode->i_count) > 1 ||
+ if (inode->i_count > 1 ||
!tail_has_to_be_packed(inode) ||
!page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
/* leave tail in an unformatted node */
Index: linux-2.6/fs/sysv/namei.c
===================================================================
--- linux-2.6.orig/fs/sysv/namei.c
+++ linux-2.6/fs/sysv/namei.c
@@ -126,7 +126,9 @@ static int sysv_link(struct dentry * old

inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);

return add_nondir(dentry, inode);
}
Index: linux-2.6/fs/ubifs/dir.c
===================================================================
--- linux-2.6.orig/fs/ubifs/dir.c
+++ linux-2.6/fs/ubifs/dir.c
@@ -550,7 +550,9 @@ static int ubifs_link(struct dentry *old

lock_2_inodes(dir, inode);
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
inode->i_ctime = ubifs_current_time(inode);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
Index: linux-2.6/fs/ubifs/super.c
===================================================================
--- linux-2.6.orig/fs/ubifs/super.c
+++ linux-2.6/fs/ubifs/super.c
@@ -342,7 +342,7 @@ static void ubifs_delete_inode(struct in
goto out;

dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
- ubifs_assert(!atomic_read(&inode->i_count));
+ ubifs_assert(!inode->i_count);
ubifs_assert(inode->i_nlink == 0);

truncate_inode_pages(&inode->i_data, 0);
Index: linux-2.6/fs/udf/namei.c
===================================================================
--- linux-2.6.orig/fs/udf/namei.c
+++ linux-2.6/fs/udf/namei.c
@@ -1101,7 +1101,9 @@ static int udf_link(struct dentry *old_d
inc_nlink(inode);
inode->i_ctime = current_fs_time(inode->i_sb);
mark_inode_dirty(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);
d_instantiate(dentry, inode);
unlock_kernel();

Index: linux-2.6/fs/ufs/namei.c
===================================================================
--- linux-2.6.orig/fs/ufs/namei.c
+++ linux-2.6/fs/ufs/namei.c
@@ -180,7 +180,9 @@ static int ufs_link (struct dentry * old

inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
- atomic_inc(&inode->i_count);
+ spin_lock(&inode->i_lock);
+ inode->i_count++;
+ spin_unlock(&inode->i_lock);

error = ufs_add_nondir(dentry, inode);
unlock_kernel();
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c
+++ linux-2.6/fs/notify/inode_mark.c
@@ -382,24 +382,30 @@ void fsnotify_unmount_inodes(struct list
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
*/
- if (!atomic_read(&inode->i_count))
+ if (!inode->i_count)
continue;

need_iput_tmp = need_iput;
need_iput = NULL;

/* In case fsnotify_inode_delete() drops a reference. */
- if (inode != need_iput_tmp)
+ if (inode != need_iput_tmp) {
+ spin_lock(&inode->i_lock);
__iget(inode);
- else
+ spin_unlock(&inode->i_lock);
+ } else
need_iput_tmp = NULL;

/* In case the dropping of a reference would nuke next_i. */
- if ((&next_i->i_sb_list != list) &&
- atomic_read(&next_i->i_count) &&
- !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
- __iget(next_i);
- need_iput = next_i;
+ if (&next_i->i_sb_list != list) {
+ spin_lock(&next_i->i_lock);
+ if (next_i->i_count &&
+ !(next_i->i_state &
+ (I_CLEAR | I_FREEING | I_WILL_FREE))) {
+ __iget(next_i);
+ need_iput = next_i;
+ }
+ spin_unlock(&next_i->i_lock);
}

/*
Index: linux-2.6/fs/ntfs/super.c
===================================================================
--- linux-2.6.orig/fs/ntfs/super.c
+++ linux-2.6/fs/ntfs/super.c
@@ -2930,7 +2930,9 @@ static int ntfs_fill_super(struct super_
}
if ((sb->s_root = d_alloc_root(vol->root_ino))) {
/* We increment i_count simulating an ntfs_iget(). */
- atomic_inc(&vol->root_ino->i_count);
+ spin_lock(&vol->root_ino->i_lock);
+ vol->root_ino->i_count++;
+ spin_unlock(&vol->root_ino->i_lock);
ntfs_debug("Exiting, status successful.");
/* Release the default upcase if it has no users. */
mutex_lock(&ntfs_lock);
Index: linux-2.6/fs/cifs/inode.c
===================================================================
--- linux-2.6.orig/fs/cifs/inode.c
+++ linux-2.6/fs/cifs/inode.c
@@ -1612,7 +1612,7 @@ int cifs_revalidate_dentry(struct dentry
}

cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
- "jiffies %ld", full_path, inode, inode->i_count.counter,
+ "jiffies %ld", full_path, inode, inode->i_count,
dentry, dentry->d_time, jiffies);

if (CIFS_SB(sb)->tcon->unix_ext)
Index: linux-2.6/fs/xfs/linux-2.6/xfs_trace.h
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_trace.h
+++ linux-2.6/fs/xfs/linux-2.6/xfs_trace.h
@@ -576,7 +576,7 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
- __entry->count = atomic_read(&VFS_I(ip)->i_count);
+ __entry->count = VFS_I(ip)->i_count;
__entry->pincount = atomic_read(&ip->i_pincount);
__entry->caller_ip = caller_ip;
),
Index: linux-2.6/net/socket.c
===================================================================
--- linux-2.6.orig/net/socket.c
+++ linux-2.6/net/socket.c
@@ -386,7 +386,9 @@ static int sock_alloc_file(struct socket
&socket_file_ops);
if (unlikely(!file)) {
/* drop dentry, keep inode */
- atomic_inc(&path.dentry->d_inode->i_count);
+ spin_lock(&path.dentry->d_inode->i_lock);
+ path.dentry->d_inode->i_count++;
+ spin_unlock(&path.dentry->d_inode->i_lock);
path_put(&path);
put_unused_fd(fd);
return -ENFILE;


2010-06-30 07:27:57

by Dave Chinner

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Thu, Jun 24, 2010 at 01:02:41PM +1000, [email protected] wrote:
> Protect inode->i_count with i_lock, rather than having it atomic.
> Next step should also be to move things together (eg. the refcount increment
> into d_instantiate, which will remove a lock/unlock cycle on i_lock).
.....
> Index: linux-2.6/fs/inode.c
> ===================================================================
> --- linux-2.6.orig/fs/inode.c
> +++ linux-2.6/fs/inode.c
> @@ -33,14 +33,13 @@
> * inode_hash_lock protects:
> * inode hash table, i_hash
> * inode->i_lock protects:
> - * i_state
> + * i_state, i_count
> *
> * Ordering:
> * inode_lock
> * sb_inode_list_lock
> * inode->i_lock
> - * inode_lock
> - * inode_hash_lock
> + * inode_hash_lock
> */

I thought that the rule governing the use of inode->i_lock was that
it can be used anywhere as long as it is the innermost lock.

Hmmm, no references in the code or documentation. Google gives a
pretty good reference:

http://www.mail-archive.com/[email protected]/msg02584.html

Perhaps a different/new lock needs to be used here?

Cheers,

Dave.
--
Dave Chinner
[email protected]

2010-06-30 14:33:09

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Wed, Jun 30, 2010 at 05:27:02PM +1000, Dave Chinner wrote:
> On Thu, Jun 24, 2010 at 01:02:41PM +1000, [email protected] wrote:
> > Protect inode->i_count with i_lock, rather than having it atomic.
> > Next step should also be to move things together (eg. the refcount increment
> > into d_instantiate, which will remove a lock/unlock cycle on i_lock).
> .....
> > Index: linux-2.6/fs/inode.c
> > ===================================================================
> > --- linux-2.6.orig/fs/inode.c
> > +++ linux-2.6/fs/inode.c
> > @@ -33,14 +33,13 @@
> > * inode_hash_lock protects:
> > * inode hash table, i_hash
> > * inode->i_lock protects:
> > - * i_state
> > + * i_state, i_count
> > *
> > * Ordering:
> > * inode_lock
> > * sb_inode_list_lock
> > * inode->i_lock
> > - * inode_lock
> > - * inode_hash_lock
> > + * inode_hash_lock
> > */
>
> I thought that the rule governing the use of inode->i_lock was that
> it can be used anywhere as long as it is the innermost lock.
>
> Hmmm, no references in the code or documentation. Google gives a
> pretty good reference:
>
> http://www.mail-archive.com/[email protected]/msg02584.html
>
> Perhaps a different/new lock needs to be used here?

Well I just changed the order (and documented it to boot :)). It's
pretty easy to verify that LOR is no problem. inode hash is only
taken in a very few places so other code outside inode.c is fine to
use i_lock as an innermost lock.

2010-07-01 02:36:35

by Dave Chinner

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Wed, Jun 30, 2010 at 10:05:02PM +1000, Nick Piggin wrote:
> On Wed, Jun 30, 2010 at 05:27:02PM +1000, Dave Chinner wrote:
> > On Thu, Jun 24, 2010 at 01:02:41PM +1000, [email protected] wrote:
> > > Protect inode->i_count with i_lock, rather than having it atomic.
> > > Next step should also be to move things together (eg. the refcount increment
> > > into d_instantiate, which will remove a lock/unlock cycle on i_lock).
> > .....
> > > Index: linux-2.6/fs/inode.c
> > > ===================================================================
> > > --- linux-2.6.orig/fs/inode.c
> > > +++ linux-2.6/fs/inode.c
> > > @@ -33,14 +33,13 @@
> > > * inode_hash_lock protects:
> > > * inode hash table, i_hash
> > > * inode->i_lock protects:
> > > - * i_state
> > > + * i_state, i_count
> > > *
> > > * Ordering:
> > > * inode_lock
> > > * sb_inode_list_lock
> > > * inode->i_lock
> > > - * inode_lock
> > > - * inode_hash_lock
> > > + * inode_hash_lock
> > > */
> >
> > I thought that the rule governing the use of inode->i_lock was that
> > it can be used anywhere as long as it is the innermost lock.
> >
> > Hmmm, no references in the code or documentation. Google gives a
> > pretty good reference:
> >
> > http://www.mail-archive.com/[email protected]/msg02584.html
> >
> > Perhaps a different/new lock needs to be used here?
>
> Well I just changed the order (and documented it to boot :)). It's
> pretty easy to verify that LOR is no problem. inode hash is only
> taken in a very few places so other code outside inode.c is fine to
> use i_lock as an innermost lock.

It's not just the inode_hash_lock - you move four or five other
locks under inode->i_lock as the series progresses. IOWs, there's
now many paths and locking orders where the i_lock is not innermost.
If we go forward with this, it's only going to get more complex and
eventually somewhere we'll need a new lock for an innermost
operation because inode->i_lock is no longer safe to use....

Seriously: use a new lock for high level inode operations you are
optimising - don't repurpose an existing lock with different usage
rules just because it's convenient.

Cheers,

Dave.
--
Dave Chinner
[email protected]

2010-07-01 07:54:33

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Thu, Jul 01, 2010 at 12:36:18PM +1000, Dave Chinner wrote:
> On Wed, Jun 30, 2010 at 10:05:02PM +1000, Nick Piggin wrote:
> > On Wed, Jun 30, 2010 at 05:27:02PM +1000, Dave Chinner wrote:
> > > On Thu, Jun 24, 2010 at 01:02:41PM +1000, [email protected] wrote:
> > > > Protect inode->i_count with i_lock, rather than having it atomic.
> > > > Next step should also be to move things together (eg. the refcount increment
> > > > into d_instantiate, which will remove a lock/unlock cycle on i_lock).
> > > .....
> > > > Index: linux-2.6/fs/inode.c
> > > > ===================================================================
> > > > --- linux-2.6.orig/fs/inode.c
> > > > +++ linux-2.6/fs/inode.c
> > > > @@ -33,14 +33,13 @@
> > > > * inode_hash_lock protects:
> > > > * inode hash table, i_hash
> > > > * inode->i_lock protects:
> > > > - * i_state
> > > > + * i_state, i_count
> > > > *
> > > > * Ordering:
> > > > * inode_lock
> > > > * sb_inode_list_lock
> > > > * inode->i_lock
> > > > - * inode_lock
> > > > - * inode_hash_lock
> > > > + * inode_hash_lock
> > > > */
> > >
> > > I thought that the rule governing the use of inode->i_lock was that
> > > it can be used anywhere as long as it is the innermost lock.
> > >
> > > Hmmm, no references in the code or documentation. Google gives a
> > > pretty good reference:
> > >
> > > http://www.mail-archive.com/[email protected]/msg02584.html
> > >
> > > Perhaps a different/new lock needs to be used here?
> >
> > Well I just changed the order (and documented it to boot :)). It's
> > pretty easy to verify that LOR is no problem. inode hash is only
> > taken in a very few places so other code outside inode.c is fine to
> > use i_lock as an innermost lock.
>
> It's not just the inode_hash_lock - you move four or five other
> locks under inode->i_lock as the series progresses. IOWs, there's
> now many paths and locking orders where the i_lock is not innermost.
> If we go forward with this, it's only going to get more complex and
> eventually somewhere we'll need a new lock for an innermost
> operation because inode->i_lock is no longer safe to use....

OK yes it's more than one lock, but I don't quite see the problem.
The locks are mostly confined to inode.c and fs-writeback.c, and
filesystems can basically use i_lock as inner most for their purposes.
If they get it wrong, lockdep will tell them pretty quick. And it's
documented to boot.


> Seriously: use a new lock for high level inode operations you are
> optimising - don't repurpose an existing lock with different usage
> rules just because it's convenient.

That's what scalability development is all about, I'm afraid. Just
adding more and more locks is what makes things more complex, so
you have to juggle around or change locks when possible. If there is a
difficulty with locking pops up in future, I'd prefer to look at it
then.

I don't think any filesystems cared at all when I converted them.

2010-07-01 09:36:11

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Thu, Jul 01, 2010 at 05:54:26PM +1000, Nick Piggin wrote:
> On Thu, Jul 01, 2010 at 12:36:18PM +1000, Dave Chinner wrote:
> If there is a
> difficulty with locking pops up in future, I'd prefer to look at it
> then.
>
> I don't think any filesystems cared at all when I converted them.

What I mean by this is that _today_ no filesystems seemed to have
any problems with how I did it. I did touch quota and notify code,
which iterates inode sb lists, but it was pretty trivial. Not many
others are about inode locking details enough to care about any
of the locks in fs/inode.c.

And so instead of adding another lock now when I already have a
(IMO) nice and working code, I will prefer to wait until some fs
development runs into problem with locking.

There are several things that can be done. Using RCU for more of
the inode lists is a possibility, and can improve lock order problems
while actually reducing the amount of locking rather than adding
locks.

2010-07-01 16:22:10

by Frank Mayhar

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Thu, 2010-07-01 at 17:54 +1000, Nick Piggin wrote:
> On Thu, Jul 01, 2010 at 12:36:18PM +1000, Dave Chinner wrote:
> > Seriously: use a new lock for high level inode operations you are
> > optimising - don't repurpose an existing lock with different usage
> > rules just because it's convenient.
>
> That's what scalability development is all about, I'm afraid. Just
> adding more and more locks is what makes things more complex, so
> you have to juggle around or change locks when possible. If there is a
> difficulty with locking pops up in future, I'd prefer to look at it
> then.

I must agree strongly with Nick here. Lock proliferation is a Bad
Thing; adding a new lock here just adds complexity without really
improving anything, since you still have to deal with lock ordering
_and_ add a new one to the mix. It also makes struct inode bigger for
no real gain. Since i_lock is already well understood and its use is
pretty isolated, it seems ideal to extend it to more general use while
keeping the isolation intact as much as possible. Hell, if the code
were designed with this kind of scalability in mind, i_lock would be
doing all the locking directly related to struct inode. Much as it is
after Nick's work.

My argument here is that it's not just convenient, it makes the
implementation simpler since instead of dealing with two locks there's
just one.
--
Frank Mayhar <[email protected]>
Google, Inc.

2010-07-03 02:05:12

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Wed, 30 Jun 2010 22:05:02 +1000 Nick Piggin <[email protected]> wrote:

> On Wed, Jun 30, 2010 at 05:27:02PM +1000, Dave Chinner wrote:
> > On Thu, Jun 24, 2010 at 01:02:41PM +1000, [email protected] wrote:
> > > Protect inode->i_count with i_lock, rather than having it atomic.
> > > Next step should also be to move things together (eg. the refcount increment
> > > into d_instantiate, which will remove a lock/unlock cycle on i_lock).
> > .....
> > > Index: linux-2.6/fs/inode.c
> > > ===================================================================
> > > --- linux-2.6.orig/fs/inode.c
> > > +++ linux-2.6/fs/inode.c
> > > @@ -33,14 +33,13 @@
> > > * inode_hash_lock protects:
> > > * inode hash table, i_hash
> > > * inode->i_lock protects:
> > > - * i_state
> > > + * i_state, i_count
> > > *
> > > * Ordering:
> > > * inode_lock
> > > * sb_inode_list_lock
> > > * inode->i_lock
> > > - * inode_lock
> > > - * inode_hash_lock
> > > + * inode_hash_lock
> > > */
> >
> > I thought that the rule governing the use of inode->i_lock was that
> > it can be used anywhere as long as it is the innermost lock.
> >
> > Hmmm, no references in the code or documentation. Google gives a
> > pretty good reference:
> >
> > http://www.mail-archive.com/[email protected]/msg02584.html
> >
> > Perhaps a different/new lock needs to be used here?
>
> Well I just changed the order (and documented it to boot :)). It's
> pretty easy to verify that LOR is no problem. inode hash is only
> taken in a very few places so other code outside inode.c is fine to
> use i_lock as an innermost lock.

um, nesting a kernel-wide singleton lock inside a per-inode lock is
plain nutty.

2010-07-03 03:41:33

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Fri, Jul 02, 2010 at 07:03:55PM -0700, Andrew Morton wrote:
> On Wed, 30 Jun 2010 22:05:02 +1000 Nick Piggin <[email protected]> wrote:
>
> > On Wed, Jun 30, 2010 at 05:27:02PM +1000, Dave Chinner wrote:
> > > On Thu, Jun 24, 2010 at 01:02:41PM +1000, [email protected] wrote:
> > > > Protect inode->i_count with i_lock, rather than having it atomic.
> > > > Next step should also be to move things together (eg. the refcount increment
> > > > into d_instantiate, which will remove a lock/unlock cycle on i_lock).
> > > .....
> > > > Index: linux-2.6/fs/inode.c
> > > > ===================================================================
> > > > --- linux-2.6.orig/fs/inode.c
> > > > +++ linux-2.6/fs/inode.c
> > > > @@ -33,14 +33,13 @@
> > > > * inode_hash_lock protects:
> > > > * inode hash table, i_hash
> > > > * inode->i_lock protects:
> > > > - * i_state
> > > > + * i_state, i_count
> > > > *
> > > > * Ordering:
> > > > * inode_lock
> > > > * sb_inode_list_lock
> > > > * inode->i_lock
> > > > - * inode_lock
> > > > - * inode_hash_lock
> > > > + * inode_hash_lock
> > > > */
> > >
> > > I thought that the rule governing the use of inode->i_lock was that
> > > it can be used anywhere as long as it is the innermost lock.
> > >
> > > Hmmm, no references in the code or documentation. Google gives a
> > > pretty good reference:
> > >
> > > http://www.mail-archive.com/[email protected]/msg02584.html
> > >
> > > Perhaps a different/new lock needs to be used here?
> >
> > Well I just changed the order (and documented it to boot :)). It's
> > pretty easy to verify that LOR is no problem. inode hash is only
> > taken in a very few places so other code outside inode.c is fine to
> > use i_lock as an innermost lock.
>
> um, nesting a kernel-wide singleton lock inside a per-inode lock is
> plain nutty.

I think it worked out OK. Because the kernel-wide locks are really
restricted in where they are to be used (ie. not in filesystems). So
they're really pretty constrained to the inode management subsystem.
So filesystems still get to really use i_lock as an inner most lock
for their purposes.

And filesystems get to take i_lock and stop all manipulation of inode
now. No changing of flags, moving it on/off lists etc behind its back.
It really is about locking the data rather than the code.

The final ordering outcome looks like this:

* inode->i_lock
* inode_list_lglock
* zone->inode_lru_lock
* wb->b_lock
* inode_hash_bucket lock

And it works like that because when you want to add or remove an inode
from various data structures, you take the i_lock and then take each
of these locks in turn, inside it. The alternative is to build a bigger
lock ordering graph, and take all the locks up-front before taking
i_lock. I did actaully try that and it ended up being worse, so I went
this route.

I think taking a global lock in mark_inode_dirty is nutty (especially
when that global lock is shared with hash management, LRU scanning,
writeback, i_flags access... :) It's just a question of which is less
nutty.

2010-07-03 04:32:08

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Sat, 3 Jul 2010 13:41:23 +1000 Nick Piggin <[email protected]> wrote:

> On Fri, Jul 02, 2010 at 07:03:55PM -0700, Andrew Morton wrote:
> > On Wed, 30 Jun 2010 22:05:02 +1000 Nick Piggin <[email protected]> wrote:
> >
> > > On Wed, Jun 30, 2010 at 05:27:02PM +1000, Dave Chinner wrote:
> > > > On Thu, Jun 24, 2010 at 01:02:41PM +1000, [email protected] wrote:
> > > > > Protect inode->i_count with i_lock, rather than having it atomic.
> > > > > Next step should also be to move things together (eg. the refcount increment
> > > > > into d_instantiate, which will remove a lock/unlock cycle on i_lock).
> > > > .....
> > > > > Index: linux-2.6/fs/inode.c
> > > > > ===================================================================
> > > > > --- linux-2.6.orig/fs/inode.c
> > > > > +++ linux-2.6/fs/inode.c
> > > > > @@ -33,14 +33,13 @@
> > > > > * inode_hash_lock protects:
> > > > > * inode hash table, i_hash
> > > > > * inode->i_lock protects:
> > > > > - * i_state
> > > > > + * i_state, i_count
> > > > > *
> > > > > * Ordering:
> > > > > * inode_lock
> > > > > * sb_inode_list_lock
> > > > > * inode->i_lock
> > > > > - * inode_lock
> > > > > - * inode_hash_lock
> > > > > + * inode_hash_lock
> > > > > */
> > > >
> > > > I thought that the rule governing the use of inode->i_lock was that
> > > > it can be used anywhere as long as it is the innermost lock.
> > > >
> > > > Hmmm, no references in the code or documentation. Google gives a
> > > > pretty good reference:
> > > >
> > > > http://www.mail-archive.com/[email protected]/msg02584.html
> > > >
> > > > Perhaps a different/new lock needs to be used here?
> > >
> > > Well I just changed the order (and documented it to boot :)). It's
> > > pretty easy to verify that LOR is no problem. inode hash is only
> > > taken in a very few places so other code outside inode.c is fine to
> > > use i_lock as an innermost lock.
> >
> > um, nesting a kernel-wide singleton lock inside a per-inode lock is
> > plain nutty.
>
> I think it worked out OK. Because the kernel-wide locks are really
> restricted in where they are to be used (ie. not in filesystems). So
> they're really pretty constrained to the inode management subsystem.
> So filesystems still get to really use i_lock as an inner most lock
> for their purposes.
>
> And filesystems get to take i_lock and stop all manipulation of inode
> now. No changing of flags, moving it on/off lists etc behind its back.
> It really is about locking the data rather than the code.
>
> The final ordering outcome looks like this:
>
> * inode->i_lock
> * inode_list_lglock
> * zone->inode_lru_lock
> * wb->b_lock
> * inode_hash_bucket lock

Apart from the conceptual vandalism, it means that any contention times
and cache transfer times on those singleton locks will increase
worst-case hold times of our nice, fine-grained i_lock.

> And it works like that because when you want to add or remove an inode
> from various data structures, you take the i_lock

Well that would be wrong. i_lock protects things *within* its inode.
It's nonsensical to take i_lock when the inode is being added to or
removed from external containers because i_lock doesn't protect those
containers!

> and then take each
> of these locks in turn, inside it. The alternative is to build a bigger
> lock ordering graph, and take all the locks up-front before taking
> i_lock. I did actaully try that and it ended up being worse, so I went
> this route.
>
> I think taking a global lock in mark_inode_dirty is nutty (especially
> when that global lock is shared with hash management, LRU scanning,
> writeback, i_flags access... :) It's just a question of which is less
> nutty.

Yes, inode_lock is bad news.

2010-07-03 05:07:12

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Fri, Jul 02, 2010 at 09:31:49PM -0700, Andrew Morton wrote:
> On Sat, 3 Jul 2010 13:41:23 +1000 Nick Piggin <[email protected]> wrote:
> > On Fri, Jul 02, 2010 at 07:03:55PM -0700, Andrew Morton wrote:
> > > um, nesting a kernel-wide singleton lock inside a per-inode lock is
> > > plain nutty.
> >
> > I think it worked out OK. Because the kernel-wide locks are really
> > restricted in where they are to be used (ie. not in filesystems). So
> > they're really pretty constrained to the inode management subsystem.
> > So filesystems still get to really use i_lock as an inner most lock
> > for their purposes.
> >
> > And filesystems get to take i_lock and stop all manipulation of inode
> > now. No changing of flags, moving it on/off lists etc behind its back.
> > It really is about locking the data rather than the code.
> >
> > The final ordering outcome looks like this:
> >
> > * inode->i_lock
> > * inode_list_lglock
> > * zone->inode_lru_lock
> > * wb->b_lock
> > * inode_hash_bucket lock
>
> Apart from the conceptual vandalism, it means that any contention times
> and cache transfer times on those singleton locks will increase
> worst-case hold times of our nice, fine-grained i_lock.

Yes you are quite right about contention times. I'll answer in
two parts. First, why I don't think it should prove to be a big
problem; second, what we can do to improve it if it does.

So a lot of things that previously required the much worse inode_lock
now can use the i_lock (or other fine grained locks above). Also, the
the contention only comes into play if we actually happen to hit the
same i_lock at the same time, so the throughput oriented mainline
should generally be OK, and -rt has priority inheretance that improves
that situation.

IF this proves to be a problem -- I doubt it will, in fact I think that
worst case contention experienced by filesystems and vfs will go down,
significantly -- but if it is a problem, we can easily fix it up.

Because all of those above data structures can be traversed using RCU
(hash list already is). That would make all the locks really only taken
to put an inode on or off a list.

The other thing that can be trivially done is to introduce different
locks. I don't have a problem with that, but like any other data
structure, I just would like to wait and see where we have problems.
The same argument applies to a lot of places that we use a single lock
to lock different properties of an object. We use page_lock to protect
page membership on or off LRU and pagecache lists for example, as well
as various state transitions (eg. to writeback).

In summary, if there is a lock hold problem, it is easy to use RCU to
reduce lock widths, or introduce a new per-inode lock to protect
different parts of the inode structure.


> > And it works like that because when you want to add or remove an inode
> > from various data structures, you take the i_lock
>
> Well that would be wrong. i_lock protects things *within* its inode.
> It's nonsensical to take i_lock when the inode is being added to or
> removed from external containers because i_lock doesn't protect those
> containers!

Membership in a data structure is a property of the item, conceptually
and literally when you're messing with list entries and such. There is
no conceptual vandalism that I can tell.

And it just works better this way when lifting inode_lock (and
dcache_lock). Currently, we do things like:

spin_lock(&inode_lock);
add_to_list1
add_to_list2
inode->blah = something
spin_unlock(&inode_lock);

If you lock list1, list2, and blah seperately, it is no longer an
equivalent transformation: other code could find an inode on list1 that
is now not on list2 and blah is not set.

The real data object of interest in all cases is the inode object.
Other structures are just in aid of finding inode objects that have
a particular property.

So it makes a lot of sense to have a lock to rule the inode (as opposed
to now we have a lock to rule *all* inodes).

It is possible that locking can be reduced if some things are verified
and carefully shown not to matter. I just don't see the need yet and it
would make things overly complicated I think. Introducing any more
complexity will sink this patchset.


> > and then take each
> > of these locks in turn, inside it. The alternative is to build a bigger
> > lock ordering graph, and take all the locks up-front before taking
> > i_lock. I did actaully try that and it ended up being worse, so I went
> > this route.
> >
> > I think taking a global lock in mark_inode_dirty is nutty (especially
> > when that global lock is shared with hash management, LRU scanning,
> > writeback, i_flags access... :) It's just a question of which is less
> > nutty.
>
> Yes, inode_lock is bad news.

Hopefully not for long :)

2010-07-03 05:18:22

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Sat, Jul 03, 2010 at 03:06:52PM +1000, Nick Piggin wrote:
> It is possible that locking can be reduced if some things are verified
> and carefully shown not to matter. I just don't see the need yet and it
> would make things overly complicated I think. Introducing any more
> complexity will sink this patchset.

By overly complicated, I mean, for this patchset where locking is
already been rewritten. It would then be no more complicated (actually
far less) than equivalently trying to lift inode_lock from parts of the
code where it is causing contention times.

2010-07-05 22:41:33

by Dave Chinner

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Sat, Jul 03, 2010 at 03:06:52PM +1000, Nick Piggin wrote:
> So it makes a lot of sense to have a lock to rule the inode (as opposed
> to now we have a lock to rule *all* inodes).

I don't disagree with this approach - I object to the fact that you
repurpose an existing lock and change it's locking rules to "rule
the inode". We don't have any one lock that "rules the inode",
anyway, so adding a new "i_list_lock" for the new VFS level locking
strategies makes it a lot more self-contained. Fundamentally I'm
less concerned about the additional memory usage than I am about
having landmines planted around i_lock...

Cheers,

Dave.
--
Dave Chinner
[email protected]

2010-07-06 04:35:01

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Tue, Jul 06, 2010 at 08:41:06AM +1000, Dave Chinner wrote:
> On Sat, Jul 03, 2010 at 03:06:52PM +1000, Nick Piggin wrote:
> > So it makes a lot of sense to have a lock to rule the inode (as opposed
> > to now we have a lock to rule *all* inodes).
>
> I don't disagree with this approach - I object to the fact that you
> repurpose an existing lock and change it's locking rules to "rule
> the inode". We don't have any one lock that "rules the inode",

"rule the inode" was in regard to the inode cache / dirty / writeout
handling; ie. what inode_lock is currently for. The idea is that in
these code paths, we tend to want to take an inode, lock it, and then
manipulate it (including putting it on or taking it off data
structures).


> anyway, so adding a new "i_list_lock" for the new VFS level locking
> strategies makes it a lot more self-contained. Fundamentally I'm
> less concerned about the additional memory usage than I am about
> having landmines planted around i_lock...

(it's not just lists, its refcount and i_state too)

I just don't see a problem. _No_ filesystem takes any of the locks
that nest inside i_lock. They may call some inode.c functions, but
shouldn't do so while holding i_lock if they are treating i_lock as
an innermost lock. So they can keep using i_lock. I did go through
and attempt to look at all filesystems.

Within inode.c, lock ordering is 2 levels deep, same as it was when
we had inode_lock and i_lock.

If some filesystem introduces a lock ordering problem from not
reading the newly added documentation, lockdep should catch it pretty
quick.

So I don't see a landmine they don't have (in much larger doses) with
i_mutex, reentrant reclaim, page lock, buffer lock, mmap_sem etc
already.

2010-07-06 10:38:37

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count


On Jul 6, 2010, at 12:34 AM, Nick Piggin wrote:
> On Tue, Jul 06, 2010 at 08:41:06AM +1000, Dave Chinner wrote:
>>
>>
>> I don't disagree with this approach - I object to the fact that you
>> repurpose an existing lock and change it's locking rules to "rule
>> the inode". We don't have any one lock that "rules the inode",
>> anyway, so adding a new "i_list_lock" for the new VFS level locking
>> strategies makes it a lot more self-contained. Fundamentally I'm
>> less concerned about the additional memory usage than I am about
>> having landmines planted around i_lock...
>
> If some filesystem introduces a lock ordering problem from not
> reading the newly added documentation, lockdep should catch it pretty
> quick.

I assume you mean inline documentation in the source, because I
quickly scanned the source and couldn't find any significant changes
to any files in Documentation.

It would be nice if the new state of affairs is documented in a single file,
so that people who want to understand this new locking system don't
have to go crawling through the code, or searching mailing list archives
to figure out what's going on.

A lot of the text in this mail thread, including your discussion of the new
locking hierarchy, and why things are the way they are, would be good
fodder for a new documentation file. And if you don't want to rename
i_lock, because no better name can be found, we should at least
document that starting as of 2.6.35/36 the meaning of i_lock changed.

-- Ted

2010-07-06 13:04:46

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Tue, Jul 06, 2010 at 06:38:28AM -0400, Theodore Tso wrote:
>
> On Jul 6, 2010, at 12:34 AM, Nick Piggin wrote:
> > On Tue, Jul 06, 2010 at 08:41:06AM +1000, Dave Chinner wrote:
> >>
> >>
> >> I don't disagree with this approach - I object to the fact that you
> >> repurpose an existing lock and change it's locking rules to "rule
> >> the inode". We don't have any one lock that "rules the inode",
> >> anyway, so adding a new "i_list_lock" for the new VFS level locking
> >> strategies makes it a lot more self-contained. Fundamentally I'm
> >> less concerned about the additional memory usage than I am about
> >> having landmines planted around i_lock...
> >
> > If some filesystem introduces a lock ordering problem from not
> > reading the newly added documentation, lockdep should catch it pretty
> > quick.
>
> I assume you mean inline documentation in the source, because I
> quickly scanned the source and couldn't find any significant changes
> to any files in Documentation.
>
> It would be nice if the new state of affairs is documented in a single file,
> so that people who want to understand this new locking system don't
> have to go crawling through the code, or searching mailing list archives
> to figure out what's going on.

These type of internal details of lock ordering I think work best in
source files (see rmap.c and filemap.c) so it's a little closer to the
source code. That's in inode.c and dcache.c.

Locking for filesystem callback APIs I agree is just as good to be in
Documentation/filesystems/Locking (which I need to update a bit), but
it's never been used for this stuff before. I'm always open to
suggestions of how to document it better though.


> A lot of the text in this mail thread, including your discussion of the new
> locking hierarchy, and why things are the way they are, would be good
> fodder for a new documentation file. And if you don't want to rename
> i_lock, because no better name can be found, we should at least
> document that starting as of 2.6.35/36 the meaning of i_lock changed.

Well there is not much definition of what i_lock is. It is really not
an "innermost" lock anyway (whatever that exactly means). CEPH even
takes dcache_lock inside i_lock. NFS uses it pretty extensively too.

So it really is *the* non sleeping lock to protect inode fields that
I can see.

As far as filesystems go, inode changes matter very little really,
but the best I can do is just to comment and document the locking
and try to audit each filesystem.

2010-07-07 17:01:24

by Frank Mayhar

[permalink] [raw]
Subject: Re: [patch 29/52] fs: icache lock i_count

On Tue, 2010-07-06 at 06:38 -0400, Theodore Tso wrote:
> A lot of the text in this mail thread, including your discussion of the new
> locking hierarchy, and why things are the way they are, would be good
> fodder for a new documentation file. And if you don't want to rename
> i_lock, because no better name can be found, we should at least
> document that starting as of 2.6.35/36 the meaning of i_lock changed.

Actually I like the idea of renaming i_lock. It adds a bit of code
churn but renaming it to more clearly reflect its function makes sense
to me and might catch uses of it that might have been missed earlier.
--
Frank Mayhar <[email protected]>
Google, Inc.