While path walking we do follow_mount or follow_down which uses
dcache_lock for serialisation. vfsmount related operations also use
dcache_lock for all updates. I think we can use a separate lock for
vfsmount related work and can improve path walking.
The following two patches does the same. The first one replaces
dcache_lock with new vfsmount_lock in namespace.c. The lock is
local to namespace.c and is not required outside. The second patch
uses RCU to have lock free lookup_mnt(). The patches are quite simple
and straight forward.
The lockmeter reults show reduced contention, and lock acquisitions
for dcache_lock while running dcachebench* on a 4-way SMP box
SPINLOCKS HOLD WAIT
UTIL CON MEAN( MAX ) MEAN( MAX )(% CPU) TOTAL NOWAIT SPIN RJECT NAME
baselkm-2569:
20.7% 20.9% 0.5us( 146us) 2.9us( 144us)(0.81%) 31590840 79.1% 20.9% 0% dcache_lock
mntlkm-2569:
14.3% 13.6% 0.4us( 170us) 2.9us( 187us)(0.42%) 23071746 86.4% 13.6% 0% dcache_lock
We get more than 8% improvement on 4-way SMP and 44% improvement on 16-way
NUMAQ while runing dcachebench*.
Average (usecs/iteration) Std. Deviation
(lower is better)
4-way SMP
2.5.69 15739.3 470.90
2.5.69-mnt 14459.6 298.51
16-way NUMAQ
2.5.69 120426.5 363.78
2.5.69-mnt 63225.8 427.60
*dcachebench is a microbenchmark written by Bill Hartner and is available at
http://www-124.ibm.com/developerworks/opensource/linuxperf/dcachebench/dcachebench.html
vfsmount_lock.patch
-------------------
- Patch for replacing dcache_lock with new vfsmount_lock for all mount
related operation. This removes the need to take dcache_lock while
doing follow_mount or follow_down operations in path walking.
fs/namei.c | 24 +++++++++------------
fs/namespace.c | 64 ++++++++++++++++++++++++++++++++-------------------------
2 files changed, 48 insertions(+), 40 deletions(-)
diff -puN fs/namespace.c~vfsmount_lock fs/namespace.c
--- linux-2.5.69/fs/namespace.c~vfsmount_lock 2003-05-21 10:53:00.000000000 +0530
+++ linux-2.5.69-maneesh/fs/namespace.c 2003-05-21 11:02:47.000000000 +0530
@@ -28,6 +28,8 @@ extern int do_remount_sb(struct super_bl
extern int __init init_rootfs(void);
extern int __init fs_subsys_init(void);
+/* spinlock for vfsmount related operation, inplace of dcache_lock */
+static spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
static struct list_head *mount_hashtable;
static int hash_mask, hash_bits;
static kmem_cache_t *mnt_cache;
@@ -69,30 +71,38 @@ void free_vfsmnt(struct vfsmount *mnt)
kmem_cache_free(mnt_cache, mnt);
}
+/*
+ * Now, lookup_mnt increments the ref count before returning
+ * the vfsmount struct.
+ */
struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
{
struct list_head * head = mount_hashtable + hash(mnt, dentry);
struct list_head * tmp = head;
- struct vfsmount *p;
+ struct vfsmount *p, *found = NULL;
+ spin_lock(&vfsmount_lock);
for (;;) {
tmp = tmp->next;
p = NULL;
if (tmp == head)
break;
p = list_entry(tmp, struct vfsmount, mnt_hash);
- if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry)
+ if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
+ found = mntget(p);
break;
+ }
}
- return p;
+ spin_lock(&vfsmount_lock);
+ return found;
}
static int check_mnt(struct vfsmount *mnt)
{
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
while (mnt->mnt_parent != mnt)
mnt = mnt->mnt_parent;
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
return mnt == current->namespace->root;
}
@@ -273,15 +283,15 @@ void umount_tree(struct vfsmount *mnt)
mnt = list_entry(kill.next, struct vfsmount, mnt_list);
list_del_init(&mnt->mnt_list);
if (mnt->mnt_parent == mnt) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
} else {
struct nameidata old_nd;
detach_mnt(mnt, &old_nd);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
path_release(&old_nd);
}
mntput(mnt);
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
}
}
@@ -334,17 +344,17 @@ static int do_umount(struct vfsmount *mn
}
down_write(¤t->namespace->sem);
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
if (atomic_read(&sb->s_active) == 1) {
/* last instance - try to be smart */
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
lock_kernel();
DQUOT_OFF(sb);
acct_auto_close(sb);
unlock_kernel();
security_sb_umount_close(mnt);
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
}
retval = -EBUSY;
if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
@@ -352,7 +362,7 @@ static int do_umount(struct vfsmount *mn
umount_tree(mnt);
retval = 0;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
if (retval)
security_sb_umount_busy(mnt);
up_write(¤t->namespace->sem);
@@ -442,17 +452,17 @@ static struct vfsmount *copy_tree(struct
q = clone_mnt(p, p->mnt_root);
if (!q)
goto Enomem;
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
list_add_tail(&q->mnt_list, &res->mnt_list);
attach_mnt(q, &nd);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
}
return res;
Enomem:
if (res) {
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
umount_tree(res);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
}
return NULL;
}
@@ -476,7 +486,7 @@ static int graft_tree(struct vfsmount *m
if (err)
goto out_unlock;
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
struct list_head head;
attach_mnt(mnt, nd);
@@ -485,7 +495,7 @@ static int graft_tree(struct vfsmount *m
mntget(mnt);
err = 0;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
out_unlock:
up(&nd->dentry->d_inode->i_sem);
if (!err)
@@ -522,9 +532,9 @@ static int do_loopback(struct nameidata
if (mnt) {
err = graft_tree(mnt, nd);
if (err) {
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
umount_tree(mnt);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
} else
mntput(mnt);
}
@@ -589,7 +599,7 @@ static int do_move_mount(struct nameidat
if (IS_DEADDIR(nd->dentry->d_inode))
goto out1;
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
goto out2;
@@ -613,7 +623,7 @@ static int do_move_mount(struct nameidat
detach_mnt(old_nd.mnt, &parent_nd);
attach_mnt(old_nd.mnt, nd);
out2:
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
out1:
up(&nd->dentry->d_inode->i_sem);
out:
@@ -794,9 +804,9 @@ int copy_namespace(int flags, struct tas
down_write(&tsk->namespace->sem);
/* First pass: copy the tree topology */
new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
/* Second pass: switch the tsk->fs->* elements */
if (fs) {
@@ -1017,7 +1027,7 @@ asmlinkage long sys_pivot_root(const cha
if (new_nd.mnt->mnt_root != new_nd.dentry)
goto out2; /* not a mountpoint */
tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
if (tmp != new_nd.mnt) {
for (;;) {
if (tmp->mnt_parent == tmp)
@@ -1034,7 +1044,7 @@ asmlinkage long sys_pivot_root(const cha
detach_mnt(user_nd.mnt, &root_parent);
attach_mnt(user_nd.mnt, &old_nd);
attach_mnt(new_nd.mnt, &root_parent);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
chroot_fs_refs(&user_nd, &new_nd);
security_sb_post_pivotroot(&user_nd, &new_nd);
error = 0;
@@ -1051,7 +1061,7 @@ out0:
unlock_kernel();
return error;
out3:
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
goto out2;
}
diff -puN fs/namei.c~vfsmount_lock fs/namei.c
--- linux-2.5.69/fs/namei.c~vfsmount_lock 2003-05-21 10:53:00.000000000 +0530
+++ linux-2.5.69-maneesh/fs/namei.c 2003-05-21 10:53:00.000000000 +0530
@@ -434,19 +434,17 @@ int follow_up(struct vfsmount **mnt, str
return 1;
}
+/* no need for dcache_lock, as serialization is taken care in
+ * namespace.c
+ */
static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
{
int res = 0;
while (d_mountpoint(*dentry)) {
- struct vfsmount *mounted;
- spin_lock(&dcache_lock);
- mounted = lookup_mnt(*mnt, *dentry);
- if (!mounted) {
- spin_unlock(&dcache_lock);
+ struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
+ if (!mounted)
break;
- }
- *mnt = mntget(mounted);
- spin_unlock(&dcache_lock);
+ *mnt = mounted;
dput(*dentry);
mntput(mounted->mnt_parent);
*dentry = dget(mounted->mnt_root);
@@ -455,21 +453,21 @@ static int follow_mount(struct vfsmount
return res;
}
+/* no need for dcache_lock, as serialization is taken care in
+ * namespace.c
+ */
static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
{
struct vfsmount *mounted;
-
- spin_lock(&dcache_lock);
+
mounted = lookup_mnt(*mnt, *dentry);
if (mounted) {
- *mnt = mntget(mounted);
- spin_unlock(&dcache_lock);
+ *mnt = mounted;
dput(*dentry);
mntput(mounted->mnt_parent);
*dentry = dget(mounted->mnt_root);
return 1;
}
- spin_unlock(&dcache_lock);
return 0;
}
_
--
Maneesh Soni
IBM Linux Technology Center,
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: [email protected]
http://lse.sourceforge.net/
- Following patch uses synchronize_kernel in detach_mnt() and facilitates
lockless lookup_mnt.
- diff'ed over vfsmount_lock.patch
fs/namespace.c | 22 ++++++++++++++++++----
1 files changed, 18 insertions(+), 4 deletions(-)
diff -puN fs/namespace.c~lookup_mnt-rcu fs/namespace.c
--- linux-2.5.69/fs/namespace.c~lookup_mnt-rcu 2003-05-21 10:53:27.000000000 +0530
+++ linux-2.5.69-maneesh/fs/namespace.c 2003-05-21 10:56:57.000000000 +0530
@@ -74,6 +74,11 @@ void free_vfsmnt(struct vfsmount *mnt)
/*
* Now, lookup_mnt increments the ref count before returning
* the vfsmount struct.
+ *
+ * lookup_mnt can be done without taking any lock, as now we
+ * do synchronize_kernel() while removing vfsmount struct
+ * from mnt_hash list. rcu_read_(un)lock is required for
+ * pre-emptive kernels.
*/
struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
{
@@ -81,7 +86,7 @@ struct vfsmount *lookup_mnt(struct vfsmo
struct list_head * tmp = head;
struct vfsmount *p, *found = NULL;
- spin_lock(&vfsmount_lock);
+ rcu_read_lock();
for (;;) {
tmp = tmp->next;
p = NULL;
@@ -93,7 +98,7 @@ struct vfsmount *lookup_mnt(struct vfsmo
break;
}
}
- spin_lock(&vfsmount_lock);
+ rcu_read_unlock();
return found;
}
@@ -110,10 +115,19 @@ static void detach_mnt(struct vfsmount *
{
old_nd->dentry = mnt->mnt_mountpoint;
old_nd->mnt = mnt->mnt_parent;
+
+ /* remove from the hash_list, before other things */
+ list_del_rcu(&mnt->mnt_hash);
+ spin_unlock(&vfsmount_lock);
+
+ /* There could be existing users doing lookup_mnt, let
+ * them finish their work.
+ */
+ synchronize_kernel();
+ spin_lock(&vfsmount_lock);
mnt->mnt_parent = mnt;
mnt->mnt_mountpoint = mnt->mnt_root;
list_del_init(&mnt->mnt_child);
- list_del_init(&mnt->mnt_hash);
old_nd->dentry->d_mounted--;
}
@@ -121,7 +135,7 @@ static void attach_mnt(struct vfsmount *
{
mnt->mnt_parent = mntget(nd->mnt);
mnt->mnt_mountpoint = dget(nd->dentry);
- list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+ list_add_rcu(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
nd->dentry->d_mounted++;
}
_
--
Maneesh Soni
IBM Linux Technology Center,
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: [email protected]
http://lse.sourceforge.net/
spin_unlock ???
> > - return p;
> > + spin_lock(&vfsmount_lock);
> > + return found;
> > }
>
> err, how many times do you want to spin that lock?
>
Paul
Maneesh Soni <[email protected]> wrote:
>
> struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
> {
> struct list_head * head = mount_hashtable + hash(mnt, dentry);
> struct list_head * tmp = head;
> - struct vfsmount *p;
> + struct vfsmount *p, *found = NULL;
>
> + spin_lock(&vfsmount_lock);
> for (;;) {
> tmp = tmp->next;
> p = NULL;
> if (tmp == head)
> break;
> p = list_entry(tmp, struct vfsmount, mnt_hash);
> - if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry)
> + if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
> + found = mntget(p);
> break;
> + }
> }
> - return p;
> + spin_lock(&vfsmount_lock);
> + return found;
> }
err, how many times do you want to spin that lock?
On Wed, May 21, 2003 at 02:35:23AM -0700, Andrew Morton wrote:
> > }
> > - return p;
> > + spin_lock(&vfsmount_lock);
> > + return found;
> > }
>
> err, how many times do you want to spin that lock?
sorry I didnot test the patches separately. Corrected patches follow
- Patch for replacing dcache_lock with new vfsmount_lock for all mount
related operation. This removes the need to take dcache_lock while
doing follow_mount or follow_down operations in path walking.
fs/namei.c | 24 +++++++++------------
fs/namespace.c | 64 ++++++++++++++++++++++++++++++++-------------------------
2 files changed, 48 insertions(+), 40 deletions(-)
diff -puN fs/namespace.c~vfsmount_lock fs/namespace.c
--- linux-2.5.69/fs/namespace.c~vfsmount_lock 2003-05-21 10:53:00.000000000 +0530
+++ linux-2.5.69-maneesh/fs/namespace.c 2003-05-21 15:10:37.000000000 +0530
@@ -28,6 +28,8 @@ extern int do_remount_sb(struct super_bl
extern int __init init_rootfs(void);
extern int __init fs_subsys_init(void);
+/* spinlock for vfsmount related operation, inplace of dcache_lock */
+static spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
static struct list_head *mount_hashtable;
static int hash_mask, hash_bits;
static kmem_cache_t *mnt_cache;
@@ -69,30 +71,38 @@ void free_vfsmnt(struct vfsmount *mnt)
kmem_cache_free(mnt_cache, mnt);
}
+/*
+ * Now, lookup_mnt increments the ref count before returning
+ * the vfsmount struct.
+ */
struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
{
struct list_head * head = mount_hashtable + hash(mnt, dentry);
struct list_head * tmp = head;
- struct vfsmount *p;
+ struct vfsmount *p, *found = NULL;
+ spin_lock(&vfsmount_lock);
for (;;) {
tmp = tmp->next;
p = NULL;
if (tmp == head)
break;
p = list_entry(tmp, struct vfsmount, mnt_hash);
- if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry)
+ if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
+ found = mntget(p);
break;
+ }
}
- return p;
+ spin_unlock(&vfsmount_lock);
+ return found;
}
static int check_mnt(struct vfsmount *mnt)
{
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
while (mnt->mnt_parent != mnt)
mnt = mnt->mnt_parent;
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
return mnt == current->namespace->root;
}
@@ -273,15 +283,15 @@ void umount_tree(struct vfsmount *mnt)
mnt = list_entry(kill.next, struct vfsmount, mnt_list);
list_del_init(&mnt->mnt_list);
if (mnt->mnt_parent == mnt) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
} else {
struct nameidata old_nd;
detach_mnt(mnt, &old_nd);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
path_release(&old_nd);
}
mntput(mnt);
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
}
}
@@ -334,17 +344,17 @@ static int do_umount(struct vfsmount *mn
}
down_write(¤t->namespace->sem);
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
if (atomic_read(&sb->s_active) == 1) {
/* last instance - try to be smart */
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
lock_kernel();
DQUOT_OFF(sb);
acct_auto_close(sb);
unlock_kernel();
security_sb_umount_close(mnt);
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
}
retval = -EBUSY;
if (atomic_read(&mnt->mnt_count) == 2 || flags & MNT_DETACH) {
@@ -352,7 +362,7 @@ static int do_umount(struct vfsmount *mn
umount_tree(mnt);
retval = 0;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
if (retval)
security_sb_umount_busy(mnt);
up_write(¤t->namespace->sem);
@@ -442,17 +452,17 @@ static struct vfsmount *copy_tree(struct
q = clone_mnt(p, p->mnt_root);
if (!q)
goto Enomem;
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
list_add_tail(&q->mnt_list, &res->mnt_list);
attach_mnt(q, &nd);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
}
return res;
Enomem:
if (res) {
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
umount_tree(res);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
}
return NULL;
}
@@ -476,7 +486,7 @@ static int graft_tree(struct vfsmount *m
if (err)
goto out_unlock;
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
struct list_head head;
attach_mnt(mnt, nd);
@@ -485,7 +495,7 @@ static int graft_tree(struct vfsmount *m
mntget(mnt);
err = 0;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
out_unlock:
up(&nd->dentry->d_inode->i_sem);
if (!err)
@@ -522,9 +532,9 @@ static int do_loopback(struct nameidata
if (mnt) {
err = graft_tree(mnt, nd);
if (err) {
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
umount_tree(mnt);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
} else
mntput(mnt);
}
@@ -589,7 +599,7 @@ static int do_move_mount(struct nameidat
if (IS_DEADDIR(nd->dentry->d_inode))
goto out1;
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
if (!IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
goto out2;
@@ -613,7 +623,7 @@ static int do_move_mount(struct nameidat
detach_mnt(old_nd.mnt, &parent_nd);
attach_mnt(old_nd.mnt, nd);
out2:
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
out1:
up(&nd->dentry->d_inode->i_sem);
out:
@@ -794,9 +804,9 @@ int copy_namespace(int flags, struct tas
down_write(&tsk->namespace->sem);
/* First pass: copy the tree topology */
new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
/* Second pass: switch the tsk->fs->* elements */
if (fs) {
@@ -1017,7 +1027,7 @@ asmlinkage long sys_pivot_root(const cha
if (new_nd.mnt->mnt_root != new_nd.dentry)
goto out2; /* not a mountpoint */
tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
- spin_lock(&dcache_lock);
+ spin_lock(&vfsmount_lock);
if (tmp != new_nd.mnt) {
for (;;) {
if (tmp->mnt_parent == tmp)
@@ -1034,7 +1044,7 @@ asmlinkage long sys_pivot_root(const cha
detach_mnt(user_nd.mnt, &root_parent);
attach_mnt(user_nd.mnt, &old_nd);
attach_mnt(new_nd.mnt, &root_parent);
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
chroot_fs_refs(&user_nd, &new_nd);
security_sb_post_pivotroot(&user_nd, &new_nd);
error = 0;
@@ -1051,7 +1061,7 @@ out0:
unlock_kernel();
return error;
out3:
- spin_unlock(&dcache_lock);
+ spin_unlock(&vfsmount_lock);
goto out2;
}
diff -puN fs/namei.c~vfsmount_lock fs/namei.c
--- linux-2.5.69/fs/namei.c~vfsmount_lock 2003-05-21 10:53:00.000000000 +0530
+++ linux-2.5.69-maneesh/fs/namei.c 2003-05-21 10:53:00.000000000 +0530
@@ -434,19 +434,17 @@ int follow_up(struct vfsmount **mnt, str
return 1;
}
+/* no need for dcache_lock, as serialization is taken care in
+ * namespace.c
+ */
static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
{
int res = 0;
while (d_mountpoint(*dentry)) {
- struct vfsmount *mounted;
- spin_lock(&dcache_lock);
- mounted = lookup_mnt(*mnt, *dentry);
- if (!mounted) {
- spin_unlock(&dcache_lock);
+ struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
+ if (!mounted)
break;
- }
- *mnt = mntget(mounted);
- spin_unlock(&dcache_lock);
+ *mnt = mounted;
dput(*dentry);
mntput(mounted->mnt_parent);
*dentry = dget(mounted->mnt_root);
@@ -455,21 +453,21 @@ static int follow_mount(struct vfsmount
return res;
}
+/* no need for dcache_lock, as serialization is taken care in
+ * namespace.c
+ */
static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
{
struct vfsmount *mounted;
-
- spin_lock(&dcache_lock);
+
mounted = lookup_mnt(*mnt, *dentry);
if (mounted) {
- *mnt = mntget(mounted);
- spin_unlock(&dcache_lock);
+ *mnt = mounted;
dput(*dentry);
mntput(mounted->mnt_parent);
*dentry = dget(mounted->mnt_root);
return 1;
}
- spin_unlock(&dcache_lock);
return 0;
}
_
--
Maneesh Soni
IBM Linux Technology Center,
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: [email protected]
http://lse.sourceforge.net/
On Wed, May 21, 2003 at 02:35:23AM -0700, Andrew Morton wrote:
> Maneesh Soni <[email protected]> wrote:
> >
> > struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
> > {
> > + spin_lock(&vfsmount_lock);
> > for (;;) {
> > tmp = tmp->next;
> > p = NULL;
> > if (tmp == head)
> > break;
> > p = list_entry(tmp, struct vfsmount, mnt_hash);
> > - if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry)
> > + if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
> > + found = mntget(p);
> > break;
> > + }
> > }
> > - return p;
> > + spin_lock(&vfsmount_lock);
> > + return found;
> > }
>
> err, how many times do you want to spin that lock?
None, if you apply the second patch ;-)
Thanks
Dipankar
On Wed, May 21, 2003 at 03:13:40PM +0530, Maneesh Soni wrote:
> On Wed, May 21, 2003 at 02:35:23AM -0700, Andrew Morton wrote:
> > > }
> > > - return p;
> > > + spin_lock(&vfsmount_lock);
> > > + return found;
> > > }
> >
> > err, how many times do you want to spin that lock?
>
> sorry I didnot test the patches separately. Corrected patches follow
- Following patch uses synchronize_kernel in detach_mnt() and facilitates
lockless lookup_mnt.
- diff'ed over vfsmount_lock.patch
fs/namespace.c | 22 ++++++++++++++++++----
1 files changed, 18 insertions(+), 4 deletions(-)
diff -puN fs/namespace.c~lookup_mnt-rcu fs/namespace.c
--- linux-2.5.69/fs/namespace.c~lookup_mnt-rcu 2003-05-21 10:53:27.000000000 +0530
+++ linux-2.5.69-maneesh/fs/namespace.c 2003-05-21 10:56:57.000000000 +0530
@@ -74,6 +74,11 @@ void free_vfsmnt(struct vfsmount *mnt)
/*
* Now, lookup_mnt increments the ref count before returning
* the vfsmount struct.
+ *
+ * lookup_mnt can be done without taking any lock, as now we
+ * do synchronize_kernel() while removing vfsmount struct
+ * from mnt_hash list. rcu_read_(un)lock is required for
+ * pre-emptive kernels.
*/
struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
{
@@ -81,7 +86,7 @@ struct vfsmount *lookup_mnt(struct vfsmo
struct list_head * tmp = head;
struct vfsmount *p, *found = NULL;
- spin_lock(&vfsmount_lock);
+ rcu_read_lock();
for (;;) {
tmp = tmp->next;
p = NULL;
@@ -93,7 +98,7 @@ struct vfsmount *lookup_mnt(struct vfsmo
break;
}
}
- spin_unlock(&vfsmount_lock);
+ rcu_read_unlock();
return found;
}
@@ -110,10 +115,19 @@ static void detach_mnt(struct vfsmount *
{
old_nd->dentry = mnt->mnt_mountpoint;
old_nd->mnt = mnt->mnt_parent;
+
+ /* remove from the hash_list, before other things */
+ list_del_rcu(&mnt->mnt_hash);
+ spin_unlock(&vfsmount_lock);
+
+ /* There could be existing users doing lookup_mnt, let
+ * them finish their work.
+ */
+ synchronize_kernel();
+ spin_lock(&vfsmount_lock);
mnt->mnt_parent = mnt;
mnt->mnt_mountpoint = mnt->mnt_root;
list_del_init(&mnt->mnt_child);
- list_del_init(&mnt->mnt_hash);
old_nd->dentry->d_mounted--;
}
@@ -121,7 +135,7 @@ static void attach_mnt(struct vfsmount *
{
mnt->mnt_parent = mntget(nd->mnt);
mnt->mnt_mountpoint = dget(nd->dentry);
- list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+ list_add_rcu(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);
nd->dentry->d_mounted++;
}
_
--
Maneesh Soni
IBM Linux Technology Center,
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: [email protected]
http://lse.sourceforge.net/