2002-07-12 14:01:39

by Maneesh Soni

[permalink] [raw]
Subject: [RFC] dcache scalability patch (2.4.17)

Here is the dcache scalability patch (cleaned up) as disscussed in
the previous post to lkml by Dipankar. The patch uses RCU for doing fast
dcache lookup. It also does lazy updates to lru list of dentries to
avoid doing write operations while doing lookup.

Following changes were done in this version

o Removed the d_next_hash hack. Dentries are unhashed using list_del
instead of list_del_init, and that too using d_drop interface
o Used d_drop_locked and d_unhashed instead of directly manipulating hash list
using list macros
o Changed DCACHE_UNLINKED to DCACHE_UNHASHED

Regards,
Maneesh

diff -urN linux-2.4.17-base/fs/autofs4/root.c linux-2.4.17-dc8/fs/autofs4/root.c
--- linux-2.4.17-base/fs/autofs4/root.c Tue Oct 24 10:27:38 2000
+++ linux-2.4.17-dc8/fs/autofs4/root.c Fri Jul 12 10:59:38 2002
@@ -403,7 +403,7 @@
spin_unlock(&dcache_lock);
return -ENOTEMPTY;
}
- list_del_init(&dentry->d_hash);
+ d_drop_locked(dentry);
spin_unlock(&dcache_lock);

dput(ino->dentry);
diff -urN linux-2.4.17-base/fs/dcache.c linux-2.4.17-dc8/fs/dcache.c
--- linux-2.4.17-base/fs/dcache.c Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/dcache.c Fri Jul 12 16:18:39 2002
@@ -25,6 +25,7 @@
#include <linux/module.h>

#include <asm/uaccess.h>
+#include <linux/rcupdate.h>

#define DCACHE_PARANOIA 1
/* #define DCACHE_DEBUG 1 */
@@ -55,14 +56,21 @@
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {0, 0, 45, 0,};

+static void d_callback(void *arg)
+{
+ struct dentry * dentry = (struct dentry *)arg;
+
+ if (dname_external(dentry))
+ kfree((void *) dentry->d_name.name);
+ kmem_cache_free(dentry_cache, dentry);
+}
+
/* no dcache_lock, please */
static inline void d_free(struct dentry *dentry)
{
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
- if (dname_external(dentry))
- kfree(dentry->d_name.name);
- kmem_cache_free(dentry_cache, dentry);
+ call_rcu(&dentry->d_rcu, d_callback, dentry);
dentry_stat.nr_dentry--;
}

@@ -124,9 +132,13 @@
if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
return;

- /* dput on a free dentry? */
- if (!list_empty(&dentry->d_lru))
- BUG();
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ return;
+ }
+
/*
* AV: ->d_delete() is _NOT_ allowed to block now.
*/
@@ -135,18 +147,28 @@
goto unhash_it;
}
/* Unreachable? Get rid of it */
- if (list_empty(&dentry->d_hash))
+ if (d_unhashed(dentry))
goto kill_it;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+
+ if (list_empty(&dentry->d_lru)) {
+ dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return;

unhash_it:
- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);

kill_it: {
struct dentry *parent;
+ spin_unlock(&dentry->d_lock);
+ if (!list_empty(&dentry->d_lru)) {
+ list_del(&dentry->d_lru);
+ dentry_stat.nr_unused--;
+ }
list_del(&dentry->d_child);
/* drops the lock, at that point nobody can reach this dentry */
dentry_iput(dentry);
@@ -177,7 +199,7 @@
* If it's already been dropped, return OK.
*/
spin_lock(&dcache_lock);
- if (list_empty(&dentry->d_hash)) {
+ if (d_unhashed(dentry)) {
spin_unlock(&dcache_lock);
return 0;
}
@@ -201,15 +223,18 @@
* we might still populate it if it was a
* working directory or similar).
*/
+ spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count) > 1) {
if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return -EBUSY;
}
}
-
- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+
return 0;
}

@@ -217,11 +242,14 @@

static inline struct dentry * __dget_locked(struct dentry *dentry)
{
+ spin_lock(&dentry->d_lock);
atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
if (atomic_read(&dentry->d_count) == 1) {
dentry_stat.nr_unused--;
list_del_init(&dentry->d_lru);
}
+ spin_unlock(&dentry->d_lock);
return dentry;
}

@@ -252,8 +280,8 @@
tmp = next;
next = tmp->next;
alias = list_entry(tmp, struct dentry, d_alias);
- if (!list_empty(&alias->d_hash)) {
- __dget_locked(alias);
+ if (!d_unhashed(alias)) {
+ dget(alias);
spin_unlock(&dcache_lock);
return alias;
}
@@ -263,7 +291,7 @@
}

/*
- * Try to kill dentries associated with this inode.
+ * Try to kill dentries associated with this inode.
* WARNING: you must own a reference to inode.
*/
void d_prune_aliases(struct inode *inode)
@@ -274,13 +302,16 @@
tmp = head;
while ((tmp = tmp->next) != head) {
struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+ spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
- __dget_locked(dentry);
+ __dget(dentry);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
- d_drop(dentry);
dput(dentry);
goto restart;
}
+ spin_unlock(&dentry->d_lock);
}
spin_unlock(&dcache_lock);
}
@@ -295,7 +326,8 @@
{
struct dentry * parent;

- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
list_del(&dentry->d_child);
dentry_iput(dentry);
parent = dentry->d_parent;
@@ -330,19 +362,20 @@
if (tmp == &dentry_unused)
break;
list_del_init(tmp);
+ dentry_stat.nr_unused--;
dentry = list_entry(tmp, struct dentry, d_lru);

+ spin_lock(&dentry->d_lock);
/* If the dentry was recently referenced, don't free it. */
if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
+ if (!atomic_read(&dentry->d_count)) {
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
continue;
}
- dentry_stat.nr_unused--;
-
- /* Unused dentry with a count? */
- if (atomic_read(&dentry->d_count))
- BUG();

prune_one_dentry(dentry);
if (!--count)
@@ -405,10 +438,13 @@
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
- if (atomic_read(&dentry->d_count))
- continue;
+ spin_lock(&dentry->d_lock);
dentry_stat.nr_unused--;
list_del_init(tmp);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
prune_one_dentry(dentry);
goto repeat;
}
@@ -488,11 +524,13 @@
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
+ list_del_init(&dentry->d_lru);
+ spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
- list_del(&dentry->d_lru);
list_add(&dentry->d_lru, dentry_unused.prev);
found++;
}
+ spin_unlock(&dentry->d_lock);
/*
* Descend a level if the d_subdirs list is non-empty.
*/
@@ -606,8 +644,9 @@
str[name->len] = 0;

atomic_set(&dentry->d_count, 1);
- dentry->d_vfs_flags = 0;
+ dentry->d_vfs_flags = DCACHE_UNHASHED;
dentry->d_flags = 0;
+ dentry->d_lock = SPIN_LOCK_UNLOCKED;
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
@@ -708,8 +747,9 @@
const unsigned char *str = name->name;
struct list_head *head = d_hash(parent,hash);
struct list_head *tmp;
+ struct dentry * found = NULL;

- spin_lock(&dcache_lock);
+ /* rcu_read_lock(); for pre-emptible kernel */
tmp = head->next;
for (;;) {
struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -729,13 +769,16 @@
if (memcmp(dentry->d_name.name, str, len))
continue;
}
- __dget_locked(dentry);
- dentry->d_vfs_flags |= DCACHE_REFERENCED;
- spin_unlock(&dcache_lock);
- return dentry;
+ spin_lock(&dentry->d_lock);
+ if (!(dentry->d_vfs_flags & DCACHE_UNHASHED)) {
+ found = __dget(dentry);
+ }
+ spin_unlock(&dentry->d_lock);
+ /* rcu_read_unlock(); for pre-emptible kernel */
+ return found;
}
- spin_unlock(&dcache_lock);
- return NULL;
+ /* rcu_read_unlock(); for pre-emptible kernel */
+ return found;
}

/**
@@ -774,7 +817,7 @@
lhp = base = d_hash(dparent, dentry->d_name.hash);
while ((lhp = lhp->next) != base) {
if (dentry == list_entry(lhp, struct dentry, d_hash)) {
- __dget_locked(dentry);
+ dget(dentry);
spin_unlock(&dcache_lock);
return 1;
}
@@ -834,9 +877,12 @@
void d_rehash(struct dentry * entry)
{
struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
- if (!list_empty(&entry->d_hash)) BUG();
spin_lock(&dcache_lock);
+ spin_lock(&entry->d_lock);
+ if (!list_empty(&entry->d_hash) && !d_unhashed(entry)) BUG();
list_add(&entry->d_hash, list);
+ entry->d_vfs_flags &= ~DCACHE_UNHASHED;
+ spin_unlock(&entry->d_lock);
spin_unlock(&dcache_lock);
}

@@ -909,7 +955,7 @@
list_add(&dentry->d_hash, &target->d_hash);

/* Unhash the target: dput() will then get rid of it */
- list_del_init(&target->d_hash);
+ d_drop_locked(target);

list_del(&dentry->d_child);
list_del(&target->d_child);
@@ -951,7 +997,7 @@

*--end = '\0';
buflen--;
- if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) {
+ if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
buflen -= 10;
end -= 10;
memcpy(end, " (deleted)", 10);
@@ -1034,7 +1080,7 @@
error = -ENOENT;
/* Has the current directory has been unlinked? */
spin_lock(&dcache_lock);
- if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
+ if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
unsigned long len;
char * cwd;

diff -urN linux-2.4.17-base/fs/intermezzo/journal.c linux-2.4.17-dc8/fs/intermezzo/journal.c
--- linux-2.4.17-base/fs/intermezzo/journal.c Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/intermezzo/journal.c Mon Jul 8 16:18:43 2002
@@ -186,7 +186,7 @@

*--end = '\0';
buflen--;
- if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) {
+ if (dentry->d_parent != dentry && d_unhashed(dentry)) {
buflen -= 10;
end -= 10;
memcpy(end, " (deleted)", 10);
diff -urN linux-2.4.17-base/fs/nfsd/nfsfh.c linux-2.4.17-dc8/fs/nfsd/nfsfh.c
--- linux-2.4.17-base/fs/nfsd/nfsfh.c Thu Oct 4 11:29:22 2001
+++ linux-2.4.17-dc8/fs/nfsd/nfsfh.c Mon Jul 8 16:18:43 2002
@@ -348,7 +348,7 @@
spin_lock(&dcache_lock);
list_for_each(lp, &child->d_inode->i_dentry) {
struct dentry *tmp = list_entry(lp,struct dentry, d_alias);
- if (!list_empty(&tmp->d_hash) &&
+ if (!d_unhashed(tmp) &&
tmp->d_parent == parent) {
child = dget_locked(tmp);
spin_unlock(&dcache_lock);
diff -urN linux-2.4.17-base/fs/readdir.c linux-2.4.17-dc8/fs/readdir.c
--- linux-2.4.17-base/fs/readdir.c Mon Aug 13 03:29:08 2001
+++ linux-2.4.17-dc8/fs/readdir.c Mon Jul 8 16:18:43 2002
@@ -79,7 +79,7 @@
while(1) {
struct dentry *de = list_entry(list, struct dentry, d_child);

- if (!list_empty(&de->d_hash) && de->d_inode) {
+ if (!d_unhashed(de) && de->d_inode) {
spin_unlock(&dcache_lock);
if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0)
break;
diff -urN linux-2.4.17-base/include/linux/dcache.h linux-2.4.17-dc8/include/linux/dcache.h
--- linux-2.4.17-base/include/linux/dcache.h Fri Nov 23 01:16:18 2001
+++ linux-2.4.17-dc8/include/linux/dcache.h Fri Jul 12 10:59:09 2002
@@ -5,6 +5,8 @@

#include <asm/atomic.h>
#include <linux/mount.h>
+#include <linux/rcupdate.h>
+#include <asm/system.h>

/*
* linux/include/linux/dcache.h
@@ -65,11 +67,13 @@

struct dentry {
atomic_t d_count;
+ spinlock_t d_lock; /* lock for d_count & d_vfs_flags */
+ unsigned long d_vfs_flags; /* moved here to be on same cacheline */
unsigned int d_flags;
struct inode * d_inode; /* Where the name belongs to - NULL is negative */
struct dentry * d_parent; /* parent directory */
struct list_head d_hash; /* lookup hash list */
- struct list_head d_lru; /* d_count = 0 LRU list */
+ struct list_head d_lru; /* LRU list */
struct list_head d_child; /* child of parent list */
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
@@ -78,8 +82,8 @@
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op;
struct super_block * d_sb; /* The root of the dentry tree */
- unsigned long d_vfs_flags;
void * d_fsdata; /* fs-specific data */
+ struct rcu_head d_rcu;
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
};

@@ -123,10 +127,23 @@
* s_nfsd_free_path semaphore will be down
*/
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
+#define DCACHE_UNHASHED 0x0010

extern spinlock_t dcache_lock;

/**
+ * d_unhashed - is dentry hashed
+ * @dentry: entry to check
+ *
+ * Returns true if the dentry passed is not currently hashed.
+ */
+
+static __inline__ int d_unhashed(struct dentry *dentry)
+{
+ return (dentry->d_vfs_flags & DCACHE_UNHASHED);
+}
+
+/**
* d_drop - drop a dentry
* @dentry: dentry to drop
*
@@ -143,14 +160,32 @@
* timeouts or autofs deletes).
*/

+static __inline__ void __d_drop(struct dentry * dentry)
+{
+ if (!d_unhashed(dentry)) {
+ dentry->d_vfs_flags |= DCACHE_UNHASHED;
+ wmb();
+ list_del(&dentry->d_hash);
+ wmb();
+ }
+}
+
static __inline__ void d_drop(struct dentry * dentry)
{
spin_lock(&dcache_lock);
- list_del(&dentry->d_hash);
- INIT_LIST_HEAD(&dentry->d_hash);
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
}

+static __inline__ void d_drop_locked(struct dentry * dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
static __inline__ int dname_external(struct dentry *d)
{
return d->d_name.name != d->d_iname;
@@ -243,27 +278,25 @@
static __inline__ struct dentry * dget(struct dentry *dentry)
{
if (dentry) {
- if (!atomic_read(&dentry->d_count))
- BUG();
+ spin_lock(&dentry->d_lock);
atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ spin_unlock(&dentry->d_lock);
}
return dentry;
}

-extern struct dentry * dget_locked(struct dentry *);
-
-/**
- * d_unhashed - is dentry hashed
- * @dentry: entry to check
- *
- * Returns true if the dentry passed is not currently hashed.
- */
-
-static __inline__ int d_unhashed(struct dentry *dentry)
+static __inline__ struct dentry * __dget(struct dentry *dentry)
{
- return list_empty(&dentry->d_hash);
+ if (dentry) {
+ atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ }
+ return dentry;
}

+extern struct dentry * dget_locked(struct dentry *);
+
extern void dput(struct dentry *);

static __inline__ int d_mountpoint(struct dentry *dentry)

--
Maneesh Soni
IBM Linux Technology Center,
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: [email protected]
http://lse.sourceforge.net/locking/rcupdate.html


2002-07-12 14:10:44

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [Lse-tech] [RFC] dcache scalability patch (2.4.17)

> diff -urN linux-2.4.17-base/fs/dcache.c linux-2.4.17-dc8/fs/dcache.c
> --- linux-2.4.17-base/fs/dcache.c Fri Dec 21 23:11:55 2001
> +++ linux-2.4.17-dc8/fs/dcache.c Fri Jul 12 16:18:39 2002
> @@ -25,6 +25,7 @@
> #include <linux/module.h>
>
> #include <asm/uaccess.h>
> +#include <linux/rcupdate.h>

Please try to include <linux/*.h> before <asm/*.h> headers.


> +static void d_callback(void *arg)
> +{
> + struct dentry * dentry = (struct dentry *)arg;
> +
> + if (dname_external(dentry))
> + kfree((void *) dentry->d_name.name);
> + kmem_cache_free(dentry_cache, dentry);
> +}

why do you cast to void * before calling kfree?

> - /* dput on a free dentry? */
> - if (!list_empty(&dentry->d_lru))
> - BUG();
> + spin_lock(&dentry->d_lock);
> + if (atomic_read(&dentry->d_count)) {
> + spin_unlock(&dentry->d_lock);
> + spin_unlock(&dcache_lock);
> + return;
> + }
> +

Please use tabs instead of eight spaces in kernel code.

Another implementation details is whether we shouldn't spin on a bit of
->d_vfs_flags instead of increasing struct dentry further. Maybe the
spin_lock_bit interface that wli prototypes might be a godd choise.

Else the patch looks fine to me, although I'm wondering why you target 2.4.17

2002-07-12 14:27:20

by Alexander Viro

[permalink] [raw]
Subject: Re: [RFC] dcache scalability patch (2.4.17)



On Fri, 12 Jul 2002, Maneesh Soni wrote:

> Here is the dcache scalability patch (cleaned up) as disscussed in
> the previous post to lkml by Dipankar. The patch uses RCU for doing fast
> dcache lookup. It also does lazy updates to lru list of dentries to
> avoid doing write operations while doing lookup.

Where is
* version for 2.5.<current>
* analysis of benefits in real-world situations for 2.5 version?

Patch adds complexity and unless you can show that it gives significant
benefits outside of pathological situations, it's not going in.

Note: measurements on 2.4 do not make sense; reduction of cacheline
bouncing between 2.4 and 2.5 will change the results anyway and
if any of these patches are going to be applied to 2.4, reduction of
cacheline bouncing on ->d_count is going to go in before that one.

2002-07-12 16:03:07

by Dipankar Sarma

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)

On Fri, Jul 12, 2002 at 10:29:53AM -0400, Alexander Viro wrote:
> On Fri, 12 Jul 2002, Maneesh Soni wrote:
>
> > Here is the dcache scalability patch (cleaned up) as disscussed in
> > the previous post to lkml by Dipankar. The patch uses RCU for doing fast
> > dcache lookup. It also does lazy updates to lru list of dentries to
> > avoid doing write operations while doing lookup.
>
> Where is
> * version for 2.5.<current>
> * analysis of benefits in real-world situations for 2.5 version?

I know that 2.5 patches are available, but Maneesh will probably
respond to this on Monday.

I am working on getting 2.5 measurements done. BTW, would you consider
specweb99 reasonably real-world ? If not, do you have any suggestions
for benchmarks ? I suspect that dbench wouldn't cut it ;-).

>
> Patch adds complexity and unless you can show that it gives significant
> benefits outside of pathological situations, it's not going in.

Fair enough.

>
> Note: measurements on 2.4 do not make sense; reduction of cacheline
> bouncing between 2.4 and 2.5 will change the results anyway and

Quite possible. Our performance measurements have been far
behind and we are catching up now. You may expect 2.5 numbers soon.

> if any of these patches are going to be applied to 2.4, reduction of
> cacheline bouncing on ->d_count is going to go in before that one.

That is an issue we need to work on. We can do some cache event
profiling to understand the extent of the d_count cacheline bouncing.
At the same time, it seems that the dcache_lock cacheline is also
bouncing around and it is probably more shared than the dentries
for / or /usr. One thing for sure - RCU based lookup of dcache
makes it difficult to optimize on dget()s. We will have to figure
out a way to do this.

Thanks
--
Dipankar Sarma <[email protected]> http://lse.sourceforge.net
Linux Technology Center, IBM Software Lab, Bangalore, India.

2002-07-12 16:13:43

by Dipankar Sarma

[permalink] [raw]
Subject: Re: [Lse-tech] [RFC] dcache scalability patch (2.4.17)

On Fri, Jul 12, 2002 at 03:13:22PM +0100, Christoph Hellwig wrote:
>
> Else the patch looks fine to me, although I'm wondering why you target 2.4.17

Just to clarify from the project standpoint - we are *not* targeting
2.4.X. 2.4.X is what was used for ongoing performance measurement
work and we had to hop on to that bandwagon. It was just a proof
of concept.

We will publish the 2.5 stuff soon.

Thanks
--
Dipankar Sarma <[email protected]> http://lse.sourceforge.net
Linux Technology Center, IBM Software Lab, Bangalore, India.

2002-07-12 17:33:34

by Paul Menage

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)

>
>Note: measurements on 2.4 do not make sense; reduction of cacheline
>bouncing between 2.4 and 2.5 will change the results anyway and
>if any of these patches are going to be applied to 2.4, reduction of
>cacheline bouncing on ->d_count is going to go in before that one.

I think there are some other possibilities for cache-bounce removal in
struct dentry. The most obvious one is d_vfs_flags - not only does it
get written to on every d_lookup (to set DCACHE_REFERENCED) but it also
shares a cache line (on Pentium IV) with d_op, d_iname and part of
d_name (along with d_sb and d_fsdata, but these don't seem to be so
crucial).

Some quick stats gathering suggested that DCACHE_REFERENCED is already
set 95%-98% of the time, so this cache bounce is not even doing anything
useful. I submitted this patch a while ago making the DCACHE_REFERENCED
bit setting be conditional on it not being already set, which didn't
generate any interest. One problem with this patch would be the
additional branch prediction misses (on some architectures?) that would
work against the benefits of not dirtying a cache line.

Maybe we should have a function definition something like the following:

static __inline__ void __ensure_bit_set(int nr, volatile unsigned long * addr)
{
#if defined (CONFIG_BIG_SMP) || defined(ARCH_HAVE_PREDICATED_WRITE)
if(!test_bit(nr, addr))
#endif
set_bit(nr, addr);
}

so that architectures that support conditional writes (arm and ia64?) and
for SMP systems with enough processors that cache-bouncing is an issue,
the test can be performed, and for others where the branch prediction
miss would hurt us more than the cache dirtying it would just do it
unconditionally.

--- linux-2.5.13/fs/dcache.c Thu May 2 17:22:42 2002
+++ linux-2.5.13-nodref/fs/dcache.c Sat May 4 16:36:38 2002
@@ -883,7 +883,8 @@
if (memcmp(dentry->d_name.name, str, len))
continue;
}
- dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ if(!(dentry->d_vfs_flags & DCACHE_REFERENCED))
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
return dentry;
}
return NULL;


Perhaps another solution is to rearrange struct dentry - put the
volatile stuff in one cache line (or set of lines), and the constant
stuff in another. So probably d_count, d_lru and d_vfs_flags
would be in the volatile line, and most other stuff in the the other.

It would probably make sense to ensure that d_name and d_iname share a
cache line if possible, even on smaller cache-line architectures, and
maybe also d_hash and d_parent on that same line.

Paul



2002-07-12 17:55:17

by Dipankar Sarma

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)

On Fri, Jul 12, 2002 at 09:40:08PM +0530, Dipankar Sarma wrote:
> On Fri, Jul 12, 2002 at 10:29:53AM -0400, Alexander Viro wrote:
> > Where is
> > * version for 2.5.<current>
> > * analysis of benefits in real-world situations for 2.5 version?
>
> I know that 2.5 patches are available, but Maneesh will probably
> respond to this on Monday.
>
> I am working on getting 2.5 measurements done. BTW, would you consider
> specweb99 reasonably real-world ? If not, do you have any suggestions
> for benchmarks ? I suspect that dbench wouldn't cut it ;-).

Hi Al,

Mark Hahn made a good point over private email that real-worldness
also includes hardware. I will dig around and see if we can work out
a setup with different dual/4CPU hardware to do webserver benchmarks
and analyze results for 2.5 patches.

Thanks
--
Dipankar Sarma <[email protected]> http://lse.sourceforge.net
Linux Technology Center, IBM Software Lab, Bangalore, India.

2002-07-12 18:04:06

by Hanna Linder

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)

--On Friday, July 12, 2002 23:32:05 +0530 Dipankar Sarma <[email protected]> wrote:

>
> Hi Al,
>
> Mark Hahn made a good point over private email that real-worldness
> also includes hardware. I will dig around and see if we can work out
> a setup with different dual/4CPU hardware to do webserver benchmarks
> and analyze results for 2.5 patches.
>

Dipankar,

I just loaded a 2-way PII 400Mhz with 256Meg of ram which
is a lot more real world than most the other systems we test. It
is on the net (behind the ibm firewall of course) so you can use
it if you want.

Hanna

2002-07-12 22:45:49

by Hanna Linder

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)

--On Friday, July 12, 2002 10:29:53 -0400 Alexander Viro <[email protected]> wrote:
>
>
> On Fri, 12 Jul 2002, Maneesh Soni wrote:
>
>> Here is the dcache scalability patch (cleaned up) as disscussed in
>> the previous post to lkml by Dipankar. The patch uses RCU for doing fast
>> dcache lookup. It also does lazy updates to lru list of dentries to
>> avoid doing write operations while doing lookup.
>
> Where is
> * version for 2.5.<current>
> * analysis of benefits in real-world situations for 2.5 version?
>
> Patch adds complexity and unless you can show that it gives significant
> benefits outside of pathological situations, it's not going in.


Here are the slides where I presented, among other things, some
performance results of fastwalk compared to using rcu with lazy
updating of the d_lru list. The results are similar to what Dipankar
just published but there are a few more data points.

http://lse.sf.net/locking

Thanks.

Hanna


2002-07-13 08:49:42

by Alexander Viro

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)



On Fri, 12 Jul 2002, Paul Menage wrote:

> >Note: measurements on 2.4 do not make sense; reduction of cacheline
> >bouncing between 2.4 and 2.5 will change the results anyway and
> >if any of these patches are going to be applied to 2.4, reduction of
> >cacheline bouncing on ->d_count is going to go in before that one.
>
> I think there are some other possibilities for cache-bounce removal in
> struct dentry. The most obvious one is d_vfs_flags - not only does it
> get written to on every d_lookup (to set DCACHE_REFERENCED) but it also
> shares a cache line (on Pentium IV) with d_op, d_iname and part of
> d_name (along with d_sb and d_fsdata, but these don't seem to be so
> crucial).
>
> Some quick stats gathering suggested that DCACHE_REFERENCED is already
> set 95%-98% of the time, so this cache bounce is not even doing anything
> useful. I submitted this patch a while ago making the DCACHE_REFERENCED
> bit setting be conditional on it not being already set, which didn't
> generate any interest. One problem with this patch would be the
> additional branch prediction misses (on some architectures?) that would
> work against the benefits of not dirtying a cache line.

Frankly, I'd rather moved setting DCACHE_REFERENCED to dput(). We don't
care for that bit for dentries with positive ->d_count.

So I'd just do

vi fs/dcache.c -c '/|= DCACHE_R/d|/nr_un/pu|<|x'

and be done with that. Linus?

2002-07-13 17:23:36

by Paul Menage

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)

>
>Frankly, I'd rather moved setting DCACHE_REFERENCED to dput(). We don't
>care for that bit for dentries with positive ->d_count.
>
>So I'd just do
>
>vi fs/dcache.c -c '/|= DCACHE_R/d|/nr_un/pu|<|x'
>
>and be done with that. Linus?
>

Some possibly minor issues with that:

- accessing foo/../bar, won't mark foo as referenced, even though it
might be being referenced frequently. Probably not a common case for foo
to be accessed exclusively in this way, but it could be fixed by marking
a dentry referenced when following ".."

- currently, negative dentries start off unreferenced and get marked
referenced the second and subsequent time that they're used. This would
change to starting off referenced (by the ref count set in lock_nd()
after the ->lookup()) but then not being marked referenced ever again,
as they're always looked at under dcache_lock, and no count is taken on
them. So used-once negative dentries would hang around longer, and
frequently-used negative dentries would be cleaned up sooner.

- referenced bit will be set possibly long after the reference is
actually taken/used, which might make dentry pruning a little less
accurate.

I was considering suggesting moving the reference bit setting to
unlock_nd(), since that's another place where we're already changing
d_count, but that still has the first two problems that I mentioned.
Either way, moving d_vfsflags to the same cacheline as d_count would
probably be a good idea.

Paul

2002-07-13 17:30:25

by Alexander Viro

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)



On Sat, 13 Jul 2002, Paul Menage wrote:

> - accessing foo/../bar, won't mark foo as referenced, even though it
> might be being referenced frequently. Probably not a common case for foo
> to be accessed exclusively in this way, but it could be fixed by marking
> a dentry referenced when following ".."

It certainly will. Look - until ->d_count hits zero referenced bit is
not touched or looked at. At all.

Look at the code. There is _no_ aging for dentries with positive ->d_count.
They start aging only when after they enter unused_list...

2002-07-13 17:48:51

by Paul Menage

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)

>
>
>On Sat, 13 Jul 2002, Paul Menage wrote:
>
>> - accessing foo/../bar, won't mark foo as referenced, even though it
>> might be being referenced frequently. Probably not a common case for foo
>> to be accessed exclusively in this way, but it could be fixed by marking
>> a dentry referenced when following ".."
>
>It certainly will. Look - until ->d_count hits zero referenced bit is
>not touched or looked at. At all.
>
>Look at the code. There is _no_ aging for dentries with positive ->d_count.
>They start aging only when after they enter unused_list...
>

Yes, but with the fastwalk lookup, accessing foo/../bar won't do a
dget() for foo (assuming it was cached), and hence will never do a
dput() on it either. So if the only references to foo are as foo/../bar
then its d_count will stay zero and it will never be marked referenced.
(Or am I missing something?)

Paul

2002-07-13 17:51:43

by Alexander Viro

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)



On Sat, 13 Jul 2002, Alexander Viro wrote:

>
>
> On Sat, 13 Jul 2002, Paul Menage wrote:
>
> > - accessing foo/../bar, won't mark foo as referenced, even though it
> > might be being referenced frequently. Probably not a common case for foo
> > to be accessed exclusively in this way, but it could be fixed by marking
> > a dentry referenced when following ".."
>
> It certainly will. Look - until ->d_count hits zero referenced bit is
> not touched or looked at. At all.
>
> Look at the code. There is _no_ aging for dentries with positive ->d_count.
> They start aging only when after they enter unused_list...

On the second thought, I should apply that advice myself. It's true
that they start aging only after they hit the unused list, but they are born
old. Hrm...

OK, it kills that variant and it really looks like there's no way
around dirtying dentry first time we find it on d_lookup(). Which means
that we probably want to put that |= under if - without any ifdefs...

2002-07-15 00:05:19

by Linus Torvalds

[permalink] [raw]
Subject: Re: [Lse-tech] Re: [RFC] dcache scalability patch (2.4.17)



On Sat, 13 Jul 2002, Alexander Viro wrote:
>
> So I'd just do
>
> vi fs/dcache.c -c '/|= DCACHE_R/d|/nr_un/pu|<|x'
>
> and be done with that. Linus?

Done.

For future reference - don't anybody else try to send patches as vi
scripts, please. Yes, it's manly, but let's face it, so is bungee-jumping
with the cord tied to your testicles.

Linus

2002-07-15 07:40:55

by Maneesh Soni

[permalink] [raw]
Subject: Re: [RFC] dcache scalability patch (2.4.17)

On Fri, Jul 12, 2002 at 10:29:53AM -0400, Alexander Viro wrote:
>
>
> On Fri, 12 Jul 2002, Maneesh Soni wrote:
>
> > Here is the dcache scalability patch (cleaned up) as disscussed in
> > the previous post to lkml by Dipankar. The patch uses RCU for doing fast
> > dcache lookup. It also does lazy updates to lru list of dentries to
> > avoid doing write operations while doing lookup.
>
> Where is
> * version for 2.5.<current>
> * analysis of benefits in real-world situations for 2.5 version?
>
> Patch adds complexity and unless you can show that it gives significant
> benefits outside of pathological situations, it's not going in.
>
> Note: measurements on 2.4 do not make sense; reduction of cacheline
> bouncing between 2.4 and 2.5 will change the results anyway and
> if any of these patches are going to be applied to 2.4, reduction of
> cacheline bouncing on ->d_count is going to go in before that one.

Hi Viro,

The 2.4 tests we did, also has fastwalk patch ported from 2.5. Though
fastwalk has performed better (throughput improved by 1%) than base 2.4
but I think because of increased hold and wait times on dcache_lock, results
are not as good as using only dcache_rcu.
http://marc.theaimsgroup.com/?l=linux-kernel&m=102645767914212&w=2

For 2.5 I tried to merge fastwalk and dcache_rcu but both doesnot seem
to compliment each other. fastwalk takes dcache_lock much earlier than
d_lookup.

On 2.5 we get better results from dcache_rcu when we reomve
fastwalk and put the dcache code back to 2.5.10ish level of code. Probably
this is not the correct way as it involves lots of code changes and we need
to workout some other way.

There are some numbers from 2.5.20-base vs dcache_rcu(no fastwalk) done by
Anton Blanchard with dbench.
http://samba.org/~anton/linux/2.5.20/

Regards,
Maneesh

--
Maneesh Soni
IBM Linux Technology Center,
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: [email protected]
http://lse.sourceforge.net/locking/rcupdate.html

2002-07-15 08:01:15

by Maneesh Soni

[permalink] [raw]
Subject: Re: [Lse-tech] [RFC] dcache scalability patch (2.4.17)

On Fri, Jul 12, 2002 at 03:13:22PM +0100, Christoph Hellwig wrote:
> > diff -urN linux-2.4.17-base/fs/dcache.c linux-2.4.17-dc8/fs/dcache.c
> > --- linux-2.4.17-base/fs/dcache.c Fri Dec 21 23:11:55 2001
> > +++ linux-2.4.17-dc8/fs/dcache.c Fri Jul 12 16:18:39 2002
> > @@ -25,6 +25,7 @@
> > #include <linux/module.h>
> >
> > #include <asm/uaccess.h>
> > +#include <linux/rcupdate.h>
>
> Please try to include <linux/*.h> before <asm/*.h> headers.
Ok.. no problem.

>
> > +static void d_callback(void *arg)
> > +{
> > + struct dentry * dentry = (struct dentry *)arg;
> > +
> > + if (dname_external(dentry))
> > + kfree((void *) dentry->d_name.name);
> > + kmem_cache_free(dentry_cache, dentry);
> > +}
>
> why do you cast to void * before calling kfree?
I think this got left over becasue of some intermediate trials.. I have
removed this now.

>
> > - /* dput on a free dentry? */
> > - if (!list_empty(&dentry->d_lru))
> > - BUG();
> > + spin_lock(&dentry->d_lock);
> > + if (atomic_read(&dentry->d_count)) {
> > + spin_unlock(&dentry->d_lock);
> > + spin_unlock(&dcache_lock);
> > + return;
> > + }
> > +
>
> Please use tabs instead of eight spaces in kernel code.
Sure..

> Another implementation details is whether we shouldn't spin on a bit of
> ->d_vfs_flags instead of increasing struct dentry further. Maybe the
> spin_lock_bit interface that wli prototypes might be a godd choise.
I have note seen this but it should not be impossible.

> Else the patch looks fine to me, although I'm wondering why you target 2.4.17

Thanks for the review. I corrected the above two problems.. new patch is as
below. As Dipankr said 2.4 version can be taken just as proof of concept and
for disscussion.

Regards,
Maneesh


diff -urN linux-2.4.17-base/fs/autofs4/root.c linux-2.4.17-dc8/fs/autofs4/root.c
--- linux-2.4.17-base/fs/autofs4/root.c Tue Oct 24 10:27:38 2000
+++ linux-2.4.17-dc8/fs/autofs4/root.c Fri Jul 12 10:59:38 2002
@@ -403,7 +403,7 @@
spin_unlock(&dcache_lock);
return -ENOTEMPTY;
}
- list_del_init(&dentry->d_hash);
+ d_drop_locked(dentry);
spin_unlock(&dcache_lock);

dput(ino->dentry);
diff -urN linux-2.4.17-base/fs/dcache.c linux-2.4.17-dc8/fs/dcache.c
--- linux-2.4.17-base/fs/dcache.c Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/dcache.c Mon Jul 15 13:20:29 2002
@@ -23,6 +23,7 @@
#include <linux/smp_lock.h>
#include <linux/cache.h>
#include <linux/module.h>
+#include <linux/rcupdate.h>

#include <asm/uaccess.h>

@@ -55,14 +56,21 @@
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {0, 0, 45, 0,};

+static void d_callback(void *arg)
+{
+ struct dentry * dentry = (struct dentry *)arg;
+
+ if (dname_external(dentry))
+ kfree(dentry->d_name.name);
+ kmem_cache_free(dentry_cache, dentry);
+}
+
/* no dcache_lock, please */
static inline void d_free(struct dentry *dentry)
{
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
- if (dname_external(dentry))
- kfree(dentry->d_name.name);
- kmem_cache_free(dentry_cache, dentry);
+ call_rcu(&dentry->d_rcu, d_callback, dentry);
dentry_stat.nr_dentry--;
}

@@ -124,9 +132,13 @@
if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
return;

- /* dput on a free dentry? */
- if (!list_empty(&dentry->d_lru))
- BUG();
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ return;
+ }
+
/*
* AV: ->d_delete() is _NOT_ allowed to block now.
*/
@@ -135,18 +147,28 @@
goto unhash_it;
}
/* Unreachable? Get rid of it */
- if (list_empty(&dentry->d_hash))
+ if (d_unhashed(dentry))
goto kill_it;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+
+ if (list_empty(&dentry->d_lru)) {
+ dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return;

unhash_it:
- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);

kill_it: {
struct dentry *parent;
+ spin_unlock(&dentry->d_lock);
+ if (!list_empty(&dentry->d_lru)) {
+ list_del(&dentry->d_lru);
+ dentry_stat.nr_unused--;
+ }
list_del(&dentry->d_child);
/* drops the lock, at that point nobody can reach this dentry */
dentry_iput(dentry);
@@ -177,7 +199,7 @@
* If it's already been dropped, return OK.
*/
spin_lock(&dcache_lock);
- if (list_empty(&dentry->d_hash)) {
+ if (d_unhashed(dentry)) {
spin_unlock(&dcache_lock);
return 0;
}
@@ -201,15 +223,18 @@
* we might still populate it if it was a
* working directory or similar).
*/
+ spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count) > 1) {
if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return -EBUSY;
}
}
-
- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+
return 0;
}

@@ -217,11 +242,14 @@

static inline struct dentry * __dget_locked(struct dentry *dentry)
{
+ spin_lock(&dentry->d_lock);
atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
if (atomic_read(&dentry->d_count) == 1) {
dentry_stat.nr_unused--;
list_del_init(&dentry->d_lru);
}
+ spin_unlock(&dentry->d_lock);
return dentry;
}

@@ -252,8 +280,8 @@
tmp = next;
next = tmp->next;
alias = list_entry(tmp, struct dentry, d_alias);
- if (!list_empty(&alias->d_hash)) {
- __dget_locked(alias);
+ if (!d_unhashed(alias)) {
+ dget(alias);
spin_unlock(&dcache_lock);
return alias;
}
@@ -263,7 +291,7 @@
}

/*
- * Try to kill dentries associated with this inode.
+ * Try to kill dentries associated with this inode.
* WARNING: you must own a reference to inode.
*/
void d_prune_aliases(struct inode *inode)
@@ -274,13 +302,16 @@
tmp = head;
while ((tmp = tmp->next) != head) {
struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+ spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
- __dget_locked(dentry);
+ __dget(dentry);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
- d_drop(dentry);
dput(dentry);
goto restart;
}
+ spin_unlock(&dentry->d_lock);
}
spin_unlock(&dcache_lock);
}
@@ -295,7 +326,8 @@
{
struct dentry * parent;

- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
list_del(&dentry->d_child);
dentry_iput(dentry);
parent = dentry->d_parent;
@@ -330,19 +362,20 @@
if (tmp == &dentry_unused)
break;
list_del_init(tmp);
+ dentry_stat.nr_unused--;
dentry = list_entry(tmp, struct dentry, d_lru);

+ spin_lock(&dentry->d_lock);
/* If the dentry was recently referenced, don't free it. */
if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
+ if (!atomic_read(&dentry->d_count)) {
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
continue;
}
- dentry_stat.nr_unused--;
-
- /* Unused dentry with a count? */
- if (atomic_read(&dentry->d_count))
- BUG();

prune_one_dentry(dentry);
if (!--count)
@@ -405,10 +438,13 @@
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
- if (atomic_read(&dentry->d_count))
- continue;
+ spin_lock(&dentry->d_lock);
dentry_stat.nr_unused--;
list_del_init(tmp);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
prune_one_dentry(dentry);
goto repeat;
}
@@ -488,11 +524,13 @@
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
+ list_del_init(&dentry->d_lru);
+ spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
- list_del(&dentry->d_lru);
list_add(&dentry->d_lru, dentry_unused.prev);
found++;
}
+ spin_unlock(&dentry->d_lock);
/*
* Descend a level if the d_subdirs list is non-empty.
*/
@@ -606,8 +644,9 @@
str[name->len] = 0;

atomic_set(&dentry->d_count, 1);
- dentry->d_vfs_flags = 0;
+ dentry->d_vfs_flags = DCACHE_UNHASHED;
dentry->d_flags = 0;
+ dentry->d_lock = SPIN_LOCK_UNLOCKED;
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
@@ -709,7 +748,7 @@
struct list_head *head = d_hash(parent,hash);
struct list_head *tmp;

- spin_lock(&dcache_lock);
+ /* rcu_read_lock(); for pre-emptible kernel */
tmp = head->next;
for (;;) {
struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -729,12 +768,15 @@
if (memcmp(dentry->d_name.name, str, len))
continue;
}
- __dget_locked(dentry);
- dentry->d_vfs_flags |= DCACHE_REFERENCED;
- spin_unlock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
+ if (!(dentry->d_vfs_flags & DCACHE_UNHASHED)) {
+ dentry = __dget(dentry);
+ }
+ spin_unlock(&dentry->d_lock);
+ /* rcu_read_unlock(); for pre-emptible kernel */
return dentry;
}
- spin_unlock(&dcache_lock);
+ /* rcu_read_unlock(); for pre-emptible kernel */
return NULL;
}

@@ -774,7 +816,7 @@
lhp = base = d_hash(dparent, dentry->d_name.hash);
while ((lhp = lhp->next) != base) {
if (dentry == list_entry(lhp, struct dentry, d_hash)) {
- __dget_locked(dentry);
+ dget(dentry);
spin_unlock(&dcache_lock);
return 1;
}
@@ -834,9 +876,12 @@
void d_rehash(struct dentry * entry)
{
struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
- if (!list_empty(&entry->d_hash)) BUG();
spin_lock(&dcache_lock);
+ spin_lock(&entry->d_lock);
+ if (!list_empty(&entry->d_hash) && !d_unhashed(entry)) BUG();
list_add(&entry->d_hash, list);
+ entry->d_vfs_flags &= ~DCACHE_UNHASHED;
+ spin_unlock(&entry->d_lock);
spin_unlock(&dcache_lock);
}

@@ -909,7 +954,7 @@
list_add(&dentry->d_hash, &target->d_hash);

/* Unhash the target: dput() will then get rid of it */
- list_del_init(&target->d_hash);
+ d_drop_locked(target);

list_del(&dentry->d_child);
list_del(&target->d_child);
@@ -951,7 +996,7 @@

*--end = '\0';
buflen--;
- if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) {
+ if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
buflen -= 10;
end -= 10;
memcpy(end, " (deleted)", 10);
@@ -1034,7 +1079,7 @@
error = -ENOENT;
/* Has the current directory has been unlinked? */
spin_lock(&dcache_lock);
- if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
+ if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
unsigned long len;
char * cwd;

diff -urN linux-2.4.17-base/fs/intermezzo/journal.c linux-2.4.17-dc8/fs/intermezzo/journal.c
--- linux-2.4.17-base/fs/intermezzo/journal.c Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/intermezzo/journal.c Mon Jul 8 16:18:43 2002
@@ -186,7 +186,7 @@

*--end = '\0';
buflen--;
- if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) {
+ if (dentry->d_parent != dentry && d_unhashed(dentry)) {
buflen -= 10;
end -= 10;
memcpy(end, " (deleted)", 10);
diff -urN linux-2.4.17-base/fs/nfsd/nfsfh.c linux-2.4.17-dc8/fs/nfsd/nfsfh.c
--- linux-2.4.17-base/fs/nfsd/nfsfh.c Thu Oct 4 11:29:22 2001
+++ linux-2.4.17-dc8/fs/nfsd/nfsfh.c Mon Jul 8 16:18:43 2002
@@ -348,7 +348,7 @@
spin_lock(&dcache_lock);
list_for_each(lp, &child->d_inode->i_dentry) {
struct dentry *tmp = list_entry(lp,struct dentry, d_alias);
- if (!list_empty(&tmp->d_hash) &&
+ if (!d_unhashed(tmp) &&
tmp->d_parent == parent) {
child = dget_locked(tmp);
spin_unlock(&dcache_lock);
diff -urN linux-2.4.17-base/fs/readdir.c linux-2.4.17-dc8/fs/readdir.c
--- linux-2.4.17-base/fs/readdir.c Mon Aug 13 03:29:08 2001
+++ linux-2.4.17-dc8/fs/readdir.c Mon Jul 8 16:18:43 2002
@@ -79,7 +79,7 @@
while(1) {
struct dentry *de = list_entry(list, struct dentry, d_child);

- if (!list_empty(&de->d_hash) && de->d_inode) {
+ if (!d_unhashed(de) && de->d_inode) {
spin_unlock(&dcache_lock);
if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0)
break;
diff -urN linux-2.4.17-base/include/linux/dcache.h linux-2.4.17-dc8/include/linux/dcache.h
--- linux-2.4.17-base/include/linux/dcache.h Fri Nov 23 01:16:18 2001
+++ linux-2.4.17-dc8/include/linux/dcache.h Fri Jul 12 10:59:09 2002
@@ -5,6 +5,8 @@

#include <asm/atomic.h>
#include <linux/mount.h>
+#include <linux/rcupdate.h>
+#include <asm/system.h>

/*
* linux/include/linux/dcache.h
@@ -65,11 +67,13 @@

struct dentry {
atomic_t d_count;
+ spinlock_t d_lock; /* lock for d_count & d_vfs_flags */
+ unsigned long d_vfs_flags; /* moved here to be on same cacheline */
unsigned int d_flags;
struct inode * d_inode; /* Where the name belongs to - NULL is negative */
struct dentry * d_parent; /* parent directory */
struct list_head d_hash; /* lookup hash list */
- struct list_head d_lru; /* d_count = 0 LRU list */
+ struct list_head d_lru; /* LRU list */
struct list_head d_child; /* child of parent list */
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
@@ -78,8 +82,8 @@
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op;
struct super_block * d_sb; /* The root of the dentry tree */
- unsigned long d_vfs_flags;
void * d_fsdata; /* fs-specific data */
+ struct rcu_head d_rcu;
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
};

@@ -123,10 +127,23 @@
* s_nfsd_free_path semaphore will be down
*/
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
+#define DCACHE_UNHASHED 0x0010

extern spinlock_t dcache_lock;

/**
+ * d_unhashed - is dentry hashed
+ * @dentry: entry to check
+ *
+ * Returns true if the dentry passed is not currently hashed.
+ */
+
+static __inline__ int d_unhashed(struct dentry *dentry)
+{
+ return (dentry->d_vfs_flags & DCACHE_UNHASHED);
+}
+
+/**
* d_drop - drop a dentry
* @dentry: dentry to drop
*
@@ -143,14 +160,32 @@
* timeouts or autofs deletes).
*/

+static __inline__ void __d_drop(struct dentry * dentry)
+{
+ if (!d_unhashed(dentry)) {
+ dentry->d_vfs_flags |= DCACHE_UNHASHED;
+ wmb();
+ list_del(&dentry->d_hash);
+ wmb();
+ }
+}
+
static __inline__ void d_drop(struct dentry * dentry)
{
spin_lock(&dcache_lock);
- list_del(&dentry->d_hash);
- INIT_LIST_HEAD(&dentry->d_hash);
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
}

+static __inline__ void d_drop_locked(struct dentry * dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
static __inline__ int dname_external(struct dentry *d)
{
return d->d_name.name != d->d_iname;
@@ -243,27 +278,25 @@
static __inline__ struct dentry * dget(struct dentry *dentry)
{
if (dentry) {
- if (!atomic_read(&dentry->d_count))
- BUG();
+ spin_lock(&dentry->d_lock);
atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ spin_unlock(&dentry->d_lock);
}
return dentry;
}

-extern struct dentry * dget_locked(struct dentry *);
-
-/**
- * d_unhashed - is dentry hashed
- * @dentry: entry to check
- *
- * Returns true if the dentry passed is not currently hashed.
- */
-
-static __inline__ int d_unhashed(struct dentry *dentry)
+static __inline__ struct dentry * __dget(struct dentry *dentry)
{
- return list_empty(&dentry->d_hash);
+ if (dentry) {
+ atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ }
+ return dentry;
}

+extern struct dentry * dget_locked(struct dentry *);
+
extern void dput(struct dentry *);

static __inline__ int d_mountpoint(struct dentry *dentry)
--
Maneesh Soni
IBM Linux Technology Center,
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: [email protected]
http://lse.sourceforge.net/locking/rcupdate.html

2002-07-15 08:17:19

by Maneesh Soni

[permalink] [raw]
Subject: Re: [Lse-tech] [RFC] dcache scalability patch (2.4.17)

oops.. sorry.

This is the correct patch..


diff -urN linux-2.4.17-base/fs/autofs4/root.c linux-2.4.17-dc8/fs/autofs4/root.c
--- linux-2.4.17-base/fs/autofs4/root.c Tue Oct 24 10:27:38 2000
+++ linux-2.4.17-dc8/fs/autofs4/root.c Fri Jul 12 10:59:38 2002
@@ -403,7 +403,7 @@
spin_unlock(&dcache_lock);
return -ENOTEMPTY;
}
- list_del_init(&dentry->d_hash);
+ d_drop_locked(dentry);
spin_unlock(&dcache_lock);

dput(ino->dentry);
diff -urN linux-2.4.17-base/fs/dcache.c linux-2.4.17-dc8/fs/dcache.c
--- linux-2.4.17-base/fs/dcache.c Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/dcache.c Mon Jul 15 13:47:35 2002
@@ -23,6 +23,7 @@
#include <linux/smp_lock.h>
#include <linux/cache.h>
#include <linux/module.h>
+#include <linux/rcupdate.h>

#include <asm/uaccess.h>

@@ -55,14 +56,21 @@
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {0, 0, 45, 0,};

+static void d_callback(void *arg)
+{
+ struct dentry * dentry = (struct dentry *)arg;
+
+ if (dname_external(dentry))
+ kfree(dentry->d_name.name);
+ kmem_cache_free(dentry_cache, dentry);
+}
+
/* no dcache_lock, please */
static inline void d_free(struct dentry *dentry)
{
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
- if (dname_external(dentry))
- kfree(dentry->d_name.name);
- kmem_cache_free(dentry_cache, dentry);
+ call_rcu(&dentry->d_rcu, d_callback, dentry);
dentry_stat.nr_dentry--;
}

@@ -124,9 +132,13 @@
if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
return;

- /* dput on a free dentry? */
- if (!list_empty(&dentry->d_lru))
- BUG();
+ spin_lock(&dentry->d_lock);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ return;
+ }
+
/*
* AV: ->d_delete() is _NOT_ allowed to block now.
*/
@@ -135,18 +147,28 @@
goto unhash_it;
}
/* Unreachable? Get rid of it */
- if (list_empty(&dentry->d_hash))
+ if (d_unhashed(dentry))
goto kill_it;
- list_add(&dentry->d_lru, &dentry_unused);
- dentry_stat.nr_unused++;
+
+ if (list_empty(&dentry->d_lru)) {
+ dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return;

unhash_it:
- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);

kill_it: {
struct dentry *parent;
+ spin_unlock(&dentry->d_lock);
+ if (!list_empty(&dentry->d_lru)) {
+ list_del(&dentry->d_lru);
+ dentry_stat.nr_unused--;
+ }
list_del(&dentry->d_child);
/* drops the lock, at that point nobody can reach this dentry */
dentry_iput(dentry);
@@ -177,7 +199,7 @@
* If it's already been dropped, return OK.
*/
spin_lock(&dcache_lock);
- if (list_empty(&dentry->d_hash)) {
+ if (d_unhashed(dentry)) {
spin_unlock(&dcache_lock);
return 0;
}
@@ -201,15 +223,18 @@
* we might still populate it if it was a
* working directory or similar).
*/
+ spin_lock(&dentry->d_lock);
if (atomic_read(&dentry->d_count) > 1) {
if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
return -EBUSY;
}
}
-
- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+
return 0;
}

@@ -217,11 +242,14 @@

static inline struct dentry * __dget_locked(struct dentry *dentry)
{
+ spin_lock(&dentry->d_lock);
atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
if (atomic_read(&dentry->d_count) == 1) {
dentry_stat.nr_unused--;
list_del_init(&dentry->d_lru);
}
+ spin_unlock(&dentry->d_lock);
return dentry;
}

@@ -252,8 +280,8 @@
tmp = next;
next = tmp->next;
alias = list_entry(tmp, struct dentry, d_alias);
- if (!list_empty(&alias->d_hash)) {
- __dget_locked(alias);
+ if (!d_unhashed(alias)) {
+ dget(alias);
spin_unlock(&dcache_lock);
return alias;
}
@@ -263,7 +291,7 @@
}

/*
- * Try to kill dentries associated with this inode.
+ * Try to kill dentries associated with this inode.
* WARNING: you must own a reference to inode.
*/
void d_prune_aliases(struct inode *inode)
@@ -274,13 +302,16 @@
tmp = head;
while ((tmp = tmp->next) != head) {
struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+ spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
- __dget_locked(dentry);
+ __dget(dentry);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
- d_drop(dentry);
dput(dentry);
goto restart;
}
+ spin_unlock(&dentry->d_lock);
}
spin_unlock(&dcache_lock);
}
@@ -295,7 +326,8 @@
{
struct dentry * parent;

- list_del_init(&dentry->d_hash);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
list_del(&dentry->d_child);
dentry_iput(dentry);
parent = dentry->d_parent;
@@ -330,19 +362,20 @@
if (tmp == &dentry_unused)
break;
list_del_init(tmp);
+ dentry_stat.nr_unused--;
dentry = list_entry(tmp, struct dentry, d_lru);

+ spin_lock(&dentry->d_lock);
/* If the dentry was recently referenced, don't free it. */
if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
- list_add(&dentry->d_lru, &dentry_unused);
+ if (!atomic_read(&dentry->d_count)) {
+ list_add(&dentry->d_lru, &dentry_unused);
+ dentry_stat.nr_unused++;
+ }
+ spin_unlock(&dentry->d_lock);
continue;
}
- dentry_stat.nr_unused--;
-
- /* Unused dentry with a count? */
- if (atomic_read(&dentry->d_count))
- BUG();

prune_one_dentry(dentry);
if (!--count)
@@ -405,10 +438,13 @@
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
- if (atomic_read(&dentry->d_count))
- continue;
+ spin_lock(&dentry->d_lock);
dentry_stat.nr_unused--;
list_del_init(tmp);
+ if (atomic_read(&dentry->d_count)) {
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
prune_one_dentry(dentry);
goto repeat;
}
@@ -488,11 +524,13 @@
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
+ list_del_init(&dentry->d_lru);
+ spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
- list_del(&dentry->d_lru);
list_add(&dentry->d_lru, dentry_unused.prev);
found++;
}
+ spin_unlock(&dentry->d_lock);
/*
* Descend a level if the d_subdirs list is non-empty.
*/
@@ -606,8 +644,9 @@
str[name->len] = 0;

atomic_set(&dentry->d_count, 1);
- dentry->d_vfs_flags = 0;
+ dentry->d_vfs_flags = DCACHE_UNHASHED;
dentry->d_flags = 0;
+ dentry->d_lock = SPIN_LOCK_UNLOCKED;
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
@@ -708,8 +747,9 @@
const unsigned char *str = name->name;
struct list_head *head = d_hash(parent,hash);
struct list_head *tmp;
+ struct dentry * found = NULL;

- spin_lock(&dcache_lock);
+ /* rcu_read_lock(); for pre-emptible kernel */
tmp = head->next;
for (;;) {
struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -729,12 +769,15 @@
if (memcmp(dentry->d_name.name, str, len))
continue;
}
- __dget_locked(dentry);
- dentry->d_vfs_flags |= DCACHE_REFERENCED;
- spin_unlock(&dcache_lock);
- return dentry;
+ spin_lock(&dentry->d_lock);
+ if (!(dentry->d_vfs_flags & DCACHE_UNHASHED)) {
+ found = __dget(dentry);
+ }
+ spin_unlock(&dentry->d_lock);
+ /* rcu_read_unlock(); for pre-emptible kernel */
+ return found;
}
- spin_unlock(&dcache_lock);
+ /* rcu_read_unlock(); for pre-emptible kernel */
return NULL;
}

@@ -774,7 +817,7 @@
lhp = base = d_hash(dparent, dentry->d_name.hash);
while ((lhp = lhp->next) != base) {
if (dentry == list_entry(lhp, struct dentry, d_hash)) {
- __dget_locked(dentry);
+ dget(dentry);
spin_unlock(&dcache_lock);
return 1;
}
@@ -834,9 +877,12 @@
void d_rehash(struct dentry * entry)
{
struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
- if (!list_empty(&entry->d_hash)) BUG();
spin_lock(&dcache_lock);
+ spin_lock(&entry->d_lock);
+ if (!list_empty(&entry->d_hash) && !d_unhashed(entry)) BUG();
list_add(&entry->d_hash, list);
+ entry->d_vfs_flags &= ~DCACHE_UNHASHED;
+ spin_unlock(&entry->d_lock);
spin_unlock(&dcache_lock);
}

@@ -909,7 +955,7 @@
list_add(&dentry->d_hash, &target->d_hash);

/* Unhash the target: dput() will then get rid of it */
- list_del_init(&target->d_hash);
+ d_drop_locked(target);

list_del(&dentry->d_child);
list_del(&target->d_child);
@@ -951,7 +997,7 @@

*--end = '\0';
buflen--;
- if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) {
+ if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
buflen -= 10;
end -= 10;
memcpy(end, " (deleted)", 10);
@@ -1034,7 +1080,7 @@
error = -ENOENT;
/* Has the current directory has been unlinked? */
spin_lock(&dcache_lock);
- if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
+ if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
unsigned long len;
char * cwd;

diff -urN linux-2.4.17-base/fs/intermezzo/journal.c linux-2.4.17-dc8/fs/intermezzo/journal.c
--- linux-2.4.17-base/fs/intermezzo/journal.c Fri Dec 21 23:11:55 2001
+++ linux-2.4.17-dc8/fs/intermezzo/journal.c Mon Jul 8 16:18:43 2002
@@ -186,7 +186,7 @@

*--end = '\0';
buflen--;
- if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) {
+ if (dentry->d_parent != dentry && d_unhashed(dentry)) {
buflen -= 10;
end -= 10;
memcpy(end, " (deleted)", 10);
diff -urN linux-2.4.17-base/fs/nfsd/nfsfh.c linux-2.4.17-dc8/fs/nfsd/nfsfh.c
--- linux-2.4.17-base/fs/nfsd/nfsfh.c Thu Oct 4 11:29:22 2001
+++ linux-2.4.17-dc8/fs/nfsd/nfsfh.c Mon Jul 8 16:18:43 2002
@@ -348,7 +348,7 @@
spin_lock(&dcache_lock);
list_for_each(lp, &child->d_inode->i_dentry) {
struct dentry *tmp = list_entry(lp,struct dentry, d_alias);
- if (!list_empty(&tmp->d_hash) &&
+ if (!d_unhashed(tmp) &&
tmp->d_parent == parent) {
child = dget_locked(tmp);
spin_unlock(&dcache_lock);
diff -urN linux-2.4.17-base/fs/readdir.c linux-2.4.17-dc8/fs/readdir.c
--- linux-2.4.17-base/fs/readdir.c Mon Aug 13 03:29:08 2001
+++ linux-2.4.17-dc8/fs/readdir.c Mon Jul 8 16:18:43 2002
@@ -79,7 +79,7 @@
while(1) {
struct dentry *de = list_entry(list, struct dentry, d_child);

- if (!list_empty(&de->d_hash) && de->d_inode) {
+ if (!d_unhashed(de) && de->d_inode) {
spin_unlock(&dcache_lock);
if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0)
break;
diff -urN linux-2.4.17-base/include/linux/dcache.h linux-2.4.17-dc8/include/linux/dcache.h
--- linux-2.4.17-base/include/linux/dcache.h Fri Nov 23 01:16:18 2001
+++ linux-2.4.17-dc8/include/linux/dcache.h Fri Jul 12 10:59:09 2002
@@ -5,6 +5,8 @@

#include <asm/atomic.h>
#include <linux/mount.h>
+#include <linux/rcupdate.h>
+#include <asm/system.h>

/*
* linux/include/linux/dcache.h
@@ -65,11 +67,13 @@

struct dentry {
atomic_t d_count;
+ spinlock_t d_lock; /* lock for d_count & d_vfs_flags */
+ unsigned long d_vfs_flags; /* moved here to be on same cacheline */
unsigned int d_flags;
struct inode * d_inode; /* Where the name belongs to - NULL is negative */
struct dentry * d_parent; /* parent directory */
struct list_head d_hash; /* lookup hash list */
- struct list_head d_lru; /* d_count = 0 LRU list */
+ struct list_head d_lru; /* LRU list */
struct list_head d_child; /* child of parent list */
struct list_head d_subdirs; /* our children */
struct list_head d_alias; /* inode alias list */
@@ -78,8 +82,8 @@
unsigned long d_time; /* used by d_revalidate */
struct dentry_operations *d_op;
struct super_block * d_sb; /* The root of the dentry tree */
- unsigned long d_vfs_flags;
void * d_fsdata; /* fs-specific data */
+ struct rcu_head d_rcu;
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
};

@@ -123,10 +127,23 @@
* s_nfsd_free_path semaphore will be down
*/
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
+#define DCACHE_UNHASHED 0x0010

extern spinlock_t dcache_lock;

/**
+ * d_unhashed - is dentry hashed
+ * @dentry: entry to check
+ *
+ * Returns true if the dentry passed is not currently hashed.
+ */
+
+static __inline__ int d_unhashed(struct dentry *dentry)
+{
+ return (dentry->d_vfs_flags & DCACHE_UNHASHED);
+}
+
+/**
* d_drop - drop a dentry
* @dentry: dentry to drop
*
@@ -143,14 +160,32 @@
* timeouts or autofs deletes).
*/

+static __inline__ void __d_drop(struct dentry * dentry)
+{
+ if (!d_unhashed(dentry)) {
+ dentry->d_vfs_flags |= DCACHE_UNHASHED;
+ wmb();
+ list_del(&dentry->d_hash);
+ wmb();
+ }
+}
+
static __inline__ void d_drop(struct dentry * dentry)
{
spin_lock(&dcache_lock);
- list_del(&dentry->d_hash);
- INIT_LIST_HEAD(&dentry->d_hash);
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
}

+static __inline__ void d_drop_locked(struct dentry * dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
static __inline__ int dname_external(struct dentry *d)
{
return d->d_name.name != d->d_iname;
@@ -243,27 +278,25 @@
static __inline__ struct dentry * dget(struct dentry *dentry)
{
if (dentry) {
- if (!atomic_read(&dentry->d_count))
- BUG();
+ spin_lock(&dentry->d_lock);
atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ spin_unlock(&dentry->d_lock);
}
return dentry;
}

-extern struct dentry * dget_locked(struct dentry *);
-
-/**
- * d_unhashed - is dentry hashed
- * @dentry: entry to check
- *
- * Returns true if the dentry passed is not currently hashed.
- */
-
-static __inline__ int d_unhashed(struct dentry *dentry)
+static __inline__ struct dentry * __dget(struct dentry *dentry)
{
- return list_empty(&dentry->d_hash);
+ if (dentry) {
+ atomic_inc(&dentry->d_count);
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ }
+ return dentry;
}

+extern struct dentry * dget_locked(struct dentry *);
+
extern void dput(struct dentry *);

static __inline__ int d_mountpoint(struct dentry *dentry)

--
Maneesh Soni
IBM Linux Technology Center,
IBM India Software Lab, Bangalore.
Phone: +91-80-5044999 email: [email protected]
http://lse.sourceforge.net/locking/rcupdate.html