2002-03-08 00:25:14

by Hanna Linder

[permalink] [raw]
Subject: [PATCH] 2.5.6-pre3 Fast Walk Dcache


Please consider this patch for inclusion in 2.5.6.

This patch is half the size of the previously submitted 2.5.5-dj2 version.
Thank you including the simple half (path_lookup) in 2.5.6-pre3 already.

What it does:

Changed path_lookup to hold the dcache_lock instead of incrementing the
d_count reference counter while walking the path as long as the desired
dentry's are found in the dcache. Dave Olien wrote permission_exec_lite.
These ideas came from Al Viro to decrease cacheline bouncing.

Testing:

It compiles and boots and runs as well or better as the clean 2.5.6-pre3
kernel. I have run it on a T21 laptop and an 8-way SMP system.

Performance Results:

TBD

Hanna Linder ([email protected])
IBM Linux Technology Center

--------------------

diff -Nru -X dontdiff linux-2.5.6-pre3/fs/dcache.c linux-2.5.6-pre3-fw/fs/dcache.c
--- linux-2.5.6-pre3/fs/dcache.c Thu Mar 7 12:53:48 2002
+++ linux-2.5.6-pre3-fw/fs/dcache.c Thu Mar 7 12:40:30 2002
@@ -704,13 +704,22 @@

struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
{
+ struct dentry * dentry;
+ spin_lock(&dcache_lock);
+ dentry = __d_lookup(parent,name);
+ spin_unlock(&dcache_lock);
+ return dentry;
+}
+
+struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
+{
+
unsigned int len = name->len;
unsigned int hash = name->hash;
const unsigned char *str = name->name;
struct list_head *head = d_hash(parent,hash);
struct list_head *tmp;

- spin_lock(&dcache_lock);
tmp = head->next;
for (;;) {
struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -732,10 +741,8 @@
}
__dget_locked(dentry);
dentry->d_vfs_flags |= DCACHE_REFERENCED;
- spin_unlock(&dcache_lock);
return dentry;
}
- spin_unlock(&dcache_lock);
return NULL;
}

diff -Nru -X dontdiff linux-2.5.6-pre3/fs/namei.c linux-2.5.6-pre3-fw/fs/namei.c
--- linux-2.5.6-pre3/fs/namei.c Thu Mar 7 12:53:50 2002
+++ linux-2.5.6-pre3-fw/fs/namei.c Thu Mar 7 15:27:19 2002
@@ -268,8 +268,41 @@
static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
{
struct dentry * dentry = d_lookup(parent, name);
+
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ }
+ }
+ return dentry;
+}

+/*for fastwalking*/
+static inline void undo_locked(struct nameidata *nd)
+{
+ if(nd->flags & LOOKUP_LOCKED){
+ dget(nd->dentry);
+ mntget(nd->mnt);
+ spin_unlock(&dcache_lock);
+ nd->flags &= ~LOOKUP_LOCKED;
+ }
+}
+
+/*
+ * For fast path lookup while holding the dcache_lock.
+ * SMP-safe
+ */
+static struct dentry * cached_lookup_nd(struct nameidata * nd, struct qstr * name, int flags)
+{
+ struct dentry * dentry = NULL;
+ if(!nd->flags & LOOKUP_LOCKED)
+ return cached_lookup(nd->dentry, name, flags);
+
+ dentry = __d_lookup(nd->dentry, name);
+
if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ undo_locked(nd);
if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
dput(dentry);
dentry = NULL;
@@ -279,6 +312,34 @@
}

/*
+ * Short-cut version of permission(), for calling by
+ * path_walk(), when dcache lock is held. Combines parts
+ * of permission() and vfs_permission(), and tests ONLY for
+ * MAY_EXEC permission.
+ *
+ * If appropriate, check DAC only. If not appropriate, or
+ * short-cut DAC fails, then call permission() to do more
+ * complete permission check.
+ */
+static inline int exec_permission_lite(struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+
+ if ((inode->i_op && inode->i_op->permission))
+ return -EACCES;
+
+ if (current->fsuid == inode->i_uid)
+ mode >>= 6;
+ else if (in_group_p(inode->i_gid))
+ mode >>= 3;
+
+ if (mode & MAY_EXEC)
+ return 0;
+
+ return -EACCES;
+}
+
+/*
* This is called when everything else fails, and we actually have
* to go to the low-level filesystem to find out what we should do..
*
@@ -472,7 +533,9 @@
struct qstr this;
unsigned int c;

- err = permission(inode, MAY_EXEC);
+ err = exec_permission_lite(inode);
+ if(err)
+ err = permission(inode, MAY_EXEC);
dentry = ERR_PTR(err);
if (err)
break;
@@ -507,6 +570,7 @@
case 2:
if (this.name[1] != '.')
break;
+ undo_locked(nd);
follow_dotdot(nd);
inode = nd->dentry->d_inode;
/* fallthrough */
@@ -523,16 +587,20 @@
break;
}
/* This does the actual lookups.. */
- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+ dentry = cached_lookup_nd(nd, &this, LOOKUP_CONTINUE);
if (!dentry) {
+ undo_locked(nd);
dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
}
/* Check mountpoints.. */
- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
- ;
+ if(d_mountpoint(dentry)){
+ undo_locked(nd);
+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
+ ;
+ }

err = -ENOENT;
inode = dentry->d_inode;
@@ -543,6 +611,7 @@
goto out_dput;

if (inode->i_op->follow_link) {
+ undo_locked(nd);
err = do_follow_link(dentry, nd);
dput(dentry);
if (err)
@@ -555,7 +624,8 @@
if (!inode->i_op)
break;
} else {
- dput(nd->dentry);
+ if (!nd->flags & LOOKUP_LOCKED)
+ dput(nd->dentry);
nd->dentry = dentry;
}
err = -ENOTDIR;
@@ -575,6 +645,7 @@
case 2:
if (this.name[1] != '.')
break;
+ undo_locked(nd);
follow_dotdot(nd);
inode = nd->dentry->d_inode;
/* fallthrough */
@@ -586,7 +657,8 @@
if (err < 0)
break;
}
- dentry = cached_lookup(nd->dentry, &this, 0);
+ dentry = cached_lookup_nd(nd, &this, 0);
+ undo_locked(nd);
if (!dentry) {
dentry = real_lookup(nd->dentry, &this, 0);
err = PTR_ERR(dentry);
@@ -626,11 +698,14 @@
else if (this.len == 2 && this.name[1] == '.')
nd->last_type = LAST_DOTDOT;
return_base:
+ undo_locked(nd);
return 0;
out_dput:
+ undo_locked(nd);
dput(dentry);
break;
}
+ undo_locked(nd);
path_release(nd);
return_err:
return err;
@@ -734,6 +809,36 @@
nd->dentry = dget(current->fs->pwd);
read_unlock(&current->fs->lock);
return 1;
+}
+
+int path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
+{
+ nd->last_type = LAST_ROOT; /* if there are only slashes... */
+ nd->flags = flags;
+ if (*name=='/'){
+ read_lock(&current->fs->lock);
+ if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+ nd->mnt = mntget(current->fs->altrootmnt);
+ nd->dentry = dget(current->fs->altroot);
+ read_unlock(&current->fs->lock);
+ if (__emul_lookup_dentry(name,nd))
+ return 0;
+ read_lock(&current->fs->lock);
+ }
+ spin_lock(&dcache_lock); /*to avoid cacheline bouncing with d_count*/
+ nd->mnt = current->fs->rootmnt;
+ nd->dentry = current->fs->root;
+ read_unlock(&current->fs->lock);
+ }
+ else{
+ read_lock(&current->fs->lock);
+ spin_lock(&dcache_lock);
+ nd->mnt = current->fs->pwdmnt;
+ nd->dentry = current->fs->pwd;
+ read_unlock(&current->fs->lock);
+ }
+ nd->flags |= LOOKUP_LOCKED;
+ return (path_walk(name, nd));
}

/*
diff -Nru -X dontdiff linux-2.5.6-pre3/include/linux/dcache.h linux-2.5.6-pre3-fw/include/linux/dcache.h
--- linux-2.5.6-pre3/include/linux/dcache.h Thu Mar 7 12:53:52 2002
+++ linux-2.5.6-pre3-fw/include/linux/dcache.h Thu Mar 7 12:40:30 2002
@@ -220,6 +220,7 @@

/* appendix may either be NULL or be used for transname suffixes */
extern struct dentry * d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * __d_lookup(struct dentry *, struct qstr *);

/* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *);
diff -Nru -X dontdiff linux-2.5.6-pre3/include/linux/fs.h linux-2.5.6-pre3-fw/include/linux/fs.h
--- linux-2.5.6-pre3/include/linux/fs.h Thu Mar 7 12:53:52 2002
+++ linux-2.5.6-pre3-fw/include/linux/fs.h Thu Mar 7 15:42:54 2002
@@ -1273,12 +1273,15 @@
* - require a directory
* - ending slashes ok even for nonexistent files
* - internal "there are more path compnents" flag
+ * - locked when lookup done with dcache_lock held
*/
#define LOOKUP_FOLLOW (1)
#define LOOKUP_DIRECTORY (2)
#define LOOKUP_CONTINUE (4)
#define LOOKUP_PARENT (16)
#define LOOKUP_NOALT (32)
+#define LOOKUP_LOCKED (64)
+
/*
* Type of the last component on LOOKUP_PARENT
*/
@@ -1309,13 +1312,7 @@
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-static inline int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
-{
- int error = 0;
- if (path_init(path, flags, nd))
- error = path_walk(path, nd);
- return error;
-}
+extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
extern void path_release(struct nameidata *);
extern int follow_down(struct vfsmount **, struct dentry **);
extern int follow_up(struct vfsmount **, struct dentry **);


2002-03-08 21:28:56

by Paul Menage

[permalink] [raw]
Subject: Re: [PATCH] 2.5.6-pre3 Fast Walk Dcache


In article <[email protected]>,
you write:
>
>Changed path_lookup to hold the dcache_lock instead of incrementing the
>d_count reference counter while walking the path as long as the desired
>dentry's are found in the dcache. Dave Olien wrote permission_exec_lite.
>These ideas came from Al Viro to decrease cacheline bouncing.

Some points:

1) You're missing parentheses in cached_lookup_nd() and path_lookup():

if(!nd->flags & LOOKUP_LOCKED)
return cached_lookup(nd->dentry, name, flags);

...

if (!nd->flags & LOOKUP_LOCKED)
dput(nd->dentry);


! binds closer than binary &, so the tests will never be true (and gcc
probably optimises the entire tests/calls away).

2) Since cached_lookup_nd() calls __d_lookup() and hence
__dget_locked(), it's not clear how you actually avoid incrementing the
d_count values of the dentries, other than the root/cwd dentries. Can
you explain the logic in a little more detail?

e.g. if you do path_lookup("/usr/bin", 0, nd), this translates into
(substituting names for dentries for readability ...)

path_walk("/usr/bin", nd)
cached_lookup("/", "usr", LOOKUP_CONTINUE)
__d_lookup("/", "usr")
__dget_locked("/usr")
atomic_inc(&"/usr"->d_count)

3) If you replace walk_init_root() and path_lookup() with something
like the following, you can pull the ugliness of walk_init_root() out
of path_lookup(). Basically, make walk_init_root() recognise
LOOKUP_LOCKED and take the dcache_lock rather than grabbing refcounts.
walk_init_root() drops the LOOKUP_LOCKED flag if necessary while
calling __emul_lookup_dentry() to avoid additional complexity. If
walk_init_root() returns 0, then the dcache lock wasn't taken,
regardless of whether the nd.flags had LOOKUP_LOCKED set.

static inline int
walk_init_root(const char *name, struct nameidata *nd)
{
unsigned int flags = nd->flags;
read_lock(&current->fs->lock);
if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {

if(flags & LOOKUP_LOCKED)
nd->flags &= ~LOOKUP_LOCKED;

nd->mnt = mntget(current->fs->altrootmnt);
nd->dentry = dget(current->fs->altroot);
read_unlock(&current->fs->lock);
if (__emul_lookup_dentry(name,nd))
return 0;

if(flags & LOOKUP_LOCKED)
nd->flags = flags;

read_lock(&current->fs->lock);
}
nd->mnt = current->fs->rootmnt;
nd->dentry = current->fs->root;
if(flags & LOOKUP_LOCKED) {
read_lock(&dcache_lock);
} else {
mntget(nd->mnt);
dget(nd->dentry);
}
read_unlock(&current->fs->lock);
return 1;
}

...

int path_lookup(const char *name, unsigned int flags, struct nameidata
*nd)
{
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_LOCKED;
if (*name=='/'){
if(!walk_init_root(name, nd))
return 0;
} else{
read_lock(&current->fs->lock);
spin_lock(&dcache_lock);
nd->mnt = current->fs->pwdmnt;
nd->dentry = current->fs->pwd;
read_unlock(&current->fs->lock);
}
return (path_walk(name, nd));
}

Paul

2002-03-09 01:29:45

by Hanna Linder

[permalink] [raw]
Subject: Re: [PATCH] 2.5.6-pre3 Fast Walk Dcache


--On Friday, March 08, 2002 13:28:23 -0800 Paul Menage <[email protected]> wrote:

>
> 1) You're missing parentheses in cached_lookup_nd() and path_lookup():
>
Oops. Operator precedence strikes again. They are in
cached_lookup_nd() and link_path_walk(). I have made
the changes.

> 2) Since cached_lookup_nd() calls __d_lookup() and hence
> __dget_locked(), it's not clear how you actually avoid incrementing the
> d_count values of the dentries, other than the root/cwd dentries. Can
> you explain the logic in a little more detail?

The frequency with which root/cwd dentries are in any given path
probably cause the majority of the cacheline bouncing of d_count.
So the logic there is clear. However, you make a good point and
I'm looking at this.

> 3) If you replace walk_init_root() and path_lookup() with something
> like the following, you can pull the ugliness of walk_init_root() out
> of path_lookup(). Basically, make walk_init_root() recognise
> LOOKUP_LOCKED and take the dcache_lock rather than grabbing refcounts.
> walk_init_root() drops the LOOKUP_LOCKED flag if necessary while
> calling __emul_lookup_dentry() to avoid additional complexity. If
> walk_init_root() returns 0, then the dcache lock wasn't taken,
> regardless of whether the nd.flags had LOOKUP_LOCKED set.
>
> static inline int
> walk_init_root(const char *name, struct nameidata *nd)
> {
> unsigned int flags = nd->flags;
> read_lock(&current->fs->lock);
> if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
>
> if(flags & LOOKUP_LOCKED)
> nd->flags &= ~LOOKUP_LOCKED;
>
> nd->mnt = mntget(current->fs->altrootmnt);
> nd->dentry = dget(current->fs->altroot);

The first reaction I have is that it breaks the consistancy
between a flag and what the flag represents. Having the dcache_lock
held without the LOOKUP_LOCKED flag in this part of the code might
cause deadlocks or lead to hard-to-maintain code.

I appreciate you taking the time to provide such thoughtful and
deatailed comments and I will look at the whole patch again with
these comments in mind. Next week expect a new and improved
version!

Thanks.

Hanna





2002-03-09 02:04:00

by Paul Menage

[permalink] [raw]
Subject: Re: [PATCH] 2.5.6 Fast Walk Dcache (improved)


> The frequency with which root/cwd dentries are in any given path
> probably cause the majority of the cacheline bouncing of d_count.
> So the logic there is clear. However, you make a good point and
> I'm looking at this.

See the revised patch below for an implementation of this. Things like
/usr, /bin, /etc seem likely to have very hot activity on d_count
currently, so it makes sense to cut that out if practical.

> The first reaction I have is that it breaks the consistancy
> between a flag and what the flag represents. Having the dcache_lock
> held without the LOOKUP_LOCKED flag in this part of the code might
> cause deadlocks or lead to hard-to-maintain code.

It's the other way around - it's possible to be in walk_init_root() with
(nd->flags & LOOKUP_LOCKED) but not holding the dcache lock. But that
applies only within the walk_init_root() function itself - any further
function calls from within walk_init_root() can rely on the invariant
that LOOKUP_LOCKED implies dcache_lock held, since walk_init_root()
takes care not to call any other functions while LOOKUP_LOCKED is set.

I've reworked the patch somewhat to give the following features:

- don't take any reference counts at all while we hold the dcache_lock

- only set the DCACHE_REFERENCED bit in d_vfs_flags if we're actually
changing it, hopefully removing another cache line bounce (although at
the cost of a possible branch misprediction - I'm not sure which is
worse). Also, it might make sense to move d_vfs_flags to a different
cache line - it's currently surrounded by heavily referenced static
stuff, which is going to suffer cache traffic due to changes in
the DCACHE_REFERENCED flag.

- hide the altroot stuff within walk_init_root() as mentioned above

- track stats of fast walks (dentries where we skip the dget() against
slow walks (dentries where we are forced to do a dget()) in
/proc/sys/fs/dentry_state. Note that this in its current state provides
a big cacheline bounce and so slightly defeats the point of the patch,
but once all the details are sorted out it can be relegated to debugging
code. I'm getting the vast majority of transitions being fast rather
than slow - 10 to 1 following a boot and some filesystem activity.

# cat /proc/sys/fs/dentry-state
9374 8642 45 0 54170 5601
# cat /proc/sys/fs/dentry-state
9374 8642 45 0 55915 5676
# cat /proc/sys/fs/dentry-state
9374 8642 45 0 55979 5679

In the case above, the only slow transitions are caused by looking up
/proc/sys/fd/dentry-state itself, due to /proc being on a separate
mountpoint and having its own permission() function. In principle I
think it ought to be possible to do fast transitions over mount points,
although it'll probably require some special casing in __follow_down().

Al, I'd appreciate your comments on these changes to Hanna's patch.


fs/dcache.c | 43 +++++++++-------
fs/namei.c | 130 ++++++++++++++++++++++++++++++++++++++++++++-----
include/linux/dcache.h | 4 +
include/linux/fs.h | 11 +---
4 files changed, 152 insertions(+), 36 deletions(-)

diff -daur linux-2.5.6/fs/dcache.c linux-2.5.6.dcache/fs/dcache.c
--- linux-2.5.6/fs/dcache.c Fri Mar 8 10:34:58 2002
+++ linux-2.5.6.dcache/fs/dcache.c Fri Mar 8 17:07:40 2002
@@ -691,18 +691,7 @@
return dentry_hashtable + (hash & D_HASHMASK);
}

-/**
- * d_lookup - search for a dentry
- * @parent: parent dentry
- * @name: qstr of name we wish to find
- *
- * Searches the children of the parent dentry for the name in question. If
- * the dentry is found its reference count is incremented and the dentry
- * is returned. The caller must use d_put to free the entry when it has
- * finished using it. %NULL is returned on failure.
- */
-
-struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
@@ -710,7 +699,6 @@
struct list_head *head = d_hash(parent,hash);
struct list_head *tmp;

- spin_lock(&dcache_lock);
tmp = head->next;
for (;;) {
struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -730,16 +718,37 @@
if (memcmp(dentry->d_name.name, str, len))
continue;
}
- __dget_locked(dentry);
- dentry->d_vfs_flags |= DCACHE_REFERENCED;
- spin_unlock(&dcache_lock);
+ if(!(dentry->d_vfs_flags & DCACHE_REFERENCED)) {
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ }
return dentry;
}
- spin_unlock(&dcache_lock);
return NULL;
}

/**
+ * d_lookup - search for a dentry
+ * @parent: parent dentry
+ * @name: qstr of name we wish to find
+ *
+ * Searches the children of the parent dentry for the name in question. If
+ * the dentry is found its reference count is incremented and the dentry
+ * is returned. The caller must use d_put to free the entry when it has
+ * finished using it. %NULL is returned on failure.
+ */
+
+struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+{
+ struct dentry * dentry;
+ spin_lock(&dcache_lock);
+ dentry = __d_lookup(parent,name);
+ if(dentry)
+ __dget_locked(dentry);
+ spin_unlock(&dcache_lock);
+ return dentry;
+}
+
+/**
* d_validate - verify dentry provided from insecure source
* @dentry: The dentry alleged to be valid child of @dparent
* @dparent: The parent dentry (known to be valid)
diff -daur linux-2.5.6/fs/namei.c linux-2.5.6.dcache/fs/namei.c
--- linux-2.5.6/fs/namei.c Fri Mar 8 10:34:58 2002
+++ linux-2.5.6.dcache/fs/namei.c Fri Mar 8 17:16:30 2002
@@ -265,11 +265,37 @@
* Internal lookup() using the new generic dcache.
* SMP-safe
*/
-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
+
+/*for fastwalking*/
+static void __undo_locked(struct nameidata *nd, struct dentry *dentry) {
+ dget_locked(nd->dentry);
+ if(dentry)
+ dget_locked(dentry);
+ mntget(nd->mnt);
+ spin_unlock(&dcache_lock);
+ nd->flags &= ~LOOKUP_LOCKED;
+}
+
+static inline void undo_locked(struct nameidata *nd, struct dentry *dentry)
{
- struct dentry * dentry = d_lookup(parent, name);
+ if(nd->flags & LOOKUP_LOCKED)
+ __undo_locked(nd, dentry);
+}
+
+/*
+ * For fast path lookup while holding the dcache_lock.
+ * SMP-safe
+ */
+static struct dentry * cached_lookup(struct nameidata * nd, struct qstr * name, int flags)
+{
+ struct dentry * dentry = NULL;
+ if(nd->flags & LOOKUP_LOCKED)
+ dentry = __d_lookup(nd->dentry, name);
+ else
+ dentry = d_lookup(nd->dentry, name);

if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ undo_locked(nd, dentry);
if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
dput(dentry);
dentry = NULL;
@@ -279,6 +305,34 @@
}

/*
+ * Short-cut version of permission(), for calling by
+ * path_walk(), when dcache lock is held. Combines parts
+ * of permission() and vfs_permission(), and tests ONLY for
+ * MAY_EXEC permission.
+ *
+ * If appropriate, check DAC only. If not appropriate, or
+ * short-cut DAC fails, then call permission() to do more
+ * complete permission check.
+ */
+static inline int exec_permission(struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+
+ if ((inode->i_op && inode->i_op->permission))
+ return -EACCES;
+
+ if (current->fsuid == inode->i_uid)
+ mode >>= 6;
+ else if (in_group_p(inode->i_gid))
+ mode >>= 3;
+
+ if (mode & MAY_EXEC)
+ return 0;
+
+ return -EACCES;
+}
+
+/*
* This is called when everything else fails, and we actually have
* to go to the low-level filesystem to find out what we should do..
*
@@ -472,7 +526,11 @@
struct qstr this;
unsigned int c;

- err = permission(inode, MAY_EXEC);
+ err = exec_permission_lite(inode);
+ if(err) {
+ undo_locked(nd, NULL);
+ err = permission(inode, MAY_EXEC);
+ }
dentry = ERR_PTR(err);
if (err)
break;
@@ -507,6 +565,7 @@
case 2:
if (this.name[1] != '.')
break;
+ undo_locked(nd, NULL);
follow_dotdot(nd);
inode = nd->dentry->d_inode;
/* fallthrough */
@@ -523,16 +582,20 @@
break;
}
/* This does the actual lookups.. */
- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+ dentry = cached_lookup(nd, &this, LOOKUP_CONTINUE);
if (!dentry) {
+ undo_locked(nd, NULL);
dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
}
/* Check mountpoints.. */
- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
- ;
+ if(d_mountpoint(dentry)){
+ undo_locked(nd, dentry);
+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
+ ;
+ }

err = -ENOENT;
inode = dentry->d_inode;
@@ -543,6 +606,7 @@
goto out_dput;

if (inode->i_op->follow_link) {
+ undo_locked(nd, dentry);
err = do_follow_link(dentry, nd);
dput(dentry);
if (err)
@@ -555,7 +619,12 @@
if (!inode->i_op)
break;
} else {
- dput(nd->dentry);
+ if (!(nd->flags & LOOKUP_LOCKED)) {
+ dentry_stat.slowwalks ++;
+ dput(nd->dentry);
+ } else {
+ dentry_stat.fastwalks ++;
+ }
nd->dentry = dentry;
}
err = -ENOTDIR;
@@ -575,6 +644,7 @@
case 2:
if (this.name[1] != '.')
break;
+ undo_locked(nd, NULL);
follow_dotdot(nd);
inode = nd->dentry->d_inode;
/* fallthrough */
@@ -586,7 +656,8 @@
if (err < 0)
break;
}
- dentry = cached_lookup(nd->dentry, &this, 0);
+ dentry = cached_lookup(nd, &this, 0);
+ undo_locked(nd, dentry);
if (!dentry) {
dentry = real_lookup(nd->dentry, &this, 0);
err = PTR_ERR(dentry);
@@ -626,11 +697,14 @@
else if (this.len == 2 && this.name[1] == '.')
nd->last_type = LAST_DOTDOT;
return_base:
+ undo_locked(nd, NULL);
return 0;
out_dput:
+ undo_locked(nd, dentry);
dput(dentry);
break;
}
+ undo_locked(nd, dentry);
path_release(nd);
return_err:
return err;
@@ -707,17 +781,30 @@
static inline int
walk_init_root(const char *name, struct nameidata *nd)
{
+ unsigned int flags = nd->flags;
read_lock(&current->fs->lock);
if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+
+ nd->flags &= ~LOOKUP_LOCKED;
+
nd->mnt = mntget(current->fs->altrootmnt);
nd->dentry = dget(current->fs->altroot);
read_unlock(&current->fs->lock);
if (__emul_lookup_dentry(name,nd))
return 0;
+
+ nd->flags = flags;
+
read_lock(&current->fs->lock);
}
- nd->mnt = mntget(current->fs->rootmnt);
- nd->dentry = dget(current->fs->root);
+ nd->mnt = current->fs->rootmnt;
+ nd->dentry = current->fs->root;
+ if(flags & LOOKUP_LOCKED) {
+ read_lock(&dcache_lock);
+ } else {
+ mntget(nd->mnt);
+ dget(nd->dentry);
+ }
read_unlock(&current->fs->lock);
return 1;
}
@@ -736,6 +823,23 @@
return 1;
}

+int path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
+{
+ nd->last_type = LAST_ROOT; /* if there are only slashes... */
+ nd->flags = flags | LOOKUP_LOCKED;
+ if (*name=='/'){
+ if(!walk_init_root(name, nd))
+ return 0;
+ } else{
+ read_lock(&current->fs->lock);
+ spin_lock(&dcache_lock);
+ nd->mnt = current->fs->pwdmnt;
+ nd->dentry = current->fs->pwd;
+ read_unlock(&current->fs->lock);
+ }
+ return (path_walk(name, nd));
+}
+
/*
* Restricted form of lookup. Doesn't follow links, single-component only,
* needs parent already locked. Doesn't follow mounts.
@@ -744,6 +848,7 @@
struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
{
struct dentry * dentry;
+ struct nameidata nd;
struct inode *inode;
int err;

@@ -753,6 +858,9 @@
if (err)
goto out;

+ nd.dentry = base;
+ nd.flags = 0;
+
/*
* See if the low-level filesystem might want
* to use its own hash..
@@ -764,7 +872,7 @@
goto out;
}

- dentry = cached_lookup(base, name, 0);
+ dentry = cached_lookup(&nd, name, 0);
if (!dentry) {
struct dentry *new = d_alloc(base, name);
dentry = ERR_PTR(-ENOMEM);
diff -daur linux-2.5.6/include/linux/dcache.h linux-2.5.6.dcache/include/linux/dcache.h
--- linux-2.5.6/include/linux/dcache.h Fri Mar 8 10:34:59 2002
+++ linux-2.5.6.dcache/include/linux/dcache.h Fri Mar 8 17:07:09 2002
@@ -33,7 +33,8 @@
int nr_unused;
int age_limit; /* age in seconds */
int want_pages; /* pages requested by system */
- int dummy[2];
+ int fastwalks;
+ int slowwalks;
};
extern struct dentry_stat_t dentry_stat;

@@ -220,6 +221,7 @@

/* appendix may either be NULL or be used for transname suffixes */
extern struct dentry * d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * __d_lookup(struct dentry *, struct qstr *);

/* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *);
diff -daur linux-2.5.6/include/linux/fs.h linux-2.5.6.dcache/include/linux/fs.h
--- linux-2.5.6/include/linux/fs.h Fri Mar 8 10:34:59 2002
+++ linux-2.5.6.dcache/include/linux/fs.h Fri Mar 8 16:33:42 2002
@@ -1273,12 +1273,15 @@
* - require a directory
* - ending slashes ok even for nonexistent files
* - internal "there are more path compnents" flag
+ * - locked when lookup done with dcache_lock held
*/
#define LOOKUP_FOLLOW (1)
#define LOOKUP_DIRECTORY (2)
#define LOOKUP_CONTINUE (4)
#define LOOKUP_PARENT (16)
#define LOOKUP_NOALT (32)
+#define LOOKUP_LOCKED (64)
+
/*
* Type of the last component on LOOKUP_PARENT
*/
@@ -1309,13 +1312,7 @@
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-static inline int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
-{
- int error = 0;
- if (path_init(path, flags, nd))
- error = path_walk(path, nd);
- return error;
-}
+extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
extern void path_release(struct nameidata *);
extern int follow_down(struct vfsmount **, struct dentry **);
extern int follow_up(struct vfsmount **, struct dentry **);


2002-03-09 07:19:05

by Paul Menage

[permalink] [raw]
Subject: Re: [PATCH] 2.5.6 Fast Walk Dcache (improved)

>I've reworked the patch somewhat to give the following features:
>

Oops - that was a slightly non-functional version of the patch, as
exec_permission_lite() had somehow got renamed to exec_permission().

Here's the correct one.

diff -daur linux-2.5.6/fs/dcache.c linux-2.5.6.dcache/fs/dcache.c
--- linux-2.5.6/fs/dcache.c Fri Mar 8 10:34:58 2002
+++ linux-2.5.6.dcache/fs/dcache.c Fri Mar 8 17:07:40 2002
@@ -691,18 +691,7 @@
return dentry_hashtable + (hash & D_HASHMASK);
}

-/**
- * d_lookup - search for a dentry
- * @parent: parent dentry
- * @name: qstr of name we wish to find
- *
- * Searches the children of the parent dentry for the name in question. If
- * the dentry is found its reference count is incremented and the dentry
- * is returned. The caller must use d_put to free the entry when it has
- * finished using it. %NULL is returned on failure.
- */
-
-struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
@@ -710,7 +699,6 @@
struct list_head *head = d_hash(parent,hash);
struct list_head *tmp;

- spin_lock(&dcache_lock);
tmp = head->next;
for (;;) {
struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -730,16 +718,37 @@
if (memcmp(dentry->d_name.name, str, len))
continue;
}
- __dget_locked(dentry);
- dentry->d_vfs_flags |= DCACHE_REFERENCED;
- spin_unlock(&dcache_lock);
+ if(!(dentry->d_vfs_flags & DCACHE_REFERENCED)) {
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ }
return dentry;
}
- spin_unlock(&dcache_lock);
return NULL;
}

/**
+ * d_lookup - search for a dentry
+ * @parent: parent dentry
+ * @name: qstr of name we wish to find
+ *
+ * Searches the children of the parent dentry for the name in question. If
+ * the dentry is found its reference count is incremented and the dentry
+ * is returned. The caller must use d_put to free the entry when it has
+ * finished using it. %NULL is returned on failure.
+ */
+
+struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+{
+ struct dentry * dentry;
+ spin_lock(&dcache_lock);
+ dentry = __d_lookup(parent,name);
+ if(dentry)
+ __dget_locked(dentry);
+ spin_unlock(&dcache_lock);
+ return dentry;
+}
+
+/**
* d_validate - verify dentry provided from insecure source
* @dentry: The dentry alleged to be valid child of @dparent
* @dparent: The parent dentry (known to be valid)
diff -daur linux-2.5.6/fs/namei.c linux-2.5.6.dcache/fs/namei.c
--- linux-2.5.6/fs/namei.c Fri Mar 8 10:34:58 2002
+++ linux-2.5.6.dcache/fs/namei.c Fri Mar 8 17:16:30 2002
@@ -265,11 +265,37 @@
* Internal lookup() using the new generic dcache.
* SMP-safe
*/
-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
+
+/*for fastwalking*/
+static void __undo_locked(struct nameidata *nd, struct dentry *dentry) {
+ dget_locked(nd->dentry);
+ if(dentry)
+ dget_locked(dentry);
+ mntget(nd->mnt);
+ spin_unlock(&dcache_lock);
+ nd->flags &= ~LOOKUP_LOCKED;
+}
+
+static inline void undo_locked(struct nameidata *nd, struct dentry *dentry)
{
- struct dentry * dentry = d_lookup(parent, name);
+ if(nd->flags & LOOKUP_LOCKED)
+ __undo_locked(nd, dentry);
+}
+
+/*
+ * For fast path lookup while holding the dcache_lock.
+ * SMP-safe
+ */
+static struct dentry * cached_lookup(struct nameidata * nd, struct qstr * name, int flags)
+{
+ struct dentry * dentry = NULL;
+ if(nd->flags & LOOKUP_LOCKED)
+ dentry = __d_lookup(nd->dentry, name);
+ else
+ dentry = d_lookup(nd->dentry, name);

if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ undo_locked(nd, dentry);
if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
dput(dentry);
dentry = NULL;
@@ -279,6 +305,34 @@
}

/*
+ * Short-cut version of permission(), for calling by
+ * path_walk(), when dcache lock is held. Combines parts
+ * of permission() and vfs_permission(), and tests ONLY for
+ * MAY_EXEC permission.
+ *
+ * If appropriate, check DAC only. If not appropriate, or
+ * short-cut DAC fails, then call permission() to do more
+ * complete permission check.
+ */
+static inline int exec_permission_lite(struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+
+ if ((inode->i_op && inode->i_op->permission))
+ return -EACCES;
+
+ if (current->fsuid == inode->i_uid)
+ mode >>= 6;
+ else if (in_group_p(inode->i_gid))
+ mode >>= 3;
+
+ if (mode & MAY_EXEC)
+ return 0;
+
+ return -EACCES;
+}
+
+/*
* This is called when everything else fails, and we actually have
* to go to the low-level filesystem to find out what we should do..
*
@@ -472,7 +526,11 @@
struct qstr this;
unsigned int c;

- err = permission(inode, MAY_EXEC);
+ err = exec_permission_lite(inode);
+ if(err) {
+ undo_locked(nd, NULL);
+ err = permission(inode, MAY_EXEC);
+ }
dentry = ERR_PTR(err);
if (err)
break;
@@ -507,6 +565,7 @@
case 2:
if (this.name[1] != '.')
break;
+ undo_locked(nd, NULL);
follow_dotdot(nd);
inode = nd->dentry->d_inode;
/* fallthrough */
@@ -523,16 +582,20 @@
break;
}
/* This does the actual lookups.. */
- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+ dentry = cached_lookup(nd, &this, LOOKUP_CONTINUE);
if (!dentry) {
+ undo_locked(nd, NULL);
dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
}
/* Check mountpoints.. */
- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
- ;
+ if(d_mountpoint(dentry)){
+ undo_locked(nd, dentry);
+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
+ ;
+ }

err = -ENOENT;
inode = dentry->d_inode;
@@ -543,6 +606,7 @@
goto out_dput;

if (inode->i_op->follow_link) {
+ undo_locked(nd, dentry);
err = do_follow_link(dentry, nd);
dput(dentry);
if (err)
@@ -555,7 +619,12 @@
if (!inode->i_op)
break;
} else {
- dput(nd->dentry);
+ if (!(nd->flags & LOOKUP_LOCKED)) {
+ dentry_stat.slowwalks ++;
+ dput(nd->dentry);
+ } else {
+ dentry_stat.fastwalks ++;
+ }
nd->dentry = dentry;
}
err = -ENOTDIR;
@@ -575,6 +644,7 @@
case 2:
if (this.name[1] != '.')
break;
+ undo_locked(nd, NULL);
follow_dotdot(nd);
inode = nd->dentry->d_inode;
/* fallthrough */
@@ -586,7 +656,8 @@
if (err < 0)
break;
}
- dentry = cached_lookup(nd->dentry, &this, 0);
+ dentry = cached_lookup(nd, &this, 0);
+ undo_locked(nd, dentry);
if (!dentry) {
dentry = real_lookup(nd->dentry, &this, 0);
err = PTR_ERR(dentry);
@@ -626,11 +697,14 @@
else if (this.len == 2 && this.name[1] == '.')
nd->last_type = LAST_DOTDOT;
return_base:
+ undo_locked(nd, NULL);
return 0;
out_dput:
+ undo_locked(nd, dentry);
dput(dentry);
break;
}
+ undo_locked(nd, dentry);
path_release(nd);
return_err:
return err;
@@ -707,17 +781,30 @@
static inline int
walk_init_root(const char *name, struct nameidata *nd)
{
+ unsigned int flags = nd->flags;
read_lock(&current->fs->lock);
if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+
+ nd->flags &= ~LOOKUP_LOCKED;
+
nd->mnt = mntget(current->fs->altrootmnt);
nd->dentry = dget(current->fs->altroot);
read_unlock(&current->fs->lock);
if (__emul_lookup_dentry(name,nd))
return 0;
+
+ nd->flags = flags;
+
read_lock(&current->fs->lock);
}
- nd->mnt = mntget(current->fs->rootmnt);
- nd->dentry = dget(current->fs->root);
+ nd->mnt = current->fs->rootmnt;
+ nd->dentry = current->fs->root;
+ if(flags & LOOKUP_LOCKED) {
+ read_lock(&dcache_lock);
+ } else {
+ mntget(nd->mnt);
+ dget(nd->dentry);
+ }
read_unlock(&current->fs->lock);
return 1;
}
@@ -736,6 +823,23 @@
return 1;
}

+int path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
+{
+ nd->last_type = LAST_ROOT; /* if there are only slashes... */
+ nd->flags = flags | LOOKUP_LOCKED;
+ if (*name=='/'){
+ if(!walk_init_root(name, nd))
+ return 0;
+ } else{
+ read_lock(&current->fs->lock);
+ spin_lock(&dcache_lock);
+ nd->mnt = current->fs->pwdmnt;
+ nd->dentry = current->fs->pwd;
+ read_unlock(&current->fs->lock);
+ }
+ return (path_walk(name, nd));
+}
+
/*
* Restricted form of lookup. Doesn't follow links, single-component only,
* needs parent already locked. Doesn't follow mounts.
@@ -744,6 +848,7 @@
struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
{
struct dentry * dentry;
+ struct nameidata nd;
struct inode *inode;
int err;

@@ -753,6 +858,9 @@
if (err)
goto out;

+ nd.dentry = base;
+ nd.flags = 0;
+
/*
* See if the low-level filesystem might want
* to use its own hash..
@@ -764,7 +872,7 @@
goto out;
}

- dentry = cached_lookup(base, name, 0);
+ dentry = cached_lookup(&nd, name, 0);
if (!dentry) {
struct dentry *new = d_alloc(base, name);
dentry = ERR_PTR(-ENOMEM);
diff -daur linux-2.5.6/include/linux/dcache.h linux-2.5.6.dcache/include/linux/dcache.h
--- linux-2.5.6/include/linux/dcache.h Fri Mar 8 10:34:59 2002
+++ linux-2.5.6.dcache/include/linux/dcache.h Fri Mar 8 17:07:09 2002
@@ -33,7 +33,8 @@
int nr_unused;
int age_limit; /* age in seconds */
int want_pages; /* pages requested by system */
- int dummy[2];
+ int fastwalks;
+ int slowwalks;
};
extern struct dentry_stat_t dentry_stat;

@@ -220,6 +221,7 @@

/* appendix may either be NULL or be used for transname suffixes */
extern struct dentry * d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * __d_lookup(struct dentry *, struct qstr *);

/* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *);
diff -daur linux-2.5.6/include/linux/fs.h linux-2.5.6.dcache/include/linux/fs.h
--- linux-2.5.6/include/linux/fs.h Fri Mar 8 10:34:59 2002
+++ linux-2.5.6.dcache/include/linux/fs.h Fri Mar 8 16:33:42 2002
@@ -1273,12 +1273,15 @@
* - require a directory
* - ending slashes ok even for nonexistent files
* - internal "there are more path compnents" flag
+ * - locked when lookup done with dcache_lock held
*/
#define LOOKUP_FOLLOW (1)
#define LOOKUP_DIRECTORY (2)
#define LOOKUP_CONTINUE (4)
#define LOOKUP_PARENT (16)
#define LOOKUP_NOALT (32)
+#define LOOKUP_LOCKED (64)
+
/*
* Type of the last component on LOOKUP_PARENT
*/
@@ -1309,13 +1312,7 @@
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-static inline int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
-{
- int error = 0;
- if (path_init(path, flags, nd))
- error = path_walk(path, nd);
- return error;
-}
+extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
extern void path_release(struct nameidata *);
extern int follow_down(struct vfsmount **, struct dentry **);
extern int follow_up(struct vfsmount **, struct dentry **);


2002-03-10 19:49:55

by Hanna Linder

[permalink] [raw]
Subject: Re: [PATCH] 2.5.6 Fast Walk Dcache (improved)


--On Friday, March 08, 2002 6:59 PM -0800 Paul Menage <[email protected]> wrote:

>> I've reworked the patch somewhat to give the following features:
>>
>
> Oops - that was a slightly non-functional version of the patch, as
> exec_permission_lite() had somehow got renamed to exec_permission().
>
> Here's the correct one.
>
> diff -daur linux-2.5.6/fs/dcache.c linux-2.5.6.dcache/fs/dcache.c

Hi Paul,

There seems to be a problem while booting with this patch applied
on an 8-way SMP (.config available). Here is where the boot process stops:

VFS: Mounted root (ext2 filesystem) readonly.
Freeing unused kernel memory: 232k freed
INIT: version 2.78 booting
Welcome to Red Hat Linux
Press 'I' to enter interactive startup.
Mounting proc filesystem: [ OK ]
Configuring kernel parameters: [ OK ]
modprobe: modprobe: Can't open dependencies file /lib/modules/2.5.6/modules.dep (No such file or directory)
Setting clock (localtime): Sun Mar 10 11:10:27 PST 2002 [ OK ]
Loading default keymap (us): [ OK ]

Hanna


2002-03-11 07:13:39

by Paul Menage

[permalink] [raw]
Subject: Re: [PATCH] 2.5.6 Fast Walk Dcache (improved)

>
> There seems to be a problem while booting with this patch applied
>on an 8-way SMP (.config available). Here is where the boot process stops:

I tested the SMP kernel (default config), but only on a UP box. Do you
get that problem on smaller systems, or just the 8-way box?

>Setting clock (localtime): Sun Mar 10 11:10:27 PST 2002 [ OK ]
>Loading default keymap (us): [ OK ]
>

I guess that doing swapon is the next step. But you'd think that if it
was going to die during swapon, the display would read

Activating swap partitions:

rather than nothing after the keymap line.

Paul

2002-03-12 07:17:20

by Paul Menage

[permalink] [raw]
Subject: Re: [PATCH] 2.5.6 Fast Walk Dcache (improved)

> There seems to be a problem while booting with this patch applied
>on an 8-way SMP (.config available). Here is where the boot process stops:

Oops, a "read_lock(&dcache_lock)" crept into walk_init_root(). It
didn't cause any problems in an SMP kernel on a UP box, but reliably
locked up on a 4-way box. (Presumably due to corruption at the first
sign of contention).

New patch below (happily boots on a 4-way system):

diff -aur linux-2.5.6/fs/dcache.c linux-2.5.6.dcache2/fs/dcache.c
--- linux-2.5.6/fs/dcache.c Fri Mar 8 10:34:58 2002
+++ linux-2.5.6.dcache2/fs/dcache.c Sun Mar 10 22:47:45 2002
@@ -691,18 +691,7 @@
return dentry_hashtable + (hash & D_HASHMASK);
}

-/**
- * d_lookup - search for a dentry
- * @parent: parent dentry
- * @name: qstr of name we wish to find
- *
- * Searches the children of the parent dentry for the name in question. If
- * the dentry is found its reference count is incremented and the dentry
- * is returned. The caller must use d_put to free the entry when it has
- * finished using it. %NULL is returned on failure.
- */
-
-struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
@@ -710,7 +699,6 @@
struct list_head *head = d_hash(parent,hash);
struct list_head *tmp;

- spin_lock(&dcache_lock);
tmp = head->next;
for (;;) {
struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
@@ -730,16 +718,37 @@
if (memcmp(dentry->d_name.name, str, len))
continue;
}
- __dget_locked(dentry);
- dentry->d_vfs_flags |= DCACHE_REFERENCED;
- spin_unlock(&dcache_lock);
+ if(!(dentry->d_vfs_flags & DCACHE_REFERENCED)) {
+ dentry->d_vfs_flags |= DCACHE_REFERENCED;
+ }
return dentry;
}
- spin_unlock(&dcache_lock);
return NULL;
}

/**
+ * d_lookup - search for a dentry
+ * @parent: parent dentry
+ * @name: qstr of name we wish to find
+ *
+ * Searches the children of the parent dentry for the name in question. If
+ * the dentry is found its reference count is incremented and the dentry
+ * is returned. The caller must use d_put to free the entry when it has
+ * finished using it. %NULL is returned on failure.
+ */
+
+struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+{
+ struct dentry * dentry;
+ spin_lock(&dcache_lock);
+ dentry = __d_lookup(parent,name);
+ if(dentry)
+ __dget_locked(dentry);
+ spin_unlock(&dcache_lock);
+ return dentry;
+}
+
+/**
* d_validate - verify dentry provided from insecure source
* @dentry: The dentry alleged to be valid child of @dparent
* @dparent: The parent dentry (known to be valid)
diff -aur linux-2.5.6/fs/namei.c linux-2.5.6.dcache2/fs/namei.c
--- linux-2.5.6/fs/namei.c Fri Mar 8 10:34:58 2002
+++ linux-2.5.6.dcache2/fs/namei.c Mon Mar 11 22:54:59 2002
@@ -265,11 +265,37 @@
* Internal lookup() using the new generic dcache.
* SMP-safe
*/
-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
+
+/*for fastwalking*/
+static void __undo_locked(struct nameidata *nd, struct dentry *dentry) {
+ dget_locked(nd->dentry);
+ if(dentry)
+ dget_locked(dentry);
+ mntget(nd->mnt);
+ spin_unlock(&dcache_lock);
+ nd->flags &= ~LOOKUP_LOCKED;
+}
+
+static inline void undo_locked(struct nameidata *nd, struct dentry *dentry)
{
- struct dentry * dentry = d_lookup(parent, name);
+ if(nd->flags & LOOKUP_LOCKED)
+ __undo_locked(nd, dentry);
+}
+
+/*
+ * For fast path lookup while holding the dcache_lock.
+ * SMP-safe
+ */
+static struct dentry * cached_lookup(struct nameidata * nd, struct qstr * name, int flags)
+{
+ struct dentry * dentry = NULL;
+ if(nd->flags & LOOKUP_LOCKED)
+ dentry = __d_lookup(nd->dentry, name);
+ else
+ dentry = d_lookup(nd->dentry, name);

if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ undo_locked(nd, dentry);
if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
dput(dentry);
dentry = NULL;
@@ -279,6 +305,34 @@
}

/*
+ * Short-cut version of permission(), for calling by
+ * path_walk(), when dcache lock is held. Combines parts
+ * of permission() and vfs_permission(), and tests ONLY for
+ * MAY_EXEC permission.
+ *
+ * If appropriate, check DAC only. If not appropriate, or
+ * short-cut DAC fails, then call permission() to do more
+ * complete permission check.
+ */
+static inline int exec_permission_lite(struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+
+ if ((inode->i_op && inode->i_op->permission))
+ return -EACCES;
+
+ if (current->fsuid == inode->i_uid)
+ mode >>= 6;
+ else if (in_group_p(inode->i_gid))
+ mode >>= 3;
+
+ if (mode & MAY_EXEC)
+ return 0;
+
+ return -EACCES;
+}
+
+/*
* This is called when everything else fails, and we actually have
* to go to the low-level filesystem to find out what we should do..
*
@@ -472,7 +526,11 @@
struct qstr this;
unsigned int c;

- err = permission(inode, MAY_EXEC);
+ err = exec_permission_lite(inode);
+ if(err) {
+ undo_locked(nd, NULL);
+ err = permission(inode, MAY_EXEC);
+ }
dentry = ERR_PTR(err);
if (err)
break;
@@ -507,6 +565,7 @@
case 2:
if (this.name[1] != '.')
break;
+ undo_locked(nd, NULL);
follow_dotdot(nd);
inode = nd->dentry->d_inode;
/* fallthrough */
@@ -523,16 +582,20 @@
break;
}
/* This does the actual lookups.. */
- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+ dentry = cached_lookup(nd, &this, LOOKUP_CONTINUE);
if (!dentry) {
+ undo_locked(nd, NULL);
dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
}
/* Check mountpoints.. */
- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
- ;
+ if(d_mountpoint(dentry)){
+ undo_locked(nd, dentry);
+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
+ ;
+ }

err = -ENOENT;
inode = dentry->d_inode;
@@ -543,6 +606,7 @@
goto out_dput;

if (inode->i_op->follow_link) {
+ undo_locked(nd, dentry);
err = do_follow_link(dentry, nd);
dput(dentry);
if (err)
@@ -555,7 +619,12 @@
if (!inode->i_op)
break;
} else {
- dput(nd->dentry);
+ if (!(nd->flags & LOOKUP_LOCKED)) {
+ dentry_stat.slowwalks ++;
+ dput(nd->dentry);
+ } else {
+ dentry_stat.fastwalks ++;
+ }
nd->dentry = dentry;
}
err = -ENOTDIR;
@@ -575,6 +644,7 @@
case 2:
if (this.name[1] != '.')
break;
+ undo_locked(nd, NULL);
follow_dotdot(nd);
inode = nd->dentry->d_inode;
/* fallthrough */
@@ -586,7 +656,8 @@
if (err < 0)
break;
}
- dentry = cached_lookup(nd->dentry, &this, 0);
+ dentry = cached_lookup(nd, &this, 0);
+ undo_locked(nd, dentry);
if (!dentry) {
dentry = real_lookup(nd->dentry, &this, 0);
err = PTR_ERR(dentry);
@@ -626,11 +697,14 @@
else if (this.len == 2 && this.name[1] == '.')
nd->last_type = LAST_DOTDOT;
return_base:
+ undo_locked(nd, NULL);
return 0;
out_dput:
+ undo_locked(nd, dentry);
dput(dentry);
break;
}
+ undo_locked(nd, dentry);
path_release(nd);
return_err:
return err;
@@ -707,17 +781,30 @@
static inline int
walk_init_root(const char *name, struct nameidata *nd)
{
+ unsigned int flags = nd->flags;
read_lock(&current->fs->lock);
if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+
+ nd->flags &= ~LOOKUP_LOCKED;
+
nd->mnt = mntget(current->fs->altrootmnt);
nd->dentry = dget(current->fs->altroot);
read_unlock(&current->fs->lock);
if (__emul_lookup_dentry(name,nd))
return 0;
+
+ nd->flags = flags;
+
read_lock(&current->fs->lock);
}
- nd->mnt = mntget(current->fs->rootmnt);
- nd->dentry = dget(current->fs->root);
+ nd->mnt = current->fs->rootmnt;
+ nd->dentry = current->fs->root;
+ if(flags & LOOKUP_LOCKED) {
+ spin_lock(&dcache_lock);
+ } else {
+ mntget(nd->mnt);
+ dget(nd->dentry);
+ }
read_unlock(&current->fs->lock);
return 1;
}
@@ -736,6 +823,23 @@
return 1;
}

+int path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
+{
+ nd->last_type = LAST_ROOT; /* if there are only slashes... */
+ nd->flags = flags | LOOKUP_LOCKED;
+ if (*name=='/'){
+ if(!walk_init_root(name, nd))
+ return 0;
+ } else{
+ read_lock(&current->fs->lock);
+ spin_lock(&dcache_lock);
+ nd->mnt = current->fs->pwdmnt;
+ nd->dentry = current->fs->pwd;
+ read_unlock(&current->fs->lock);
+ }
+ return (path_walk(name, nd));
+}
+
/*
* Restricted form of lookup. Doesn't follow links, single-component only,
* needs parent already locked. Doesn't follow mounts.
@@ -744,6 +848,7 @@
struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
{
struct dentry * dentry;
+ struct nameidata nd;
struct inode *inode;
int err;

@@ -753,6 +858,9 @@
if (err)
goto out;

+ nd.dentry = base;
+ nd.flags = 0;
+
/*
* See if the low-level filesystem might want
* to use its own hash..
@@ -764,7 +872,7 @@
goto out;
}

- dentry = cached_lookup(base, name, 0);
+ dentry = cached_lookup(&nd, name, 0);
if (!dentry) {
struct dentry *new = d_alloc(base, name);
dentry = ERR_PTR(-ENOMEM);
diff -aur linux-2.5.6/include/linux/dcache.h linux-2.5.6.dcache2/include/linux/dcache.h
--- linux-2.5.6/include/linux/dcache.h Fri Mar 8 10:34:59 2002
+++ linux-2.5.6.dcache2/include/linux/dcache.h Sun Mar 10 22:50:05 2002
@@ -33,7 +33,8 @@
int nr_unused;
int age_limit; /* age in seconds */
int want_pages; /* pages requested by system */
- int dummy[2];
+ int fastwalks;
+ int slowwalks;
};
extern struct dentry_stat_t dentry_stat;

@@ -220,6 +221,7 @@

/* appendix may either be NULL or be used for transname suffixes */
extern struct dentry * d_lookup(struct dentry *, struct qstr *);
+extern struct dentry * __d_lookup(struct dentry *, struct qstr *);

/* validate "insecure" dentry pointer */
extern int d_validate(struct dentry *, struct dentry *);
diff -aur linux-2.5.6/include/linux/fs.h linux-2.5.6.dcache2/include/linux/fs.h
--- linux-2.5.6/include/linux/fs.h Fri Mar 8 10:34:59 2002
+++ linux-2.5.6.dcache2/include/linux/fs.h Mon Mar 11 18:04:35 2002
@@ -1273,12 +1273,15 @@
* - require a directory
* - ending slashes ok even for nonexistent files
* - internal "there are more path compnents" flag
+ * - locked when lookup done with dcache_lock held
*/
#define LOOKUP_FOLLOW (1)
#define LOOKUP_DIRECTORY (2)
#define LOOKUP_CONTINUE (4)
#define LOOKUP_PARENT (16)
#define LOOKUP_NOALT (32)
+#define LOOKUP_LOCKED (64)
+
/*
* Type of the last component on LOOKUP_PARENT
*/
@@ -1309,13 +1312,7 @@
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
-static inline int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
-{
- int error = 0;
- if (path_init(path, flags, nd))
- error = path_walk(path, nd);
- return error;
-}
+extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
extern void path_release(struct nameidata *);
extern int follow_down(struct vfsmount **, struct dentry **);
extern int follow_up(struct vfsmount **, struct dentry **);