2009-10-15 06:05:24

by Nick Piggin

[permalink] [raw]
Subject: [patch 2/6] fs: no games with DCACHE_UNHASHED

(this is in -mm)

Filesystems outside the regular namespace do not have to clear DCACHE_UNHASHED
in order to have a working /proc/$pid/fd/XXX. Nothing in proc prevents the
fd link from being used if its dentry is not in the hash.

Also, it does not get put into the dcache hash if DCACHE_UNHASHED is clear;
that depends on the filesystem calling d_add or d_rehash.

So delete the misleading comments and needless code.

Acked-by: Miklos Szeredi <[email protected]>
Signed-off-by: Nick Piggin <[email protected]>
---
fs/anon_inodes.c | 16 ----------------
fs/pipe.c | 18 ------------------
net/socket.c | 19 -------------------
3 files changed, 53 deletions(-)

Index: linux-2.6/fs/pipe.c
===================================================================
--- linux-2.6.orig/fs/pipe.c
+++ linux-2.6/fs/pipe.c
@@ -887,17 +887,6 @@ void free_pipe_info(struct inode *inode)
}

static struct vfsmount *pipe_mnt __read_mostly;
-static int pipefs_delete_dentry(struct dentry *dentry)
-{
- /*
- * At creation time, we pretended this dentry was hashed
- * (by clearing DCACHE_UNHASHED bit in d_flags)
- * At delete time, we restore the truth : not hashed.
- * (so that dput() can proceed correctly)
- */
- dentry->d_flags |= DCACHE_UNHASHED;
- return 0;
-}

/*
* pipefs_dname() is called from d_path().
@@ -909,7 +898,6 @@ static char *pipefs_dname(struct dentry
}

static const struct dentry_operations pipefs_dentry_operations = {
- .d_delete = pipefs_delete_dentry,
.d_dname = pipefs_dname,
};

@@ -969,12 +957,6 @@ struct file *create_write_pipe(int flags
goto err_inode;

dentry->d_op = &pipefs_dentry_operations;
- /*
- * We dont want to publish this dentry into global dentry hash table.
- * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
- * This permits a working /proc/$pid/fd/XXX on pipes
- */
- dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(dentry, inode);

err = -ENFILE;
Index: linux-2.6/net/socket.c
===================================================================
--- linux-2.6.orig/net/socket.c
+++ linux-2.6/net/socket.c
@@ -306,18 +306,6 @@ static struct file_system_type sock_fs_t
.kill_sb = kill_anon_super,
};

-static int sockfs_delete_dentry(struct dentry *dentry)
-{
- /*
- * At creation time, we pretended this dentry was hashed
- * (by clearing DCACHE_UNHASHED bit in d_flags)
- * At delete time, we restore the truth : not hashed.
- * (so that dput() can proceed correctly)
- */
- dentry->d_flags |= DCACHE_UNHASHED;
- return 0;
-}
-
/*
* sockfs_dname() is called from d_path().
*/
@@ -328,7 +316,6 @@ static char *sockfs_dname(struct dentry
}

static const struct dentry_operations sockfs_dentry_operations = {
- .d_delete = sockfs_delete_dentry,
.d_dname = sockfs_dname,
};

@@ -377,12 +364,6 @@ static int sock_attach_fd(struct socket
return -ENOMEM;

dentry->d_op = &sockfs_dentry_operations;
- /*
- * We dont want to push this dentry into global dentry hash table.
- * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
- * This permits a working /proc/$pid/fd/XXX on sockets
- */
- dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(dentry, SOCK_INODE(sock));

sock->file = file;
Index: linux-2.6/fs/anon_inodes.c
===================================================================
--- linux-2.6.orig/fs/anon_inodes.c
+++ linux-2.6/fs/anon_inodes.c
@@ -35,24 +35,11 @@ static int anon_inodefs_get_sb(struct fi
mnt);
}

-static int anon_inodefs_delete_dentry(struct dentry *dentry)
-{
- /*
- * We faked vfs to believe the dentry was hashed when we created it.
- * Now we restore the flag so that dput() will work correctly.
- */
- dentry->d_flags |= DCACHE_UNHASHED;
- return 1;
-}
-
static struct file_system_type anon_inode_fs_type = {
.name = "anon_inodefs",
.get_sb = anon_inodefs_get_sb,
.kill_sb = kill_anon_super,
};
-static const struct dentry_operations anon_inodefs_dentry_operations = {
- .d_delete = anon_inodefs_delete_dentry,
-};

/*
* nop .set_page_dirty method so that people can use .page_mkwrite on
@@ -117,9 +104,6 @@ struct file *anon_inode_getfile(const ch
*/
atomic_inc(&anon_inode_inode->i_count);

- dentry->d_op = &anon_inodefs_dentry_operations;
- /* Do not publish this dentry inside the global dentry hash table */
- dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(dentry, anon_inode_inode);

error = -ENFILE;


2009-10-15 06:31:46

by David Miller

[permalink] [raw]
Subject: Re: [patch 2/6] fs: no games with DCACHE_UNHASHED

From: [email protected]
Date: Thu, 15 Oct 2009 15:40:28 +1100

> (this is in -mm)
>
> Filesystems outside the regular namespace do not have to clear DCACHE_UNHASHED
> in order to have a working /proc/$pid/fd/XXX. Nothing in proc prevents the
> fd link from being used if its dentry is not in the hash.
>
> Also, it does not get put into the dcache hash if DCACHE_UNHASHED is clear;
> that depends on the filesystem calling d_add or d_rehash.
>
> So delete the misleading comments and needless code.
>
> Acked-by: Miklos Szeredi <[email protected]>
> Signed-off-by: Nick Piggin <[email protected]>

For networking bits:

Acked-by: David S. Miller <[email protected]>

2009-10-15 07:45:23

by Eric Dumazet

[permalink] [raw]
Subject: Re: [patch 2/6] fs: no games with DCACHE_UNHASHED

[email protected] a ?crit :
> (this is in -mm)
>
> Filesystems outside the regular namespace do not have to clear DCACHE_UNHASHED
> in order to have a working /proc/$pid/fd/XXX. Nothing in proc prevents the
> fd link from being used if its dentry is not in the hash.
>
> Also, it does not get put into the dcache hash if DCACHE_UNHASHED is clear;
> that depends on the filesystem calling d_add or d_rehash.
>
> So delete the misleading comments and needless code.
>

This was added in commit 304e61e6fbadec586dfe002b535f169a04248e49

[PATCH] net: don't insert socket dentries into dentry_hashtable

We currently insert socket dentries into the global dentry hashtable. This
is suboptimal because there is currently no way these entries can be used
for a lookup(). (/proc/xxx/fd/xxx uses a different mechanism). Inserting
them in dentry hashtable slows dcache lookups.

To let __dpath() still work correctly (ie not adding a " (deleted)") after
dentry name, we do :

- Right after d_alloc(), pretend they are hashed by clearing the
DCACHE_UNHASHED bit.

- Call d_instantiate() instead of d_add() : dentry is not inserted in
hash table.

__dpath() & friends work as intended during dentry lifetime.

- At dismantle time, once dput() must clear the dentry, setting again
DCACHE_UNHASHED bit inside the custom d_delete() function provided by
socket code, so that dput() can just kill_it.

Signed-off-by: Eric Dumazet <[email protected]>
Cc: Al Viro <[email protected]>
Acked-by: "David S. Miller" <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>



Back in 2006, we had to perform this hack in order to not leak '(deleted)' in __d_path()

if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
(prepend(&end, &buflen, " (deleted)", 10) != 0))
goto Elong;

In current kernel this part became :

if (d_unlinked(dentry) &&
(prepend(&end, &buflen, " (deleted)", 10) != 0))
goto Elong;


So your cleanup seems good, thanks !

Acked-by: Eric Dumazet <[email protected]>

2009-10-15 08:14:24

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 2/6] fs: no games with DCACHE_UNHASHED

On Thu, Oct 15, 2009 at 09:44:35AM +0200, Eric Dumazet wrote:
> [email protected] a ?crit :
> > (this is in -mm)
> >
> > Filesystems outside the regular namespace do not have to clear DCACHE_UNHASHED
> > in order to have a working /proc/$pid/fd/XXX. Nothing in proc prevents the
> > fd link from being used if its dentry is not in the hash.
> >
> > Also, it does not get put into the dcache hash if DCACHE_UNHASHED is clear;
> > that depends on the filesystem calling d_add or d_rehash.
> >
> > So delete the misleading comments and needless code.
> >
>
> This was added in commit 304e61e6fbadec586dfe002b535f169a04248e49
>
> [PATCH] net: don't insert socket dentries into dentry_hashtable
>
> We currently insert socket dentries into the global dentry hashtable. This
> is suboptimal because there is currently no way these entries can be used
> for a lookup(). (/proc/xxx/fd/xxx uses a different mechanism). Inserting
> them in dentry hashtable slows dcache lookups.
>
> To let __dpath() still work correctly (ie not adding a " (deleted)") after
> dentry name, we do :
>
> - Right after d_alloc(), pretend they are hashed by clearing the
> DCACHE_UNHASHED bit.
>
> - Call d_instantiate() instead of d_add() : dentry is not inserted in
> hash table.
>
> __dpath() & friends work as intended during dentry lifetime.
>
> - At dismantle time, once dput() must clear the dentry, setting again
> DCACHE_UNHASHED bit inside the custom d_delete() function provided by
> socket code, so that dput() can just kill_it.
>
> Signed-off-by: Eric Dumazet <[email protected]>
> Cc: Al Viro <[email protected]>
> Acked-by: "David S. Miller" <[email protected]>
> Signed-off-by: Andrew Morton <[email protected]>
> Signed-off-by: Linus Torvalds <[email protected]>
>
>
>
> Back in 2006, we had to perform this hack in order to not leak '(deleted)' in __d_path()
>
> if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
> (prepend(&end, &buflen, " (deleted)", 10) != 0))
> goto Elong;
>
> In current kernel this part became :
>
> if (d_unlinked(dentry) &&
> (prepend(&end, &buflen, " (deleted)", 10) != 0))
> goto Elong;
>
>
> So your cleanup seems good, thanks !
>
> Acked-by: Eric Dumazet <[email protected]>

Ahh, hmm d_unlinked() is exactly the same code. OK, so I think this shows
why I'm an idiot. I didn't think about the __d_path output. Though the
comments are still misleading, in my defence (and my test programs did not
use any anoninode fds)...

Now both sockets and pipes define a d_dname so they are OK, but anon_inodes
does not. I think they should probably be made to just provide a d_dname
anyway so we can have the familiar format of "pseudofs:[ino]" rather than
"[pseudofs]" that we have now.

That should make this patch work for anon_inodes.c as well.

2009-10-15 08:29:43

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 2/6] fs: no games with DCACHE_UNHASHED

On Thu, Oct 15, 2009 at 10:13:43AM +0200, Nick Piggin wrote:
> Now both sockets and pipes define a d_dname so they are OK, but anon_inodes
> does not. I think they should probably be made to just provide a d_dname
> anyway so we can have the familiar format of "pseudofs:[ino]" rather than
> "[pseudofs]" that we have now.
>
> That should make this patch work for anon_inodes.c as well.

So what if we were to do this first? Are there any other reasons I've
missed? (btw. this changes the format the link if that is a problem.
Probably if we're going to do this change, we should change things like
[timerfd] to just timerfd while we're there to improve consistency
further.
--
Pipe and socket pseudo filesystems report their file descriptor link
paths as pipe:[ino] and socket:[ino]. anon_inodefs allows subsystems
to specify a name, but it does not report an associated inode number.

Implement this with anon_inodefs_dname in the same way pipefs and sockfs
are.

---
fs/anon_inodes.c | 10 ++++++++++
1 file changed, 10 insertions(+)

Index: linux-2.6/fs/anon_inodes.c
===================================================================
--- linux-2.6.orig/fs/anon_inodes.c
+++ linux-2.6/fs/anon_inodes.c
@@ -45,6 +45,15 @@ static int anon_inodefs_delete_dentry(st
return 1;
}

+/*
+ * anon_inodefs_dname() is called from d_path().
+ */
+static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
+ dentry->d_name.name, dentry->d_inode->i_ino);
+}
+
static struct file_system_type anon_inode_fs_type = {
.name = "anon_inodefs",
.get_sb = anon_inodefs_get_sb,
@@ -52,6 +61,7 @@ static struct file_system_type anon_inod
};
static const struct dentry_operations anon_inodefs_dentry_operations = {
.d_delete = anon_inodefs_delete_dentry,
+ .d_dname = anon_inodefs_dname,
};

/*

2009-10-15 09:14:33

by Eric Dumazet

[permalink] [raw]
Subject: Re: [patch 2/6] fs: no games with DCACHE_UNHASHED

Nick Piggin a ?crit :

> So what if we were to do this first? Are there any other reasons I've
> missed? (btw. this changes the format the link if that is a problem.
> Probably if we're going to do this change, we should change things like
> [timerfd] to just timerfd while we're there to improve consistency
> further.
> --
> Pipe and socket pseudo filesystems report their file descriptor link
> paths as pipe:[ino] and socket:[ino]. anon_inodefs allows subsystems
> to specify a name, but it does not report an associated inode number.
>
> Implement this with anon_inodefs_dname in the same way pipefs and sockfs
> are.
>
> ---
> fs/anon_inodes.c | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> Index: linux-2.6/fs/anon_inodes.c
> ===================================================================
> --- linux-2.6.orig/fs/anon_inodes.c
> +++ linux-2.6/fs/anon_inodes.c
> @@ -45,6 +45,15 @@ static int anon_inodefs_delete_dentry(st
> return 1;
> }
>
> +/*
> + * anon_inodefs_dname() is called from d_path().
> + */
> +static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
> +{
> + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
> + dentry->d_name.name, dentry->d_inode->i_ino);
> +}
> +
> static struct file_system_type anon_inode_fs_type = {
> .name = "anon_inodefs",
> .get_sb = anon_inodefs_get_sb,
> @@ -52,6 +61,7 @@ static struct file_system_type anon_inod
> };
> static const struct dentry_operations anon_inodefs_dentry_operations = {
> .d_delete = anon_inodefs_delete_dentry,
> + .d_dname = anon_inodefs_dname,
> };
>
> /*

Yes, that would be fine, thanks Nick.

Acked-by: Eric Dumazet <[email protected]>

2009-10-15 13:21:24

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [patch 2/6] fs: no games with DCACHE_UNHASHED

On Thu, Oct 15, 2009 at 10:29:03AM +0200, Nick Piggin wrote:
> On Thu, Oct 15, 2009 at 10:13:43AM +0200, Nick Piggin wrote:
> > Now both sockets and pipes define a d_dname so they are OK, but anon_inodes
> > does not. I think they should probably be made to just provide a d_dname
> > anyway so we can have the familiar format of "pseudofs:[ino]" rather than
> > "[pseudofs]" that we have now.
> >
> > That should make this patch work for anon_inodes.c as well.
>
> So what if we were to do this first? Are there any other reasons I've
> missed? (btw. this changes the format the link if that is a problem.
> Probably if we're going to do this change, we should change things like
> [timerfd] to just timerfd while we're there to improve consistency
> further.
> --
> Pipe and socket pseudo filesystems report their file descriptor link
> paths as pipe:[ino] and socket:[ino]. anon_inodefs allows subsystems
> to specify a name, but it does not report an associated inode number.

I think that's because the inode number is always 0 -- there's just one
anonymous inode.

> Implement this with anon_inodefs_dname in the same way pipefs and sockfs
> are.
>
> ---
> fs/anon_inodes.c | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> Index: linux-2.6/fs/anon_inodes.c
> ===================================================================
> --- linux-2.6.orig/fs/anon_inodes.c
> +++ linux-2.6/fs/anon_inodes.c
> @@ -45,6 +45,15 @@ static int anon_inodefs_delete_dentry(st
> return 1;
> }
>
> +/*
> + * anon_inodefs_dname() is called from d_path().
> + */
> +static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
> +{
> + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
> + dentry->d_name.name, dentry->d_inode->i_ino);
> +}
> +
> static struct file_system_type anon_inode_fs_type = {
> .name = "anon_inodefs",
> .get_sb = anon_inodefs_get_sb,
> @@ -52,6 +61,7 @@ static struct file_system_type anon_inod
> };
> static const struct dentry_operations anon_inodefs_dentry_operations = {
> .d_delete = anon_inodefs_delete_dentry,
> + .d_dname = anon_inodefs_dname,
> };
>
> /*
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
Matthew Wilcox Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."

2009-10-15 14:42:16

by Nick Piggin

[permalink] [raw]
Subject: Re: [patch 2/6] fs: no games with DCACHE_UNHASHED

On Thu, Oct 15, 2009 at 07:20:45AM -0600, Matthew Wilcox wrote:
> On Thu, Oct 15, 2009 at 10:29:03AM +0200, Nick Piggin wrote:
> > On Thu, Oct 15, 2009 at 10:13:43AM +0200, Nick Piggin wrote:
> > > Now both sockets and pipes define a d_dname so they are OK, but anon_inodes
> > > does not. I think they should probably be made to just provide a d_dname
> > > anyway so we can have the familiar format of "pseudofs:[ino]" rather than
> > > "[pseudofs]" that we have now.
> > >
> > > That should make this patch work for anon_inodes.c as well.
> >
> > So what if we were to do this first? Are there any other reasons I've
> > missed? (btw. this changes the format the link if that is a problem.
> > Probably if we're going to do this change, we should change things like
> > [timerfd] to just timerfd while we're there to improve consistency
> > further.
> > --
> > Pipe and socket pseudo filesystems report their file descriptor link
> > paths as pipe:[ino] and socket:[ino]. anon_inodefs allows subsystems
> > to specify a name, but it does not report an associated inode number.
>
> I think that's because the inode number is always 0 -- there's just one
> anonymous inode.

OIC. Hmm, well we could give them a unique number I guess. But anyway
for now, let's just make anon_inodefs_dname just print dentry->d_name.name.

>
> > Implement this with anon_inodefs_dname in the same way pipefs and sockfs
> > are.
> >
> > ---
> > fs/anon_inodes.c | 10 ++++++++++
> > 1 file changed, 10 insertions(+)
> >
> > Index: linux-2.6/fs/anon_inodes.c
> > ===================================================================
> > --- linux-2.6.orig/fs/anon_inodes.c
> > +++ linux-2.6/fs/anon_inodes.c
> > @@ -45,6 +45,15 @@ static int anon_inodefs_delete_dentry(st
> > return 1;
> > }
> >
> > +/*
> > + * anon_inodefs_dname() is called from d_path().
> > + */
> > +static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
> > +{
> > + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
> > + dentry->d_name.name, dentry->d_inode->i_ino);
> > +}
> > +
> > static struct file_system_type anon_inode_fs_type = {
> > .name = "anon_inodefs",
> > .get_sb = anon_inodefs_get_sb,
> > @@ -52,6 +61,7 @@ static struct file_system_type anon_inod
> > };
> > static const struct dentry_operations anon_inodefs_dentry_operations = {
> > .d_delete = anon_inodefs_delete_dentry,
> > + .d_dname = anon_inodefs_dname,
> > };
> >
> > /*
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> --
> Matthew Wilcox Intel Open Source Technology Centre
> "Bill, look, we understand that you're interested in selling us this
> operating system, but compare it to ours. We can't possibly take such
> a retrograde step."