2012-08-23 10:49:29

by Cyrill Gorcunov

[permalink] [raw]
Subject: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2

This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
is needed to extend seq operations and plug in auxiliary fdinfo provides
from subsystems like eventfd/eventpoll/fsnotify.

Note the proc_fd_link no longer call for proc_fd_info, simply because
proc_fd_info is converted to seq_fdinfo_open (which is seq-file open()
prototype).

v2 (by Al Viro):
- Don't use helper function with optional arguments, thus proc_fd_info get deprecated
- Use proc_fdinfo structure with seq_file embedded, thus we can use container_of helper
- Use fput to free reference to the file we've grabbed

Signed-off-by: Cyrill Gorcunov <[email protected]>
CC: Pavel Emelyanov <[email protected]>
CC: Al Viro <[email protected]>
CC: Alexey Dobriyan <[email protected]>
CC: Andrew Morton <[email protected]>
CC: James Bottomley <[email protected]>
CC: "Aneesh Kumar K.V" <[email protected]>
CC: Alexey Dobriyan <[email protected]>
CC: Matthew Helsley <[email protected]>
CC: "J. Bruce Fields" <[email protected]>
CC: "Aneesh Kumar K.V" <[email protected]>
---
fs/proc/fd.c | 145 ++++++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 99 insertions(+), 46 deletions(-)

Index: linux-2.6.git/fs/proc/fd.c
===================================================================
--- linux-2.6.git.orig/fs/proc/fd.c
+++ linux-2.6.git/fs/proc/fd.c
@@ -6,61 +6,105 @@
#include <linux/namei.h>
#include <linux/pid.h>
#include <linux/security.h>
+#include <linux/file.h>
+#include <linux/seq_file.h>

#include <linux/proc_fs.h>

#include "internal.h"
#include "fd.h"

-#define PROC_FDINFO_MAX 64
+struct proc_fdinfo {
+ struct seq_file m;
+ struct file *fd_file;
+ int f_flags;
+};
+
+static int seq_show(struct seq_file *m, void *v)
+{
+ struct proc_fdinfo *fdinfo = container_of(m, struct proc_fdinfo, m);
+ seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
+ (long long)fdinfo->fd_file->f_pos, fdinfo->f_flags);
+ return 0;
+}

-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
{
- struct task_struct *task = get_proc_task(inode);
+ struct proc_fdinfo *fdinfo = NULL;
struct files_struct *files = NULL;
- int fd = proc_fd(inode);
- struct file *file;
+ struct task_struct *task;
+ struct file *fd_file;
+ int f_flags, ret;

+ ret = -ENOENT;
+
+ task = get_proc_task(inode);
if (task) {
files = get_files_struct(task);
put_task_struct(task);
}
+
if (files) {
- /*
- * We are not taking a ref to the file structure, so we must
- * hold ->file_lock.
- */
+ int fd = proc_fd(inode);
+
spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
- if (file) {
- unsigned int f_flags;
- struct fdtable *fdt;
+ fd_file = fcheck_files(files, fd);
+ if (fd_file) {
+ struct fdtable *fdt = files_fdtable(files);

- fdt = files_fdtable(files);
- f_flags = file->f_flags & ~O_CLOEXEC;
+ f_flags = fd_file->f_flags & ~O_CLOEXEC;
if (close_on_exec(fd, fdt))
f_flags |= O_CLOEXEC;

- if (path) {
- *path = file->f_path;
- path_get(&file->f_path);
- }
- if (info)
- snprintf(info, PROC_FDINFO_MAX,
- "pos:\t%lli\n"
- "flags:\t0%o\n",
- (long long) file->f_pos,
- f_flags);
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- return 0;
+ get_file(fd_file);
+ ret = 0;
}
spin_unlock(&files->file_lock);
+
put_files_struct(files);
}
- return -ENOENT;
+
+ if (ret)
+ return ret;
+
+ ret = -ENOMEM;
+ fdinfo = kmalloc(sizeof(*fdinfo), GFP_KERNEL);
+ if (!fdinfo)
+ goto err_put;
+
+ fdinfo->fd_file = fd_file;
+ fdinfo->f_flags = f_flags;
+ file->private_data = &fdinfo->m;
+
+ ret = single_open(file, seq_show, NULL);
+ if (ret)
+ goto err_free;
+
+ return 0;
+
+err_free:
+ kfree(fdinfo);
+err_put:
+ fput(fd_file);
+ return ret;
}

+static int seq_fdinfo_release(struct inode *inode, struct file *file)
+{
+ struct proc_fdinfo *fdinfo =
+ container_of((struct seq_file *)file->private_data,
+ struct proc_fdinfo, m);
+ fput(fdinfo->fd_file);
+ return single_release(inode, file);
+}
+
+static const struct file_operations proc_fdinfo_file_operations = {
+ .open = seq_fdinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_fdinfo_release,
+};
+
static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
{
struct files_struct *files;
@@ -130,7 +174,32 @@ static const struct dentry_operations ti

static int proc_fd_link(struct dentry *dentry, struct path *path)
{
- return proc_fd_info(dentry->d_inode, path, NULL);
+ struct files_struct *files = NULL;
+ struct task_struct *task;
+ int ret = -ENOENT;
+
+ task = get_proc_task(dentry->d_inode);
+ if (task) {
+ files = get_files_struct(task);
+ put_task_struct(task);
+ }
+
+ if (files) {
+ int fd = proc_fd(dentry->d_inode);
+ struct file *fd_file;
+
+ spin_lock(&files->file_lock);
+ fd_file = fcheck_files(files, fd);
+ if (fd_file) {
+ *path = fd_file->f_path;
+ path_get(&fd_file->f_path);
+ ret = 0;
+ }
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ }
+
+ return ret;
}

static struct dentry *
@@ -245,22 +314,6 @@ out_no_task:
return retval;
}

-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
- char tmp[PROC_FDINFO_MAX];
- int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
- if (!err)
- err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
- return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
- .open = nonseekable_open,
- .read = proc_fdinfo_read,
- .llseek = no_llseek,
-};
-
static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
{
return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);


2012-08-26 02:47:04

by Al Viro

[permalink] [raw]
Subject: Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2

On Thu, Aug 23, 2012 at 02:43:25PM +0400, Cyrill Gorcunov wrote:
> This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
> is needed to extend seq operations and plug in auxiliary fdinfo provides
> from subsystems like eventfd/eventpoll/fsnotify.
>
> Note the proc_fd_link no longer call for proc_fd_info, simply because
> proc_fd_info is converted to seq_fdinfo_open (which is seq-file open()
> prototype).

Actually, now that I've looked at it a bit more... You've just introduced
an ABI change here. Look:

echo foo > /tmp/a
exec 8</tmp/a # fd 8 reads from /tmp/a
read i <&8 # read line from it
exec 9</proc/self/fdinfo/8 # fd 9 is /proc/self/fdinfo/8
exec 8</tmp/a # close fd 8 and reopen it to the same /tmp/a
cat <&9 # now read from fd 9

With the mainline it will print
pos: 0
flags: 0100000

With that commit you will get
pos: 4
flags: 0100000

since the file you've opened refers to what used to be at fd 8 at the
moment of open(2), not read(2). It may or may not be harmless, but it
definitely is a userland ABI change. And that way it's actually an
extra PITA for yourself - think what /proc/self/fdinfo/9 should contain
now! That's right, you've got hidden state there and would need to
print it to be able to reconstruct the state on restart. Only it doesn't
end just there - what if you've taken that one step further and got the
struct file stashed in there at open(2) time also of the same kind?

IMO doing that at open() time is just a headache for no good reason -
resolving descriptor to struct file * at read() time as we do now
is much saner. Better do that in your ->show(), since you are using
a single-shot iterator anyway...

2012-08-26 08:13:47

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2

On Sun, Aug 26, 2012 at 03:46:53AM +0100, Al Viro wrote:
> On Thu, Aug 23, 2012 at 02:43:25PM +0400, Cyrill Gorcunov wrote:
> > This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
> > is needed to extend seq operations and plug in auxiliary fdinfo provides
> > from subsystems like eventfd/eventpoll/fsnotify.
> >
> > Note the proc_fd_link no longer call for proc_fd_info, simply because
> > proc_fd_info is converted to seq_fdinfo_open (which is seq-file open()
> > prototype).
>
> Actually, now that I've looked at it a bit more... You've just introduced
> an ABI change here. Look:

Crap, ineed. Thanks, Al! I'll fix it up, sorry.

...

> IMO doing that at open() time is just a headache for no good reason -
> resolving descriptor to struct file * at read() time as we do now
> is much saner. Better do that in your ->show(), since you are using
> a single-shot iterator anyway...

Oh, thanks for the hint, Al! I'll rework and send an updated version.

Cyrill

2012-08-26 14:28:27

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2

On Sun, Aug 26, 2012 at 03:46:53AM +0100, Al Viro wrote:
>
> IMO doing that at open() time is just a headache for no good reason -
> resolving descriptor to struct file * at read() time as we do now
> is much saner. Better do that in your ->show(), since you are using
> a single-shot iterator anyway...

Al, the updated version is below. I suppose I can grab proc inode then
and lookup for file position and flags at show method. (I remember
what you've said about O_CLOEXEC bit, but I'll address this in
another patch).
---
From: Cyrill Gorcunov <[email protected]>
Subject: procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v3

This patch converts /proc/pid/fdinfo/ handling routines to seq-file which
is needed to extend seq operations and plug in auxiliary fdinfo provides
from subsystems like eventfd/eventpoll/fsnotify.

Note the proc_fd_link no longer call for proc_fd_info, simply because
proc_fd_info is converted to seq_fdinfo_open (which is seq-file open()
prototype).

v2 (by Al Viro):
- Don't use helper function with optional arguments, thus proc_fd_info get deprecated
- Use proc_fdinfo structure with seq_file embedded, thus we can use container_of helper
- Use fput to free reference to the file we've grabbed

v3 (by Al Viro):
- Don't grab struct file at read(2) time since it breaks ABI, instead all
manipulations should be done at ->show() call.

Signed-off-by: Cyrill Gorcunov <[email protected]>
CC: Pavel Emelyanov <[email protected]>
CC: Al Viro <[email protected]>
CC: Alexey Dobriyan <[email protected]>
CC: Andrew Morton <[email protected]>
CC: James Bottomley <[email protected]>
CC: "Aneesh Kumar K.V" <[email protected]>
CC: Alexey Dobriyan <[email protected]>
CC: Matthew Helsley <[email protected]>
CC: "J. Bruce Fields" <[email protected]>
CC: "Aneesh Kumar K.V" <[email protected]>
---
fs/proc/fd.c | 142 +++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 95 insertions(+), 47 deletions(-)

Index: linux-2.6.git/fs/proc/fd.c
===================================================================
--- linux-2.6.git.orig/fs/proc/fd.c
+++ linux-2.6.git/fs/proc/fd.c
@@ -6,61 +6,100 @@
#include <linux/namei.h>
#include <linux/pid.h>
#include <linux/security.h>
+#include <linux/file.h>
+#include <linux/seq_file.h>

#include <linux/proc_fs.h>

#include "internal.h"
#include "fd.h"

-#define PROC_FDINFO_MAX 64
+struct proc_fdinfo {
+ struct seq_file m;
+ struct inode *inode;
+};

-static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+static int seq_show(struct seq_file *m, void *v)
{
- struct task_struct *task = get_proc_task(inode);
+ struct proc_fdinfo *fdinfo = container_of(m, struct proc_fdinfo, m);
struct files_struct *files = NULL;
- int fd = proc_fd(inode);
- struct file *file;
+ int f_flags = 0, ret = -ENOENT;
+ struct file *file = NULL;
+ struct task_struct *task;
+
+ task = get_proc_task(fdinfo->inode);
+ if (!task)
+ return -ENOENT;
+
+ files = get_files_struct(task);
+ put_task_struct(task);

- if (task) {
- files = get_files_struct(task);
- put_task_struct(task);
- }
if (files) {
- /*
- * We are not taking a ref to the file structure, so we must
- * hold ->file_lock.
- */
+ int fd = proc_fd(fdinfo->inode);
+
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (file) {
- unsigned int f_flags;
- struct fdtable *fdt;
+ struct fdtable *fdt = files_fdtable(files);

- fdt = files_fdtable(files);
f_flags = file->f_flags & ~O_CLOEXEC;
if (close_on_exec(fd, fdt))
f_flags |= O_CLOEXEC;

- if (path) {
- *path = file->f_path;
- path_get(&file->f_path);
- }
- if (info)
- snprintf(info, PROC_FDINFO_MAX,
- "pos:\t%lli\n"
- "flags:\t0%o\n",
- (long long) file->f_pos,
- f_flags);
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- return 0;
+ get_file(file);
+ ret = 0;
}
spin_unlock(&files->file_lock);
put_files_struct(files);
}
- return -ENOENT;
+
+ if (!ret) {
+ seq_printf(m, "pos:\t%lli\nflags:\t0%o\n",
+ (long long)file->f_pos, f_flags);
+ fput(file);
+ }
+
+ return ret;
+}
+
+static int seq_fdinfo_open(struct inode *inode, struct file *file)
+{
+ struct proc_fdinfo *fdinfo;
+ int ret = 0;
+
+ fdinfo = kmalloc(sizeof(*fdinfo), GFP_KERNEL);
+ if (!fdinfo)
+ return -ENOMEM;
+
+ ihold(inode);
+ fdinfo->inode = inode;
+
+ file->private_data = &fdinfo->m;
+ ret = single_open(file, seq_show, NULL);
+ if (ret) {
+ iput(inode);
+ kfree(fdinfo);
+ }
+
+ return ret;
+}
+
+static int seq_fdinfo_release(struct inode *inode, struct file *file)
+{
+ struct proc_fdinfo *fdinfo =
+ container_of((struct seq_file *)file->private_data,
+ struct proc_fdinfo, m);
+ iput(fdinfo->inode);
+ return single_release(inode, file);
}

+static const struct file_operations proc_fdinfo_file_operations = {
+ .open = seq_fdinfo_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_fdinfo_release,
+};
+
static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
{
struct files_struct *files;
@@ -130,7 +169,32 @@ static const struct dentry_operations ti

static int proc_fd_link(struct dentry *dentry, struct path *path)
{
- return proc_fd_info(dentry->d_inode, path, NULL);
+ struct files_struct *files = NULL;
+ struct task_struct *task;
+ int ret = -ENOENT;
+
+ task = get_proc_task(dentry->d_inode);
+ if (task) {
+ files = get_files_struct(task);
+ put_task_struct(task);
+ }
+
+ if (files) {
+ int fd = proc_fd(dentry->d_inode);
+ struct file *fd_file;
+
+ spin_lock(&files->file_lock);
+ fd_file = fcheck_files(files, fd);
+ if (fd_file) {
+ *path = fd_file->f_path;
+ path_get(&fd_file->f_path);
+ ret = 0;
+ }
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+ }
+
+ return ret;
}

static struct dentry *
@@ -245,22 +309,6 @@ out_no_task:
return retval;
}

-static ssize_t proc_fdinfo_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
- char tmp[PROC_FDINFO_MAX];
- int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp);
- if (!err)
- err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp));
- return err;
-}
-
-static const struct file_operations proc_fdinfo_file_operations = {
- .open = nonseekable_open,
- .read = proc_fdinfo_read,
- .llseek = no_llseek,
-};
-
static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
{
return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);

2012-08-26 15:05:27

by Al Viro

[permalink] [raw]
Subject: Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2

On Sun, Aug 26, 2012 at 06:28:20PM +0400, Cyrill Gorcunov wrote:
> On Sun, Aug 26, 2012 at 03:46:53AM +0100, Al Viro wrote:
> >
> > IMO doing that at open() time is just a headache for no good reason -
> > resolving descriptor to struct file * at read() time as we do now
> > is much saner. Better do that in your ->show(), since you are using
> > a single-shot iterator anyway...
>
> Al, the updated version is below. I suppose I can grab proc inode then
> and lookup for file position and flags at show method. (I remember
> what you've said about O_CLOEXEC bit, but I'll address this in
> another patch).

Applied, with a couple of changes:
* there's no need for those games with ihold/iput - opened file pins
its inode down just fine, TYVM.
* struct fd_info is pointless in that form - the last argument
of single_open() will end up in seq_file ->private, so let's just pass
the inode there and use m->private in ->show().

I'll push that into vfs.git#master (along with the previous patch) in a few
hours.

O_CLOEXEC is taken care of in my tree. FWIW, I'm consolidating descriptor
handling in general into fs/file.c (and I'm seriously tempted to rename
that sucker to something like fs/descriptors.c or fs/fdtable.c); some of
that stuff is already in #master. I'm probably going to move some of the
code from your fs/proc/fd.c there as well...

2012-08-26 15:11:00

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [patch 2/9] procfs: Convert /proc/pid/fdinfo/ handling routines to seq-file v2

On Sun, Aug 26, 2012 at 04:05:20PM +0100, Al Viro wrote:
>
> Applied, with a couple of changes:
> * there's no need for those games with ihold/iput - opened file pins
> its inode down just fine, TYVM.

Ah, yeah, thanks!

> * struct fd_info is pointless in that form - the last argument
> of single_open() will end up in seq_file ->private, so let's just pass
> the inode there and use m->private in ->show().

Yes. Thanks!

> I'll push that into vfs.git#master (along with the previous patch) in a few hours.
>
> O_CLOEXEC is taken care of in my tree. FWIW, I'm consolidating descriptor
> handling in general into fs/file.c (and I'm seriously tempted to rename
> that sucker to something like fs/descriptors.c or fs/fdtable.c); some of
> that stuff is already in #master. I'm probably going to move some of the
> code from your fs/proc/fd.c there as well...

OK, so I'll fetch your tree and rebase the rest of series on top then,
to continue fdinfo handling. Sounds good?