Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756919AbXKUJhr (ORCPT ); Wed, 21 Nov 2007 04:37:47 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1750914AbXKUJhh (ORCPT ); Wed, 21 Nov 2007 04:37:37 -0500 Received: from sacred.ru ([62.205.161.221]:58585 "EHLO sacred.ru" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750719AbXKUJhe (ORCPT ); Wed, 21 Nov 2007 04:37:34 -0500 Message-ID: <4743FC2B.9010105@openvz.org> Date: Wed, 21 Nov 2007 12:36:43 +0300 From: Pavel Emelyanov User-Agent: Thunderbird 2.0.0.9 (X11/20071031) MIME-Version: 1.0 To: "Eric W. Biederman" CC: "Rafael J. Wysocki" , Pavel Machek , kernel list , netdev Subject: Re: 2.6.24-rc3: find complains about /proc/net References: <20071119191000.GA1560@elf.ucw.cz> <200711192304.25087.rjw@sisk.pl> <4743026B.2020907@openvz.org> In-Reply-To: Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit X-Greylist: Sender succeeded SMTP AUTH authentication, not delayed by milter-greylist-3.0 (sacred.ru [62.205.161.221]); Wed, 21 Nov 2007 12:36:45 +0300 (MSK) Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 12049 Lines: 385 Eric W. Biederman wrote: > Below is a preliminary patch. It solves the directory issue but it doesn't > play well with proc_mnt and proc_flush_task. It works by simply caching the > network namespace when we mount proc so we don't have to be fancy and dynamic. Nice... Where should we apply this patch to? > Something for the discussion anyway. > > I will start sorting out what makes sense tomorrow. > > Eric > > >>From f359fde2469ba8be2123a465e788a83c7e6831e9 Mon Sep 17 00:00:00 2001 > From: Eric W. Biederman > Date: Tue, 20 Nov 2007 19:36:05 -0700 > Subject: [PATCH] proc: Fix /proc/net directory listings. > > Having proc dynamically display the contents of /proc/net is > hard. So make life simpler by capturing the network namespace > when we mount proc and only displaying that network namespace. > > --- > fs/proc/base.c | 8 ++-- > fs/proc/generic.c | 4 ++- > fs/proc/internal.h | 13 +++++++ > fs/proc/proc_net.c | 89 ++++------------------------------------------- > fs/proc/root.c | 50 ++++++++++++++++++-------- > include/linux/proc_fs.h | 4 ++ > 6 files changed, 66 insertions(+), 102 deletions(-) > > diff --git a/fs/proc/base.c b/fs/proc/base.c > index aeaf0d0..9d4f06a 100644 > --- a/fs/proc/base.c > +++ b/fs/proc/base.c > @@ -2395,7 +2395,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct > if (tgid == ~0U) > goto out; > > - ns = dentry->d_sb->s_fs_info; > + ns = proc_sbi(dentry->d_sb)->pid_ns; > rcu_read_lock(); > task = find_task_by_pid_ns(tgid, ns); > if (task) > @@ -2476,7 +2476,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) > goto out; > } > > - ns = filp->f_dentry->d_sb->s_fs_info; > + ns = proc_sbi(filp->f_dentry->d_sb)->pid_ns; > tgid = filp->f_pos - TGID_OFFSET; > for (task = next_tgid(tgid, ns); > task; > @@ -2615,7 +2615,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry > if (tid == ~0U) > goto out; > > - ns = dentry->d_sb->s_fs_info; > + ns = proc_sbi(dentry->d_sb)->pid_ns; > rcu_read_lock(); > task = find_task_by_pid_ns(tid, ns); > if (task) > @@ -2758,7 +2758,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi > /* f_version caches the tgid value that the last readdir call couldn't > * return. lseek aka telldir automagically resets f_version to 0. > */ > - ns = filp->f_dentry->d_sb->s_fs_info; > + ns = proc_sbi(filp->f_dentry->d_sb)->pid_ns; > tid = (int)filp->f_version; > filp->f_version = 0; > for (task = first_tid(leader, tid, pos - 2, ns); > diff --git a/fs/proc/generic.c b/fs/proc/generic.c > index 1bdb624..b58f0ec 100644 > --- a/fs/proc/generic.c > +++ b/fs/proc/generic.c > @@ -398,7 +398,9 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam > continue; > if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { > unsigned int ino = de->low_ino; > - > + > + if (de->shadow_proc) > + de = de->shadow_proc(dentry->d_sb, de); > de_get(de); > spin_unlock(&proc_subdir_lock); > error = -EINVAL; > diff --git a/fs/proc/internal.h b/fs/proc/internal.h > index 1820eb2..a26f115 100644 > --- a/fs/proc/internal.h > +++ b/fs/proc/internal.h > @@ -11,6 +11,18 @@ > > #include > > +struct pid_namespace; > +struct net; > +struct proc_sb_info { > + struct pid_namespace *pid_ns; > + struct net *net_ns; > +}; > + > +static inline struct proc_sb_info *proc_sbi(struct super_block *sb) > +{ > + return sb->s_fs_info; > +} > + > #ifdef CONFIG_PROC_SYSCTL > extern int proc_sys_init(void); > #else > @@ -78,3 +90,4 @@ static inline int proc_fd(struct inode *inode) > { > return PROC_I(inode)->fd; > } > + > diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c > index 131f9c6..8a82e29 100644 > --- a/fs/proc/proc_net.c > +++ b/fs/proc/proc_net.c > @@ -50,89 +50,15 @@ struct net *get_proc_net(const struct inode *inode) > } > EXPORT_SYMBOL_GPL(get_proc_net); > > -static struct proc_dir_entry *proc_net_shadow; > +static struct proc_dir_entry *shadow_pde; > > -static struct dentry *proc_net_shadow_dentry(struct dentry *parent, > - struct proc_dir_entry *de) > +static struct proc_dir_entry *proc_net_shadow(struct super_block *sb, > + struct proc_dir_entry *de) > { > - struct dentry *shadow = NULL; > - struct inode *inode; > - if (!de) > - goto out; > - de_get(de); > - inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de); > - if (!inode) > - goto out_de_put; > - shadow = d_alloc_name(parent, de->name); > - if (!shadow) > - goto out_iput; > - shadow->d_op = parent->d_op; /* proc_dentry_operations */ > - d_instantiate(shadow, inode); > -out: > - return shadow; > -out_iput: > - iput(inode); > -out_de_put: > - de_put(de); > - goto out; > -} > - > -static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd) > -{ > - struct net *net = current->nsproxy->net_ns; > - struct dentry *shadow; > - shadow = proc_net_shadow_dentry(parent, net->proc_net); > - if (!shadow) > - return ERR_PTR(-ENOENT); > - > - dput(nd->dentry); > - /* My dentry count is 1 and that should be enough as the > - * shadow dentry is thrown away immediately. > - */ > - nd->dentry = shadow; > - return NULL; > + struct proc_sb_info *sbi = proc_sbi(sb); > + return sbi->net_ns->proc_net; > } > > -static struct dentry *proc_net_lookup(struct inode *dir, struct dentry *dentry, > - struct nameidata *nd) > -{ > - struct net *net = current->nsproxy->net_ns; > - struct dentry *shadow; > - > - shadow = proc_net_shadow_dentry(nd->dentry, net->proc_net); > - if (!shadow) > - return ERR_PTR(-ENOENT); > - > - dput(nd->dentry); > - nd->dentry = shadow; > - > - return shadow->d_inode->i_op->lookup(shadow->d_inode, dentry, nd); > -} > - > -static int proc_net_setattr(struct dentry *dentry, struct iattr *iattr) > -{ > - struct net *net = current->nsproxy->net_ns; > - struct dentry *shadow; > - int ret; > - > - shadow = proc_net_shadow_dentry(dentry->d_parent, net->proc_net); > - if (!shadow) > - return -ENOENT; > - ret = shadow->d_inode->i_op->setattr(shadow, iattr); > - dput(shadow); > - return ret; > -} > - > -static const struct file_operations proc_net_dir_operations = { > - .read = generic_read_dir, > -}; > - > -static struct inode_operations proc_net_dir_inode_operations = { > - .follow_link = proc_net_follow_link, > - .lookup = proc_net_lookup, > - .setattr = proc_net_setattr, > -}; > - > static __net_init int proc_net_ns_init(struct net *net) > { > struct proc_dir_entry *root, *netd, *net_statd; > @@ -185,9 +111,8 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { > > int __init proc_net_init(void) > { > - proc_net_shadow = proc_mkdir("net", NULL); > - proc_net_shadow->proc_iops = &proc_net_dir_inode_operations; > - proc_net_shadow->proc_fops = &proc_net_dir_operations; > + shadow_pde = proc_mkdir("net", NULL); > + shadow_pde->shadow_proc = proc_net_shadow; > > return register_pernet_subsys(&proc_net_ns_ops); > } > diff --git a/fs/proc/root.c b/fs/proc/root.c > index ec9cb3b..e60ac83 100644 > --- a/fs/proc/root.c > +++ b/fs/proc/root.c > @@ -19,6 +19,7 @@ > #include > #include > #include > +#include > > #include "internal.h" > > @@ -26,15 +27,23 @@ struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; > > static int proc_test_super(struct super_block *sb, void *data) > { > - return sb->s_fs_info == data; > + struct proc_sb_info *sbi = proc_sbi(sb), *info = data; > + return (sbi->pid_ns == info->pid_ns) && > + (sbi->net_ns == info->net_ns); > } > > static int proc_set_super(struct super_block *sb, void *data) > { > - struct pid_namespace *ns; > - > - ns = (struct pid_namespace *)data; > - sb->s_fs_info = get_pid_ns(ns); > + > + struct proc_sb_info *new, *info = data; > + > + new = kzalloc(sizeof(*new), GFP_KERNEL); > + if (!new) > + return -ENOMEM; > + *new = *info; > + get_pid_ns(new->pid_ns); > + get_net(new->net_ns); > + sb->s_fs_info = new; > return set_anon_super(sb, NULL); > } > > @@ -43,7 +52,7 @@ static int proc_get_sb(struct file_system_type *fs_type, > { > int err; > struct super_block *sb; > - struct pid_namespace *ns; > + struct proc_sb_info info, *sbi; > struct proc_inode *ei; > > if (proc_mnt) { > @@ -57,12 +66,14 @@ static int proc_get_sb(struct file_system_type *fs_type, > ei->pid = find_get_pid(1); > } > > + info.pid_ns = current->nsproxy->pid_ns; > + info.net_ns = current->nsproxy->net_ns; > if (flags & MS_KERNMOUNT) > - ns = (struct pid_namespace *)data; > + sbi = data; > else > - ns = current->nsproxy->pid_ns; > + sbi = &info; > > - sb = sget(fs_type, proc_test_super, proc_set_super, ns); > + sb = sget(fs_type, proc_test_super, proc_set_super, sbi); > if (IS_ERR(sb)) > return PTR_ERR(sb); > > @@ -78,12 +89,13 @@ static int proc_get_sb(struct file_system_type *fs_type, > ei = PROC_I(sb->s_root->d_inode); > if (!ei->pid) { > rcu_read_lock(); > - ei->pid = get_pid(find_pid_ns(1, ns)); > + ei->pid = get_pid(find_pid_ns(1, sbi->pid_ns)); > rcu_read_unlock(); > } > > sb->s_flags |= MS_ACTIVE; > - ns->proc_mnt = mnt; > + if (!sbi->pid_ns->proc_mnt) > + sbi->pid_ns->proc_mnt = mnt; > } > > return simple_set_mnt(mnt, sb); > @@ -91,11 +103,13 @@ static int proc_get_sb(struct file_system_type *fs_type, > > static void proc_kill_sb(struct super_block *sb) > { > - struct pid_namespace *ns; > + struct proc_sb_info *sbi; > > - ns = (struct pid_namespace *)sb->s_fs_info; > + sbi = proc_sbi(sb); > kill_anon_super(sb); > - put_pid_ns(ns); > + put_pid_ns(sbi->pid_ns); > + put_net(sbi->net_ns); > + kfree(sbi); > } > > static struct file_system_type proc_fs_type = { > @@ -106,13 +120,16 @@ static struct file_system_type proc_fs_type = { > > void __init proc_root_init(void) > { > + struct proc_sb_info info; > int err = proc_init_inodecache(); > if (err) > return; > err = register_filesystem(&proc_fs_type); > if (err) > return; > - proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); > + info.pid_ns = &init_pid_ns; > + info.net_ns = current->nsproxy->net_ns; > + proc_mnt = kern_mount_data(&proc_fs_type, &info); > err = PTR_ERR(proc_mnt); > if (IS_ERR(proc_mnt)) { > unregister_filesystem(&proc_fs_type); > @@ -214,8 +231,11 @@ struct proc_dir_entry proc_root = { > > int pid_ns_prepare_proc(struct pid_namespace *ns) > { > + struct proc_sb_info info; > struct vfsmount *mnt; > > + info.pid_ns = ns; > + info.net_ns = current->nsproxy->net_ns; > mnt = kern_mount_data(&proc_fs_type, ns); > if (IS_ERR(mnt)) > return PTR_ERR(mnt); > diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h > index 2b3c1d8..c22c558 100644 > --- a/include/linux/proc_fs.h > +++ b/include/linux/proc_fs.h > @@ -48,6 +48,9 @@ typedef int (read_proc_t)(char *page, char **start, off_t off, > typedef int (write_proc_t)(struct file *file, const char __user *buffer, > unsigned long count, void *data); > typedef int (get_info_t)(char *, char **, off_t, int); > +struct proc_dir_entry; > +typedef struct proc_dir_entry *(shadow_proc_t)(struct super_block *sb, > + struct proc_dir_entry *pde); > > struct proc_dir_entry { > unsigned int low_ino; > @@ -79,6 +82,7 @@ struct proc_dir_entry { > int pde_users; /* number of callers into module in progress */ > spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ > struct completion *pde_unload_completion; > + shadow_proc_t *shadow_proc; > }; > > struct kcore_list { - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/