Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756307AbYFBTZY (ORCPT ); Mon, 2 Jun 2008 15:25:24 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1752853AbYFBTZJ (ORCPT ); Mon, 2 Jun 2008 15:25:09 -0400 Received: from e34.co.us.ibm.com ([32.97.110.152]:53636 "EHLO e34.co.us.ibm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752453AbYFBTZG (ORCPT ); Mon, 2 Jun 2008 15:25:06 -0400 Date: Mon, 2 Jun 2008 14:24:59 -0500 From: "Serge E. Hallyn" To: Benjamin Thery Cc: Andrew Morton , Greg Kroah-Hartman , Eric Biederman , Serge Hallyn , linux-kernel@vger.kernel.org, Tejun Heo , Al Viro , Daniel Lezcano Subject: Re: [PATCH 10/10] sysfs: user namespaces: fix bug with clone(CLONE_NEWUSER) with fairsched Message-ID: <20080602192459.GA18509@us.ibm.com> References: <20080602134438.224352910@theryb.frec.bull.fr> <20080602134439.953880460@theryb.frec.bull.fr> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20080602134439.953880460@theryb.frec.bull.fr> User-Agent: Mutt/1.5.17+20080114 (2008-01-14) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7257 Lines: 234 Quoting Benjamin Thery (benjamin.thery@bull.net): > Mark the /sys/kernel/uids directory to be tagged so that processes in > different user namespaces can remount /sys and see their own uid > listings. > > Without this patch, having CONFIG_FAIR_SCHED=y makes user namespaces > unusable, because when you > clone(CLONE_NEWUSER) > it will auto-create the root userid and try to create > /sys/kernel/uids/0. Since that already exists from the parent user > namespace, the create fails, and the clone misleadingly ends up > returning -ENOMEM. > > This patch fixes the issue by allowing each user namespace to remount > /sys, and having /sys filter the /sys/kernel/uid/ entries by user > namespace. > > Signed-off-by: Serge Hallyn > Signed-off-by: Benjamin Thery Thanks for picking this up, Benjamin. Eric, please look this one over. I think I removed everything that shouldn't be here from the last version, and at this point only do what I need to to access the user_ns where we need it, and tag the appropriate /sys/ files. thanks, -serge > --- > fs/sysfs/mount.c | 24 ++++++++++++++++++++++++ > include/linux/sched.h | 1 + > include/linux/sysfs.h | 9 +++++++++ > include/linux/user_namespace.h | 1 + > kernel/user.c | 21 +++++++++++++++++++++ > kernel/user_namespace.c | 3 ++- > 6 files changed, 58 insertions(+), 1 deletion(-) > > Index: linux-mm/fs/sysfs/mount.c > =================================================================== > --- linux-mm.orig/fs/sysfs/mount.c > +++ linux-mm/fs/sysfs/mount.c > @@ -81,6 +81,7 @@ static int sysfs_fill_super(struct super > sb->s_root = root; > sb->s_fs_info = info; > info->tag.net_ns = hold_net(current->nsproxy->net_ns); > + info->tag.user_ns = current->nsproxy->user_ns; > return 0; > > out_err: > @@ -100,6 +101,8 @@ static int sysfs_test_super(struct super > > if (task->nsproxy->net_ns != info->tag.net_ns) > found = 0; > + if (task->nsproxy->user_ns != info->tag.user_ns) > + found = 0; > > return found; > } > @@ -214,6 +217,27 @@ static struct pernet_operations sysfs_ne > }; > #endif > > +#ifdef CONFIG_USER_NS > +void sysfs_userns_exit(struct user_namespace *user_ns) > +{ > + /* Allow the net namespace to go away while sysfs is still mounted. */ > + struct super_block *sb; > + printk(KERN_NOTICE "sysfs: user namespace exiting\n"); > + mutex_lock(&sysfs_rename_mutex); > + sysfs_grab_supers(); > + mutex_lock(&sysfs_mutex); > + list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) { > + struct sysfs_super_info *info = sysfs_info(sb); > + if (info->tag.user_ns != user_ns) > + continue; > + info->tag.user_ns = NULL; > + } > + mutex_unlock(&sysfs_mutex); > + sysfs_release_supers(); > + mutex_unlock(&sysfs_rename_mutex); > +} > +#endif > + > int __init sysfs_init(void) > { > int err = -ENOMEM; > Index: linux-mm/include/linux/sched.h > =================================================================== > --- linux-mm.orig/include/linux/sched.h > +++ linux-mm/include/linux/sched.h > @@ -600,6 +600,7 @@ struct user_struct { > /* Hash table maintenance information */ > struct hlist_node uidhash_node; > uid_t uid; > + struct user_namespace *user_ns; > > #ifdef CONFIG_USER_SCHED > struct task_group *tg; > Index: linux-mm/include/linux/sysfs.h > =================================================================== > --- linux-mm.orig/include/linux/sysfs.h > +++ linux-mm/include/linux/sysfs.h > @@ -20,6 +20,7 @@ > struct kobject; > struct module; > struct net; > +struct user_namespace; > > /* FIXME > * The *owner field is no longer used, but leave around > @@ -81,6 +82,7 @@ struct sysfs_ops { > > struct sysfs_tag_info { > struct net *net_ns; > + struct user_namespace *user_ns; > }; > > struct sysfs_tagged_dir_operations { > @@ -138,6 +140,9 @@ int sysfs_enable_tagging(struct kobject > > extern int __must_check sysfs_init(void); > > +struct user_namespace; > +void sysfs_userns_exit(struct user_namespace *user_ns); > + > #else /* CONFIG_SYSFS */ > > static inline int sysfs_schedule_callback(struct kobject *kobj, > @@ -254,6 +259,10 @@ static inline int __must_check sysfs_ini > return 0; > } > > +static inline void sysfs_userns_exit(struct user_namespace *user_ns) > +{ > +} > + > static inline void sysfs_printk_last_file(void) > { > } > Index: linux-mm/include/linux/user_namespace.h > =================================================================== > --- linux-mm.orig/include/linux/user_namespace.h > +++ linux-mm/include/linux/user_namespace.h > @@ -12,6 +12,7 @@ > struct user_namespace { > struct kref kref; > struct hlist_head uidhash_table[UIDHASH_SZ]; > + struct kset *kset; > struct user_struct *root_user; > }; > > Index: linux-mm/kernel/user.c > =================================================================== > --- linux-mm.orig/kernel/user.c > +++ linux-mm/kernel/user.c > @@ -53,6 +53,7 @@ struct user_struct root_user = { > .files = ATOMIC_INIT(0), > .sigpending = ATOMIC_INIT(0), > .locked_shm = 0, > + .user_ns = &init_user_ns, > #ifdef CONFIG_USER_SCHED > .tg = &init_task_group, > #endif > @@ -236,6 +237,23 @@ static void uids_release(struct kobject > return; > } > > +static const void *userns_sb_tag(struct sysfs_tag_info *info) > +{ > + return info->user_ns; > +} > + > +static const void *userns_kobject_tag(struct kobject *kobj) > +{ > + struct user_struct *up; > + up = container_of(kobj, struct user_struct, kobj); > + return up->user_ns; > +} > + > +static struct sysfs_tagged_dir_operations userns_tagged_dir_operations = { > + .sb_tag = userns_sb_tag, > + .kobject_tag = userns_kobject_tag, > +}; > + > static struct kobj_type uids_ktype = { > .sysfs_ops = &kobj_sysfs_ops, > .default_attrs = uids_attributes, > @@ -272,6 +290,8 @@ int __init uids_sysfs_init(void) > if (!uids_kset) > return -ENOMEM; > > + sysfs_enable_tagging(&uids_kset->kobj, &userns_tagged_dir_operations); > + > return uids_user_create(&root_user); > } > > @@ -404,6 +424,7 @@ struct user_struct *alloc_uid(struct use > goto out_unlock; > > new->uid = uid; > + new->user_ns = ns; > atomic_set(&new->__count, 1); > > if (sched_create_user(new) < 0) > Index: linux-mm/kernel/user_namespace.c > =================================================================== > --- linux-mm.orig/kernel/user_namespace.c > +++ linux-mm/kernel/user_namespace.c > @@ -22,7 +22,7 @@ static struct user_namespace *clone_user > struct user_struct *new_user; > int n; > > - ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); > + ns = kzalloc(sizeof(struct user_namespace), GFP_KERNEL); > if (!ns) > return ERR_PTR(-ENOMEM); > > @@ -71,6 +71,7 @@ void free_user_ns(struct kref *kref) > struct user_namespace *ns; > > ns = container_of(kref, struct user_namespace, kref); > + sysfs_userns_exit(ns); > release_uids(ns); > kfree(ns); > } > > -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/