2016-03-09 11:28:26

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH RESEND v2 16/18] fuse: Support fuse filesystems outside of init_user_ns

On Mon, Jan 04, 2016 at 12:03:55PM -0600, Seth Forshee wrote:
> In order to support mounts from namespaces other than
> init_user_ns, fuse must translate uids and gids to/from the
> userns of the process servicing requests on /dev/fuse. This
> patch does that, with a couple of restrictions on the namespace:
>
> - The userns for the fuse connection is fixed to the namespace
> from which /dev/fuse is opened.
>
> - The namespace must be the same as s_user_ns.
>
> These restrictions simplify the implementation by avoiding the
> need to pass around userns references and by allowing fuse to
> rely on the checks in inode_change_ok for ownership changes.
> Either restriction could be relaxed in the future if needed.
>
> For cuse the namespace used for the connection is also simply
> current_user_ns() at the time /dev/cuse is opened.
>
> Signed-off-by: Seth Forshee <[email protected]>
> ---
> fs/fuse/cuse.c | 3 ++-
> fs/fuse/dev.c | 13 ++++++++-----
> fs/fuse/dir.c | 14 +++++++-------
> fs/fuse/fuse_i.h | 6 +++++-
> fs/fuse/inode.c | 35 +++++++++++++++++++++++------------
> 5 files changed, 45 insertions(+), 26 deletions(-)
>
> diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
> index eae2c11268bc..a10aca57bfe4 100644
> --- a/fs/fuse/cuse.c
> +++ b/fs/fuse/cuse.c
> @@ -48,6 +48,7 @@
> #include <linux/stat.h>
> #include <linux/module.h>
> #include <linux/uio.h>
> +#include <linux/user_namespace.h>
>
> #include "fuse_i.h"
>
> @@ -498,7 +499,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
> if (!cc)
> return -ENOMEM;
>
> - fuse_conn_init(&cc->fc);
> + fuse_conn_init(&cc->fc, current_user_ns());
>
> fud = fuse_dev_alloc(&cc->fc);
> if (!fud) {
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index a4f6f30d6d86..11b4cb0a0e2f 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -127,8 +127,8 @@ static void __fuse_put_request(struct fuse_req *req)
>
> static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
> {
> - req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
> - req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
> + req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
> + req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
> req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
> }
>
> @@ -186,7 +186,8 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
> __set_bit(FR_WAITING, &req->flags);
> if (for_background)
> __set_bit(FR_BACKGROUND, &req->flags);
> - if (req->in.h.pid == 0) {
> + if (req->in.h.pid == 0 || req->in.h.uid == (uid_t)-1 ||
> + req->in.h.gid == (gid_t)-1) {
> fuse_put_request(fc, req);
> return ERR_PTR(-EOVERFLOW);
> }
> @@ -1248,7 +1249,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
> struct fuse_in *in;
> unsigned reqsize;
>
> - if (task_active_pid_ns(current) != fc->pid_ns)
> + if (task_active_pid_ns(current) != fc->pid_ns ||
> + current_user_ns() != fc->user_ns)
> return -EIO;
>
> restart:
> @@ -1880,7 +1882,8 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
> struct fuse_req *req;
> struct fuse_out_header oh;
>
> - if (task_active_pid_ns(current) != fc->pid_ns)
> + if (task_active_pid_ns(current) != fc->pid_ns ||
> + current_user_ns() != fc->user_ns)
> return -EIO;
>
> if (nbytes < sizeof(struct fuse_out_header))
> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> index 5e2e08712d3b..8fd9fe4dcd43 100644
> --- a/fs/fuse/dir.c
> +++ b/fs/fuse/dir.c
> @@ -841,8 +841,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
> stat->ino = attr->ino;
> stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
> stat->nlink = attr->nlink;
> - stat->uid = make_kuid(&init_user_ns, attr->uid);
> - stat->gid = make_kgid(&init_user_ns, attr->gid);
> + stat->uid = inode->i_uid;
> + stat->gid = inode->i_gid;

This breaks the attr_version logic in fuse_change_attributes().

So just use make_k[ug]id() here as well.

> stat->rdev = inode->i_rdev;
> stat->atime.tv_sec = attr->atime;
> stat->atime.tv_nsec = attr->atimensec;
> @@ -1455,17 +1455,17 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
> return true;
> }
>
> -static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
> - bool trust_local_cmtime)
> +static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
> + struct fuse_setattr_in *arg, bool trust_local_cmtime)
> {
> unsigned ivalid = iattr->ia_valid;
>
> if (ivalid & ATTR_MODE)
> arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
> if (ivalid & ATTR_UID)
> - arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
> + arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
> if (ivalid & ATTR_GID)
> - arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
> + arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
> if (ivalid & ATTR_SIZE)
> arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
> if (ivalid & ATTR_ATIME) {
> @@ -1625,7 +1625,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
>
> memset(&inarg, 0, sizeof(inarg));
> memset(&outarg, 0, sizeof(outarg));
> - iattr_to_fattr(attr, &inarg, trust_local_cmtime);
> + iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
> if (file) {
> struct fuse_file *ff = file->private_data;
> inarg.valid |= FATTR_FH;
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 143b595197b6..5897805405ba 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -23,6 +23,7 @@
> #include <linux/poll.h>
> #include <linux/workqueue.h>
> #include <linux/pid_namespace.h>
> +#include <linux/user_namespace.h>
>
> /** Max number of pages that can be used in a single read request */
> #define FUSE_MAX_PAGES_PER_REQ 32
> @@ -460,6 +461,9 @@ struct fuse_conn {
> /** The pid namespace for this mount */
> struct pid_namespace *pid_ns;
>
> + /** The user namespace for this mount */
> + struct user_namespace *user_ns;
> +
> /** The fuse mount flags for this mount */
> unsigned flags;
>
> @@ -855,7 +859,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
> /**
> * Initialize fuse_conn
> */
> -void fuse_conn_init(struct fuse_conn *fc);
> +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
>
> /**
> * Release reference to fuse_conn
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 2f31874ea9db..b7bdfdac3521 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -167,8 +167,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
> inode->i_ino = fuse_squash_ino(attr->ino);
> inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
> set_nlink(inode, attr->nlink);
> - inode->i_uid = make_kuid(&init_user_ns, attr->uid);
> - inode->i_gid = make_kgid(&init_user_ns, attr->gid);
> + inode->i_uid = make_kuid(fc->user_ns, attr->uid);
> + inode->i_gid = make_kgid(fc->user_ns, attr->gid);
> inode->i_blocks = attr->blocks;
> inode->i_atime.tv_sec = attr->atime;
> inode->i_atime.tv_nsec = attr->atimensec;
> @@ -467,12 +467,15 @@ static int fuse_match_uint(substring_t *s, unsigned int *res)
> return err;
> }
>
> -static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
> +static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
> + struct user_namespace *user_ns)
> {
> char *p;
> memset(d, 0, sizeof(struct fuse_mount_data));
> d->max_read = ~0;
> d->blksize = FUSE_DEFAULT_BLKSIZE;
> + d->user_id = make_kuid(user_ns, 0);
> + d->group_id = make_kgid(user_ns, 0);

It is true that if "user_id=" or "group_id" options were omitted we used the
zero uid/gid values. However, this isn't actually used by anybody AFAIK, and
generalizing it for userns doesn't seem to make much sense.

So I suggest we that we instead return an error if mounting from a userns AND
neither "allow_other" nor both "user_id" and "group_id" are specified.


>
> while ((p = strsep(&opt, ",")) != NULL) {
> int token;
> @@ -503,7 +506,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
> case OPT_USER_ID:
> if (fuse_match_uint(&args[0], &uv))
> return 0;
> - d->user_id = make_kuid(current_user_ns(), uv);
> + d->user_id = make_kuid(user_ns, uv);
> if (!uid_valid(d->user_id))
> return 0;
> d->user_id_present = 1;
> @@ -512,7 +515,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
> case OPT_GROUP_ID:
> if (fuse_match_uint(&args[0], &uv))
> return 0;
> - d->group_id = make_kgid(current_user_ns(), uv);
> + d->group_id = make_kgid(user_ns, uv);
> if (!gid_valid(d->group_id))
> return 0;
> d->group_id_present = 1;
> @@ -555,8 +558,10 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
> struct super_block *sb = root->d_sb;
> struct fuse_conn *fc = get_fuse_conn_super(sb);
>
> - seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
> - seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
> + seq_printf(m, ",user_id=%u",
> + from_kuid_munged(fc->user_ns, fc->user_id));
> + seq_printf(m, ",group_id=%u",
> + from_kgid_munged(fc->user_ns, fc->group_id));
> if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
> seq_puts(m, ",default_permissions");
> if (fc->flags & FUSE_ALLOW_OTHER)
> @@ -587,7 +592,7 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
> fpq->connected = 1;
> }
>
> -void fuse_conn_init(struct fuse_conn *fc)
> +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
> {
> memset(fc, 0, sizeof(*fc));
> spin_lock_init(&fc->lock);
> @@ -611,6 +616,7 @@ void fuse_conn_init(struct fuse_conn *fc)
> fc->attr_version = 1;
> get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
> fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
> + fc->user_ns = get_user_ns(user_ns);
> }
> EXPORT_SYMBOL_GPL(fuse_conn_init);
>
> @@ -620,6 +626,7 @@ void fuse_conn_put(struct fuse_conn *fc)
> if (fc->destroy_req)
> fuse_request_free(fc->destroy_req);
> put_pid_ns(fc->pid_ns);
> + put_user_ns(fc->user_ns);
> fc->release(fc);
> }
> }
> @@ -1046,7 +1053,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
>
> sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
>
> - if (!parse_fuse_opt(data, &d, is_bdev))
> + if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
> goto err;
>
> if (is_bdev) {
> @@ -1070,8 +1077,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
> if (!file)
> goto err;
>
> - if ((file->f_op != &fuse_dev_operations) ||
> - (file->f_cred->user_ns != &init_user_ns))
> + /*
> + * Require mount to happen from the same user namespace which
> + * opened /dev/fuse to prevent potential attacks.
> + */
> + if (file->f_op != &fuse_dev_operations ||
> + file->f_cred->user_ns != sb->s_user_ns)
> goto err_fput;
>
> fc = kmalloc(sizeof(*fc), GFP_KERNEL);
> @@ -1079,7 +1090,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
> if (!fc)
> goto err_fput;
>
> - fuse_conn_init(fc);
> + fuse_conn_init(fc, sb->s_user_ns);
> fc->release = fuse_free_conn;
>
> fud = fuse_dev_alloc(fc);
> --
> 1.9.1
>


2016-03-09 14:18:52

by Seth Forshee

[permalink] [raw]
Subject: Re: [PATCH RESEND v2 16/18] fuse: Support fuse filesystems outside of init_user_ns

On Wed, Mar 09, 2016 at 12:29:23PM +0100, Miklos Szeredi wrote:
> On Mon, Jan 04, 2016 at 12:03:55PM -0600, Seth Forshee wrote:
> > In order to support mounts from namespaces other than
> > init_user_ns, fuse must translate uids and gids to/from the
> > userns of the process servicing requests on /dev/fuse. This
> > patch does that, with a couple of restrictions on the namespace:
> >
> > - The userns for the fuse connection is fixed to the namespace
> > from which /dev/fuse is opened.
> >
> > - The namespace must be the same as s_user_ns.
> >
> > These restrictions simplify the implementation by avoiding the
> > need to pass around userns references and by allowing fuse to
> > rely on the checks in inode_change_ok for ownership changes.
> > Either restriction could be relaxed in the future if needed.
> >
> > For cuse the namespace used for the connection is also simply
> > current_user_ns() at the time /dev/cuse is opened.
> >
> > Signed-off-by: Seth Forshee <[email protected]>
> > ---
> > fs/fuse/cuse.c | 3 ++-
> > fs/fuse/dev.c | 13 ++++++++-----
> > fs/fuse/dir.c | 14 +++++++-------
> > fs/fuse/fuse_i.h | 6 +++++-
> > fs/fuse/inode.c | 35 +++++++++++++++++++++++------------
> > 5 files changed, 45 insertions(+), 26 deletions(-)
> >
> > diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
> > index eae2c11268bc..a10aca57bfe4 100644
> > --- a/fs/fuse/cuse.c
> > +++ b/fs/fuse/cuse.c
> > @@ -48,6 +48,7 @@
> > #include <linux/stat.h>
> > #include <linux/module.h>
> > #include <linux/uio.h>
> > +#include <linux/user_namespace.h>
> >
> > #include "fuse_i.h"
> >
> > @@ -498,7 +499,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
> > if (!cc)
> > return -ENOMEM;
> >
> > - fuse_conn_init(&cc->fc);
> > + fuse_conn_init(&cc->fc, current_user_ns());
> >
> > fud = fuse_dev_alloc(&cc->fc);
> > if (!fud) {
> > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> > index a4f6f30d6d86..11b4cb0a0e2f 100644
> > --- a/fs/fuse/dev.c
> > +++ b/fs/fuse/dev.c
> > @@ -127,8 +127,8 @@ static void __fuse_put_request(struct fuse_req *req)
> >
> > static void fuse_req_init_context(struct fuse_conn *fc, struct fuse_req *req)
> > {
> > - req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
> > - req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
> > + req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
> > + req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
> > req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
> > }
> >
> > @@ -186,7 +186,8 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
> > __set_bit(FR_WAITING, &req->flags);
> > if (for_background)
> > __set_bit(FR_BACKGROUND, &req->flags);
> > - if (req->in.h.pid == 0) {
> > + if (req->in.h.pid == 0 || req->in.h.uid == (uid_t)-1 ||
> > + req->in.h.gid == (gid_t)-1) {
> > fuse_put_request(fc, req);
> > return ERR_PTR(-EOVERFLOW);
> > }
> > @@ -1248,7 +1249,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
> > struct fuse_in *in;
> > unsigned reqsize;
> >
> > - if (task_active_pid_ns(current) != fc->pid_ns)
> > + if (task_active_pid_ns(current) != fc->pid_ns ||
> > + current_user_ns() != fc->user_ns)
> > return -EIO;
> >
> > restart:
> > @@ -1880,7 +1882,8 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
> > struct fuse_req *req;
> > struct fuse_out_header oh;
> >
> > - if (task_active_pid_ns(current) != fc->pid_ns)
> > + if (task_active_pid_ns(current) != fc->pid_ns ||
> > + current_user_ns() != fc->user_ns)
> > return -EIO;
> >
> > if (nbytes < sizeof(struct fuse_out_header))
> > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
> > index 5e2e08712d3b..8fd9fe4dcd43 100644
> > --- a/fs/fuse/dir.c
> > +++ b/fs/fuse/dir.c
> > @@ -841,8 +841,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
> > stat->ino = attr->ino;
> > stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
> > stat->nlink = attr->nlink;
> > - stat->uid = make_kuid(&init_user_ns, attr->uid);
> > - stat->gid = make_kgid(&init_user_ns, attr->gid);
> > + stat->uid = inode->i_uid;
> > + stat->gid = inode->i_gid;
>
> This breaks the attr_version logic in fuse_change_attributes().
>
> So just use make_k[ug]id() here as well.

Okay.

> > stat->rdev = inode->i_rdev;
> > stat->atime.tv_sec = attr->atime;
> > stat->atime.tv_nsec = attr->atimensec;
> > @@ -1455,17 +1455,17 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
> > return true;
> > }
> >
> > -static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg,
> > - bool trust_local_cmtime)
> > +static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
> > + struct fuse_setattr_in *arg, bool trust_local_cmtime)
> > {
> > unsigned ivalid = iattr->ia_valid;
> >
> > if (ivalid & ATTR_MODE)
> > arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode;
> > if (ivalid & ATTR_UID)
> > - arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid);
> > + arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
> > if (ivalid & ATTR_GID)
> > - arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid);
> > + arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
> > if (ivalid & ATTR_SIZE)
> > arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size;
> > if (ivalid & ATTR_ATIME) {
> > @@ -1625,7 +1625,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
> >
> > memset(&inarg, 0, sizeof(inarg));
> > memset(&outarg, 0, sizeof(outarg));
> > - iattr_to_fattr(attr, &inarg, trust_local_cmtime);
> > + iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
> > if (file) {
> > struct fuse_file *ff = file->private_data;
> > inarg.valid |= FATTR_FH;
> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> > index 143b595197b6..5897805405ba 100644
> > --- a/fs/fuse/fuse_i.h
> > +++ b/fs/fuse/fuse_i.h
> > @@ -23,6 +23,7 @@
> > #include <linux/poll.h>
> > #include <linux/workqueue.h>
> > #include <linux/pid_namespace.h>
> > +#include <linux/user_namespace.h>
> >
> > /** Max number of pages that can be used in a single read request */
> > #define FUSE_MAX_PAGES_PER_REQ 32
> > @@ -460,6 +461,9 @@ struct fuse_conn {
> > /** The pid namespace for this mount */
> > struct pid_namespace *pid_ns;
> >
> > + /** The user namespace for this mount */
> > + struct user_namespace *user_ns;
> > +
> > /** The fuse mount flags for this mount */
> > unsigned flags;
> >
> > @@ -855,7 +859,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
> > /**
> > * Initialize fuse_conn
> > */
> > -void fuse_conn_init(struct fuse_conn *fc);
> > +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns);
> >
> > /**
> > * Release reference to fuse_conn
> > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> > index 2f31874ea9db..b7bdfdac3521 100644
> > --- a/fs/fuse/inode.c
> > +++ b/fs/fuse/inode.c
> > @@ -167,8 +167,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
> > inode->i_ino = fuse_squash_ino(attr->ino);
> > inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
> > set_nlink(inode, attr->nlink);
> > - inode->i_uid = make_kuid(&init_user_ns, attr->uid);
> > - inode->i_gid = make_kgid(&init_user_ns, attr->gid);
> > + inode->i_uid = make_kuid(fc->user_ns, attr->uid);
> > + inode->i_gid = make_kgid(fc->user_ns, attr->gid);
> > inode->i_blocks = attr->blocks;
> > inode->i_atime.tv_sec = attr->atime;
> > inode->i_atime.tv_nsec = attr->atimensec;
> > @@ -467,12 +467,15 @@ static int fuse_match_uint(substring_t *s, unsigned int *res)
> > return err;
> > }
> >
> > -static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
> > +static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
> > + struct user_namespace *user_ns)
> > {
> > char *p;
> > memset(d, 0, sizeof(struct fuse_mount_data));
> > d->max_read = ~0;
> > d->blksize = FUSE_DEFAULT_BLKSIZE;
> > + d->user_id = make_kuid(user_ns, 0);
> > + d->group_id = make_kgid(user_ns, 0);
>
> It is true that if "user_id=" or "group_id" options were omitted we used the
> zero uid/gid values. However, this isn't actually used by anybody AFAIK, and
> generalizing it for userns doesn't seem to make much sense.
>
> So I suggest we that we instead return an error if mounting from a userns AND
> neither "allow_other" nor both "user_id" and "group_id" are specified.

But those are also used for ownership of the connection files in
fusectl. In an allow_other mount shouldn't those files by owned by
namespace root and not global root?

> > while ((p = strsep(&opt, ",")) != NULL) {
> > int token;
> > @@ -503,7 +506,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
> > case OPT_USER_ID:
> > if (fuse_match_uint(&args[0], &uv))
> > return 0;
> > - d->user_id = make_kuid(current_user_ns(), uv);
> > + d->user_id = make_kuid(user_ns, uv);
> > if (!uid_valid(d->user_id))
> > return 0;
> > d->user_id_present = 1;
> > @@ -512,7 +515,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
> > case OPT_GROUP_ID:
> > if (fuse_match_uint(&args[0], &uv))
> > return 0;
> > - d->group_id = make_kgid(current_user_ns(), uv);
> > + d->group_id = make_kgid(user_ns, uv);
> > if (!gid_valid(d->group_id))
> > return 0;
> > d->group_id_present = 1;
> > @@ -555,8 +558,10 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
> > struct super_block *sb = root->d_sb;
> > struct fuse_conn *fc = get_fuse_conn_super(sb);
> >
> > - seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
> > - seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
> > + seq_printf(m, ",user_id=%u",
> > + from_kuid_munged(fc->user_ns, fc->user_id));
> > + seq_printf(m, ",group_id=%u",
> > + from_kgid_munged(fc->user_ns, fc->group_id));
> > if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
> > seq_puts(m, ",default_permissions");
> > if (fc->flags & FUSE_ALLOW_OTHER)
> > @@ -587,7 +592,7 @@ static void fuse_pqueue_init(struct fuse_pqueue *fpq)
> > fpq->connected = 1;
> > }
> >
> > -void fuse_conn_init(struct fuse_conn *fc)
> > +void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
> > {
> > memset(fc, 0, sizeof(*fc));
> > spin_lock_init(&fc->lock);
> > @@ -611,6 +616,7 @@ void fuse_conn_init(struct fuse_conn *fc)
> > fc->attr_version = 1;
> > get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
> > fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
> > + fc->user_ns = get_user_ns(user_ns);
> > }
> > EXPORT_SYMBOL_GPL(fuse_conn_init);
> >
> > @@ -620,6 +626,7 @@ void fuse_conn_put(struct fuse_conn *fc)
> > if (fc->destroy_req)
> > fuse_request_free(fc->destroy_req);
> > put_pid_ns(fc->pid_ns);
> > + put_user_ns(fc->user_ns);
> > fc->release(fc);
> > }
> > }
> > @@ -1046,7 +1053,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
> >
> > sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION);
> >
> > - if (!parse_fuse_opt(data, &d, is_bdev))
> > + if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
> > goto err;
> >
> > if (is_bdev) {
> > @@ -1070,8 +1077,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
> > if (!file)
> > goto err;
> >
> > - if ((file->f_op != &fuse_dev_operations) ||
> > - (file->f_cred->user_ns != &init_user_ns))
> > + /*
> > + * Require mount to happen from the same user namespace which
> > + * opened /dev/fuse to prevent potential attacks.
> > + */
> > + if (file->f_op != &fuse_dev_operations ||
> > + file->f_cred->user_ns != sb->s_user_ns)
> > goto err_fput;
> >
> > fc = kmalloc(sizeof(*fc), GFP_KERNEL);
> > @@ -1079,7 +1090,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
> > if (!fc)
> > goto err_fput;
> >
> > - fuse_conn_init(fc);
> > + fuse_conn_init(fc, sb->s_user_ns);
> > fc->release = fuse_free_conn;
> >
> > fud = fuse_dev_alloc(fc);
> > --
> > 1.9.1
> >

2016-03-09 14:49:39

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH RESEND v2 16/18] fuse: Support fuse filesystems outside of init_user_ns

On Wed, Mar 9, 2016 at 3:18 PM, Seth Forshee <[email protected]> wrote:
> On Wed, Mar 09, 2016 at 12:29:23PM +0100, Miklos Szeredi wrote:
>> On Mon, Jan 04, 2016 at 12:03:55PM -0600, Seth Forshee wrote:

>> > -static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
>> > +static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
>> > + struct user_namespace *user_ns)
>> > {
>> > char *p;
>> > memset(d, 0, sizeof(struct fuse_mount_data));
>> > d->max_read = ~0;
>> > d->blksize = FUSE_DEFAULT_BLKSIZE;
>> > + d->user_id = make_kuid(user_ns, 0);
>> > + d->group_id = make_kgid(user_ns, 0);
>>
>> It is true that if "user_id=" or "group_id" options were omitted we used the
>> zero uid/gid values. However, this isn't actually used by anybody AFAIK, and
>> generalizing it for userns doesn't seem to make much sense.
>>
>> So I suggest we that we instead return an error if mounting from a userns AND
>> neither "allow_other" nor both "user_id" and "group_id" are specified.
>
> But those are also used for ownership of the connection files in
> fusectl. In an allow_other mount shouldn't those files by owned by
> namespace root and not global root?

Yes.

Can't we use current_cred()->uid/gid? Or fsuid/fsgid maybe?

When we have true unprivileged mounts, the user_id/group_id options
become redundant anyway and we can just use the current credentials.

Thanks,
Miklos

2016-03-09 15:25:19

by Seth Forshee

[permalink] [raw]
Subject: Re: [PATCH RESEND v2 16/18] fuse: Support fuse filesystems outside of init_user_ns

On Wed, Mar 09, 2016 at 03:48:22PM +0100, Miklos Szeredi wrote:
> On Wed, Mar 9, 2016 at 3:18 PM, Seth Forshee <[email protected]> wrote:
> > On Wed, Mar 09, 2016 at 12:29:23PM +0100, Miklos Szeredi wrote:
> >> On Mon, Jan 04, 2016 at 12:03:55PM -0600, Seth Forshee wrote:
>
> >> > -static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
> >> > +static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
> >> > + struct user_namespace *user_ns)
> >> > {
> >> > char *p;
> >> > memset(d, 0, sizeof(struct fuse_mount_data));
> >> > d->max_read = ~0;
> >> > d->blksize = FUSE_DEFAULT_BLKSIZE;
> >> > + d->user_id = make_kuid(user_ns, 0);
> >> > + d->group_id = make_kgid(user_ns, 0);
> >>
> >> It is true that if "user_id=" or "group_id" options were omitted we used the
> >> zero uid/gid values. However, this isn't actually used by anybody AFAIK, and
> >> generalizing it for userns doesn't seem to make much sense.
> >>
> >> So I suggest we that we instead return an error if mounting from a userns AND
> >> neither "allow_other" nor both "user_id" and "group_id" are specified.
> >
> > But those are also used for ownership of the connection files in
> > fusectl. In an allow_other mount shouldn't those files by owned by
> > namespace root and not global root?
>
> Yes.
>
> Can't we use current_cred()->uid/gid? Or fsuid/fsgid maybe?

That would be a departure from the current behavior in the !allow_other
case for unprivileged users. Since those mounts are done by an suid
helper all of those ids would be root in the userns, wouldn't they?

> When we have true unprivileged mounts, the user_id/group_id options
> become redundant anyway and we can just use the current credentials.

True, but we don't yet have that.

Thanks,
Seth

2016-03-09 15:51:50

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH RESEND v2 16/18] fuse: Support fuse filesystems outside of init_user_ns

On Wed, Mar 9, 2016 at 4:25 PM, Seth Forshee <[email protected]> wrote:
> On Wed, Mar 09, 2016 at 03:48:22PM +0100, Miklos Szeredi wrote:

>> Can't we use current_cred()->uid/gid? Or fsuid/fsgid maybe?
>
> That would be a departure from the current behavior in the !allow_other
> case for unprivileged users. Since those mounts are done by an suid
> helper all of those ids would be root in the userns, wouldn't they?

Well, actually this is what the helper does:

sprintf(d, "fd=%i,rootmode=%o,user_id=%u,group_id=%u",
fd, rootmode, getuid(), getgid());

So it just uses the current uid/gid. Apparently no reason to do this
in userland, we could just as well set these in the kernel. Except
for possible backward compatibility problems for things not using the
helper.

BUT if the mount is unprivileged or it's a userns mount, or anything
previously not possible, then we are not constrained by the backward
compatibility issues, and can go with the saner solution.

Does that not make sense?

>> When we have true unprivileged mounts, the user_id/group_id options
>> become redundant anyway and we can just use the current credentials.
>
> True, but we don't yet have that.

What's missing?

Thanks,
Miklos

2016-03-09 17:07:54

by Seth Forshee

[permalink] [raw]
Subject: Re: [PATCH RESEND v2 16/18] fuse: Support fuse filesystems outside of init_user_ns

On Wed, Mar 09, 2016 at 04:51:42PM +0100, Miklos Szeredi wrote:
> On Wed, Mar 9, 2016 at 4:25 PM, Seth Forshee <[email protected]> wrote:
> > On Wed, Mar 09, 2016 at 03:48:22PM +0100, Miklos Szeredi wrote:
>
> >> Can't we use current_cred()->uid/gid? Or fsuid/fsgid maybe?
> >
> > That would be a departure from the current behavior in the !allow_other
> > case for unprivileged users. Since those mounts are done by an suid
> > helper all of those ids would be root in the userns, wouldn't they?
>
> Well, actually this is what the helper does:
>
> sprintf(d, "fd=%i,rootmode=%o,user_id=%u,group_id=%u",
> fd, rootmode, getuid(), getgid());

Sorry, I was thinking of euid. So this may not be a problem.

> So it just uses the current uid/gid. Apparently no reason to do this
> in userland, we could just as well set these in the kernel. Except
> for possible backward compatibility problems for things not using the
> helper.
>
> BUT if the mount is unprivileged or it's a userns mount, or anything
> previously not possible, then we are not constrained by the backward
> compatibility issues, and can go with the saner solution.
>
> Does that not make sense?

But we generally do want backwards compatibility, and we want userspace
software to be able to expect the same behavior whether or not it's
running in a user namespaced container. Obviously we can't always have
things 100% identical, but we shouldn't break things unless we really
need to.

However it may be that this isn't actually going to break assumptions of
existing software like I had feared. My preference is still to not
change any userspace-visible behaviors since we never know what software
might have made assumptions based on those behaviors. But if you're
confident that it won't break anything I'm willing to give it a try.

> >> When we have true unprivileged mounts, the user_id/group_id options
> >> become redundant anyway and we can just use the current credentials.
> >
> > True, but we don't yet have that.
>
> What's missing?

A user must still be privileged to mount, even if only towards their own
user and mount namespaces. Maybe that's what you meant though and I just
misunderstood.

Thanks,
Seth

2016-03-14 20:58:48

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH RESEND v2 16/18] fuse: Support fuse filesystems outside of init_user_ns

On Wed, Mar 9, 2016 at 6:07 PM, Seth Forshee <[email protected]> wrote:
> On Wed, Mar 09, 2016 at 04:51:42PM +0100, Miklos Szeredi wrote:
>> On Wed, Mar 9, 2016 at 4:25 PM, Seth Forshee <[email protected]> wrote:
>> > On Wed, Mar 09, 2016 at 03:48:22PM +0100, Miklos Szeredi wrote:
>>
>> >> Can't we use current_cred()->uid/gid? Or fsuid/fsgid maybe?
>> >
>> > That would be a departure from the current behavior in the !allow_other
>> > case for unprivileged users. Since those mounts are done by an suid
>> > helper all of those ids would be root in the userns, wouldn't they?
>>
>> Well, actually this is what the helper does:
>>
>> sprintf(d, "fd=%i,rootmode=%o,user_id=%u,group_id=%u",
>> fd, rootmode, getuid(), getgid());
>
> Sorry, I was thinking of euid. So this may not be a problem.
>
>> So it just uses the current uid/gid. Apparently no reason to do this
>> in userland, we could just as well set these in the kernel. Except
>> for possible backward compatibility problems for things not using the
>> helper.
>>
>> BUT if the mount is unprivileged or it's a userns mount, or anything
>> previously not possible, then we are not constrained by the backward
>> compatibility issues, and can go with the saner solution.
>>
>> Does that not make sense?
>
> But we generally do want backwards compatibility, and we want userspace
> software to be able to expect the same behavior whether or not it's
> running in a user namespaced container. Obviously we can't always have
> things 100% identical, but we shouldn't break things unless we really
> need to.
>
> However it may be that this isn't actually going to break assumptions of
> existing software like I had feared. My preference is still to not
> change any userspace-visible behaviors since we never know what software
> might have made assumptions based on those behaviors. But if you're
> confident that it won't break anything I'm willing to give it a try.

I'm quite confident it won't make a difference.

Thanks,
Miklos

2016-03-25 20:31:55

by Seth Forshee

[permalink] [raw]
Subject: Re: [PATCH RESEND v2 16/18] fuse: Support fuse filesystems outside of init_user_ns

On Mon, Mar 14, 2016 at 09:58:43PM +0100, Miklos Szeredi wrote:
> On Wed, Mar 9, 2016 at 6:07 PM, Seth Forshee <[email protected]> wrote:
> > On Wed, Mar 09, 2016 at 04:51:42PM +0100, Miklos Szeredi wrote:
> >> On Wed, Mar 9, 2016 at 4:25 PM, Seth Forshee <[email protected]> wrote:
> >> > On Wed, Mar 09, 2016 at 03:48:22PM +0100, Miklos Szeredi wrote:
> >>
> >> >> Can't we use current_cred()->uid/gid? Or fsuid/fsgid maybe?
> >> >
> >> > That would be a departure from the current behavior in the !allow_other
> >> > case for unprivileged users. Since those mounts are done by an suid
> >> > helper all of those ids would be root in the userns, wouldn't they?
> >>
> >> Well, actually this is what the helper does:
> >>
> >> sprintf(d, "fd=%i,rootmode=%o,user_id=%u,group_id=%u",
> >> fd, rootmode, getuid(), getgid());
> >
> > Sorry, I was thinking of euid. So this may not be a problem.
> >
> >> So it just uses the current uid/gid. Apparently no reason to do this
> >> in userland, we could just as well set these in the kernel. Except
> >> for possible backward compatibility problems for things not using the
> >> helper.
> >>
> >> BUT if the mount is unprivileged or it's a userns mount, or anything
> >> previously not possible, then we are not constrained by the backward
> >> compatibility issues, and can go with the saner solution.
> >>
> >> Does that not make sense?
> >
> > But we generally do want backwards compatibility, and we want userspace
> > software to be able to expect the same behavior whether or not it's
> > running in a user namespaced container. Obviously we can't always have
> > things 100% identical, but we shouldn't break things unless we really
> > need to.
> >
> > However it may be that this isn't actually going to break assumptions of
> > existing software like I had feared. My preference is still to not
> > change any userspace-visible behaviors since we never know what software
> > might have made assumptions based on those behaviors. But if you're
> > confident that it won't break anything I'm willing to give it a try.
>
> I'm quite confident it won't make a difference.

I was just about to go make these changes and discovered that the
user_id and group_id options are already mandatory, due to this check at
the bottom of parse_fuse_opt():

if (!d->fd_present || !d->rootmode_present ||
!d->user_id_present || !d->group_id_present)
return 0;

So I'll simply drop those two lines which supply default values for
these options.

Thanks,
Seth