2019-03-19 16:24:22

by David Howells

[permalink] [raw]
Subject: [RFC PATCH 0/4] fuse: Convert to fs_context


Hi Miklós,

Here's a set of patches that convert fuse to use mount API:

(1) Provide a replacement for mount_bdev() that takes an fs_context to
specify the parameters.

I also put a block device pointer and block device file mode into the
fs_context struct for use in the sget_fc() test and set functions.

(2) Improve handling of fd-type parameters.

(3) Convert fuse to implement the mount API interface.

(4) Move as much of the subtype parameter handling into the fuse driver as
possible.

These are on top of:

http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/log/?h=mount-api-viro

Thanks,
David
---
David Howells (4):
vfs: Create fs_context-aware mount_bdev() replacement
vfs: Make fs_parse() handle fs_param_is_fd-type params better
fuse: Convert to mount API
fuse: Move the subtype parameter into fuse


fs/fs_context.c | 14 --
fs/fs_parser.c | 15 ++
fs/fuse/inode.c | 289 +++++++++++++++++++++++++-------------------
fs/super.c | 111 ++++++++++++++++-
include/linux/fs_context.h | 7 +
5 files changed, 291 insertions(+), 145 deletions(-)



2019-03-19 16:24:34

by David Howells

[permalink] [raw]
Subject: [RFC PATCH 1/4] vfs: Create fs_context-aware mount_bdev() replacement

Create a function, vfs_get_block_super(), that is fs_context-aware and a
replacement for mount_bdev(). It caches the block device pointer and file
open mode in the fs_context struct so that this information can be passed
into sget_fc()'s test and set functions.

Signed-off-by: David Howells <[email protected]>
---

fs/fs_context.c | 2 +
fs/super.c | 106 ++++++++++++++++++++++++++++++++++++++++++++
include/linux/fs_context.h | 6 ++
3 files changed, 114 insertions(+)

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 87e3546b9a52..ea027762c0b2 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -425,6 +425,8 @@ void put_fs_context(struct fs_context *fc)

if (fc->need_free && fc->ops && fc->ops->free)
fc->ops->free(fc);
+ if (fc->bdev)
+ blkdev_put(fc->bdev, fc->bdev_mode);

security_free_mnt_opts(&fc->security);
put_net(fc->net_ns);
diff --git a/fs/super.c b/fs/super.c
index f27ee08fb26f..85851adb0f19 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1211,6 +1211,112 @@ int vfs_get_super(struct fs_context *fc,
EXPORT_SYMBOL(vfs_get_super);

#ifdef CONFIG_BLOCK
+static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
+{
+ s->s_bdev = fc->bdev;
+ s->s_dev = s->s_bdev->bd_dev;
+ s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
+ fc->bdev = NULL;
+ return 0;
+}
+
+static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
+{
+ return s->s_bdev == fc->bdev;
+}
+
+/**
+ * vfs_get_block_super - Get a superblock based on a single block device
+ * @fc: The filesystem context holding the parameters
+ * @keying: How to distinguish superblocks
+ * @fill_super: Helper to initialise a new superblock
+ */
+int vfs_get_block_super(struct fs_context *fc,
+ int (*fill_super)(struct super_block *,
+ struct fs_context *))
+{
+ struct block_device *bdev;
+ struct super_block *s;
+ int error = 0;
+
+ fc->bdev_mode = FMODE_READ | FMODE_EXCL;
+ if (!(fc->sb_flags & SB_RDONLY))
+ fc->bdev_mode |= FMODE_WRITE;
+
+ if (!fc->source)
+ return invalf(fc, "No source specified");
+
+ bdev = blkdev_get_by_path(fc->source, fc->bdev_mode, fc->fs_type);
+ if (IS_ERR(bdev)) {
+ errorf(fc, "%s: Can't open blockdev", fc->source);
+ return PTR_ERR(bdev);
+ }
+
+ /* Once the superblock is inserted into the list by sget_fc(), s_umount
+ * will protect the lockfs code from trying to start a snapshot while
+ * we are mounting
+ */
+ mutex_lock(&bdev->bd_fsfreeze_mutex);
+ if (bdev->bd_fsfreeze_count > 0) {
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
+ error = -EBUSY;
+ goto error_bdev;
+ }
+
+ fc->bdev = bdev;
+ fc->sb_flags |= SB_NOSEC;
+ s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ if (IS_ERR(s)) {
+ error = PTR_ERR(s);
+ goto error_bdev;
+ }
+
+ if (s->s_root) {
+ /* Don't summarily change the RO/RW state. */
+ if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
+ warnf(fc, "%pg: Can't mount, would change RO state", bdev);
+ error = -EBUSY;
+ goto error_sb;
+ }
+
+ /* s_umount nests inside bd_mutex during __invalidate_device().
+ * blkdev_put() acquires bd_mutex and can't be called under
+ * s_umount. Drop s_umount temporarily. This is safe as we're
+ * holding an active reference.
+ */
+ up_write(&s->s_umount);
+ blkdev_put(bdev, fc->bdev_mode);
+ down_write(&s->s_umount);
+ } else {
+ s->s_mode = fc->bdev_mode;
+ snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
+ sb_set_blocksize(s, block_size(bdev));
+ error = fill_super(s, fc);
+ if (error)
+ goto error_sb;
+
+ s->s_flags |= SB_ACTIVE;
+ bdev->bd_super = s;
+ }
+
+ BUG_ON(fc->root);
+ fc->root = dget(s->s_root);
+ return 0;
+
+error_sb:
+ deactivate_locked_super(s);
+error_bdev:
+ if (fc->bdev) {
+ blkdev_put(fc->bdev, fc->bdev_mode);
+ fc->bdev = NULL;
+ }
+ return error;
+}
+EXPORT_SYMBOL(vfs_get_block_super);
+
+
static int set_bdev_super(struct super_block *s, void *data)
{
s->s_bdev = data;
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index a779022d06f5..cb49b92f02af 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -76,6 +76,7 @@ struct fs_context {
const struct fs_context_operations *ops;
struct file_system_type *fs_type;
void *fs_private; /* The filesystem's context */
+ struct block_device *bdev; /* The backing blockdev (if applicable) */
struct dentry *root; /* The root and superblock */
struct user_namespace *user_ns; /* The user namespace for this mount */
struct net *net_ns; /* The network namespace for this mount */
@@ -84,6 +85,7 @@ struct fs_context {
const char *subtype; /* The subtype to set on the superblock */
void *security; /* Linux S&M options */
void *s_fs_info; /* Proposed s_fs_info */
+ fmode_t bdev_mode; /* File open mode for bdev */
unsigned int sb_flags; /* Proposed superblock flags (SB_*) */
unsigned int sb_flags_mask; /* Superblock flags that were changed */
unsigned int s_iflags; /* OR'd with sb->s_iflags */
@@ -141,6 +143,10 @@ extern int vfs_get_super(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));

+extern int vfs_get_block_super(struct fs_context *fc,
+ int (*fill_super)(struct super_block *sb,
+ struct fs_context *fc));
+
extern const struct file_operations fscontext_fops;

#ifdef CONFIG_PRINTK


2019-03-19 16:24:50

by David Howells

[permalink] [raw]
Subject: [RFC PATCH 2/4] vfs: Make fs_parse() handle fs_param_is_fd-type params better

Make fs_parse() handle fs_param_is_fd-type parameters that are passed a
string by converting it to an integer (in addition to handling direct fd
specification).

Also range check the integer.

Signed-off-by: David Howells <[email protected]>
---

fs/fs_parser.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index be86703ec371..bd59e1725c50 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -208,9 +208,20 @@ int fs_parse(struct fs_context *fc,
goto okay;

case fs_param_is_fd: {
- if (param->type != fs_value_is_file)
+ switch (param->type) {
+ case fs_value_is_string:
+ ret = kstrtouint(param->string, 0, &result->uint_32);
+ break;
+ case fs_value_is_file:
+ result->uint_32 = param->dirfd;
+ ret = 0;
+ default:
goto bad_value;
- goto okay;
+ }
+
+ if (result->uint_32 > INT_MAX)
+ goto bad_value;
+ goto maybe_okay;
}

case fs_param_is_blockdev:


2019-03-19 16:25:02

by David Howells

[permalink] [raw]
Subject: [RFC PATCH 3/4] fuse: Convert to mount API


---

fs/fuse/inode.c | 272 ++++++++++++++++++++++++++++++-------------------------
1 file changed, 147 insertions(+), 125 deletions(-)

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ec5d9953dfb6..3aebff5b902a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -15,7 +15,8 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
-#include <linux/parser.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/statfs.h>
#include <linux/random.h>
#include <linux/sched.h>
@@ -59,7 +60,12 @@ MODULE_PARM_DESC(max_user_congthresh,
/** Congestion starts at 75% of maximum */
#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)

-struct fuse_mount_data {
+#ifdef CONFIG_BLOCK
+static struct file_system_type fuseblk_fs_type;
+#endif
+
+struct fuse_fs_context {
+ bool is_bdev;
int fd;
unsigned rootmode;
kuid_t user_id;
@@ -448,6 +454,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
}

enum {
+ OPT_SOURCE,
OPT_FD,
OPT_ROOTMODE,
OPT_USER_ID,
@@ -459,111 +466,97 @@ enum {
OPT_ERR
};

-static const match_table_t tokens = {
- {OPT_FD, "fd=%u"},
- {OPT_ROOTMODE, "rootmode=%o"},
- {OPT_USER_ID, "user_id=%u"},
- {OPT_GROUP_ID, "group_id=%u"},
- {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
- {OPT_ALLOW_OTHER, "allow_other"},
- {OPT_MAX_READ, "max_read=%u"},
- {OPT_BLKSIZE, "blksize=%u"},
- {OPT_ERR, NULL}
+static const struct fs_parameter_spec fuse_param_specs[] = {
+ fsparam_string ("source", OPT_SOURCE),
+ fsparam_fd ("fd", OPT_FD),
+ fsparam_u32oct ("rootmode", OPT_ROOTMODE),
+ fsparam_u32 ("user_id", OPT_USER_ID),
+ fsparam_u32 ("group_id", OPT_GROUP_ID),
+ fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS),
+ fsparam_flag ("allow_other", OPT_ALLOW_OTHER),
+ fsparam_u32 ("max_read", OPT_MAX_READ),
+ fsparam_u32 ("blksize", OPT_BLKSIZE),
+ {}
};

-static int fuse_match_uint(substring_t *s, unsigned int *res)
+static const struct fs_parameter_description fuse_fs_parameters = {
+ .name = "fuse",
+ .specs = fuse_param_specs,
+};
+
+static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- int err = -ENOMEM;
- char *buf = match_strdup(s);
- if (buf) {
- err = kstrtouint(buf, 10, res);
- kfree(buf);
+ struct fs_parse_result result;
+ struct fuse_fs_context *ctx = fc->fs_private;
+ int opt;
+
+ opt = fs_parse(fc, &fuse_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case OPT_SOURCE:
+ kfree(fc->source);
+ fc->source = param->string;
+ param->string = NULL;
+ break;
+
+ case OPT_FD:
+ ctx->fd = result.uint_32;
+ ctx->fd_present = 1;
+ break;
+
+ case OPT_ROOTMODE:
+ if (!fuse_valid_type(result.uint_32))
+ return invalf(fc, "fuse: Invalid rootmode");
+ ctx->rootmode = result.uint_32;
+ ctx->rootmode_present = 1;
+ break;
+
+ case OPT_USER_ID:
+ ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
+ if (!uid_valid(ctx->user_id))
+ return invalf(fc, "fuse: Invalid user_id");
+ ctx->user_id_present = 1;
+ break;
+
+ case OPT_GROUP_ID:
+ ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
+ if (!gid_valid(ctx->group_id))
+ return invalf(fc, "fuse: Invalid group_id");
+ ctx->group_id_present = 1;
+ break;
+
+ case OPT_DEFAULT_PERMISSIONS:
+ ctx->default_permissions = 1;
+ break;
+
+ case OPT_ALLOW_OTHER:
+ ctx->allow_other = 1;
+ break;
+
+ case OPT_MAX_READ:
+ ctx->max_read = result.uint_32;
+ break;
+
+ case OPT_BLKSIZE:
+ if (!ctx->is_bdev)
+ return invalf(fc, "fuse: blksize only supported for fuseblk");
+ ctx->blksize = result.uint_32;
+ break;
+
+ default:
+ return -EINVAL;
}
- return err;
+
+ return 0;
}

-static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
- struct user_namespace *user_ns)
+static void fuse_free_fc(struct fs_context *fc)
{
- char *p;
- memset(d, 0, sizeof(struct fuse_mount_data));
- d->max_read = ~0;
- d->blksize = FUSE_DEFAULT_BLKSIZE;
-
- while ((p = strsep(&opt, ",")) != NULL) {
- int token;
- int value;
- unsigned uv;
- substring_t args[MAX_OPT_ARGS];
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case OPT_FD:
- if (match_int(&args[0], &value))
- return 0;
- d->fd = value;
- d->fd_present = 1;
- break;
-
- case OPT_ROOTMODE:
- if (match_octal(&args[0], &value))
- return 0;
- if (!fuse_valid_type(value))
- return 0;
- d->rootmode = value;
- d->rootmode_present = 1;
- break;
-
- case OPT_USER_ID:
- if (fuse_match_uint(&args[0], &uv))
- return 0;
- d->user_id = make_kuid(user_ns, uv);
- if (!uid_valid(d->user_id))
- return 0;
- d->user_id_present = 1;
- break;
-
- case OPT_GROUP_ID:
- if (fuse_match_uint(&args[0], &uv))
- return 0;
- d->group_id = make_kgid(user_ns, uv);
- if (!gid_valid(d->group_id))
- return 0;
- d->group_id_present = 1;
- break;
-
- case OPT_DEFAULT_PERMISSIONS:
- d->default_permissions = 1;
- break;
-
- case OPT_ALLOW_OTHER:
- d->allow_other = 1;
- break;
-
- case OPT_MAX_READ:
- if (match_int(&args[0], &value))
- return 0;
- d->max_read = value;
- break;
-
- case OPT_BLKSIZE:
- if (!is_bdev || match_int(&args[0], &value))
- return 0;
- d->blksize = value;
- break;
-
- default:
- return 0;
- }
- }
-
- if (!d->fd_present || !d->rootmode_present ||
- !d->user_id_present || !d->group_id_present)
- return 0;
+ struct fuse_fs_context *ctx = fc->fs_private;

- return 1;
+ kfree(ctx);
}

static int fuse_show_options(struct seq_file *m, struct dentry *root)
@@ -1078,12 +1071,12 @@ void fuse_dev_free(struct fuse_dev *fud)
}
EXPORT_SYMBOL_GPL(fuse_dev_free);

-static int fuse_fill_super(struct super_block *sb, void *data, int silent)
+static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
{
+ struct fuse_fs_context *ctx = fsc->fs_private;
struct fuse_dev *fud;
struct fuse_conn *fc;
struct inode *root;
- struct fuse_mount_data d;
struct file *file;
struct dentry *root_dentry;
struct fuse_req *init_req;
@@ -1096,13 +1089,10 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)

sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);

- if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
- goto err;
-
if (is_bdev) {
#ifdef CONFIG_BLOCK
err = -EINVAL;
- if (!sb_set_blocksize(sb, d.blksize))
+ if (!sb_set_blocksize(sb, ctx->blksize))
goto err;
#endif
} else {
@@ -1119,7 +1109,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_user_ns != &init_user_ns)
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;

- file = fget(d.fd);
+ file = fget(ctx->fd);
err = -EINVAL;
if (!file)
goto err;
@@ -1162,17 +1152,17 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fc->dont_mask = 1;
sb->s_flags |= SB_POSIXACL;

- fc->default_permissions = d.default_permissions;
- fc->allow_other = d.allow_other;
- fc->user_id = d.user_id;
- fc->group_id = d.group_id;
- fc->max_read = max_t(unsigned, 4096, d.max_read);
+ fc->default_permissions = ctx->default_permissions;
+ fc->allow_other = ctx->allow_other;
+ fc->user_id = ctx->user_id;
+ fc->group_id = ctx->group_id;
+ fc->max_read = max_t(unsigned, 4096, ctx->max_read);

/* Used by get_root_inode() */
sb->s_fs_info = fc;

err = -ENOMEM;
- root = fuse_get_root_inode(sb, d.rootmode);
+ root = fuse_get_root_inode(sb, ctx->rootmode);
sb->s_d_op = &fuse_root_dentry_operations;
root_dentry = d_make_root(root);
if (!root_dentry)
@@ -1232,11 +1222,48 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
return err;
}

-static struct dentry *fuse_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *raw_data)
+static int fuse_get_tree(struct fs_context *fc)
+{
+ struct fuse_fs_context *ctx = fc->fs_private;
+
+ if (!ctx->fd_present || !ctx->rootmode_present ||
+ !ctx->user_id_present || !ctx->group_id_present)
+ return invalf(fc, "fuse: Missing mount parameter(s)");
+
+ if (ctx->is_bdev)
+ return vfs_get_block_super(fc, fuse_fill_super);
+
+ return vfs_get_super(fc, vfs_get_independent_super, fuse_fill_super);
+}
+
+static const struct fs_context_operations fuse_context_ops = {
+ .free = fuse_free_fc,
+ .parse_param = fuse_parse_param,
+ .get_tree = fuse_get_tree,
+};
+
+/*
+ * Set up the filesystem mount context.
+ */
+static int fuse_init_fs_context(struct fs_context *fc)
{
- return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
+ struct fuse_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->max_read = ~0;
+ ctx->blksize = FUSE_DEFAULT_BLKSIZE;
+
+#ifdef CONFIG_BLOCK
+ if (fc->fs_type == &fuseblk_fs_type)
+ ctx->is_bdev = true;
+#endif
+
+ fc->fs_private = ctx;
+ fc->ops = &fuse_context_ops;
+ return 0;
}

static void fuse_sb_destroy(struct super_block *sb)
@@ -1265,19 +1292,13 @@ static struct file_system_type fuse_fs_type = {
.owner = THIS_MODULE,
.name = "fuse",
.fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
- .mount = fuse_mount,
+ .init_fs_context = fuse_init_fs_context,
+ .parameters = &fuse_fs_parameters,
.kill_sb = fuse_kill_sb_anon,
};
MODULE_ALIAS_FS("fuse");

#ifdef CONFIG_BLOCK
-static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
- int flags, const char *dev_name,
- void *raw_data)
-{
- return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
-}
-
static void fuse_kill_sb_blk(struct super_block *sb)
{
fuse_sb_destroy(sb);
@@ -1287,7 +1308,8 @@ static void fuse_kill_sb_blk(struct super_block *sb)
static struct file_system_type fuseblk_fs_type = {
.owner = THIS_MODULE,
.name = "fuseblk",
- .mount = fuse_mount_blk,
+ .init_fs_context = fuse_init_fs_context,
+ .parameters = &fuse_fs_parameters,
.kill_sb = fuse_kill_sb_blk,
.fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
};


2019-03-19 16:25:23

by David Howells

[permalink] [raw]
Subject: [RFC PATCH 4/4] fuse: Move the subtype parameter into fuse

Move as much as possible of the mount subtype apparatus into the fuse
driver. The bits that are left involve determining whether it's permitted
to split the filesystem type string passed in to mount(2). Consequently,
this means that we cannot get rid of the FS_HAS_SUBTYPE flag unless we
define that a type string with a dot in in always indicates a subtype
specification.

Signed-off-by: David Howells <[email protected]>
---

fs/fs_context.c | 12 ------------
fs/fuse/inode.c | 21 +++++++++++++++++++--
fs/super.c | 5 -----
include/linux/fs_context.h | 1 -
4 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/fs/fs_context.c b/fs/fs_context.c
index ea027762c0b2..3325b435cb6f 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -432,7 +432,6 @@ void put_fs_context(struct fs_context *fc)
put_net(fc->net_ns);
put_user_ns(fc->user_ns);
put_cred(fc->cred);
- kfree(fc->subtype);
put_filesystem(fc->fs_type);
kfree(fc->source);
kfree(fc);
@@ -498,17 +497,6 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
return 0;
}

- if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
- strcmp(param->key, "subtype") == 0) {
- if (param->type != fs_value_is_string)
- return invalf(fc, "VFS: Legacy: Non-string subtype");
- if (fc->subtype)
- return invalf(fc, "VFS: Legacy: Multiple subtype");
- fc->subtype = param->string;
- param->string = NULL;
- return 0;
- }
-
if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3aebff5b902a..d830b20252f7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -65,6 +65,7 @@ static struct file_system_type fuseblk_fs_type;
#endif

struct fuse_fs_context {
+ const char *subtype;
bool is_bdev;
int fd;
unsigned rootmode;
@@ -455,6 +456,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)

enum {
OPT_SOURCE,
+ OPT_SUBTYPE,
OPT_FD,
OPT_ROOTMODE,
OPT_USER_ID,
@@ -468,6 +470,7 @@ enum {

static const struct fs_parameter_spec fuse_param_specs[] = {
fsparam_string ("source", OPT_SOURCE),
+ fsparam_string ("subtype", OPT_SUBTYPE),
fsparam_fd ("fd", OPT_FD),
fsparam_u32oct ("rootmode", OPT_ROOTMODE),
fsparam_u32 ("user_id", OPT_USER_ID),
@@ -496,11 +499,19 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)

switch (opt) {
case OPT_SOURCE:
- kfree(fc->source);
+ if (fc->source)
+ return invalf(fc, "fuse: Multiple sources specified");
fc->source = param->string;
param->string = NULL;
break;

+ case OPT_SUBTYPE:
+ if (ctx->subtype)
+ return invalf(fc, "fuse: Multiple subtypes specified");
+ ctx->subtype = param->string;
+ param->string = NULL;
+ return 0;
+
case OPT_FD:
ctx->fd = result.uint_32;
ctx->fd_present = 1;
@@ -556,7 +567,10 @@ static void fuse_free_fc(struct fs_context *fc)
{
struct fuse_fs_context *ctx = fc->fs_private;

- kfree(ctx);
+ if (ctx) {
+ kfree(ctx->subtype);
+ kfree(ctx);
+ }
}

static int fuse_show_options(struct seq_file *m, struct dentry *root)
@@ -1099,6 +1113,9 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
}
+
+ sb->s_subtype = ctx->subtype;
+ ctx->subtype = NULL;
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
sb->s_xattr = fuse_xattr_handlers;
diff --git a/fs/super.c b/fs/super.c
index 85851adb0f19..6d8dbf309241 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1536,11 +1536,6 @@ int vfs_get_tree(struct fs_context *fc)
sb = fc->root->d_sb;
WARN_ON(!sb->s_bdi);

- if (fc->subtype && !sb->s_subtype) {
- sb->s_subtype = fc->subtype;
- fc->subtype = NULL;
- }
-
/*
* Write barrier is for super_cache_count(). We place it before setting
* SB_BORN as the data dependency between the two functions is the
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index cb49b92f02af..524963c0cedb 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -82,7 +82,6 @@ struct fs_context {
struct net *net_ns; /* The network namespace for this mount */
const struct cred *cred; /* The mounter's credentials */
const char *source; /* The source name (eg. dev path) */
- const char *subtype; /* The subtype to set on the superblock */
void *security; /* Linux S&M options */
void *s_fs_info; /* Proposed s_fs_info */
fmode_t bdev_mode; /* File open mode for bdev */


2019-03-26 21:03:28

by Andrew Price

[permalink] [raw]
Subject: Re: [RFC PATCH 1/4] vfs: Create fs_context-aware mount_bdev() replacement

Hi David,

I've been testing gfs2 on top of this patch and it seems...

On 19/03/2019 16:23, David Howells wrote:
> Create a function, vfs_get_block_super(), that is fs_context-aware and a
> replacement for mount_bdev(). It caches the block device pointer and file
> open mode in the fs_context struct so that this information can be passed
> into sget_fc()'s test and set functions.
>
> Signed-off-by: David Howells <[email protected]>
> ---
>
> fs/fs_context.c | 2 +
> fs/super.c | 106 ++++++++++++++++++++++++++++++++++++++++++++
> include/linux/fs_context.h | 6 ++
> 3 files changed, 114 insertions(+)
>
> diff --git a/fs/fs_context.c b/fs/fs_context.c
> index 87e3546b9a52..ea027762c0b2 100644
> --- a/fs/fs_context.c
> +++ b/fs/fs_context.c
> @@ -425,6 +425,8 @@ void put_fs_context(struct fs_context *fc)
>
> if (fc->need_free && fc->ops && fc->ops->free)
> fc->ops->free(fc);
> + if (fc->bdev)
> + blkdev_put(fc->bdev, fc->bdev_mode);

doing this means...

>
> security_free_mnt_opts(&fc->security);
> put_net(fc->net_ns);
> diff --git a/fs/super.c b/fs/super.c
> index f27ee08fb26f..85851adb0f19 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -1211,6 +1211,112 @@ int vfs_get_super(struct fs_context *fc,
> EXPORT_SYMBOL(vfs_get_super);
>
> #ifdef CONFIG_BLOCK
> +static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
> +{
> + s->s_bdev = fc->bdev;
> + s->s_dev = s->s_bdev->bd_dev;
> + s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
> + fc->bdev = NULL;
> + return 0;
> +}
> +
> +static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
> +{
> + return s->s_bdev == fc->bdev;
> +}
> +
> +/**
> + * vfs_get_block_super - Get a superblock based on a single block device
> + * @fc: The filesystem context holding the parameters
> + * @keying: How to distinguish superblocks
> + * @fill_super: Helper to initialise a new superblock
> + */
> +int vfs_get_block_super(struct fs_context *fc,
> + int (*fill_super)(struct super_block *,
> + struct fs_context *))
> +{
> + struct block_device *bdev;
> + struct super_block *s;
> + int error = 0;
> +
> + fc->bdev_mode = FMODE_READ | FMODE_EXCL;
> + if (!(fc->sb_flags & SB_RDONLY))
> + fc->bdev_mode |= FMODE_WRITE;
> +
> + if (!fc->source)
> + return invalf(fc, "No source specified");
> +
> + bdev = blkdev_get_by_path(fc->source, fc->bdev_mode, fc->fs_type);
> + if (IS_ERR(bdev)) {
> + errorf(fc, "%s: Can't open blockdev", fc->source);
> + return PTR_ERR(bdev);
> + }
> +
> + /* Once the superblock is inserted into the list by sget_fc(), s_umount
> + * will protect the lockfs code from trying to start a snapshot while
> + * we are mounting
> + */
> + mutex_lock(&bdev->bd_fsfreeze_mutex);
> + if (bdev->bd_fsfreeze_count > 0) {
> + mutex_unlock(&bdev->bd_fsfreeze_mutex);
> + warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
> + error = -EBUSY;
> + goto error_bdev;
> + }
> +
> + fc->bdev = bdev;
> + fc->sb_flags |= SB_NOSEC;
> + s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
> + mutex_unlock(&bdev->bd_fsfreeze_mutex);
> + if (IS_ERR(s)) {
> + error = PTR_ERR(s);
> + goto error_bdev;
> + }
> +
> + if (s->s_root) {
> + /* Don't summarily change the RO/RW state. */
> + if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
> + warnf(fc, "%pg: Can't mount, would change RO state", bdev);
> + error = -EBUSY;
> + goto error_sb;
> + }
> +
> + /* s_umount nests inside bd_mutex during __invalidate_device().
> + * blkdev_put() acquires bd_mutex and can't be called under
> + * s_umount. Drop s_umount temporarily. This is safe as we're
> + * holding an active reference.
> + */
> + up_write(&s->s_umount);
> + blkdev_put(bdev, fc->bdev_mode);
> + down_write(&s->s_umount);

fc->bdev should be NULLed here (or, on the way out of sget_fc() might be
more appropriate) otherwise we get a double-blkdev_put() leading to NULL
pointer derefs later. This happens when I mount a device twice and then
unmount them, or mount it 3 times.

> + } else {
> + s->s_mode = fc->bdev_mode;
> + snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
> + sb_set_blocksize(s, block_size(bdev));
> + error = fill_super(s, fc);
> + if (error)
> + goto error_sb;
> +
> + s->s_flags |= SB_ACTIVE;
> + bdev->bd_super = s;
> + }
> +
> + BUG_ON(fc->root);

Maybe BUG_ON(fc->bdev); too?

Cheers,
Andy

2019-03-27 11:24:34

by David Howells

[permalink] [raw]
Subject: Re: [RFC PATCH 1/4] vfs: Create fs_context-aware mount_bdev() replacement

Andrew Price <[email protected]> wrote:

> > + up_write(&s->s_umount);
> > + blkdev_put(bdev, fc->bdev_mode);
> > + down_write(&s->s_umount);
>
> fc->bdev should be NULLed here (or, on the way out of sget_fc() might be more
> appropriate) otherwise we get a double-blkdev_put() leading to NULL pointer
> derefs later. This happens when I mount a device twice and then unmount them,
> or mount it 3 times.

How about the attached changes? Note that they conflict a bit with the mtd
changes from where I took the destructor piece.

David
---
commit 161602f963ae5a05bdb834461f7a4896286fbde0
Author: David Howells <[email protected]>
Date: Wed Mar 27 11:12:57 2019 +0000

bdev handling fix

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 9e22fe6aafe7..fc8fda4b9fe9 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -425,10 +425,8 @@ void put_fs_context(struct fs_context *fc)

if (fc->need_free && fc->ops && fc->ops->free)
fc->ops->free(fc);
-#ifdef CONFIG_BLOCK
- if (fc->bdev)
- blkdev_put(fc->bdev, fc->bdev_mode);
-#endif
+ if (fc->dev_destructor)
+ fc->dev_destructor(fc);

security_free_mnt_opts(&fc->security);
put_net(fc->net_ns);
diff --git a/fs/super.c b/fs/super.c
index 85851adb0f19..e9e678d2c37c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1211,8 +1211,17 @@ int vfs_get_super(struct fs_context *fc,
EXPORT_SYMBOL(vfs_get_super);

#ifdef CONFIG_BLOCK
+static void fc_bdev_destructor(struct fs_context *fc)
+{
+ if (fc->bdev) {
+ blkdev_put(fc->bdev, fc->bdev_mode);
+ fc->bdev = NULL;
+ }
+}
+
static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
{
+ s->s_mode = fc->bdev_mode;
s->s_bdev = fc->bdev;
s->s_dev = s->s_bdev->bd_dev;
s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
@@ -1252,6 +1261,9 @@ int vfs_get_block_super(struct fs_context *fc,
return PTR_ERR(bdev);
}

+ fc->dev_destructor = fc_bdev_destructor;
+ fc->bdev = bdev;
+
/* Once the superblock is inserted into the list by sget_fc(), s_umount
* will protect the lockfs code from trying to start a snapshot while
* we are mounting
@@ -1260,18 +1272,14 @@ int vfs_get_block_super(struct fs_context *fc,
if (bdev->bd_fsfreeze_count > 0) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
- error = -EBUSY;
- goto error_bdev;
+ return -EBUSY;
}

- fc->bdev = bdev;
fc->sb_flags |= SB_NOSEC;
s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
- if (IS_ERR(s)) {
- error = PTR_ERR(s);
- goto error_bdev;
- }
+ if (IS_ERR(s))
+ return PTR_ERR(s);

if (s->s_root) {
/* Don't summarily change the RO/RW state. */
@@ -1281,16 +1289,10 @@ int vfs_get_block_super(struct fs_context *fc,
goto error_sb;
}

- /* s_umount nests inside bd_mutex during __invalidate_device().
- * blkdev_put() acquires bd_mutex and can't be called under
- * s_umount. Drop s_umount temporarily. This is safe as we're
- * holding an active reference.
+ /* Leave fc->bdev to fc_bdev_destructor() to clean up to avoid
+ * locking conflicts.
*/
- up_write(&s->s_umount);
- blkdev_put(bdev, fc->bdev_mode);
- down_write(&s->s_umount);
} else {
- s->s_mode = fc->bdev_mode;
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, fc);
@@ -1307,11 +1309,7 @@ int vfs_get_block_super(struct fs_context *fc,

error_sb:
deactivate_locked_super(s);
-error_bdev:
- if (fc->bdev) {
- blkdev_put(fc->bdev, fc->bdev_mode);
- fc->bdev = NULL;
- }
+ /* Leave fc->bdev to fc_bdev_destructor() to clean up */
return error;
}
EXPORT_SYMBOL(vfs_get_block_super);
@@ -1521,8 +1519,13 @@ int vfs_get_tree(struct fs_context *fc)
* on the superblock.
*/
error = fc->ops->get_tree(fc);
- if (error < 0)
+ if (error < 0) {
+ if (fc->dev_destructor) {
+ fc->dev_destructor(fc);
+ fc->dev_destructor = NULL;
+ }
return error;
+ }

if (!fc->root) {
pr_err("Filesystem %s get_tree() didn't set fc->root\n",
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index cb49b92f02af..9af788a3ef8e 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -76,7 +76,9 @@ struct fs_context {
const struct fs_context_operations *ops;
struct file_system_type *fs_type;
void *fs_private; /* The filesystem's context */
- struct block_device *bdev; /* The backing blockdev (if applicable) */
+ union {
+ struct block_device *bdev; /* The backing blockdev (if applicable) */
+ };
struct dentry *root; /* The root and superblock */
struct user_namespace *user_ns; /* The user namespace for this mount */
struct net *net_ns; /* The network namespace for this mount */
@@ -93,6 +95,7 @@ struct fs_context {
enum fs_context_purpose purpose:8;
bool need_free:1; /* Need to call ops->free() */
bool global:1; /* Goes into &init_user_ns */
+ void (*dev_destructor)(struct fs_context *fc); /* For block or mtd */
};

struct fs_context_operations {

2019-03-27 13:52:14

by Andrew Price

[permalink] [raw]
Subject: Re: [RFC PATCH 1/4] vfs: Create fs_context-aware mount_bdev() replacement

On 27/03/2019 11:23, David Howells wrote:
> Andrew Price <[email protected]> wrote:
>
>>> + up_write(&s->s_umount);
>>> + blkdev_put(bdev, fc->bdev_mode);
>>> + down_write(&s->s_umount);
>>
>> fc->bdev should be NULLed here (or, on the way out of sget_fc() might be more
>> appropriate) otherwise we get a double-blkdev_put() leading to NULL pointer
>> derefs later. This happens when I mount a device twice and then unmount them,
>> or mount it 3 times.
>
> How about the attached changes? Note that they conflict a bit with the mtd
> changes from where I took the destructor piece.
>
> David

It fixes my bug and I see nothing unexpected from an xfstests -g quick
run with my gfs2 patch on top. Looks good.

Thanks,
Andy

> ---
> commit 161602f963ae5a05bdb834461f7a4896286fbde0
> Author: David Howells <[email protected]>
> Date: Wed Mar 27 11:12:57 2019 +0000
>
> bdev handling fix
>
> diff --git a/fs/fs_context.c b/fs/fs_context.c
> index 9e22fe6aafe7..fc8fda4b9fe9 100644
> --- a/fs/fs_context.c
> +++ b/fs/fs_context.c
> @@ -425,10 +425,8 @@ void put_fs_context(struct fs_context *fc)
>
> if (fc->need_free && fc->ops && fc->ops->free)
> fc->ops->free(fc);
> -#ifdef CONFIG_BLOCK
> - if (fc->bdev)
> - blkdev_put(fc->bdev, fc->bdev_mode);
> -#endif
> + if (fc->dev_destructor)
> + fc->dev_destructor(fc);
>
> security_free_mnt_opts(&fc->security);
> put_net(fc->net_ns);
> diff --git a/fs/super.c b/fs/super.c
> index 85851adb0f19..e9e678d2c37c 100644
> --- a/fs/super.c
> +++ b/fs/super.c
> @@ -1211,8 +1211,17 @@ int vfs_get_super(struct fs_context *fc,
> EXPORT_SYMBOL(vfs_get_super);
>
> #ifdef CONFIG_BLOCK
> +static void fc_bdev_destructor(struct fs_context *fc)
> +{
> + if (fc->bdev) {
> + blkdev_put(fc->bdev, fc->bdev_mode);
> + fc->bdev = NULL;
> + }
> +}
> +
> static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
> {
> + s->s_mode = fc->bdev_mode;
> s->s_bdev = fc->bdev;
> s->s_dev = s->s_bdev->bd_dev;
> s->s_bdi = bdi_get(s->s_bdev->bd_bdi);
> @@ -1252,6 +1261,9 @@ int vfs_get_block_super(struct fs_context *fc,
> return PTR_ERR(bdev);
> }
>
> + fc->dev_destructor = fc_bdev_destructor;
> + fc->bdev = bdev;
> +
> /* Once the superblock is inserted into the list by sget_fc(), s_umount
> * will protect the lockfs code from trying to start a snapshot while
> * we are mounting
> @@ -1260,18 +1272,14 @@ int vfs_get_block_super(struct fs_context *fc,
> if (bdev->bd_fsfreeze_count > 0) {
> mutex_unlock(&bdev->bd_fsfreeze_mutex);
> warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
> - error = -EBUSY;
> - goto error_bdev;
> + return -EBUSY;
> }
>
> - fc->bdev = bdev;
> fc->sb_flags |= SB_NOSEC;
> s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
> mutex_unlock(&bdev->bd_fsfreeze_mutex);
> - if (IS_ERR(s)) {
> - error = PTR_ERR(s);
> - goto error_bdev;
> - }
> + if (IS_ERR(s))
> + return PTR_ERR(s);
>
> if (s->s_root) {
> /* Don't summarily change the RO/RW state. */
> @@ -1281,16 +1289,10 @@ int vfs_get_block_super(struct fs_context *fc,
> goto error_sb;
> }
>
> - /* s_umount nests inside bd_mutex during __invalidate_device().
> - * blkdev_put() acquires bd_mutex and can't be called under
> - * s_umount. Drop s_umount temporarily. This is safe as we're
> - * holding an active reference.
> + /* Leave fc->bdev to fc_bdev_destructor() to clean up to avoid
> + * locking conflicts.
> */
> - up_write(&s->s_umount);
> - blkdev_put(bdev, fc->bdev_mode);
> - down_write(&s->s_umount);
> } else {
> - s->s_mode = fc->bdev_mode;
> snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
> sb_set_blocksize(s, block_size(bdev));
> error = fill_super(s, fc);
> @@ -1307,11 +1309,7 @@ int vfs_get_block_super(struct fs_context *fc,
>
> error_sb:
> deactivate_locked_super(s);
> -error_bdev:
> - if (fc->bdev) {
> - blkdev_put(fc->bdev, fc->bdev_mode);
> - fc->bdev = NULL;
> - }
> + /* Leave fc->bdev to fc_bdev_destructor() to clean up */
> return error;
> }
> EXPORT_SYMBOL(vfs_get_block_super);
> @@ -1521,8 +1519,13 @@ int vfs_get_tree(struct fs_context *fc)
> * on the superblock.
> */
> error = fc->ops->get_tree(fc);
> - if (error < 0)
> + if (error < 0) {
> + if (fc->dev_destructor) {
> + fc->dev_destructor(fc);
> + fc->dev_destructor = NULL;
> + }
> return error;
> + }
>
> if (!fc->root) {
> pr_err("Filesystem %s get_tree() didn't set fc->root\n",
> diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
> index cb49b92f02af..9af788a3ef8e 100644
> --- a/include/linux/fs_context.h
> +++ b/include/linux/fs_context.h
> @@ -76,7 +76,9 @@ struct fs_context {
> const struct fs_context_operations *ops;
> struct file_system_type *fs_type;
> void *fs_private; /* The filesystem's context */
> - struct block_device *bdev; /* The backing blockdev (if applicable) */
> + union {
> + struct block_device *bdev; /* The backing blockdev (if applicable) */
> + };
> struct dentry *root; /* The root and superblock */
> struct user_namespace *user_ns; /* The user namespace for this mount */
> struct net *net_ns; /* The network namespace for this mount */
> @@ -93,6 +95,7 @@ struct fs_context {
> enum fs_context_purpose purpose:8;
> bool need_free:1; /* Need to call ops->free() */
> bool global:1; /* Goes into &init_user_ns */
> + void (*dev_destructor)(struct fs_context *fc); /* For block or mtd */
> };
>
> struct fs_context_operations {
>