2018-07-08 21:12:35

by Eric Biggers

[permalink] [raw]
Subject: [PATCH vfs/for-next 00/18] fs_context fixes

Hi David and Al, here are some fixes for the fs_context patches.

Feel free to fold these into the original patches if you want.

Patches 13-18 are cleanups only.

Eric Biggers (18):
sysfs: check return value of kernfs_get_tree()
fs_context: fix shrinker leak in sget_fc()
fs_context: fix detecting full log buffer
fs_context: fix fs_context leak in simple_pin_fs()
fs_context: fix mount option blacklist
fs_context: fix memory leak with 's' (source) command
fs_context: fix double free of legacy_fs_context data
fsmount: pass up error code from dentry_open()
fsmount: fix handling FSMOUNT_CLOEXEC
fsmount: fix bypassing SB_MANDLOCK permission check
fspick: fix path leak
fspick: add missing permission check
fsmount: removed unused variable 'inode'
fsopen,fspick: factor out log allocation
fsopen,fspick: rename fsopen_create_fd() to fscontext_create_fd()
fs_context: de-obfuscate control flow in fscontext_read()
fs_context: de-obfuscate command validation
fs_context: fix fscontext_write() comment

fs/fs_context.c | 47 ++++++++++++++++++--------------
fs/fsopen.c | 71 +++++++++++++++++++++++++-----------------------
fs/libfs.c | 4 ++-
fs/namespace.c | 20 ++++++++------
fs/super.c | 2 +-
fs/sysfs/mount.c | 3 ++
6 files changed, 81 insertions(+), 66 deletions(-)

--
2.18.0



2018-07-08 21:06:47

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 16/18] fs_context: de-obfuscate control flow in fscontext_read()

From: Eric Biggers <[email protected]>

Signed-off-by: Eric Biggers <[email protected]>
---
fs/fsopen.c | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index 1fbd8b0ca194b..dd38f6b65aace 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -161,20 +161,18 @@ static ssize_t fscontext_read(struct file *file,
if (ret < 0)
return ret;

- ret = -ENODATA;
- if (log->head != log->tail) {
- index = log->tail & (logsize - 1);
- p = log->buffer[index];
- need_free = log->need_free & (1 << index);
- log->buffer[index] = NULL;
- log->need_free &= ~(1 << index);
- log->tail++;
- ret = 0;
+ if (log->head == log->tail) {
+ mutex_unlock(&fc->uapi_mutex);
+ return -ENODATA;
}

+ index = log->tail & (logsize - 1);
+ p = log->buffer[index];
+ need_free = log->need_free & (1 << index);
+ log->buffer[index] = NULL;
+ log->need_free &= ~(1 << index);
+ log->tail++;
mutex_unlock(&fc->uapi_mutex);
- if (ret < 0)
- return ret;

ret = -EMSGSIZE;
n = strlen(p);
--
2.18.0


2018-07-08 21:07:31

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 17/18] fs_context: de-obfuscate command validation

From: Eric Biggers <[email protected]>

Signed-off-by: Eric Biggers <[email protected]>
---
fs/fsopen.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index dd38f6b65aace..34d7292bb398e 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -50,10 +50,10 @@ static ssize_t fscontext_write(struct file *file,
case 'x':
break;
default:
- goto err_bad_cmd;
+ return -EINVAL;
}
if (opt[1] != ' ')
- goto err_bad_cmd;
+ return -EINVAL;

data = memdup_user_nul(_buf + 2, len - 2);
if (IS_ERR(data))
@@ -136,8 +136,7 @@ static ssize_t fscontext_write(struct file *file,
err_free:
kfree(data);
return ret;
-err_bad_cmd:
- return -EINVAL;
+
wrong_phase:
ret = -EBUSY;
goto err_unlock;
--
2.18.0


2018-07-08 21:07:38

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 15/18] fsopen,fspick: rename fsopen_create_fd() to fscontext_create_fd()

From: Eric Biggers <[email protected]>

It's used for both fsopen() and fspick(), not just fsopen().

Signed-off-by: Eric Biggers <[email protected]>
---
fs/fsopen.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index 02edb4705fac2..1fbd8b0ca194b 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -212,7 +212,7 @@ const struct file_operations fscontext_fs_fops = {
/*
* Attach a filesystem context to a file and an fd.
*/
-static int fsopen_create_fd(struct fs_context *fc, unsigned int o_flags)
+static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
{
int fd;

@@ -273,7 +273,7 @@ SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
if (ret < 0)
goto err_fc;

- return fsopen_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0);
+ return fscontext_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0);

err_fc:
put_fs_context(fc);
@@ -328,7 +328,7 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
goto err_fc;

path_put(&target);
- return fsopen_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0);
+ return fscontext_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0);

err_fc:
put_fs_context(fc);
--
2.18.0


2018-07-08 21:08:41

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 18/18] fs_context: fix fscontext_write() comment

From: Eric Biggers <[email protected]>

The 'r' command doesn't exist, and 'd' was apparently renamed to 's'.

Signed-off-by: Eric Biggers <[email protected]>
---
fs/fsopen.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index 34d7292bb398e..6947fed9df3b2 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -23,12 +23,11 @@
* here. For the moment, we assume a single option or command per write. Each
* line written is of the form
*
- * <option_type><space><stuff...>
+ * <command_type><space><stuff...>
*
- * d /dev/sda1 -- Device name
+ * s /dev/sda1 -- Source device
* o noatime -- Option without value
* o cell=grand.central.org -- Option with value
- * r / -- Dir within device to mount
* x create -- Create a superblock
* x reconfigure -- Reconfigure a superblock
*/
--
2.18.0


2018-07-08 21:08:56

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 12/18] fspick: add missing permission check

From: Eric Biggers <[email protected]>

Fixes: 99f8421020ac ("vfs: Implement fspick() to select a superblock for reconfiguration")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/fsopen.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index 3e439299ddf79..b3a22848f8eec 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -282,6 +282,9 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags
unsigned int lookup_flags;
int ret;

+ if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ return -EPERM;
+
if ((flags & ~(FSPICK_CLOEXEC |
FSPICK_SYMLINK_NOFOLLOW |
FSPICK_NO_AUTOMOUNT |
--
2.18.0


2018-07-08 21:09:07

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 07/18] fs_context: fix double free of legacy_fs_context data

From: Eric Biggers <[email protected]>

sys_fsmount() calls fc->ops->free() to free the data, zeroes
->fs_private, then proceeds to reuse the context. But legacy_fs_context
doesn't use ->fs_private, so we need to handle zeroing it too; otherwise
there's a double free of legacy_fs_context::{legacy_data,secdata}.

Fixes: 8a2e54b8af88 ("vfs: Implement a filesystem superblock creation/configuration context")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/fs_context.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 7a8d1ed34ae71..206256be9777e 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -504,6 +504,9 @@ static void legacy_fs_context_free(struct fs_context *fc)
kfree(ctx->legacy_data);
break;
}
+ /* This doesn't use fs_private, so need to manually zero for fsmount */
+ BUILD_BUG_ON(offsetof(struct legacy_fs_context, fc) != 0);
+ memset(fc + 1, 0, sizeof(*ctx) - sizeof(*fc));
}

/*
--
2.18.0


2018-07-08 21:09:12

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 14/18] fsopen,fspick: factor out log allocation

From: Eric Biggers <[email protected]>

Signed-off-by: Eric Biggers <[email protected]>
---
fs/fsopen.c | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index b3a22848f8eec..02edb4705fac2 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -223,6 +223,16 @@ static int fsopen_create_fd(struct fs_context *fc, unsigned int o_flags)
return fd;
}

+static int fscontext_alloc_log(struct fs_context *fc)
+{
+ fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL);
+ if (!fc->log)
+ return -ENOMEM;
+ refcount_set(&fc->log->usage, 1);
+ fc->log->owner = fc->fs_type->owner;
+ return 0;
+}
+
/*
* Open a filesystem by name so that it can be configured for mounting.
*
@@ -257,14 +267,12 @@ SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
if (IS_ERR(fc))
return PTR_ERR(fc);

- ret = -ENOMEM;
- fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL);
- if (!fc->log)
+ fc->phase = FS_CONTEXT_CREATE_PARAMS;
+
+ ret = fscontext_alloc_log(fc);
+ if (ret < 0)
goto err_fc;
- refcount_set(&fc->log->usage, 1);
- fc->log->owner = fs_type->owner;

- fc->phase = FS_CONTEXT_CREATE_PARAMS;
return fsopen_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0);

err_fc:
@@ -315,12 +323,9 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags

fc->phase = FS_CONTEXT_RECONF_PARAMS;

- ret = -ENOMEM;
- fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL);
- if (!fc->log)
+ ret = fscontext_alloc_log(fc);
+ if (ret < 0)
goto err_fc;
- refcount_set(&fc->log->usage, 1);
- fc->log->owner = fc->fs_type->owner;

path_put(&target);
return fsopen_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0);
--
2.18.0


2018-07-08 21:09:13

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 08/18] fsmount: pass up error code from dentry_open()

From: Eric Biggers <[email protected]>

Fixes: 0c65353ab9f5 ("vfs: Implement fsmount() to effect a pre-configured mount")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/namespace.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 6f0701a03a2b3..9cde133d0a9c4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3333,8 +3333,10 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, unsigned int, ms_flags
* it, not just simply put it.
*/
file = dentry_open(&newmount, O_PATH, fc->cred);
- if (IS_ERR(file))
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
goto err_path;
+ }
file->f_mode |= FMODE_NEED_UNMOUNT;

ret = get_unused_fd_flags(flags & FSMOUNT_CLOEXEC);
--
2.18.0


2018-07-08 21:09:41

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 13/18] fsmount: removed unused variable 'inode'

From: Eric Biggers <[email protected]>

Signed-off-by: Eric Biggers <[email protected]>
---
fs/namespace.c | 2 --
1 file changed, 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 7f0191bb5db46..0624af4806c4a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3230,7 +3230,6 @@ EXPORT_SYMBOL_GPL(kern_mount);
SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, unsigned int, ms_flags)
{
struct fs_context *fc;
- struct inode *inode;
struct file *file;
struct path newmount;
struct fd f;
@@ -3289,7 +3288,6 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, unsigned int, ms_flags
goto err_fsfd;
}

- inode = file_inode(f.file);
ret = mutex_lock_interruptible(&fc->uapi_mutex);
if (ret < 0)
goto err_fsfd;
--
2.18.0


2018-07-08 21:10:21

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 11/18] fspick: fix path leak

From: Eric Biggers <[email protected]>

Fixes: 99f8421020ac ("vfs: Implement fspick() to select a superblock for reconfiguration")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/fsopen.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index 8d6fa4ba8fb55..3e439299ddf79 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -301,7 +301,7 @@ SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags

ret = -EOPNOTSUPP;
if (!target.dentry->d_sb->s_op->reconfigure)
- goto err;
+ goto err_path;

fc = vfs_new_fs_context(target.dentry->d_sb->s_type, target.dentry,
0, FS_CONTEXT_FOR_RECONFIGURE);
--
2.18.0


2018-07-08 21:10:23

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 10/18] fsmount: fix bypassing SB_MANDLOCK permission check

From: Eric Biggers <[email protected]>

fc->sb_flags can be modified up until fc->uapi_mutex is taken, so the
permission check for SB_MANDLOCK needs to happen under the mutex.

Also move the may_mount() check as early as possible.

Fixes: 0c65353ab9f5 ("vfs: Implement fsmount() to effect a pre-configured mount")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/namespace.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 8ac9e8fb31c9f..7f0191bb5db46 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3237,6 +3237,9 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, unsigned int, ms_flags
unsigned int mnt_flags = 0;
long ret;

+ if (!may_mount())
+ return -EPERM;
+
if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
return -EINVAL;

@@ -3275,11 +3278,6 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, unsigned int, ms_flags

fc = f.file->private_data;

- ret = -EPERM;
- if (!may_mount() ||
- ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock()))
- goto err_fsfd;
-
/* There must be a valid superblock or we can't mount it */
ret = -EINVAL;
if (!fc->root)
@@ -3300,6 +3298,10 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, unsigned int, ms_flags
if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
goto err_unlock;

+ ret = -EPERM;
+ if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
+ goto err_unlock;
+
newmount.mnt = vfs_create_mount(fc, mnt_flags);
if (IS_ERR(newmount.mnt)) {
ret = PTR_ERR(newmount.mnt);
--
2.18.0


2018-07-08 21:10:31

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 06/18] fs_context: fix memory leak with 's' (source) command

From: Eric Biggers <[email protected]>

Fixes: 5f417428a312 ("vfs: Implement fsopen() to prepare for a mount")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/fsopen.c | 1 -
1 file changed, 1 deletion(-)

diff --git a/fs/fsopen.c b/fs/fsopen.c
index 5d346ac78d504..8d6fa4ba8fb55 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -98,7 +98,6 @@ static ssize_t fscontext_write(struct file *file,
ret = vfs_set_fs_source(fc, data, len - 2);
if (ret < 0)
goto err_unlock;
- data = NULL;
break;

case 'o':
--
2.18.0


2018-07-08 21:10:50

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 03/18] fs_context: fix detecting full log buffer

From: Eric Biggers <[email protected]>

When 'head' and 'tail' wrap around, 'log->head - log->tail' will be
something like '4 - 252 = -248', and comparing that directly to the
array size is wrong. Fix by casting to 'u8'.

Fixes: 09aeca629fb3 ("vfs: Implement logging through fs_context")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/fs_context.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 97e8c1dc4e3b1..a0e22f4c6b64a 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -418,7 +418,9 @@ void logfc(struct fs_context *fc, const char *fmt, ...)
freeable = 0;
store_string:
index = log->head & (logsize - 1);
- if ((int)log->head - (int)log->tail == 8) {
+ BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) ||
+ sizeof(log->tail) != sizeof(u8));
+ if ((u8)(log->head - log->tail) == logsize) {
/* The buffer is full, discard the oldest message */
if (log->need_free & (1 << index))
kfree(log->buffer[index]);
--
2.18.0


2018-07-08 21:11:04

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 09/18] fsmount: fix handling FSMOUNT_CLOEXEC

From: Eric Biggers <[email protected]>

Fixes: 0c65353ab9f5 ("vfs: Implement fsmount() to effect a pre-configured mount")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/namespace.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 9cde133d0a9c4..8ac9e8fb31c9f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3339,7 +3339,7 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, unsigned int, ms_flags
}
file->f_mode |= FMODE_NEED_UNMOUNT;

- ret = get_unused_fd_flags(flags & FSMOUNT_CLOEXEC);
+ ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
if (ret >= 0)
fd_install(ret, file);
else
--
2.18.0


2018-07-08 21:11:11

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 02/18] fs_context: fix shrinker leak in sget_fc()

From: Eric Biggers <[email protected]>

alloc_super() now preallocates the shrinker, so sget_fc() must only
register the pre-allocated shrinker, not allocate one again.

Fixes: 8a2e54b8af88 ("vfs: Implement a filesystem superblock creation/configuration context")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/super.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/super.c b/fs/super.c
index e6052a72f3558..a992dd0f27670 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -559,7 +559,7 @@ struct super_block *sget_fc(struct fs_context *fc,
hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(s->s_type);
- register_shrinker(&s->s_shrink);
+ register_shrinker_prepared(&s->s_shrink);
return s;
}
EXPORT_SYMBOL(sget_fc);
--
2.18.0


2018-07-08 21:11:25

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 01/18] sysfs: check return value of kernfs_get_tree()

From: Eric Biggers <[email protected]>

Reported-by: [email protected]
Fixes: a5195193b1e5 ("kernfs, sysfs, cgroup, intel_rdt: Support fs_context")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/sysfs/mount.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 0a016dd49300a..1e1c0ccc6a367 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -27,6 +27,9 @@ static int sysfs_get_tree(struct fs_context *fc)
int ret;

ret = kernfs_get_tree(fc);
+ if (ret)
+ return ret;
+
if (kfc->new_sb_created)
fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return 0;
--
2.18.0


2018-07-08 21:11:31

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 05/18] fs_context: fix mount option blacklist

From: Eric Biggers <[email protected]>

The blacklist didn't actually do anything, since match_token() always
returned 0.

Fixes: 8a2e54b8af88 ("vfs: Implement a filesystem superblock creation/configuration context")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/fs_context.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/fs/fs_context.c b/fs/fs_context.c
index a0e22f4c6b64a..7a8d1ed34ae71 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -65,26 +65,26 @@ static const match_table_t common_clear_sb_flag = {
};

static const match_table_t forbidden_sb_flag = {
- { 0, "bind" },
- { 0, "move" },
- { 0, "private" },
- { 0, "remount" },
- { 0, "shared" },
- { 0, "slave" },
- { 0, "unbindable" },
- { 0, "rec" },
- { 0, "noatime" },
- { 0, "relatime" },
- { 0, "norelatime" },
- { 0, "strictatime" },
- { 0, "nostrictatime" },
- { 0, "nodiratime" },
- { 0, "dev" },
- { 0, "nodev" },
- { 0, "exec" },
- { 0, "noexec" },
- { 0, "suid" },
- { 0, "nosuid" },
+ { 1, "bind" },
+ { 1, "move" },
+ { 1, "private" },
+ { 1, "remount" },
+ { 1, "shared" },
+ { 1, "slave" },
+ { 1, "unbindable" },
+ { 1, "rec" },
+ { 1, "noatime" },
+ { 1, "relatime" },
+ { 1, "norelatime" },
+ { 1, "strictatime" },
+ { 1, "nostrictatime" },
+ { 1, "nodiratime" },
+ { 1, "dev" },
+ { 1, "nodev" },
+ { 1, "exec" },
+ { 1, "noexec" },
+ { 1, "suid" },
+ { 1, "nosuid" },
{ },
};

--
2.18.0


2018-07-08 21:25:49

by Eric Biggers

[permalink] [raw]
Subject: [PATCH 04/18] fs_context: fix fs_context leak in simple_pin_fs()

From: Eric Biggers <[email protected]>

Fixes: 8a2e54b8af88 ("vfs: Implement a filesystem superblock creation/configuration context")
Signed-off-by: Eric Biggers <[email protected]>
---
fs/libfs.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 823f0510e43da..d9a5d883dc3f5 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -588,8 +588,10 @@ int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *c
return PTR_ERR(fc);

ret = vfs_get_tree(fc);
- if (ret < 0)
+ if (ret < 0) {
+ put_fs_context(fc);
return ret;
+ }

mnt = vfs_create_mount(fc, 0);
put_fs_context(fc);
--
2.18.0


2018-07-08 23:48:47

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH vfs/for-next 00/18] fs_context fixes

On Sun, Jul 08, 2018 at 02:01:36PM -0700, Eric Biggers wrote:
> Hi David and Al, here are some fixes for the fs_context patches.
>
> Feel free to fold these into the original patches if you want.
>
> Patches 13-18 are cleanups only.
>

Also, mount(..., MS_REMOUNT|MS_BIND, ...) now validates the mount options
string, which breaks systemd unit files with ProtectControlGroups=yes (e.g.
systemd-networkd.service) when systemd does the following to change a cgroup
(v1) mount to read-only:

mount(NULL, "/run/systemd/unit-root/sys/fs/cgroup/systemd", NULL, MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND, NULL)

... when the kernel has CONFIG_CGROUPS=y but no cgroup subsystems enabled, since
in that case the error "cgroup1: Need name or subsystem set" is hit when the
mount options string is empty.

Probably it doesn't make sense to validate the mount options string at all in
the MS_REMOUNT|MS_BIND case, though maybe you had something else in mind.

- Eric

2018-07-09 09:33:48

by David Howells

[permalink] [raw]
Subject: Re: [PATCH 03/18] fs_context: fix detecting full log buffer

Eric Biggers <[email protected]> wrote:

> When 'head' and 'tail' wrap around, 'log->head - log->tail' will be
> something like '4 - 252 = -248', and comparing that directly to the
> array size is wrong. Fix by casting to 'u8'.

I think a better fix is to use CIRC_CNT() or CIRC_SPACE().

David

2018-07-09 09:36:58

by David Howells

[permalink] [raw]
Subject: Re: [PATCH 03/18] fs_context: fix detecting full log buffer

David Howells <[email protected]> wrote:

> > When 'head' and 'tail' wrap around, 'log->head - log->tail' will be
> > something like '4 - 252 = -248', and comparing that directly to the
> > array size is wrong. Fix by casting to 'u8'.
>
> I think a better fix is to use CIRC_CNT() or CIRC_SPACE().

Or it would be - if CIRC_CNT() and CIRC_SPACE() didn't leave a hole in the
buffer - but that's unnecessary if the head and tail counters can hold 2x the
ring size or more.

David

2018-07-09 12:36:39

by David Howells

[permalink] [raw]
Subject: Re: [PATCH 07/18] fs_context: fix double free of legacy_fs_context data

Eric Biggers <[email protected]> wrote:

> sys_fsmount() calls fc->ops->free() to free the data, zeroes
> ->fs_private, then proceeds to reuse the context. But legacy_fs_context
> doesn't use ->fs_private, so we need to handle zeroing it too; otherwise
> there's a double free of legacy_fs_context::{legacy_data,secdata}.

I think the attached is better. I stopped embedding the fs_context in the
xxx_fs_context to make certain things simpler, but I missed the legacy
wrapper.

David
---
diff --git a/fs/fs_context.c b/fs/fs_context.c
index f91facc769f7..ab93a0b73dc6 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -34,7 +34,6 @@ enum legacy_fs_param {
};

struct legacy_fs_context {
- struct fs_context fc;
char *legacy_data; /* Data page for legacy filesystems */
char *secdata;
size_t data_size;
@@ -239,12 +238,21 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
enum fs_context_purpose purpose)
{
struct fs_context *fc;
- int ret;
+ int ret = -ENOMEM;

- fc = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
+ fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
if (!fc)
return ERR_PTR(-ENOMEM);

+ if (!fs_type->init_fs_context) {
+ fc->fs_private = kzalloc(sizeof(struct legacy_fs_context),
+ GFP_KERNEL);
+ if (!fc->fs_private)
+ goto err_fc;
+
+ fc->ops = &legacy_fs_context_ops;
+ }
+
fc->purpose = purpose;
fc->sb_flags = sb_flags;
fc->fs_type = get_filesystem(fs_type);
@@ -277,8 +285,6 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
ret = fc->fs_type->init_fs_context(fc, reference);
if (ret < 0)
goto err_fc;
- } else {
- fc->ops = &legacy_fs_context_ops;
}

/* Do the security check last because ->init_fs_context may change the
@@ -395,7 +401,7 @@ EXPORT_SYMBOL(put_fs_context);
*/
static void legacy_fs_context_free(struct fs_context *fc)
{
- struct legacy_fs_context *ctx = container_of(fc, struct legacy_fs_context, fc);
+ struct legacy_fs_context *ctx = fc->fs_private;

free_secdata(ctx->secdata);
switch (ctx->param_type) {
@@ -408,6 +414,8 @@ static void legacy_fs_context_free(struct fs_context *fc)
kfree(ctx->legacy_data);
break;
}
+
+ kfree(ctx);
}

/*
@@ -415,20 +423,28 @@ static void legacy_fs_context_free(struct fs_context *fc)
*/
static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
{
- struct legacy_fs_context *ctx = container_of(fc, struct legacy_fs_context, fc);
- struct legacy_fs_context *src_ctx = container_of(src_fc, struct legacy_fs_context, fc);
+ struct legacy_fs_context *ctx;
+ struct legacy_fs_context *src_ctx = src_fc->fs_private;
+
+ ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;

switch (ctx->param_type) {
case LEGACY_FS_MONOLITHIC_PARAMS:
case LEGACY_FS_INDIVIDUAL_PARAMS:
ctx->legacy_data = kmemdup(src_ctx->legacy_data,
src_ctx->data_size, GFP_KERNEL);
- if (!ctx->legacy_data)
+ if (!ctx->legacy_data) {
+ kfree(ctx);
return -ENOMEM;
+ }
/* Fall through */
default:
break;
}
+
+ fc->fs_private = ctx;
return 0;
}

@@ -438,7 +454,7 @@ static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_f
*/
static int legacy_parse_option(struct fs_context *fc, char *opt, size_t len)
{
- struct legacy_fs_context *ctx = container_of(fc, struct legacy_fs_context, fc);
+ struct legacy_fs_context *ctx = fc->fs_private;
unsigned int size = ctx->data_size;

if (ctx->param_type != LEGACY_FS_UNSET_PARAMS &&
@@ -471,7 +487,7 @@ static int legacy_parse_option(struct fs_context *fc, char *opt, size_t len)
*/
static int legacy_parse_monolithic(struct fs_context *fc, void *data, size_t data_size)
{
- struct legacy_fs_context *ctx = container_of(fc, struct legacy_fs_context, fc);
+ struct legacy_fs_context *ctx = fc->fs_private;

if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) {
pr_warn("VFS: Can't mix monolithic and individual options\n");
@@ -507,7 +523,7 @@ static int legacy_parse_monolithic(struct fs_context *fc, void *data, size_t dat
*/
static int legacy_validate(struct fs_context *fc)
{
- struct legacy_fs_context *ctx = container_of(fc, struct legacy_fs_context, fc);
+ struct legacy_fs_context *ctx = fc->fs_private;

switch (ctx->param_type) {
case LEGACY_FS_UNSET_PARAMS:
@@ -520,7 +536,7 @@ static int legacy_validate(struct fs_context *fc)
break;
}

- if (ctx->fc.fs_type->fs_flags & FS_BINARY_MOUNTDATA)
+ if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
return 0;

ctx->secdata = alloc_secdata();
@@ -557,13 +573,13 @@ static int legacy_set_subtype(struct fs_context *fc)
*/
static int legacy_get_tree(struct fs_context *fc)
{
- struct legacy_fs_context *ctx = container_of(fc, struct legacy_fs_context, fc);
+ struct legacy_fs_context *ctx = fc->fs_private;
struct super_block *sb;
struct dentry *root;
int ret;

- root = ctx->fc.fs_type->mount(ctx->fc.fs_type, ctx->fc.sb_flags,
- ctx->fc.source, ctx->legacy_data,
+ root = fc->fs_type->mount(fc->fs_type, fc->sb_flags,
+ fc->source, ctx->legacy_data,
ctx->data_size);
if (IS_ERR(root))
return PTR_ERR(root);
@@ -571,14 +587,14 @@ static int legacy_get_tree(struct fs_context *fc)
sb = root->d_sb;
BUG_ON(!sb);

- if ((ctx->fc.fs_type->fs_flags & FS_HAS_SUBTYPE) &&
+ if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
!fc->subtype) {
ret = legacy_set_subtype(fc);
if (ret < 0)
goto err_sb;
}

- ctx->fc.root = root;
+ fc->root = root;
return 0;

err_sb:

2018-07-09 15:33:12

by David Howells

[permalink] [raw]
Subject: Re: [PATCH vfs/for-next 00/18] fs_context fixes

Eric Biggers <[email protected]> wrote:

Okay, I've folded in all your changes to my mount-context branch, apart from
patch 07 (the legacy wrapper double free fix) which I think needed doing
differently.

> Also, mount(..., MS_REMOUNT|MS_BIND, ...) now validates the mount options
> string, which breaks systemd unit files with ProtectControlGroups=yes (e.g.
> systemd-networkd.service) when systemd does the following to change a cgroup
> (v1) mount to read-only:
>
> mount(NULL, "/run/systemd/unit-root/sys/fs/cgroup/systemd", NULL, MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND, NULL)
>
> ... when the kernel has CONFIG_CGROUPS=y but no cgroup subsystems enabled, since
> in that case the error "cgroup1: Need name or subsystem set" is hit when the
> mount options string is empty.
>
> Probably it doesn't make sense to validate the mount options string at all in
> the MS_REMOUNT|MS_BIND case, though maybe you had something else in mind.

How about the attached? I need to separate it out anyway if I want to
implement a mount_setattr() syscall.

David
---
commit f24ee400978ad41e48e679f1d422277fc353521c
Author: David Howells <[email protected]>
Date: Mon Jul 9 16:24:27 2018 +0100

vfs: Separate changing mount flags full remount

Separate just the changing of mount flags (MS_REMOUNT|MS_BIND) from full
remount because the mount data now gets parsed with the new fs_context
stuff prior to doing a remount - and this causes the syscall under some
circumstances.

diff --git a/fs/namespace.c b/fs/namespace.c
index 58c1ace07bb6..39e9744beab9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -275,13 +275,13 @@ static struct mount *alloc_vfsmnt(const char *name)
* mnt_want/drop_write() will _keep_ the filesystem
* r/w.
*/
-int __mnt_is_readonly(struct vfsmount *mnt)
+bool __mnt_is_readonly(struct vfsmount *mnt)
{
if (mnt->mnt_flags & MNT_READONLY)
- return 1;
+ return true;
if (sb_rdonly(mnt->mnt_sb))
- return 1;
- return 0;
+ return true;
+ return false;
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);

@@ -596,11 +596,12 @@ static int mnt_make_readonly(struct mount *mnt)
return ret;
}

-static void __mnt_unmake_readonly(struct mount *mnt)
+static int __mnt_unmake_readonly(struct mount *mnt)
{
lock_mount_hash();
mnt->mnt.mnt_flags &= ~MNT_READONLY;
unlock_mount_hash();
+ return 0;
}

int sb_prepare_remount_readonly(struct super_block *sb)
@@ -2307,21 +2308,85 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
return error;
}

-static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
+/*
+ * Don't allow locked mount flags to be cleared.
+ *
+ * No locks need to be held here while testing the various MNT_LOCK
+ * flags because those flags can never be cleared once they are set.
+ */
+static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
{
- int error = 0;
- int readonly_request = 0;
+ unsigned int fl = mnt->mnt.mnt_flags;

- if (ms_flags & MS_RDONLY)
+ if ((fl & MNT_LOCK_READONLY) &&
+ !(mnt_flags & MNT_READONLY))
+ return false;
+
+ if ((fl & MNT_LOCK_NODEV) &&
+ !(mnt_flags & MNT_NODEV))
+ return false;
+
+ if ((fl & MNT_LOCK_NOSUID) &&
+ !(mnt_flags & MNT_NOSUID))
+ return false;
+
+ if ((fl & MNT_LOCK_NOEXEC) &&
+ !(mnt_flags & MNT_NOEXEC))
+ return false;
+
+ if ((fl & MNT_LOCK_ATIME) &&
+ ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
+ return false;
+
+ return true;
+}
+
+static int change_mount_ro_state(struct vfsmount *mnt, unsigned int mnt_flags)
+{
+ bool readonly_request = true;
+
+ if (mnt_flags & MNT_READONLY)
readonly_request = 1;
if (readonly_request == __mnt_is_readonly(mnt))
return 0;

- if (readonly_request)
- error = mnt_make_readonly(real_mount(mnt));
- else
- __mnt_unmake_readonly(real_mount(mnt));
- return error;
+ if (!readonly_request)
+ return mnt_make_readonly(real_mount(mnt));
+
+ return __mnt_unmake_readonly(real_mount(mnt));
+}
+
+/*
+ * Handle reconfiguration of the mountpoint only without alteration of the
+ * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND
+ * to mount(2).
+ */
+static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
+{
+ struct super_block *sb = path->mnt->mnt_sb;
+ struct mount *mnt = real_mount(path->mnt);
+ int ret;
+
+ if (!check_mnt(mnt))
+ return -EINVAL;
+
+ if (path->dentry != path->mnt->mnt_root)
+ return -EINVAL;
+
+ if (!can_change_locked_flags(mnt, mnt_flags))
+ return -EPERM;
+
+ down_write(&sb->s_umount);
+ ret = change_mount_ro_state(path->mnt, mnt_flags);
+ if (ret == 0) {
+ lock_mount_hash();
+ mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
+ mnt->mnt.mnt_flags = mnt_flags;
+ touch_mnt_namespace(mnt->mnt_ns);
+ unlock_mount_hash();
+ }
+ up_write(&sb->s_umount);
+ return ret;
}

/*
@@ -2358,32 +2423,8 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;

- /* Don't allow changing of locked mnt flags.
- *
- * No locks need to be held here while testing the various
- * MNT_LOCK flags because those flags can never be cleared
- * once they are set.
- */
- if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
- !(mnt_flags & MNT_READONLY)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
- !(mnt_flags & MNT_NODEV)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
- !(mnt_flags & MNT_NOSUID)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
- !(mnt_flags & MNT_NOEXEC)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
- ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+ if (!can_change_locked_flags(mnt, mnt_flags))
return -EPERM;
- }

if (type->init_fs_context) {
fc = vfs_sb_reconfig(path, sb_flags);
@@ -2410,9 +2451,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
}

down_write(&sb->s_umount);
- if (ms_flags & MS_BIND)
- err = change_mount_flags(path->mnt, ms_flags);
- else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
+ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
err = -EPERM;
else
err = do_remount_sb(sb, sb_flags, data, data_size, 0, fc);
@@ -2961,7 +3000,9 @@ long do_mount(const char *dev_name, const char __user *dir_name,
SB_LAZYTIME |
SB_I_VERSION);

- if (flags & MS_REMOUNT)
+ if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
+ retval = do_reconfigure_mnt(&path, mnt_flags);
+ else if (flags & MS_REMOUNT)
retval = do_remount(&path, flags, sb_flags, mnt_flags,
data_page, data_size);
else if (flags & MS_BIND)
diff --git a/include/linux/mount.h b/include/linux/mount.h
index ee5af77afc06..41b6b080ffd0 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -82,7 +82,7 @@ extern void mnt_drop_write_file(struct file *file);
extern void mntput(struct vfsmount *mnt);
extern struct vfsmount *mntget(struct vfsmount *mnt);
extern struct vfsmount *mnt_clone_internal(const struct path *path);
-extern int __mnt_is_readonly(struct vfsmount *mnt);
+extern bool __mnt_is_readonly(struct vfsmount *mnt);
extern bool mnt_may_suid(struct vfsmount *mnt);

struct path;

2018-07-09 15:58:07

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH vfs/for-next 00/18] fs_context fixes

On Mon, Jul 09, 2018 at 04:31:59PM +0100, David Howells wrote:

> -int __mnt_is_readonly(struct vfsmount *mnt)
> +bool __mnt_is_readonly(struct vfsmount *mnt)
> {
> if (mnt->mnt_flags & MNT_READONLY)
> - return 1;
> + return true;
> if (sb_rdonly(mnt->mnt_sb))
> - return 1;
> - return 0;
> + return true;
> + return false;

Egads... *If* you go for bool here, why not
return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
and be done with that?

2018-07-09 16:30:35

by David Howells

[permalink] [raw]
Subject: Re: [PATCH vfs/for-next 00/18] fs_context fixes

Al Viro <[email protected]> wrote:

> Egads... *If* you go for bool here, why not
> return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
> and be done with that?

Fair point; I'll do that when I get back later.

David

2018-07-09 21:58:47

by David Howells

[permalink] [raw]
Subject: Re: [PATCH vfs/for-next 00/18] fs_context fixes

Okay, how about the attached?

David
---
commit 0f067bdbfeca03264a50461db21339dbcd97e8f3
Author: David Howells <[email protected]>
Date: Mon Jul 9 16:24:27 2018 +0100

vfs: Separate changing mount flags full remount

Separate just the changing of mount flags (MS_REMOUNT|MS_BIND) from full
remount because the mount data now gets parsed with the new fs_context
stuff prior to doing a remount - and this causes the syscall under some
circumstances.

diff --git a/fs/namespace.c b/fs/namespace.c
index 58c1ace07bb6..a6fbfba8e448 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -275,13 +275,9 @@ static struct mount *alloc_vfsmnt(const char *name)
* mnt_want/drop_write() will _keep_ the filesystem
* r/w.
*/
-int __mnt_is_readonly(struct vfsmount *mnt)
+bool __mnt_is_readonly(struct vfsmount *mnt)
{
- if (mnt->mnt_flags & MNT_READONLY)
- return 1;
- if (sb_rdonly(mnt->mnt_sb))
- return 1;
- return 0;
+ return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);

@@ -596,11 +592,12 @@ static int mnt_make_readonly(struct mount *mnt)
return ret;
}

-static void __mnt_unmake_readonly(struct mount *mnt)
+static int __mnt_unmake_readonly(struct mount *mnt)
{
lock_mount_hash();
mnt->mnt.mnt_flags &= ~MNT_READONLY;
unlock_mount_hash();
+ return 0;
}

int sb_prepare_remount_readonly(struct super_block *sb)
@@ -2307,21 +2304,91 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
return error;
}

-static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
+/*
+ * Don't allow locked mount flags to be cleared.
+ *
+ * No locks need to be held here while testing the various MNT_LOCK
+ * flags because those flags can never be cleared once they are set.
+ */
+static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
{
- int error = 0;
- int readonly_request = 0;
+ unsigned int fl = mnt->mnt.mnt_flags;

- if (ms_flags & MS_RDONLY)
- readonly_request = 1;
- if (readonly_request == __mnt_is_readonly(mnt))
+ if ((fl & MNT_LOCK_READONLY) &&
+ !(mnt_flags & MNT_READONLY))
+ return false;
+
+ if ((fl & MNT_LOCK_NODEV) &&
+ !(mnt_flags & MNT_NODEV))
+ return false;
+
+ if ((fl & MNT_LOCK_NOSUID) &&
+ !(mnt_flags & MNT_NOSUID))
+ return false;
+
+ if ((fl & MNT_LOCK_NOEXEC) &&
+ !(mnt_flags & MNT_NOEXEC))
+ return false;
+
+ if ((fl & MNT_LOCK_ATIME) &&
+ ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
+ return false;
+
+ return true;
+}
+
+static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
+{
+ bool readonly_request = (mnt_flags & MNT_READONLY);
+
+ if (readonly_request == __mnt_is_readonly(&mnt->mnt))
return 0;

if (readonly_request)
- error = mnt_make_readonly(real_mount(mnt));
- else
- __mnt_unmake_readonly(real_mount(mnt));
- return error;
+ return mnt_make_readonly(mnt);
+
+ return __mnt_unmake_readonly(mnt);
+}
+
+/*
+ * Update the user-settable attributes on a mount. The caller must hold
+ * sb->s_umount for writing.
+ */
+static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
+{
+ lock_mount_hash();
+ mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
+ mnt->mnt.mnt_flags = mnt_flags;
+ touch_mnt_namespace(mnt->mnt_ns);
+ unlock_mount_hash();
+}
+
+/*
+ * Handle reconfiguration of the mountpoint only without alteration of the
+ * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND
+ * to mount(2).
+ */
+static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
+{
+ struct super_block *sb = path->mnt->mnt_sb;
+ struct mount *mnt = real_mount(path->mnt);
+ int ret;
+
+ if (!check_mnt(mnt))
+ return -EINVAL;
+
+ if (path->dentry != mnt->mnt.mnt_root)
+ return -EINVAL;
+
+ if (!can_change_locked_flags(mnt, mnt_flags))
+ return -EPERM;
+
+ down_write(&sb->s_umount);
+ ret = change_mount_ro_state(mnt, mnt_flags);
+ if (ret == 0)
+ set_mount_attributes(mnt, mnt_flags);
+ up_write(&sb->s_umount);
+ return ret;
}

/*
@@ -2358,32 +2425,8 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;

- /* Don't allow changing of locked mnt flags.
- *
- * No locks need to be held here while testing the various
- * MNT_LOCK flags because those flags can never be cleared
- * once they are set.
- */
- if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
- !(mnt_flags & MNT_READONLY)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
- !(mnt_flags & MNT_NODEV)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
- !(mnt_flags & MNT_NOSUID)) {
- return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
- !(mnt_flags & MNT_NOEXEC)) {
+ if (!can_change_locked_flags(mnt, mnt_flags))
return -EPERM;
- }
- if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
- ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
- return -EPERM;
- }

if (type->init_fs_context) {
fc = vfs_sb_reconfig(path, sb_flags);
@@ -2410,18 +2453,11 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
}

down_write(&sb->s_umount);
- if (ms_flags & MS_BIND)
- err = change_mount_flags(path->mnt, ms_flags);
- else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
- err = -EPERM;
- else
+ err = -EPERM;
+ if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
err = do_remount_sb(sb, sb_flags, data, data_size, 0, fc);
- if (!err) {
- lock_mount_hash();
- mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
- mnt->mnt.mnt_flags = mnt_flags;
- touch_mnt_namespace(mnt->mnt_ns);
- unlock_mount_hash();
+ if (!err)
+ set_mount_attributes(mnt, mnt_flags);
}
up_write(&sb->s_umount);
err_fc:
@@ -2961,7 +2997,9 @@ long do_mount(const char *dev_name, const char __user *dir_name,
SB_LAZYTIME |
SB_I_VERSION);

- if (flags & MS_REMOUNT)
+ if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
+ retval = do_reconfigure_mnt(&path, mnt_flags);
+ else if (flags & MS_REMOUNT)
retval = do_remount(&path, flags, sb_flags, mnt_flags,
data_page, data_size);
else if (flags & MS_BIND)
diff --git a/include/linux/mount.h b/include/linux/mount.h
index ee5af77afc06..41b6b080ffd0 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -82,7 +82,7 @@ extern void mnt_drop_write_file(struct file *file);
extern void mntput(struct vfsmount *mnt);
extern struct vfsmount *mntget(struct vfsmount *mnt);
extern struct vfsmount *mnt_clone_internal(const struct path *path);
-extern int __mnt_is_readonly(struct vfsmount *mnt);
+extern bool __mnt_is_readonly(struct vfsmount *mnt);
extern bool mnt_may_suid(struct vfsmount *mnt);

struct path;

2018-07-10 01:19:14

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH 07/18] fs_context: fix double free of legacy_fs_context data

On Mon, Jul 09, 2018 at 01:31:09PM +0100, David Howells wrote:
> Eric Biggers <[email protected]> wrote:
>
> > sys_fsmount() calls fc->ops->free() to free the data, zeroes
> > ->fs_private, then proceeds to reuse the context. But legacy_fs_context
> > doesn't use ->fs_private, so we need to handle zeroing it too; otherwise
> > there's a double free of legacy_fs_context::{legacy_data,secdata}.
>
> I think the attached is better. I stopped embedding the fs_context in the
> xxx_fs_context to make certain things simpler, but I missed the legacy
> wrapper.
>
> David
> ---
> diff --git a/fs/fs_context.c b/fs/fs_context.c
> index f91facc769f7..ab93a0b73dc6 100644
> --- a/fs/fs_context.c
> +++ b/fs/fs_context.c
> @@ -34,7 +34,6 @@ enum legacy_fs_param {
> };
>
> struct legacy_fs_context {
> - struct fs_context fc;
> char *legacy_data; /* Data page for legacy filesystems */
> char *secdata;
> size_t data_size;
> @@ -239,12 +238,21 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
> enum fs_context_purpose purpose)
> {
> struct fs_context *fc;
> - int ret;
> + int ret = -ENOMEM;
>
> - fc = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
> + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
> if (!fc)
> return ERR_PTR(-ENOMEM);
>
> + if (!fs_type->init_fs_context) {
> + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context),
> + GFP_KERNEL);
> + if (!fc->fs_private)
> + goto err_fc;
> +
> + fc->ops = &legacy_fs_context_ops;
> + }
> +

Why isn't this done in the same place that ->init_fs_context() would otherwise
be called? It logically does the same thing, right?

> fc->purpose = purpose;
> fc->sb_flags = sb_flags;
> fc->fs_type = get_filesystem(fs_type);
> @@ -277,8 +285,6 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
> ret = fc->fs_type->init_fs_context(fc, reference);
> if (ret < 0)
> goto err_fc;
> - } else {
> - fc->ops = &legacy_fs_context_ops;
> }
>
> /* Do the security check last because ->init_fs_context may change the
> @@ -395,7 +401,7 @@ EXPORT_SYMBOL(put_fs_context);
> */
> static void legacy_fs_context_free(struct fs_context *fc)
> {
> - struct legacy_fs_context *ctx = container_of(fc, struct legacy_fs_context, fc);
> + struct legacy_fs_context *ctx = fc->fs_private;
>
> free_secdata(ctx->secdata);
> switch (ctx->param_type) {
> @@ -408,6 +414,8 @@ static void legacy_fs_context_free(struct fs_context *fc)
> kfree(ctx->legacy_data);
> break;
> }
> +
> + kfree(ctx);
> }

Okay, but now there's a NULL pointer dereference because fc->ops->free() can be
called with NULL fc->fs_private. Probably fc->ops->free() shouldn't be called
in that case.

int main()
{
int fd = syscall(__NR_fsopen, "tmpfs", 0);
write(fd, "x create", 8);
syscall(__NR_fsmount, fd, 0, 0);
}

BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
PGD 0 P4D 0
Oops: 0000 [#1] SMP
CPU: 1 PID: 186 Comm: fsopen Not tainted 4.18.0-rc1-00001-g0f067bdbfeca0 #29
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014
RIP: 0010:legacy_fs_context_free+0xc/0x40 fs/fs_context.c:500
Code: 02 75 08 48 c7 42 08 01 00 00 00 31 c0 c3 c7 42 18 01 00 00 00 31 c0 c3 66 0f 1f 44 00 00 55 48 89 e5 53 48 8b 9f 90 00 00 00 <8b> 4b 18 83 f9 04 77 0c b8 01 00 00 00 48 d3 e0 a8 13 75 08 48 8b
RSP: 0018:ffffc9000079bd88 EFLAGS: 00010282
RAX: ffffffff8118fbe0 RBX: 0000000000000000 RCX: 0000000000000001
RDX: ffff88007c82c0f4 RSI: 0000000000000001 RDI: ffff88007be77700
RBP: ffffc9000079bd90 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff88007c82c000
R13: 0000000000060003 R14: ffff88007d34d020 R15: ffff88007ab8aea8
FS: 00007fee62b79740(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000018 CR3: 0000000001c0f000 CR4: 00000000003406e0
Call Trace:
put_fs_context+0x4c/0x180 fs/fs_context.c:479
fscontext_release+0x20/0x30 fs/fsopen.c:196
__fput+0xbb/0x210 fs/file_table.c:210
____fput+0x9/0x10 fs/file_table.c:246
task_work_run+0x86/0xc0 kernel/task_work.c:113
exit_task_work include/linux/task_work.h:22 [inline]
do_exit+0x27a/0xa30 kernel/exit.c:865
do_group_exit+0x3c/0xc0 kernel/exit.c:968
__do_sys_exit_group kernel/exit.c:979 [inline]
__se_sys_exit_group kernel/exit.c:977 [inline]
__x64_sys_exit_group+0x13/0x20 kernel/exit.c:977
do_syscall_64+0x4a/0x180 arch/x86/entry/common.c:290
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7fee6224eee8
Code: Bad RIP value.
RSP: 002b:00007ffc3efc0cd8 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fee6224eee8
RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000
RBP: 00007fee625386d8 R08: 00000000000000e7 R09: ffffffffffffff80
R10: 00007fee62745100 R11: 0000000000000246 R12: 00007fee625386d8
R13: 00007fee6253dbe0 R14: 0000000000000000 R15: 0000000000000000
CR2: 0000000000000018
---[ end trace 8ac26865cb821d07 ]---

2018-07-10 01:26:50

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH 07/18] fs_context: fix double free of legacy_fs_context data

On Mon, Jul 09, 2018 at 06:17:41PM -0700, Eric Biggers wrote:
> On Mon, Jul 09, 2018 at 01:31:09PM +0100, David Howells wrote:
> > Eric Biggers <[email protected]> wrote:
> >
> > > sys_fsmount() calls fc->ops->free() to free the data, zeroes
> > > ->fs_private, then proceeds to reuse the context. But legacy_fs_context
> > > doesn't use ->fs_private, so we need to handle zeroing it too; otherwise
> > > there's a double free of legacy_fs_context::{legacy_data,secdata}.
> >
> > I think the attached is better. I stopped embedding the fs_context in the
> > xxx_fs_context to make certain things simpler, but I missed the legacy
> > wrapper.
> >
> > David
> > ---
> > diff --git a/fs/fs_context.c b/fs/fs_context.c
> > index f91facc769f7..ab93a0b73dc6 100644
> > --- a/fs/fs_context.c
> > +++ b/fs/fs_context.c
> > @@ -34,7 +34,6 @@ enum legacy_fs_param {
> > };
> >
> > struct legacy_fs_context {
> > - struct fs_context fc;
> > char *legacy_data; /* Data page for legacy filesystems */
> > char *secdata;
> > size_t data_size;
> > @@ -239,12 +238,21 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
> > enum fs_context_purpose purpose)
> > {
> > struct fs_context *fc;
> > - int ret;
> > + int ret = -ENOMEM;
> >
> > - fc = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
> > + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
> > if (!fc)
> > return ERR_PTR(-ENOMEM);
> >
> > + if (!fs_type->init_fs_context) {
> > + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context),
> > + GFP_KERNEL);
> > + if (!fc->fs_private)
> > + goto err_fc;
> > +
> > + fc->ops = &legacy_fs_context_ops;
> > + }
> > +
>
> Why isn't this done in the same place that ->init_fs_context() would otherwise
> be called? It logically does the same thing, right?

Case in point: if allocating ->fs_private fails here, you'll get a NULL pointer
dereference during put_fs_context() not only from the NULL ->fs_private in
legacy_fs_context_free(), but also from put_filesystem() since ->fs_type hasn't
been set yet.

- Eric

2018-07-10 08:06:05

by David Howells

[permalink] [raw]
Subject: Re: [PATCH 07/18] fs_context: fix double free of legacy_fs_context data

Eric Biggers <[email protected]> wrote:

> Why isn't this done in the same place that ->init_fs_context() would otherwise
> be called? It logically does the same thing, right?

Fair point. How about the attached incremental change? It breaks the legacy
context initialisation out into its own init function and just sets that as
the default if the fs doesn't supply its own.

It also makes the freeing conditional.

David
---
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 8af0542ab8b6..f388ab29d37d 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -42,6 +42,7 @@ struct legacy_fs_context {
enum legacy_fs_param param_type;
};

+static int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry);
static const struct fs_context_operations legacy_fs_context_ops;

static const match_table_t common_set_sb_flag = {
@@ -239,6 +240,7 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
unsigned int sb_flags,
enum fs_context_purpose purpose)
{
+ int (*init_fs_context)(struct fs_context *, struct dentry *);
struct fs_context *fc;
int ret = -ENOMEM;

@@ -246,15 +248,6 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,
if (!fc)
return ERR_PTR(-ENOMEM);

- if (!fs_type->init_fs_context) {
- fc->fs_private = kzalloc(sizeof(struct legacy_fs_context),
- GFP_KERNEL);
- if (!fc->fs_private)
- goto err_fc;
-
- fc->ops = &legacy_fs_context_ops;
- }
-
fc->purpose = purpose;
fc->sb_flags = sb_flags;
fc->fs_type = get_filesystem(fs_type);
@@ -285,11 +278,13 @@ struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type,


/* TODO: Make all filesystems support this unconditionally */
- if (fc->fs_type->init_fs_context) {
- ret = fc->fs_type->init_fs_context(fc, reference);
- if (ret < 0)
- goto err_fc;
- }
+ init_fs_context = fc->fs_type->init_fs_context;
+ if (!init_fs_context)
+ init_fs_context = legacy_init_fs_context;
+
+ ret = (*init_fs_context)(fc, reference);
+ if (ret < 0)
+ goto err_fc;

/* Do the security check last because ->init_fs_context may change the
* namespace subscriptions.
@@ -499,19 +494,21 @@ static void legacy_fs_context_free(struct fs_context *fc)
{
struct legacy_fs_context *ctx = fc->fs_private;

- free_secdata(ctx->secdata);
- switch (ctx->param_type) {
- case LEGACY_FS_UNSET_PARAMS:
- case LEGACY_FS_NO_PARAMS:
- break;
- case LEGACY_FS_MAGIC_PARAMS:
- break; /* ctx->data is a weird pointer */
- default:
- kfree(ctx->legacy_data);
- break;
- }
+ if (ctx) {
+ free_secdata(ctx->secdata);
+ switch (ctx->param_type) {
+ case LEGACY_FS_UNSET_PARAMS:
+ case LEGACY_FS_NO_PARAMS:
+ break;
+ case LEGACY_FS_MAGIC_PARAMS:
+ break; /* ctx->data is a weird pointer */
+ default:
+ kfree(ctx->legacy_data);
+ break;
+ }

- kfree(ctx);
+ kfree(ctx);
+ }
}

/*
@@ -707,3 +704,18 @@ static const struct fs_context_operations legacy_fs_context_ops = {
.validate = legacy_validate,
.get_tree = legacy_get_tree,
};
+
+/*
+ * Initialise a legacy context for a filesystem that doesn't support
+ * fs_context.
+ */
+static int legacy_init_fs_context(struct fs_context *fc, struct dentry *dentry)
+{
+
+ fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
+ if (!fc->fs_private)
+ return -ENOMEM;
+
+ fc->ops = &legacy_fs_context_ops;
+ return 0;
+}