2020-08-20 14:05:58

by Alexey Gladkov

[permalink] [raw]
Subject: [PATCH v3 0/2] proc: Relax check of mount visibility

If only the dynamic part of procfs is mounted (subset=pid), then there is no
need to check if procfs is fully visible to the user in the new user namespace.

Changelog
---------

v3:
* Add 'const' to struct cred *mounter_cred (fix kernel test robot warning).

v2:
* cache the mounters credentials and make access to the net directories
contingent of the permissions of the mounter of procfs.

Alexey Gladkov (2):
proc: Relax check of mount visibility
Show /proc/self/net only for CAP_NET_ADMIN

fs/namespace.c | 27 ++++++++++++++++-----------
fs/proc/proc_net.c | 8 ++++++++
fs/proc/root.c | 21 +++++++++++++++------
include/linux/fs.h | 1 +
include/linux/proc_fs.h | 1 +
5 files changed, 41 insertions(+), 17 deletions(-)

--
2.25.4


2020-08-20 14:05:58

by Alexey Gladkov

[permalink] [raw]
Subject: [PATCH v3 1/2] proc: Relax check of mount visibility

Allow to mount of procfs with subset=pid option even if the entire
procfs is not fully accessible to the user.

Signed-off-by: Alexey Gladkov <[email protected]>
---
fs/namespace.c | 27 ++++++++++++++++-----------
fs/proc/root.c | 16 +++++++++-------
include/linux/fs.h | 1 +
3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 4a0f600a3328..ab9d607921da 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3949,18 +3949,23 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
continue;

- /* This mount is not fully visible if there are any
- * locked child mounts that cover anything except for
- * empty directories.
+ /* If this filesystem is completely dynamic, then it
+ * makes no sense to check for any child mounts.
*/
- list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
- struct inode *inode = child->mnt_mountpoint->d_inode;
- /* Only worry about locked mounts */
- if (!(child->mnt.mnt_flags & MNT_LOCKED))
- continue;
- /* Is the directory permanetly empty? */
- if (!is_empty_dir_inode(inode))
- goto next;
+ if (!(sb->s_iflags & SB_I_DYNAMIC)) {
+ /* This mount is not fully visible if there are any
+ * locked child mounts that cover anything except for
+ * empty directories.
+ */
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ struct inode *inode = child->mnt_mountpoint->d_inode;
+ /* Only worry about locked mounts */
+ if (!(child->mnt.mnt_flags & MNT_LOCKED))
+ continue;
+ /* Is the directory permanetly empty? */
+ if (!is_empty_dir_inode(inode))
+ goto next;
+ }
}
/* Preserve the locked attributes */
*new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5e444d4f9717..c6bf74de1906 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -145,18 +145,21 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
return 0;
}

-static void proc_apply_options(struct proc_fs_info *fs_info,
+static void proc_apply_options(struct super_block *s,
struct fs_context *fc,
struct user_namespace *user_ns)
{
struct proc_fs_context *ctx = fc->fs_private;
+ struct proc_fs_info *fs_info = proc_sb_info(s);

if (ctx->mask & (1 << Opt_gid))
fs_info->pid_gid = make_kgid(user_ns, ctx->gid);
if (ctx->mask & (1 << Opt_hidepid))
fs_info->hide_pid = ctx->hidepid;
- if (ctx->mask & (1 << Opt_subset))
+ if (ctx->mask & (1 << Opt_subset)) {
fs_info->pidonly = ctx->pidonly;
+ s->s_iflags |= SB_I_DYNAMIC;
+ }
}

static int proc_fill_super(struct super_block *s, struct fs_context *fc)
@@ -170,9 +173,6 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
if (!fs_info)
return -ENOMEM;

- fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
- proc_apply_options(fs_info, fc, current_user_ns());
-
/* User space would break if executables or devices appear on proc */
s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
@@ -183,6 +183,9 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
s->s_time_gran = 1;
s->s_fs_info = fs_info;

+ fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
+ proc_apply_options(s, fc, current_user_ns());
+
/*
* procfs isn't actually a stacking filesystem; however, there is
* too much magic going on inside it to permit stacking things on
@@ -216,11 +219,10 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
static int proc_reconfigure(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
- struct proc_fs_info *fs_info = proc_sb_info(sb);

sync_filesystem(sb);

- proc_apply_options(fs_info, fc, current_user_ns());
+ proc_apply_options(sb, fc, current_user_ns());
return 0;
}

diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5abba86107d..aff5ed9e8f82 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1413,6 +1413,7 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */
#define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020
#define SB_I_UNTRUSTED_MOUNTER 0x00000040
+#define SB_I_DYNAMIC 0x00000080

#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */

--
2.25.4

2020-08-20 14:08:08

by Alexey Gladkov

[permalink] [raw]
Subject: [PATCH v3 2/2] Show /proc/self/net only for CAP_NET_ADMIN

Cache the mounters credentials and make access to the net directories
contingent of the permissions of the mounter of proc.

Show /proc/self/net only if mounter has CAP_NET_ADMIN and if proc is
mounted with subset=pid option.

Signed-off-by: Alexey Gladkov <[email protected]>
---
fs/proc/proc_net.c | 8 ++++++++
fs/proc/root.c | 7 +++++++
include/linux/proc_fs.h | 1 +
3 files changed, 16 insertions(+)

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index dba63b2429f0..c43fc5c907db 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -26,6 +26,7 @@
#include <linux/uidgid.h>
#include <net/net_namespace.h>
#include <linux/seq_file.h>
+#include <linux/security.h>

#include "internal.h"

@@ -275,6 +276,7 @@ static struct net *get_proc_task_net(struct inode *dir)
struct task_struct *task;
struct nsproxy *ns;
struct net *net = NULL;
+ struct proc_fs_info *fs_info = proc_sb_info(dir->i_sb);

rcu_read_lock();
task = pid_task(proc_pid(dir), PIDTYPE_PID);
@@ -287,6 +289,12 @@ static struct net *get_proc_task_net(struct inode *dir)
}
rcu_read_unlock();

+ if (net && (fs_info->pidonly == PROC_PIDONLY_ON) &&
+ security_capable(fs_info->mounter_cred, net->user_ns, CAP_NET_ADMIN, CAP_OPT_NONE) < 0) {
+ put_net(net);
+ net = NULL;
+ }
+
return net;
}

diff --git a/fs/proc/root.c b/fs/proc/root.c
index c6bf74de1906..eeeda375cf85 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -184,6 +184,8 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
s->s_fs_info = fs_info;

fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
+ fs_info->mounter_cred = get_cred(fc->cred);
+
proc_apply_options(s, fc, current_user_ns());

/*
@@ -219,9 +221,13 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc)
static int proc_reconfigure(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
+ struct proc_fs_info *fs_info = proc_sb_info(sb);

sync_filesystem(sb);

+ put_cred(fs_info->mounter_cred);
+ fs_info->mounter_cred = get_cred(fc->cred);
+
proc_apply_options(sb, fc, current_user_ns());
return 0;
}
@@ -276,6 +282,7 @@ static void proc_kill_sb(struct super_block *sb)

kill_anon_super(sb);
put_pid_ns(fs_info->pid_ns);
+ put_cred(fs_info->mounter_cred);
kfree(fs_info);
}

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index d1eed1b43651..ce00560789f6 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -63,6 +63,7 @@ struct proc_fs_info {
kgid_t pid_gid;
enum proc_hidepid hide_pid;
enum proc_pidonly pidonly;
+ const struct cred *mounter_cred;
};

static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)
--
2.25.4