2014-02-21 02:58:17

by Zefan Li

[permalink] [raw]
Subject: [PATCH v2] sysfs: fix namespace refcnt leak

As mount() and kill_sb() is not a one-to-one match, we shoudn't get
ns refcnt unconditionally in sysfs_mount(), and instead we should
get the refcnt only when kernfs_mount() allocated a new superblock.

v2:
- Changed the name of the new argument, suggested by Tejun.
- Made the argument optional, suggested by Tejun.

Signed-off-by: Li Zefan <[email protected]>
---

This fix is for 3.14.

---
fs/kernfs/mount.c | 8 +++++++-
fs/sysfs/mount.c | 5 +++--
include/linux/kernfs.h | 9 +++++----
3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 405279b..47f4efd 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -132,6 +132,7 @@ const void *kernfs_super_ns(struct super_block *sb)
* @flags: mount flags specified for the mount
* @root: kernfs_root of the hierarchy being mounted
* @ns: optional namespace tag of the mount
+ * @new_sb_created: tell the caller if we allocated a new superblock
*
* This is to be called from each kernfs user's file_system_type->mount()
* implementation, which should pass through the specified @fs_type and
@@ -141,7 +142,8 @@ const void *kernfs_super_ns(struct super_block *sb)
* The return value can be passed to the vfs layer verbatim.
*/
struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
- struct kernfs_root *root, const void *ns)
+ struct kernfs_root *root, const void *ns,
+ bool *new_sb_created)
{
struct super_block *sb;
struct kernfs_super_info *info;
@@ -159,6 +161,10 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
kfree(info);
if (IS_ERR(sb))
return ERR_CAST(sb);
+
+ if (new_sb_created)
+ *new_sb_created = !sb->s_root;
+
if (!sb->s_root) {
error = kernfs_fill_super(sb);
if (error) {
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 5c7fdd9..f5bea79 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -27,6 +27,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
{
struct dentry *root;
void *ns;
+ bool new_sb;

if (!(flags & MS_KERNMOUNT)) {
if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
@@ -37,8 +38,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
}

ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
- root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns);
- if (IS_ERR(root))
+ root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns, &new_sb);
+ if (IS_ERR(root) || !new_sb)
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
return root;
}
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 649497a..ea3f5d2 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -279,7 +279,8 @@ void kernfs_notify(struct kernfs_node *kn);

const void *kernfs_super_ns(struct super_block *sb);
struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
- struct kernfs_root *root, const void *ns);
+ struct kernfs_root *root, const void *ns,
+ bool *new_sb_created);
void kernfs_kill_sb(struct super_block *sb);

void kernfs_init(void);
@@ -372,7 +373,7 @@ static inline const void *kernfs_super_ns(struct super_block *sb)

static inline struct dentry *
kernfs_mount_ns(struct file_system_type *fs_type, int flags,
- struct kernfs_root *root, const void *ns)
+ struct kernfs_root *root, const void *ns, bool *new_sb_created)
{ return ERR_PTR(-ENOSYS); }

static inline void kernfs_kill_sb(struct super_block *sb) { }
@@ -430,9 +431,9 @@ static inline int kernfs_rename(struct kernfs_node *kn,

static inline struct dentry *
kernfs_mount(struct file_system_type *fs_type, int flags,
- struct kernfs_root *root)
+ struct kernfs_root *root, bool *new_sb_created)
{
- return kernfs_mount_ns(fs_type, flags, root, NULL);
+ return kernfs_mount_ns(fs_type, flags, root, NULL, new_sb_created);
}

#endif /* __LINUX_KERNFS_H */
--
1.8.0.2


2014-02-21 10:12:54

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH v2] sysfs: fix namespace refcnt leak

On Fri, Feb 21, 2014 at 10:55:55AM +0800, Li Zefan wrote:
> As mount() and kill_sb() is not a one-to-one match, we shoudn't get
> ns refcnt unconditionally in sysfs_mount(), and instead we should
> get the refcnt only when kernfs_mount() allocated a new superblock.
>
> v2:
> - Changed the name of the new argument, suggested by Tejun.
> - Made the argument optional, suggested by Tejun.
>
> Signed-off-by: Li Zefan <[email protected]>
...
> @@ -132,6 +132,7 @@ const void *kernfs_super_ns(struct super_block *sb)
> * @flags: mount flags specified for the mount
> * @root: kernfs_root of the hierarchy being mounted
> * @ns: optional namespace tag of the mount
> + * @new_sb_created: tell the caller if we allocated a new superblock
> *
> * This is to be called from each kernfs user's file_system_type->mount()
> * implementation, which should pass through the specified @fs_type and
> @@ -141,7 +142,8 @@ const void *kernfs_super_ns(struct super_block *sb)
> * The return value can be passed to the vfs layer verbatim.
> */
> struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
> - struct kernfs_root *root, const void *ns)
> + struct kernfs_root *root, const void *ns,
> + bool *new_sb_created)

Oops, just one more thing. Let's please put @new_sb_created before
@ns; otherwise, kernfs_mount_ns() and kernfs_mount() become really
confusing as we end up omitting the second-to-last param for the
latter instead of the last.

Other than that,

Reviewed-by: Tejun Heo <[email protected]>

Thanks.

--
tejun