The following series is a resend of a set of patches that were
originally targetted for inclusion in 2.6.30-rc1. They build upon
Al Viro's "Cache root in nameidata" patches, and add the ability to
create a private mount namespace that is not visible to user
processes.
As stated before, the goal is to allow reuse of vfs_path_lookup() in
order to allow the nfs client to walk down NFSv4 mount paths, and locate
the directory that needs to be mounted.
There have been no further comments on these patches since I mailed the
last revision, so I'm hoping that Al and Christoph will ack them and
that Linus will apply...
Cheers
Trond
---
Trond Myklebust (5):
NFS: Correct the NFS mount path when following a referral
NFS: Fix nfs_path() to always return a '/' at the beginning of the path
NFSv4: Replace nfs4_path_walk() with VFS path lookup in a private namespace
VFS: Add VFS helper functions for setting up private namespaces
VFS: Uninline the function put_mnt_ns()
fs/namespace.c | 53 +++++++++--
fs/nfs/namespace.c | 5 +
fs/nfs/super.c | 202 +++++++++++++++++++++++++++++++++++++----
include/linux/mnt_namespace.h | 10 --
4 files changed, 231 insertions(+), 39 deletions(-)
--
Signature
As noted in the previous patch, the NFSv4 client mount code currently
has several limitations. If the mount path contains symlinks, or
referrals, or even if it just contains a '..', then the client code in
nfs4_path_walk() will fail with an error.
This patch replaces the nfs4_path_walk()-based lookup with a helper
function that sets up a private namespace to represent the namespace on the
server, then uses the ordinary VFS and NFS path lookup code to walk down the
mount path in that namespace.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/super.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 157 insertions(+), 21 deletions(-)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b798ed1..8da7e59 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -42,6 +42,8 @@
#include <linux/smp_lock.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
+#include <linux/mnt_namespace.h>
+#include <linux/namei.h>
#include <linux/nfs_idmap.h>
#include <linux/vfs.h>
#include <linux/inet.h>
@@ -270,10 +272,14 @@ static const struct super_operations nfs_sops = {
#ifdef CONFIG_NFS_V4
static int nfs4_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_remote_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
static int nfs4_referral_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
static void nfs4_kill_super(struct super_block *sb);
static struct file_system_type nfs4_fs_type = {
@@ -284,6 +290,14 @@ static struct file_system_type nfs4_fs_type = {
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
+static struct file_system_type nfs4_remote_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .get_sb = nfs4_remote_get_sb,
+ .kill_sb = nfs4_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
struct file_system_type nfs4_xdev_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
@@ -292,6 +306,14 @@ struct file_system_type nfs4_xdev_fs_type = {
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
+static struct file_system_type nfs4_remote_referral_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .get_sb = nfs4_remote_referral_get_sb,
+ .kill_sb = nfs4_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
struct file_system_type nfs4_referral_fs_type = {
.owner = THIS_MODULE,
.name = "nfs4",
@@ -2422,12 +2444,12 @@ out_no_client_address:
}
/*
- * Get the superblock for an NFS4 mountpoint
+ * Get the superblock for the NFS4 root partition
*/
-static int nfs4_get_sb(struct file_system_type *fs_type,
+static int nfs4_remote_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
{
- struct nfs_parsed_mount_data *data;
+ struct nfs_parsed_mount_data *data = raw_data;
struct super_block *s;
struct nfs_server *server;
struct nfs_fh *mntfh;
@@ -2438,18 +2460,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
};
int error = -ENOMEM;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
if (data == NULL || mntfh == NULL)
goto out_free_fh;
security_init_mnt_opts(&data->lsm_opts);
- /* Validate the mount data */
- error = nfs4_validate_mount_data(raw_data, data, dev_name);
- if (error < 0)
- goto out;
-
/* Get a volume representation */
server = nfs4_create_server(data, mntfh);
if (IS_ERR(server)) {
@@ -2462,7 +2478,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
compare_super = NULL;
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
+ s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata);
if (IS_ERR(s)) {
error = PTR_ERR(s);
goto out_free;
@@ -2499,14 +2515,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
error = 0;
out:
- kfree(data->client_address);
- kfree(data->nfs_server.export_path);
- kfree(data->nfs_server.hostname);
- kfree(data->fscache_uniq);
security_free_mnt_opts(&data->lsm_opts);
out_free_fh:
kfree(mntfh);
- kfree(data);
return error;
out_free:
@@ -2520,6 +2531,102 @@ error_splat_super:
goto out;
}
+static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
+ int flags, void *data, const char *hostname)
+{
+ struct vfsmount *root_mnt;
+ char *root_devname;
+ size_t len;
+
+ len = strlen(hostname) + 3;
+ root_devname = kmalloc(len, GFP_KERNEL);
+ if (root_devname == NULL)
+ return ERR_PTR(-ENOMEM);
+ snprintf(root_devname, len, "%s:/", hostname);
+ root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
+ kfree(root_devname);
+ return root_mnt;
+}
+
+static int nfs_follow_remote_path(struct vfsmount *root_mnt,
+ const char *export_path, struct vfsmount *mnt_target)
+{
+ struct mnt_namespace *ns_private;
+ struct nameidata nd;
+ struct super_block *s;
+ int ret;
+
+ ns_private = create_mnt_ns(root_mnt);
+ ret = PTR_ERR(ns_private);
+ if (IS_ERR(ns_private))
+ goto out_mntput;
+
+ ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
+ export_path, LOOKUP_FOLLOW, &nd);
+
+ put_mnt_ns(ns_private);
+
+ if (ret != 0)
+ goto out_err;
+
+ s = nd.path.mnt->mnt_sb;
+ atomic_inc(&s->s_active);
+ mnt_target->mnt_sb = s;
+ mnt_target->mnt_root = dget(nd.path.dentry);
+
+ path_put(&nd.path);
+ down_write(&s->s_umount);
+ return 0;
+out_mntput:
+ mntput(root_mnt);
+out_err:
+ return ret;
+}
+
+/*
+ * Get the superblock for an NFS4 mountpoint
+ */
+static int nfs4_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+{
+ struct nfs_parsed_mount_data *data;
+ char *export_path;
+ struct vfsmount *root_mnt;
+ int error = -ENOMEM;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (data == NULL)
+ goto out_free_data;
+
+ /* Validate the mount data */
+ error = nfs4_validate_mount_data(raw_data, data, dev_name);
+ if (error < 0)
+ goto out;
+
+ export_path = data->nfs_server.export_path;
+ data->nfs_server.export_path = "/";
+ root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
+ data->nfs_server.hostname);
+ data->nfs_server.export_path = export_path;
+
+ error = PTR_ERR(root_mnt);
+ if (IS_ERR(root_mnt))
+ goto out;
+
+ error = nfs_follow_remote_path(root_mnt, export_path, mnt);
+
+out:
+ kfree(data->client_address);
+ kfree(data->nfs_server.export_path);
+ kfree(data->nfs_server.hostname);
+ kfree(data->fscache_uniq);
+out_free_data:
+ kfree(data);
+ dprintk("<-- nfs4_get_sb() = %d%s\n", error,
+ error != 0 ? " [error]" : "");
+ return error;
+}
+
static void nfs4_kill_super(struct super_block *sb)
{
struct nfs_server *server = NFS_SB(sb);
@@ -2615,12 +2722,9 @@ error_splat_super:
return error;
}
-/*
- * Create an NFS4 server record on referral traversal
- */
-static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data,
- struct vfsmount *mnt)
+static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data,
+ struct vfsmount *mnt)
{
struct nfs_clone_mount *data = raw_data;
struct super_block *s;
@@ -2699,4 +2803,36 @@ error_splat_super:
return error;
}
+/*
+ * Create an NFS4 server record on referral traversal
+ */
+static int nfs4_referral_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data,
+ struct vfsmount *mnt)
+{
+ struct nfs_clone_mount *data = raw_data;
+ char *export_path;
+ struct vfsmount *root_mnt;
+ int error;
+
+ dprintk("--> nfs4_referral_get_sb()\n");
+
+ export_path = data->mnt_path;
+ data->mnt_path = "/";
+
+ root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
+ flags, data, data->hostname);
+ data->mnt_path = export_path;
+
+ error = PTR_ERR(root_mnt);
+ if (IS_ERR(root_mnt))
+ goto out;
+
+ error = nfs_follow_remote_path(root_mnt, export_path, mnt);
+out:
+ dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error,
+ error != 0 ? " [error]" : "");
+ return error;
+}
+
#endif /* CONFIG_NFS_V4 */
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/super.c | 24 ++++++++++++++++++++++++
1 files changed, 24 insertions(+), 0 deletions(-)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8da7e59..daecbad 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2548,6 +2548,27 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
return root_mnt;
}
+static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
+{
+ char *page = (char *) __get_free_page(GFP_KERNEL);
+ char *devname, *tmp;
+
+ if (page == NULL)
+ return;
+ devname = nfs_path(path->mnt->mnt_devname,
+ path->mnt->mnt_root, path->dentry,
+ page, PAGE_SIZE);
+ if (devname == NULL)
+ goto out_freepage;
+ tmp = kstrdup(devname, GFP_KERNEL);
+ if (tmp == NULL)
+ goto out_freepage;
+ kfree(mnt->mnt_devname);
+ mnt->mnt_devname = tmp;
+out_freepage:
+ free_page((unsigned long)page);
+}
+
static int nfs_follow_remote_path(struct vfsmount *root_mnt,
const char *export_path, struct vfsmount *mnt_target)
{
@@ -2574,6 +2595,9 @@ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
mnt_target->mnt_sb = s;
mnt_target->mnt_root = dget(nd.path.dentry);
+ /* Correct the device pathname */
+ nfs_fix_devname(&nd.path, mnt_target);
+
path_put(&nd.path);
down_write(&s->s_umount);
return 0;
In order to allow modules to use it without having to export vfsmount_lock.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/namespace.c | 8 ++++++--
include/linux/mnt_namespace.h | 9 +--------
2 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 2dd333b..2465c05 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2246,10 +2246,14 @@ void __init mnt_init(void)
init_mount_tree();
}
-void __put_mnt_ns(struct mnt_namespace *ns)
+void put_mnt_ns(struct mnt_namespace *ns)
{
- struct vfsmount *root = ns->root;
+ struct vfsmount *root;
LIST_HEAD(umount_list);
+
+ if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
+ return;
+ root = ns->root;
ns->root = NULL;
spin_unlock(&vfsmount_lock);
down_write(&namespace_sem);
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 3a05929..299d11a 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -26,14 +26,7 @@ struct fs_struct;
extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
struct fs_struct *);
-extern void __put_mnt_ns(struct mnt_namespace *ns);
-
-static inline void put_mnt_ns(struct mnt_namespace *ns)
-{
- if (atomic_dec_and_lock(&ns->count, &vfsmount_lock))
- /* releases vfsmount_lock */
- __put_mnt_ns(ns);
-}
+extern void put_mnt_ns(struct mnt_namespace *ns);
static inline void exit_mnt_ns(struct task_struct *p)
{
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/namespace.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f01caec..40c7667 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -65,6 +65,11 @@ char *nfs_path(const char *base,
dentry = dentry->d_parent;
}
spin_unlock(&dcache_lock);
+ if (*end != '/') {
+ if (--buflen < 0)
+ goto Elong;
+ *--end = '/';
+ }
namelen = strlen(base);
/* Strip off excess slashes in base string */
while (namelen > 0 && base[namelen - 1] == '/')
The purpose of this patch is to improve the remote mount path lookup
support for distributed filesystems such as the NFSv4 client.
When given a mount command of the form "mount server:/foo/bar /mnt", the
NFSv4 client is required to look up the filehandle for "server:/", and
then look up each component of the remote mount path "foo/bar" in order
to find the directory that is actually going to be mounted on /mnt.
Following that remote mount path may involve following symlinks,
crossing server-side mount points and even following referrals to
filesystem volumes on other servers.
Since the standard VFS path lookup code already supports walking paths
that contain all these features (using in-kernel automounts for
following referrals) we would like to be able to reuse that rather than
duplicate the full path traversal functionality in the NFSv4 client code.
This patch therefore defines a VFS helper function create_mnt_ns(), that
sets up a temporary filesystem namespace and attaches a root filesystem to
it. It exports the create_mnt_ns() and put_mnt_ns() function for use by
filesystem modules.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/namespace.c | 45 ++++++++++++++++++++++++++++++++++-------
include/linux/mnt_namespace.h | 1 +
2 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 2465c05..9b766b0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1937,6 +1937,21 @@ dput_out:
return retval;
}
+static struct mnt_namespace *alloc_mnt_ns(void)
+{
+ struct mnt_namespace *new_ns;
+
+ new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+ if (!new_ns)
+ return ERR_PTR(-ENOMEM);
+ atomic_set(&new_ns->count, 1);
+ new_ns->root = NULL;
+ INIT_LIST_HEAD(&new_ns->list);
+ init_waitqueue_head(&new_ns->poll);
+ new_ns->event = 0;
+ return new_ns;
+}
+
/*
* Allocate a new namespace structure and populate it with contents
* copied from the namespace of the passed in task structure.
@@ -1948,14 +1963,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
struct vfsmount *p, *q;
- new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
- if (!new_ns)
- return ERR_PTR(-ENOMEM);
-
- atomic_set(&new_ns->count, 1);
- INIT_LIST_HEAD(&new_ns->list);
- init_waitqueue_head(&new_ns->poll);
- new_ns->event = 0;
+ new_ns = alloc_mnt_ns();
+ if (IS_ERR(new_ns))
+ return new_ns;
down_write(&namespace_sem);
/* First pass: copy the tree topology */
@@ -2019,6 +2029,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
return new_ns;
}
+/**
+ * create_mnt_ns - creates a private namespace and adds a root filesystem
+ * @mnt: pointer to the new root filesystem mountpoint
+ */
+struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+{
+ struct mnt_namespace *new_ns;
+
+ new_ns = alloc_mnt_ns();
+ if (!IS_ERR(new_ns)) {
+ mnt->mnt_ns = new_ns;
+ new_ns->root = mnt;
+ list_add(&new_ns->list, &new_ns->root->mnt_list);
+ }
+ return new_ns;
+}
+EXPORT_SYMBOL(create_mnt_ns);
+
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
{
@@ -2264,3 +2292,4 @@ void put_mnt_ns(struct mnt_namespace *ns)
release_mounts(&umount_list);
kfree(ns);
}
+EXPORT_SYMBOL(put_mnt_ns);
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 299d11a..3beb259 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -24,6 +24,7 @@ struct proc_mounts {
struct fs_struct;
+extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
struct fs_struct *);
extern void put_mnt_ns(struct mnt_namespace *ns);
Al? Where are you?
I'd like to get ack's on Trond's series, and I'd also like to get an ack
(or nak) on the (partly related) possible leak in the nd.root path
reference leak.
Trond - the series looks fine to me, and I guess I'll apply it even
without Al's acks, but I'd give it a day or two still..
Linus
On Mon, 22 Jun 2009, Trond Myklebust wrote:
>
> The following series is a resend of a set of patches that were
> originally targetted for inclusion in 2.6.30-rc1. They build upon
> Al Viro's "Cache root in nameidata" patches, and add the ability to
> create a private mount namespace that is not visible to user
> processes.
> As stated before, the goal is to allow reuse of vfs_path_lookup() in
> order to allow the nfs client to walk down NFSv4 mount paths, and locate
> the directory that needs to be mounted.
>
> There have been no further comments on these patches since I mailed the
> last revision, so I'm hoping that Al and Christoph will ack them and
> that Linus will apply...
>
> Cheers
> Trond
>
> ---
>
> Trond Myklebust (5):
> NFS: Correct the NFS mount path when following a referral
> NFS: Fix nfs_path() to always return a '/' at the beginning of the path
> NFSv4: Replace nfs4_path_walk() with VFS path lookup in a private namespace
> VFS: Add VFS helper functions for setting up private namespaces
> VFS: Uninline the function put_mnt_ns()
>
>
> fs/namespace.c | 53 +++++++++--
> fs/nfs/namespace.c | 5 +
> fs/nfs/super.c | 202 +++++++++++++++++++++++++++++++++++++----
> include/linux/mnt_namespace.h | 10 --
> 4 files changed, 231 insertions(+), 39 deletions(-)
>
> --
> Signature
>
Quoting Trond Myklebust ([email protected]):
> The purpose of this patch is to improve the remote mount path lookup
> support for distributed filesystems such as the NFSv4 client.
>
> When given a mount command of the form "mount server:/foo/bar /mnt", the
> NFSv4 client is required to look up the filehandle for "server:/", and
> then look up each component of the remote mount path "foo/bar" in order
> to find the directory that is actually going to be mounted on /mnt.
> Following that remote mount path may involve following symlinks,
> crossing server-side mount points and even following referrals to
> filesystem volumes on other servers.
>
> Since the standard VFS path lookup code already supports walking paths
> that contain all these features (using in-kernel automounts for
> following referrals) we would like to be able to reuse that rather than
> duplicate the full path traversal functionality in the NFSv4 client code.
>
> This patch therefore defines a VFS helper function create_mnt_ns(), that
> sets up a temporary filesystem namespace and attaches a root filesystem to
> it. It exports the create_mnt_ns() and put_mnt_ns() function for use by
> filesystem modules.
>
> Signed-off-by: Trond Myklebust <[email protected]>
This looks good, thanks. Though I see no reason not to also switch over
init_mount_tree() to the new helper.
(Seems plausible that c/r code would use this as well)
Reviewed-by: Serge Hallyn <[email protected]>
thanks,
-serge
> ---
>
> fs/namespace.c | 45 ++++++++++++++++++++++++++++++++++-------
> include/linux/mnt_namespace.h | 1 +
> 2 files changed, 38 insertions(+), 8 deletions(-)
>
>
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 2465c05..9b766b0 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -1937,6 +1937,21 @@ dput_out:
> return retval;
> }
>
> +static struct mnt_namespace *alloc_mnt_ns(void)
> +{
> + struct mnt_namespace *new_ns;
> +
> + new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
> + if (!new_ns)
> + return ERR_PTR(-ENOMEM);
> + atomic_set(&new_ns->count, 1);
> + new_ns->root = NULL;
> + INIT_LIST_HEAD(&new_ns->list);
> + init_waitqueue_head(&new_ns->poll);
> + new_ns->event = 0;
> + return new_ns;
> +}
> +
> /*
> * Allocate a new namespace structure and populate it with contents
> * copied from the namespace of the passed in task structure.
> @@ -1948,14 +1963,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
> struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
> struct vfsmount *p, *q;
>
> - new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
> - if (!new_ns)
> - return ERR_PTR(-ENOMEM);
> -
> - atomic_set(&new_ns->count, 1);
> - INIT_LIST_HEAD(&new_ns->list);
> - init_waitqueue_head(&new_ns->poll);
> - new_ns->event = 0;
> + new_ns = alloc_mnt_ns();
> + if (IS_ERR(new_ns))
> + return new_ns;
>
> down_write(&namespace_sem);
> /* First pass: copy the tree topology */
> @@ -2019,6 +2029,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
> return new_ns;
> }
>
> +/**
> + * create_mnt_ns - creates a private namespace and adds a root filesystem
> + * @mnt: pointer to the new root filesystem mountpoint
> + */
> +struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
> +{
> + struct mnt_namespace *new_ns;
> +
> + new_ns = alloc_mnt_ns();
> + if (!IS_ERR(new_ns)) {
> + mnt->mnt_ns = new_ns;
> + new_ns->root = mnt;
> + list_add(&new_ns->list, &new_ns->root->mnt_list);
> + }
> + return new_ns;
> +}
> +EXPORT_SYMBOL(create_mnt_ns);
> +
> SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
> char __user *, type, unsigned long, flags, void __user *, data)
> {
> @@ -2264,3 +2292,4 @@ void put_mnt_ns(struct mnt_namespace *ns)
> release_mounts(&umount_list);
> kfree(ns);
> }
> +EXPORT_SYMBOL(put_mnt_ns);
> diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
> index 299d11a..3beb259 100644
> --- a/include/linux/mnt_namespace.h
> +++ b/include/linux/mnt_namespace.h
> @@ -24,6 +24,7 @@ struct proc_mounts {
>
> struct fs_struct;
>
> +extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
> extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
> struct fs_struct *);
> extern void put_mnt_ns(struct mnt_namespace *ns);
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
Quoting Trond Myklebust ([email protected]):
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
>
> fs/nfs/super.c | 24 ++++++++++++++++++++++++
> 1 files changed, 24 insertions(+), 0 deletions(-)
>
>
> diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> index 8da7e59..daecbad 100644
> --- a/fs/nfs/super.c
> +++ b/fs/nfs/super.c
> @@ -2548,6 +2548,27 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
> return root_mnt;
> }
>
> +static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
> +{
> + char *page = (char *) __get_free_page(GFP_KERNEL);
> + char *devname, *tmp;
> +
> + if (page == NULL)
> + return;
> + devname = nfs_path(path->mnt->mnt_devname,
> + path->mnt->mnt_root, path->dentry,
> + page, PAGE_SIZE);
> + if (devname == NULL)
> + goto out_freepage;
> + tmp = kstrdup(devname, GFP_KERNEL);
> + if (tmp == NULL)
> + goto out_freepage;
> + kfree(mnt->mnt_devname);
> + mnt->mnt_devname = tmp;
(looking through patch 3 a bit) is this expected to be safe because all
callers will send in a mnt which was privately mounted as nfs root_mnt through
vfs_kern_mount? So that at this point noone else can have a ref to
mnt?
If that isn't the intent, then this seems problematic... (If it is, it
seems worth commenting both so that every reader doesn't feel compelled
to verify, and so that no new callers will naively violate that
expectation)
thanks,
-serge
> +out_freepage:
> + free_page((unsigned long)page);
> +}
> +
> static int nfs_follow_remote_path(struct vfsmount *root_mnt,
> const char *export_path, struct vfsmount *mnt_target)
> {
> @@ -2574,6 +2595,9 @@ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
> mnt_target->mnt_sb = s;
> mnt_target->mnt_root = dget(nd.path.dentry);
>
> + /* Correct the device pathname */
> + nfs_fix_devname(&nd.path, mnt_target);
> +
> path_put(&nd.path);
> down_write(&s->s_umount);
> return 0;
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2009-06-23 at 15:42 -0500, Serge E. Hallyn wrote:
> Quoting Trond Myklebust ([email protected]):
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> >
> > fs/nfs/super.c | 24 ++++++++++++++++++++++++
> > 1 files changed, 24 insertions(+), 0 deletions(-)
> >
> >
> > diff --git a/fs/nfs/super.c b/fs/nfs/super.c
> > index 8da7e59..daecbad 100644
> > --- a/fs/nfs/super.c
> > +++ b/fs/nfs/super.c
> > @@ -2548,6 +2548,27 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
> > return root_mnt;
> > }
> >
> > +static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
> > +{
> > + char *page = (char *) __get_free_page(GFP_KERNEL);
> > + char *devname, *tmp;
> > +
> > + if (page == NULL)
> > + return;
> > + devname = nfs_path(path->mnt->mnt_devname,
> > + path->mnt->mnt_root, path->dentry,
> > + page, PAGE_SIZE);
> > + if (devname == NULL)
> > + goto out_freepage;
> > + tmp = kstrdup(devname, GFP_KERNEL);
> > + if (tmp == NULL)
> > + goto out_freepage;
> > + kfree(mnt->mnt_devname);
> > + mnt->mnt_devname = tmp;
>
> (looking through patch 3 a bit) is this expected to be safe because all
> callers will send in a mnt which was privately mounted as nfs root_mnt through
> vfs_kern_mount? So that at this point noone else can have a ref to
> mnt?
>
> If that isn't the intent, then this seems problematic... (If it is, it
> seems worth commenting both so that every reader doesn't feel compelled
> to verify, and so that no new callers will naively violate that
> expectation)
The call to nfs_fix_devname() is only applied to the 'mnt_target'
vfsmount, which is the one that was passed down directly from
do_kern_mount() to the ->get_sb() method. It is entirely unreferenced by
any other process since we haven't yet called 'do_add_mount()' to
publish it.
Cheers
Trond
> thanks,
> -serge
>
> > +out_freepage:
> > + free_page((unsigned long)page);
> > +}
> > +
> > static int nfs_follow_remote_path(struct vfsmount *root_mnt,
> > const char *export_path, struct vfsmount *mnt_target)
> > {
> > @@ -2574,6 +2595,9 @@ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
> > mnt_target->mnt_sb = s;
> > mnt_target->mnt_root = dget(nd.path.dentry);
> >
> > + /* Correct the device pathname */
> > + nfs_fix_devname(&nd.path, mnt_target);
> > +
> > path_put(&nd.path);
> > down_write(&s->s_umount);
> > return 0;
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Trond Myklebust
Linux NFS client maintainer
NetApp
[email protected]
http://www.netapp.com
On Tue, 2009-06-23 at 15:13 -0500, Serge E. Hallyn wrote:
> Quoting Trond Myklebust ([email protected]):
> > The purpose of this patch is to improve the remote mount path lookup
> > support for distributed filesystems such as the NFSv4 client.
> >
> > When given a mount command of the form "mount server:/foo/bar /mnt", the
> > NFSv4 client is required to look up the filehandle for "server:/", and
> > then look up each component of the remote mount path "foo/bar" in order
> > to find the directory that is actually going to be mounted on /mnt.
> > Following that remote mount path may involve following symlinks,
> > crossing server-side mount points and even following referrals to
> > filesystem volumes on other servers.
> >
> > Since the standard VFS path lookup code already supports walking paths
> > that contain all these features (using in-kernel automounts for
> > following referrals) we would like to be able to reuse that rather than
> > duplicate the full path traversal functionality in the NFSv4 client code.
> >
> > This patch therefore defines a VFS helper function create_mnt_ns(), that
> > sets up a temporary filesystem namespace and attaches a root filesystem to
> > it. It exports the create_mnt_ns() and put_mnt_ns() function for use by
> > filesystem modules.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
>
> This looks good, thanks. Though I see no reason not to also switch over
> init_mount_tree() to the new helper.
>
> (Seems plausible that c/r code would use this as well)
>
> Reviewed-by: Serge Hallyn <[email protected]>
>
> thanks,
> -serge
Thanks for the review! I missed the code duplication in
init_mount_tree(). Something like the following?
Cheers
Trond
--------------------------------------------------------------------
From: Trond Myklebust <[email protected]>
VFS: Switch init_mount_tree() to use the new create_mnt_ns() helper
Eliminates some duplicated code...
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/namespace.c | 11 ++---------
1 files changed, 2 insertions(+), 9 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index a7bea8c..4a86b85 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2222,16 +2222,9 @@ static void __init init_mount_tree(void)
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
if (IS_ERR(mnt))
panic("Can't create rootfs");
- ns = kmalloc(sizeof(*ns), GFP_KERNEL);
- if (!ns)
+ ns = create_mnt_ns(mnt);
+ if (IS_ERR(ns))
panic("Can't allocate initial namespace");
- atomic_set(&ns->count, 1);
- INIT_LIST_HEAD(&ns->list);
- init_waitqueue_head(&ns->poll);
- ns->event = 0;
- list_add(&mnt->mnt_list, &ns->list);
- ns->root = mnt;
- mnt->mnt_ns = ns;
init_task.nsproxy->mnt_ns = ns;
get_mnt_ns(ns);
--
Trond Myklebust
Linux NFS client maintainer
NetApp
[email protected]
http://www.netapp.com
On Mon, Jun 22, 2009 at 12:40:33PM -0700, Linus Torvalds wrote:
>
>
> Al? Where are you?
Back, digging through the mailbox.
On Tue, 23 Jun 2009, Al Viro wrote:
> On Mon, Jun 22, 2009 at 12:40:33PM -0700, Linus Torvalds wrote:
> >
> >
> > Al? Where are you?
>
> Back, digging through the mailbox.
Ok, I applied Trond's private namespace patchset, since I was planning on
doing -rc1 today (now reconsidering due to the bulk of stuff I've done
today, it might be tomorrow).
But I'm still interested in fixes and particularly in the path ref leak
issue.
Oh, and Trond just sent out a cleanup patch that looked fine too.
Linus
Quoting Trond Myklebust ([email protected]):
> On Tue, 2009-06-23 at 15:13 -0500, Serge E. Hallyn wrote:
> > Quoting Trond Myklebust ([email protected]):
> > > The purpose of this patch is to improve the remote mount path lookup
> > > support for distributed filesystems such as the NFSv4 client.
> > >
> > > When given a mount command of the form "mount server:/foo/bar /mnt", the
> > > NFSv4 client is required to look up the filehandle for "server:/", and
> > > then look up each component of the remote mount path "foo/bar" in order
> > > to find the directory that is actually going to be mounted on /mnt.
> > > Following that remote mount path may involve following symlinks,
> > > crossing server-side mount points and even following referrals to
> > > filesystem volumes on other servers.
> > >
> > > Since the standard VFS path lookup code already supports walking paths
> > > that contain all these features (using in-kernel automounts for
> > > following referrals) we would like to be able to reuse that rather than
> > > duplicate the full path traversal functionality in the NFSv4 client code.
> > >
> > > This patch therefore defines a VFS helper function create_mnt_ns(), that
> > > sets up a temporary filesystem namespace and attaches a root filesystem to
> > > it. It exports the create_mnt_ns() and put_mnt_ns() function for use by
> > > filesystem modules.
> > >
> > > Signed-off-by: Trond Myklebust <[email protected]>
> >
> > This looks good, thanks. Though I see no reason not to also switch over
> > init_mount_tree() to the new helper.
> >
> > (Seems plausible that c/r code would use this as well)
> >
> > Reviewed-by: Serge Hallyn <[email protected]>
> >
> > thanks,
> > -serge
>
> Thanks for the review! I missed the code duplication in
> init_mount_tree(). Something like the following?
Yup.
(it keeps bugging me that the order of args to list_add() gets reversed
as a result, but clearly with both starting out empty it doesn't
matter..)
thanks,
-serge
> Cheers
> Trond
> --------------------------------------------------------------------
> From: Trond Myklebust <[email protected]>
> VFS: Switch init_mount_tree() to use the new create_mnt_ns() helper
>
> Eliminates some duplicated code...
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
>
> fs/namespace.c | 11 ++---------
> 1 files changed, 2 insertions(+), 9 deletions(-)
>
>
> diff --git a/fs/namespace.c b/fs/namespace.c
> index a7bea8c..4a86b85 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -2222,16 +2222,9 @@ static void __init init_mount_tree(void)
> mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
> if (IS_ERR(mnt))
> panic("Can't create rootfs");
> - ns = kmalloc(sizeof(*ns), GFP_KERNEL);
> - if (!ns)
> + ns = create_mnt_ns(mnt);
> + if (IS_ERR(ns))
> panic("Can't allocate initial namespace");
> - atomic_set(&ns->count, 1);
> - INIT_LIST_HEAD(&ns->list);
> - init_waitqueue_head(&ns->poll);
> - ns->event = 0;
> - list_add(&mnt->mnt_list, &ns->list);
> - ns->root = mnt;
> - mnt->mnt_ns = ns;
>
> init_task.nsproxy->mnt_ns = ns;
> get_mnt_ns(ns);
>
>
> --
> Trond Myklebust
> Linux NFS client maintainer
>
> NetApp
> [email protected]
> http://www.netapp.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jun 23, 2009 at 02:40:10PM -0700, Linus Torvalds wrote:
>
>
> On Tue, 23 Jun 2009, Al Viro wrote:
> > On Mon, Jun 22, 2009 at 12:40:33PM -0700, Linus Torvalds wrote:
> > >
> > >
> > > Al? Where are you?
> >
> > Back, digging through the mailbox.
>
> Ok, I applied Trond's private namespace patchset, since I was planning on
> doing -rc1 today (now reconsidering due to the bulk of stuff I've done
> today, it might be tomorrow).
>
> But I'm still interested in fixes and particularly in the path ref leak
> issue.
>
> Oh, and Trond just sent out a cleanup patch that looked fine too.
OK, I've finally crawled through the pile of mail. Will have the trees
(VFS and audit) out later tonight...
On Tue, 23 Jun 2009, Serge E. Hallyn wrote:
>
> (it keeps bugging me that the order of args to list_add() gets reversed
> as a result, but clearly with both starting out empty it doesn't
> matter..)
.. and the new one is more logical. "list_add()" really adds the first
entry to the list pointed to by the second one. It _works_ the other way
too in this case, as you point out, but now that you mention it, it really
looks like the pre-patch code is "wrong".
Linus
On Wed, Jun 24, 2009 at 12:22:58AM +0100, Al Viro wrote:
> OK, I've finally crawled through the pile of mail. Will have the trees
> (VFS and audit) out later tonight...
OK, here's the tame stuff; there's still more left, but that'll have to
wait. My apologies for disappearing in the middle of merge window ;-/
Please pull from
git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6.git/ for-linus
Shortlog:
Al Viro (16):
Make allocation of anon devices cheaper
... and the same for vfsmount id/mount group id
add caching of ACLs in struct inode
switch ext2 to inode->i_acl
switch ext3 to inode->i_acl
switch ext4 to inode->i_acl
switch jfs to inode->i_acl
switch jffs2 to inode->i_acl
switch btrfs to inode->i_acl
switch nilfs2 to inode->i_acl
reiserfs: minimal fix for ACL caching
switch reiserfs to usual conventions for caching ACLs
switch reiserfs to inode->i_acl
switch shmem to inode->i_acl
helpers for acl caching + switch to those
switch xfs to generic acl caching helpers
Ankit Jain (1):
fs: Add new pre-allocation ioctls to vfs for compatibility with legacy xfs ioctls
Christoph Hellwig (3):
reiserfs: remove stray unlock_super in reiserfs_resize
update Documentation/filesystems/Locking
cleanup __writeback_single_inode
H. Peter Anvin (1):
devpts: remove module-related code
J. R. Okajima (1):
vfs: fix nd->root leak in do_filp_open()
Trond Myklebust (1):
VFS: Switch init_mount_tree() to use the new create_mnt_ns() helper
Diffstat:
Documentation/filesystems/Locking | 41 ++++++++-------
fs/btrfs/acl.c | 44 +++-------------
fs/btrfs/btrfs_inode.h | 4 --
fs/btrfs/ctree.h | 2 -
fs/btrfs/inode.c | 16 +-----
fs/compat_ioctl.c | 48 ++++++++++++++++++
fs/devpts/inode.c | 10 ----
fs/ext2/acl.c | 81 ++++++-----------------------
fs/ext2/acl.h | 4 --
fs/ext2/ext2.h | 4 --
fs/ext2/inode.c | 4 --
fs/ext2/super.c | 16 ------
fs/ext3/acl.c | 85 +++++++------------------------
fs/ext3/acl.h | 4 --
fs/ext3/inode.c | 4 --
fs/ext3/super.c | 16 ------
fs/ext4/acl.c | 67 +++---------------------
fs/ext4/acl.h | 4 --
fs/ext4/ext4.h | 4 --
fs/ext4/inode.c | 4 --
fs/ext4/super.c | 16 ------
fs/fs-writeback.c | 100 ++++++++++++++++++------------------
fs/inode.c | 10 ++++
fs/ioctl.c | 35 +++++++++++++
fs/jffs2/acl.c | 88 ++++++--------------------------
fs/jffs2/acl.h | 4 --
fs/jffs2/jffs2_fs_i.h | 4 --
fs/jffs2/os-linux.h | 4 --
fs/jffs2/readinode.c | 1 -
fs/jfs/acl.c | 42 ++++++----------
fs/jfs/jfs_incore.h | 6 --
fs/jfs/super.c | 16 ------
fs/jfs/xattr.c | 10 +---
fs/namei.c | 11 ++++-
fs/namespace.c | 37 +++++++++-----
fs/nilfs2/inode.c | 8 ---
fs/nilfs2/nilfs.h | 4 --
fs/nilfs2/super.c | 10 ----
fs/open.c | 58 +++++++++++-----------
fs/reiserfs/inode.c | 4 --
fs/reiserfs/resize.c | 1 -
fs/reiserfs/super.c | 24 ---------
fs/reiserfs/xattr_acl.c | 58 ++++-----------------
fs/super.c | 9 +++-
fs/ubifs/xattr.c | 2 +-
fs/xfs/linux-2.6/xfs_acl.c | 73 +++-----------------------
fs/xfs/xfs_acl.h | 4 --
fs/xfs/xfs_iget.c | 2 -
fs/xfs/xfs_inode.h | 5 --
include/linux/ext3_fs_i.h | 4 --
include/linux/falloc.h | 21 ++++++++
include/linux/fs.h | 13 +++++
include/linux/posix_acl.h | 64 +++++++++++++++++++++++
include/linux/reiserfs_acl.h | 17 ------
include/linux/reiserfs_fs_i.h | 4 --
include/linux/shmem_fs.h | 8 ---
mm/shmem.c | 9 ++--
mm/shmem_acl.c | 29 ++---------
58 files changed, 455 insertions(+), 822 deletions(-)