2003-06-20 16:04:18

by David Howells

[permalink] [raw]
Subject: [PATCH] VFS autmounter support v3


Hi Linus, Al,

Okay, it turns out I don't really need a special operation to deal with
automount points... as HPA was pointing out, the same can be done by giving a
directory a follow_link() operation...

However, would you consent to accept this patch (or something similar)? It has
the actual automounting stuff stuff taken out, leaving two parts:

(1) A convenience function (__do_add_mount) that a module can call to insert
a vfsmount into the mount topology at a point described by a struct
nameidata (as would be passed to follow_link).

This can be taken out if you insist on my bouncing the mount parameters
down to userspace so that it can issue a mount - provided something
approximating fmount() is also provided so that inter-namespace mounting
can be done under controlled circumstances.

(2) Automatic mount point expiry. This allows any mountpoint to be given a
timeout, such that when mntput() detects that the vfsmount is only used
by its parent, a work chitty will be enqueued to cause the containing
namespace to be vacuumed later for dead mounts.

I'd also like to make it so that any mount can be given a "timeout" argument,
but I'm not sure what the best way to do so is.

David


diff -uNr linux-2.5.72/fs/namespace.c linux-2.5.72-auto/fs/namespace.c
--- linux-2.5.72/fs/namespace.c 2003-06-17 15:01:51.000000000 +0100
+++ linux-2.5.72-auto/fs/namespace.c 2003-06-20 14:36:58.000000000 +0100
@@ -30,6 +30,8 @@
static int hash_mask, hash_bits;
static kmem_cache_t *mnt_cache;

+static void process_mount_expiry(void *_data);
+
static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
{
unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES);
@@ -84,13 +86,9 @@
return p;
}

-static int check_mnt(struct vfsmount *mnt)
+static inline int check_mnt(struct vfsmount *mnt)
{
- spin_lock(&dcache_lock);
- while (mnt->mnt_parent != mnt)
- mnt = mnt->mnt_parent;
- spin_unlock(&dcache_lock);
- return mnt == current->namespace->root;
+ return mnt->mnt_namespace == current->namespace;
}

static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
@@ -142,6 +140,9 @@
mnt->mnt_root = dget(root);
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
+ mnt->mnt_namespace = old->mnt_namespace;
+ mnt->mnt_expiry_timeout = old->mnt_expiry_timeout;
+ mnt->mnt_expires_at = old->mnt_expires_at;
}
return mnt;
}
@@ -530,6 +531,7 @@
}

if (mnt) {
+ mnt->mnt_expiry_timeout = 0;
err = graft_tree(mnt, nd);
if (err) {
spin_lock(&dcache_lock);
@@ -622,6 +624,7 @@

detach_mnt(old_nd.mnt, &parent_nd);
attach_mnt(old_nd.mnt, nd);
+ old_nd.mnt->mnt_expiry_timeout = 0;
out2:
spin_unlock(&dcache_lock);
out1:
@@ -634,23 +637,11 @@
return err;
}

-static int do_add_mount(struct nameidata *nd, char *type, int flags,
- int mnt_flags, char *name, void *data)
+int __do_add_mount(struct vfsmount *newmnt, struct nameidata *nd)
{
- struct vfsmount *mnt;
int err;

- if (!type || !memchr(type, 0, PAGE_SIZE))
- return -EINVAL;
-
- /* we need capabilities... */
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- mnt = do_kern_mount(type, flags, name, data);
- err = PTR_ERR(mnt);
- if (IS_ERR(mnt))
- goto out;
+ newmnt->mnt_expires_at = get_seconds() + newmnt->mnt_expiry_timeout;

down_write(&current->namespace->sem);
/* Something was mounted here while we slept */
@@ -662,18 +653,131 @@

/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
- if (nd->mnt->mnt_sb == mnt->mnt_sb && nd->mnt->mnt_root == nd->dentry)
+ if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
+ nd->mnt->mnt_root == nd->dentry)
goto unlock;

- mnt->mnt_flags = mnt_flags;
- err = graft_tree(mnt, nd);
+ err = graft_tree(newmnt, nd);
unlock:
up_write(&current->namespace->sem);
- mntput(mnt);
-out:
+ mntput(newmnt);
return err;
}

+EXPORT_SYMBOL_GPL(__do_add_mount);
+
+static int do_add_mount(struct nameidata *nd, char *type, int flags,
+ int mnt_flags, char *name, void *data)
+{
+ struct vfsmount *mnt;
+ int err;
+
+ if (!type || !memchr(type, 0, PAGE_SIZE))
+ return -EINVAL;
+
+ /* we need capabilities... */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ mnt = do_kern_mount(type, flags, name, data);
+ err = PTR_ERR(mnt);
+ if (IS_ERR(mnt))
+ return err;
+
+ return __do_add_mount(mnt, nd);
+}
+
+static inline void set_mount_expiry_timer(struct namespace *namespace,
+ unsigned long timeout)
+{
+ spin_lock(&dcache_lock);
+
+ if (atomic_read(&namespace->count) > 0) {
+ get_namespace(namespace);
+
+ if (!schedule_delayed_work(&namespace->mnt_expiry_work,
+ (timeout + 1) * HZ))
+ put_namespace(namespace);
+ }
+
+ spin_unlock(&dcache_lock);
+}
+
+void mnt_begin_expiry(struct vfsmount *mnt)
+{
+ mnt->mnt_expires_at = get_seconds() + mnt->mnt_expiry_timeout;
+
+ set_mount_expiry_timer(mnt->mnt_namespace, mnt->mnt_expiry_timeout);
+}
+
+EXPORT_SYMBOL_GPL(mnt_begin_expiry);
+
+static void process_mount_expiry(void *_data)
+{
+ struct namespace *namespace = _data;
+ struct list_head *_p, *_n, graveyard;
+ struct vfsmount *mnt;
+ time_t now;
+ int timeout = INT_MAX, tmp;
+
+ INIT_LIST_HEAD(&graveyard);
+
+ down_write(&namespace->sem);
+
+ now = get_seconds();
+
+ list_for_each_safe(_p, _n, &namespace->list) {
+ mnt = list_entry(_p, struct vfsmount, mnt_list);
+
+ if (mnt->mnt_expiry_timeout &&
+ atomic_read(&mnt->mnt_count) == 1) {
+ spin_lock(&dcache_lock);
+
+ if (atomic_read(&mnt->mnt_count) == 1) {
+ tmp = (int) mnt->mnt_expires_at - (int) now;
+ if (tmp <= 0) {
+ list_move_tail(&mnt->mnt_list,
+ &graveyard);
+ list_del_init(&mnt->mnt_child);
+ list_del_init(&mnt->mnt_hash);
+ mnt->mnt_mountpoint->d_mounted--;
+ } else if (tmp < timeout) {
+ timeout = tmp;
+ }
+ }
+
+ spin_unlock(&dcache_lock);
+ }
+ }
+
+ up_write(&namespace->sem);
+
+ while (!list_empty(&graveyard)) {
+ mnt = list_entry(graveyard.next, struct vfsmount, mnt_list);
+ list_del_init(&mnt->mnt_list);
+
+ dput(xchg(&mnt->mnt_mountpoint, mnt->mnt_root));
+ mntput(xchg(&mnt->mnt_parent, mnt));
+
+ if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
+ /* last instance - try to be smart */
+ lock_kernel();
+ DQUOT_OFF(mnt->mnt_sb);
+ acct_auto_close(mnt->mnt_sb);
+ unlock_kernel();
+ }
+
+ mntput(mnt);
+ }
+
+ if (timeout != INT_MAX) {
+ set_mount_expiry_timer(namespace, timeout);
+ }
+ else {
+ put_namespace(namespace);
+ }
+}
+
static int copy_mount_options (const void __user *data, unsigned long *where)
{
int i;
@@ -800,6 +904,9 @@
init_rwsem(&new_ns->sem);
new_ns->root = NULL;
INIT_LIST_HEAD(&new_ns->list);
+ INIT_WORK(&new_ns->mnt_expiry_work,
+ process_mount_expiry,
+ new_ns);

down_write(&tsk->namespace->sem);
/* First pass: copy the tree topology */
@@ -816,6 +923,8 @@
p = namespace->root;
q = new_ns->root;
while (p) {
+ q->mnt_namespace = new_ns;
+
if (p == fs->rootmnt) {
rootmnt = p;
fs->rootmnt = mntget(q);
@@ -844,6 +953,8 @@
if (altrootmnt)
mntput(altrootmnt);

+ set_mount_expiry_timer(namespace, 1);
+
put_namespace(namespace);
return 0;

@@ -1079,9 +1190,13 @@
panic("Can't allocate initial namespace");
atomic_set(&namespace->count, 1);
INIT_LIST_HEAD(&namespace->list);
+ INIT_WORK(&namespace->mnt_expiry_work,
+ process_mount_expiry,
+ namespace);
init_rwsem(&namespace->sem);
list_add(&mnt->mnt_list, &namespace->list);
namespace->root = mnt;
+ mnt->mnt_namespace = namespace;

init_task.namespace = namespace;
read_lock(&tasklist_lock);
diff -uNr linux-2.5.72/fs/super.c linux-2.5.72-auto/fs/super.c
--- linux-2.5.72/fs/super.c 2003-06-17 15:01:51.000000000 +0100
+++ linux-2.5.72-auto/fs/super.c 2003-06-17 15:06:42.000000000 +0100
@@ -21,6 +21,7 @@
*/

#include <linux/config.h>
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/acct.h>
@@ -683,6 +684,7 @@
mnt->mnt_root = dget(sb->s_root);
mnt->mnt_mountpoint = sb->s_root;
mnt->mnt_parent = mnt;
+ mnt->mnt_namespace = current->namespace;
up_write(&sb->s_umount);
put_filesystem(type);
return mnt;
@@ -697,6 +699,8 @@
return (struct vfsmount *)sb;
}

+EXPORT_SYMBOL_GPL(do_kern_mount);
+
struct vfsmount *kern_mount(struct file_system_type *type)
{
return do_kern_mount(type->name, 0, type->name, NULL);
diff -uNr linux-2.5.72/include/linux/mount.h linux-2.5.72-auto/include/linux/mount.h
--- linux-2.5.72/include/linux/mount.h 2003-06-17 15:01:35.000000000 +0100
+++ linux-2.5.72-auto/include/linux/mount.h 2003-06-20 14:38:12.000000000 +0100
@@ -31,6 +31,9 @@
int mnt_flags;
char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
struct list_head mnt_list;
+ struct namespace *mnt_namespace; /* containing namespace */
+ time_t mnt_expires_at; /* time at which automount expires */
+ unsigned mnt_expiry_timeout; /* expiry timeout (in seconds) or 0 */
};

static inline struct vfsmount *mntget(struct vfsmount *mnt)
@@ -40,11 +43,15 @@
return mnt;
}

+extern void mnt_begin_expiry(struct vfsmount *mnt);
extern void __mntput(struct vfsmount *mnt);

static inline void mntput(struct vfsmount *mnt)
{
if (mnt) {
+ if (atomic_read(&mnt->mnt_count) == 2 &&
+ mnt->mnt_expiry_timeout)
+ mnt_begin_expiry(mnt);
if (atomic_dec_and_test(&mnt->mnt_count))
__mntput(mnt);
}
@@ -55,5 +62,7 @@
extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
const char *name, void *data);

+extern int __do_add_mount(struct vfsmount *newmnt, struct nameidata *nd);
+
#endif
#endif /* _LINUX_MOUNT_H */
diff -uNr linux-2.5.72/include/linux/namespace.h linux-2.5.72-auto/include/linux/namespace.h
--- linux-2.5.72/include/linux/namespace.h 2003-06-17 15:01:36.000000000 +0100
+++ linux-2.5.72-auto/include/linux/namespace.h 2003-06-17 15:06:42.000000000 +0100
@@ -10,6 +10,7 @@
struct vfsmount * root;
struct list_head list;
struct rw_semaphore sem;
+ struct work_struct mnt_expiry_work;
};

extern void umount_tree(struct vfsmount *);


2003-06-20 16:23:55

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] VFS autmounter support v3

On Fri, Jun 20, 2003 at 05:17:16PM +0100, David Howells wrote:

> (2) Automatic mount point expiry. This allows any mountpoint to be given a
> timeout, such that when mntput() detects that the vfsmount is only used
> by its parent, a work chitty will be enqueued to cause the containing
> namespace to be vacuumed later for dead mounts.

Broken.
a) it doesn't scale. A single expirable mountpoint and we will be
walking potentially very long list.
b) the logics is wrong - you are scheduling "let's go and expire
stuff" when ->mnt_count drops far enough; put something like /usr/share
on a separate fs and observe what happens to ->mnt_count. It will touch
the trigger value very often. Better yet, do that with /usr/include and
start a big build. You will have your expiry code triggered all the time,
even though fs is in very active use.