Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S263056AbTFTQES (ORCPT ); Fri, 20 Jun 2003 12:04:18 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S263250AbTFTQES (ORCPT ); Fri, 20 Jun 2003 12:04:18 -0400 Received: from pub237.cambridge.redhat.com ([213.86.99.237]:41464 "EHLO warthog.warthog") by vger.kernel.org with ESMTP id S263056AbTFTQEJ (ORCPT ); Fri, 20 Jun 2003 12:04:09 -0400 From: David Howells To: Linus Torvalds , viro@parcelfarce.linux.theplanet.co.uk Cc: Kernel Mailing List , dhowells@redhat.com Subject: [PATCH] VFS autmounter support v3 User-Agent: EMH/1.14.1 SEMI/1.14.4 (Hosorogi) FLIM/1.14.4 (=?ISO-8859-4?Q?Kashiharajing=FE-mae?=) APEL/10.4 Emacs/21.2 (i386-redhat-linux-gnu) MULE/5.0 (SAKAKI) MIME-Version: 1.0 (generated by SEMI 1.14.4 - "Hosorogi") Content-Type: text/plain; charset=US-ASCII Date: Fri, 20 Jun 2003 17:17:16 +0100 Message-ID: <25482.1056125836@warthog.warthog> Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10353 Lines: 370 Hi Linus, Al, Okay, it turns out I don't really need a special operation to deal with automount points... as HPA was pointing out, the same can be done by giving a directory a follow_link() operation... However, would you consent to accept this patch (or something similar)? It has the actual automounting stuff stuff taken out, leaving two parts: (1) A convenience function (__do_add_mount) that a module can call to insert a vfsmount into the mount topology at a point described by a struct nameidata (as would be passed to follow_link). This can be taken out if you insist on my bouncing the mount parameters down to userspace so that it can issue a mount - provided something approximating fmount() is also provided so that inter-namespace mounting can be done under controlled circumstances. (2) Automatic mount point expiry. This allows any mountpoint to be given a timeout, such that when mntput() detects that the vfsmount is only used by its parent, a work chitty will be enqueued to cause the containing namespace to be vacuumed later for dead mounts. I'd also like to make it so that any mount can be given a "timeout" argument, but I'm not sure what the best way to do so is. David diff -uNr linux-2.5.72/fs/namespace.c linux-2.5.72-auto/fs/namespace.c --- linux-2.5.72/fs/namespace.c 2003-06-17 15:01:51.000000000 +0100 +++ linux-2.5.72-auto/fs/namespace.c 2003-06-20 14:36:58.000000000 +0100 @@ -30,6 +30,8 @@ static int hash_mask, hash_bits; static kmem_cache_t *mnt_cache; +static void process_mount_expiry(void *_data); + static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES); @@ -84,13 +86,9 @@ return p; } -static int check_mnt(struct vfsmount *mnt) +static inline int check_mnt(struct vfsmount *mnt) { - spin_lock(&dcache_lock); - while (mnt->mnt_parent != mnt) - mnt = mnt->mnt_parent; - spin_unlock(&dcache_lock); - return mnt == current->namespace->root; + return mnt->mnt_namespace == current->namespace; } static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) @@ -142,6 +140,9 @@ mnt->mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; + mnt->mnt_namespace = old->mnt_namespace; + mnt->mnt_expiry_timeout = old->mnt_expiry_timeout; + mnt->mnt_expires_at = old->mnt_expires_at; } return mnt; } @@ -530,6 +531,7 @@ } if (mnt) { + mnt->mnt_expiry_timeout = 0; err = graft_tree(mnt, nd); if (err) { spin_lock(&dcache_lock); @@ -622,6 +624,7 @@ detach_mnt(old_nd.mnt, &parent_nd); attach_mnt(old_nd.mnt, nd); + old_nd.mnt->mnt_expiry_timeout = 0; out2: spin_unlock(&dcache_lock); out1: @@ -634,23 +637,11 @@ return err; } -static int do_add_mount(struct nameidata *nd, char *type, int flags, - int mnt_flags, char *name, void *data) +int __do_add_mount(struct vfsmount *newmnt, struct nameidata *nd) { - struct vfsmount *mnt; int err; - if (!type || !memchr(type, 0, PAGE_SIZE)) - return -EINVAL; - - /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - mnt = do_kern_mount(type, flags, name, data); - err = PTR_ERR(mnt); - if (IS_ERR(mnt)) - goto out; + newmnt->mnt_expires_at = get_seconds() + newmnt->mnt_expiry_timeout; down_write(¤t->namespace->sem); /* Something was mounted here while we slept */ @@ -662,18 +653,131 @@ /* Refuse the same filesystem on the same mount point */ err = -EBUSY; - if (nd->mnt->mnt_sb == mnt->mnt_sb && nd->mnt->mnt_root == nd->dentry) + if (nd->mnt->mnt_sb == newmnt->mnt_sb && + nd->mnt->mnt_root == nd->dentry) goto unlock; - mnt->mnt_flags = mnt_flags; - err = graft_tree(mnt, nd); + err = graft_tree(newmnt, nd); unlock: up_write(¤t->namespace->sem); - mntput(mnt); -out: + mntput(newmnt); return err; } +EXPORT_SYMBOL_GPL(__do_add_mount); + +static int do_add_mount(struct nameidata *nd, char *type, int flags, + int mnt_flags, char *name, void *data) +{ + struct vfsmount *mnt; + int err; + + if (!type || !memchr(type, 0, PAGE_SIZE)) + return -EINVAL; + + /* we need capabilities... */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + mnt = do_kern_mount(type, flags, name, data); + err = PTR_ERR(mnt); + if (IS_ERR(mnt)) + return err; + + return __do_add_mount(mnt, nd); +} + +static inline void set_mount_expiry_timer(struct namespace *namespace, + unsigned long timeout) +{ + spin_lock(&dcache_lock); + + if (atomic_read(&namespace->count) > 0) { + get_namespace(namespace); + + if (!schedule_delayed_work(&namespace->mnt_expiry_work, + (timeout + 1) * HZ)) + put_namespace(namespace); + } + + spin_unlock(&dcache_lock); +} + +void mnt_begin_expiry(struct vfsmount *mnt) +{ + mnt->mnt_expires_at = get_seconds() + mnt->mnt_expiry_timeout; + + set_mount_expiry_timer(mnt->mnt_namespace, mnt->mnt_expiry_timeout); +} + +EXPORT_SYMBOL_GPL(mnt_begin_expiry); + +static void process_mount_expiry(void *_data) +{ + struct namespace *namespace = _data; + struct list_head *_p, *_n, graveyard; + struct vfsmount *mnt; + time_t now; + int timeout = INT_MAX, tmp; + + INIT_LIST_HEAD(&graveyard); + + down_write(&namespace->sem); + + now = get_seconds(); + + list_for_each_safe(_p, _n, &namespace->list) { + mnt = list_entry(_p, struct vfsmount, mnt_list); + + if (mnt->mnt_expiry_timeout && + atomic_read(&mnt->mnt_count) == 1) { + spin_lock(&dcache_lock); + + if (atomic_read(&mnt->mnt_count) == 1) { + tmp = (int) mnt->mnt_expires_at - (int) now; + if (tmp <= 0) { + list_move_tail(&mnt->mnt_list, + &graveyard); + list_del_init(&mnt->mnt_child); + list_del_init(&mnt->mnt_hash); + mnt->mnt_mountpoint->d_mounted--; + } else if (tmp < timeout) { + timeout = tmp; + } + } + + spin_unlock(&dcache_lock); + } + } + + up_write(&namespace->sem); + + while (!list_empty(&graveyard)) { + mnt = list_entry(graveyard.next, struct vfsmount, mnt_list); + list_del_init(&mnt->mnt_list); + + dput(xchg(&mnt->mnt_mountpoint, mnt->mnt_root)); + mntput(xchg(&mnt->mnt_parent, mnt)); + + if (atomic_read(&mnt->mnt_sb->s_active) == 1) { + /* last instance - try to be smart */ + lock_kernel(); + DQUOT_OFF(mnt->mnt_sb); + acct_auto_close(mnt->mnt_sb); + unlock_kernel(); + } + + mntput(mnt); + } + + if (timeout != INT_MAX) { + set_mount_expiry_timer(namespace, timeout); + } + else { + put_namespace(namespace); + } +} + static int copy_mount_options (const void __user *data, unsigned long *where) { int i; @@ -800,6 +904,9 @@ init_rwsem(&new_ns->sem); new_ns->root = NULL; INIT_LIST_HEAD(&new_ns->list); + INIT_WORK(&new_ns->mnt_expiry_work, + process_mount_expiry, + new_ns); down_write(&tsk->namespace->sem); /* First pass: copy the tree topology */ @@ -816,6 +923,8 @@ p = namespace->root; q = new_ns->root; while (p) { + q->mnt_namespace = new_ns; + if (p == fs->rootmnt) { rootmnt = p; fs->rootmnt = mntget(q); @@ -844,6 +953,8 @@ if (altrootmnt) mntput(altrootmnt); + set_mount_expiry_timer(namespace, 1); + put_namespace(namespace); return 0; @@ -1079,9 +1190,13 @@ panic("Can't allocate initial namespace"); atomic_set(&namespace->count, 1); INIT_LIST_HEAD(&namespace->list); + INIT_WORK(&namespace->mnt_expiry_work, + process_mount_expiry, + namespace); init_rwsem(&namespace->sem); list_add(&mnt->mnt_list, &namespace->list); namespace->root = mnt; + mnt->mnt_namespace = namespace; init_task.namespace = namespace; read_lock(&tasklist_lock); diff -uNr linux-2.5.72/fs/super.c linux-2.5.72-auto/fs/super.c --- linux-2.5.72/fs/super.c 2003-06-17 15:01:51.000000000 +0100 +++ linux-2.5.72-auto/fs/super.c 2003-06-17 15:06:42.000000000 +0100 @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -683,6 +684,7 @@ mnt->mnt_root = dget(sb->s_root); mnt->mnt_mountpoint = sb->s_root; mnt->mnt_parent = mnt; + mnt->mnt_namespace = current->namespace; up_write(&sb->s_umount); put_filesystem(type); return mnt; @@ -697,6 +699,8 @@ return (struct vfsmount *)sb; } +EXPORT_SYMBOL_GPL(do_kern_mount); + struct vfsmount *kern_mount(struct file_system_type *type) { return do_kern_mount(type->name, 0, type->name, NULL); diff -uNr linux-2.5.72/include/linux/mount.h linux-2.5.72-auto/include/linux/mount.h --- linux-2.5.72/include/linux/mount.h 2003-06-17 15:01:35.000000000 +0100 +++ linux-2.5.72-auto/include/linux/mount.h 2003-06-20 14:38:12.000000000 +0100 @@ -31,6 +31,9 @@ int mnt_flags; char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; + struct namespace *mnt_namespace; /* containing namespace */ + time_t mnt_expires_at; /* time at which automount expires */ + unsigned mnt_expiry_timeout; /* expiry timeout (in seconds) or 0 */ }; static inline struct vfsmount *mntget(struct vfsmount *mnt) @@ -40,11 +43,15 @@ return mnt; } +extern void mnt_begin_expiry(struct vfsmount *mnt); extern void __mntput(struct vfsmount *mnt); static inline void mntput(struct vfsmount *mnt) { if (mnt) { + if (atomic_read(&mnt->mnt_count) == 2 && + mnt->mnt_expiry_timeout) + mnt_begin_expiry(mnt); if (atomic_dec_and_test(&mnt->mnt_count)) __mntput(mnt); } @@ -55,5 +62,7 @@ extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); +extern int __do_add_mount(struct vfsmount *newmnt, struct nameidata *nd); + #endif #endif /* _LINUX_MOUNT_H */ diff -uNr linux-2.5.72/include/linux/namespace.h linux-2.5.72-auto/include/linux/namespace.h --- linux-2.5.72/include/linux/namespace.h 2003-06-17 15:01:36.000000000 +0100 +++ linux-2.5.72-auto/include/linux/namespace.h 2003-06-17 15:06:42.000000000 +0100 @@ -10,6 +10,7 @@ struct vfsmount * root; struct list_head list; struct rw_semaphore sem; + struct work_struct mnt_expiry_work; }; extern void umount_tree(struct vfsmount *); - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/