Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S264792AbTFQOou (ORCPT ); Tue, 17 Jun 2003 10:44:50 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S264784AbTFQOnv (ORCPT ); Tue, 17 Jun 2003 10:43:51 -0400 Received: from pub237.cambridge.redhat.com ([213.86.99.237]:29434 "EHLO warthog.warthog") by vger.kernel.org with ESMTP id S264761AbTFQOmL (ORCPT ); Tue, 17 Jun 2003 10:42:11 -0400 From: David Howells To: Linus Torvalds , viro@parcelfarce.linux.theplanet.co.uk cc: Kernel Mailing List , dhowells@redhat.com Subject: [PATCH] VFS autmounter support User-Agent: EMH/1.14.1 SEMI/1.14.4 (Hosorogi) FLIM/1.14.4 (=?ISO-8859-4?Q?Kashiharajing=FE-mae?=) APEL/10.4 Emacs/21.2 (i386-redhat-linux-gnu) MULE/5.0 (SAKAKI) MIME-Version: 1.0 (generated by SEMI 1.14.4 - "Hosorogi") Content-Type: text/plain; charset=US-ASCII Date: Tue, 17 Jun 2003 15:55:57 +0100 Message-ID: <6516.1055861757@warthog.warthog> Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14436 Lines: 492 Hi Linus, Al, The attached patch adds automounting support and mountpount expiry support to the VFS. This patch involves the adding the following features: (1) A new dentry operation that (a) marks a dentry as being an automount point, and (b) gets called by the VFS to come up with a vfsmount structure which the VFS then stitches into the mount tree fabric at the appropriate place. (2) A new lookup flag that is used by sys_*stat() to prevent automounting of the path endpoint. This means "ls -l" in an automounter directory doesn't cause a mount storm, but will display all the mountpoints in that directory as subdirectories (either the underlying mountpoint dir or the root dir of the mounted fs if the mountpoint has been triggered already). (3) do_kern_mount() is now exported. (4) The vfsmount structure has acquired, amongst other things, a timeout field. If mntput() notices a vfsmount reach a usage count of 1, then the vfsmount expiry time is set and the namespace that contains the vfsmount has its expiration work chitty queued. (5) The namespace structure has acquired a work struct that is used to actually perform vfsmount expiry under process context. Here's a cut-down example, taken from the implementation I'm using my AFS filesystem client: static struct dentry_operations afs_fs_mntpt_dentry_operations = { .d_revalidate = afs_d_revalidate, .d_delete = afs_d_delete, .d_automount = afs_mntpt_d_automount, }; struct vfsmount *afs_mntpt_d_automount(struct dentry *mntpt) { struct vfsmount *mnt; size_t size; char *buf, *devname = NULL; devname = (char *) get_zeroed_page(GFP_KERNEL); /* read the contents of the AFS special symlink */ size = afs_read_string(mntpt->d_inode, devname, PAGE_SIZE - 1); mnt = do_kern_mount("afs", 0, devname, NULL); mnt->mnt_expiry_timeout = 30; free_page((unsigned long)devname); return mnt; } Note that the inode attached to the underlying mountpoint can be used to determine _what_ should be mounted. I've some ideas as to how to implement a leaner autofs with it as well as using it for AFS. Any comments would be appreciated. David diff -uNr linux-2.5.72/fs/namei.c linux-2.5.72-auto/fs/namei.c --- linux-2.5.72/fs/namei.c 2003-06-17 15:01:51.000000000 +0100 +++ linux-2.5.72-auto/fs/namei.c 2003-06-17 15:06:42.000000000 +0100 @@ -434,23 +434,35 @@ return 1; } -static int follow_mount(struct vfsmount **mnt, struct dentry **dentry) +static int follow_mount(struct vfsmount **mnt, struct dentry **dentry, unsigned int flags) { int res = 0; - while (d_mountpoint(*dentry)) { - struct vfsmount *mounted; - spin_lock(&dcache_lock); - mounted = lookup_mnt(*mnt, *dentry); - if (!mounted) { + for (;;) { + + if (d_mountpoint(*dentry)) { + struct vfsmount *mounted; + spin_lock(&dcache_lock); + mounted = lookup_mnt(*mnt, *dentry); + if (!mounted) { + spin_unlock(&dcache_lock); + break; + } + *mnt = mntget(mounted); spin_unlock(&dcache_lock); + dput(*dentry); + mntput(mounted->mnt_parent); + *dentry = dget(mounted->mnt_root); + res = 1; + + } else if (d_automount_point(*dentry)) { + if (flags & LOOKUP_NOAUTOMOUNT) + break; + res = kern_automount(*mnt, *dentry); + if (res < 0) + break; + } else { break; } - *mnt = mntget(mounted); - spin_unlock(&dcache_lock); - dput(*dentry); - mntput(mounted->mnt_parent); - *dentry = dget(mounted->mnt_root); - res = 1; } return res; } @@ -510,7 +522,7 @@ mntput(*mnt); *mnt = parent; } - follow_mount(mnt, dentry); + follow_mount(mnt, dentry, 0); } struct path { @@ -643,7 +655,9 @@ if (err) break; /* Check mountpoints.. */ - follow_mount(&next.mnt, &next.dentry); + err = follow_mount(&next.mnt, &next.dentry, 0); + if (err < 0) + goto out_dput; err = -ENOENT; inode = next.dentry->d_inode; @@ -703,7 +717,10 @@ err = do_lookup(nd, &this, &next, 0); if (err) break; - follow_mount(&next.mnt, &next.dentry); + err = follow_mount(&next.mnt, &next.dentry, nd->flags & LOOKUP_NOAUTOMOUNT); + if (err < 0) + goto out_dput; + inode = next.dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) && inode && inode->i_op && inode->i_op->follow_link) { diff -uNr linux-2.5.72/fs/namespace.c linux-2.5.72-auto/fs/namespace.c --- linux-2.5.72/fs/namespace.c 2003-06-17 15:01:51.000000000 +0100 +++ linux-2.5.72-auto/fs/namespace.c 2003-06-17 15:06:42.000000000 +0100 @@ -30,6 +30,8 @@ static int hash_mask, hash_bits; static kmem_cache_t *mnt_cache; +static void kern_automount_expiry_timeout(void *_data); + static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES); @@ -142,6 +144,9 @@ mnt->mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; + mnt->mnt_namespace = old->mnt_namespace; + mnt->mnt_expiry_timeout = old->mnt_expiry_timeout; + mnt->mnt_expires_at = old->mnt_expires_at; } return mnt; } @@ -674,6 +679,135 @@ return err; } +int kern_automount(struct vfsmount *on_mnt, struct dentry *on_dentry) +{ + struct nameidata nd; + struct vfsmount *mnt; + int err; + + if (!on_dentry->d_inode || !S_ISDIR(on_dentry->d_inode->i_mode)) + return -ENOTDIR; + + mnt = on_dentry->d_op->d_automount(on_dentry); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + mnt->mnt_expires_at = get_seconds() + mnt->mnt_expiry_timeout; + + memset(&nd,0,sizeof(nd)); + nd.dentry = on_dentry; + nd.mnt = on_mnt; + + down_write(¤t->namespace->sem); + /* Something was mounted here while we slept */ + while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry)) + ; + err = -EINVAL; + if (!check_mnt(nd.mnt)) + goto unlock; + + /* Refuse the same filesystem on the same mount point */ + err = -EBUSY; + if (nd.mnt->mnt_sb == mnt->mnt_sb && nd.mnt->mnt_root == nd.dentry) + goto unlock; + + err = graft_tree(mnt, &nd); +unlock: + up_write(¤t->namespace->sem); + mntput(mnt); + return err; +} + +static inline void kern_automount_set_expiry_timer(struct namespace *namespace, + unsigned long timeout) +{ + spin_lock(&dcache_lock); + + if (atomic_read(&namespace->count) > 0) { + get_namespace(namespace); + + if (!schedule_delayed_work(&namespace->mnt_expiry_work, + (timeout + 1) * HZ)) + put_namespace(namespace); + } + + spin_unlock(&dcache_lock); +} + +void kern_automount_begin_expiry(struct vfsmount *mnt) +{ + mnt->mnt_expires_at = get_seconds() + mnt->mnt_expiry_timeout; + + kern_automount_set_expiry_timer(mnt->mnt_namespace, + mnt->mnt_expiry_timeout); +} + +static void kern_automount_expiry_timeout(void *_data) +{ + struct namespace *namespace = _data; + struct list_head *_p, *_n, graveyard; + struct vfsmount *mnt; + time_t now; + int timeout = INT_MAX, tmp; + + INIT_LIST_HEAD(&graveyard); + + down_write(&namespace->sem); + + now = get_seconds(); + + list_for_each_safe(_p, _n, &namespace->list) { + mnt = list_entry(_p, struct vfsmount, mnt_list); + + if (mnt->mnt_expiry_timeout && + atomic_read(&mnt->mnt_count) == 1) { + spin_lock(&dcache_lock); + + if (atomic_read(&mnt->mnt_count) == 1) { + tmp = (int) mnt->mnt_expires_at - (int) now; + if (tmp <= 0) { + list_move_tail(&mnt->mnt_list, + &graveyard); + list_del_init(&mnt->mnt_child); + list_del_init(&mnt->mnt_hash); + mnt->mnt_mountpoint->d_mounted--; + } else if (tmp < timeout) { + timeout = tmp; + } + } + + spin_unlock(&dcache_lock); + } + } + + up_write(&namespace->sem); + + while (!list_empty(&graveyard)) { + mnt = list_entry(graveyard.next, struct vfsmount, mnt_list); + list_del_init(&mnt->mnt_list); + + dput(xchg(&mnt->mnt_mountpoint, mnt->mnt_root)); + mntput(xchg(&mnt->mnt_parent, mnt)); + + if (atomic_read(&mnt->mnt_sb->s_active) == 1) { + /* last instance - try to be smart */ + lock_kernel(); + DQUOT_OFF(mnt->mnt_sb); + acct_auto_close(mnt->mnt_sb); + unlock_kernel(); + } + + mntput(mnt); + } + + if (timeout != INT_MAX) { + kern_automount_set_expiry_timer(namespace, timeout); + } + else { + put_namespace(namespace); + } +} + static int copy_mount_options (const void __user *data, unsigned long *where) { int i; @@ -800,6 +934,9 @@ init_rwsem(&new_ns->sem); new_ns->root = NULL; INIT_LIST_HEAD(&new_ns->list); + INIT_WORK(&new_ns->mnt_expiry_work, + kern_automount_expiry_timeout, + new_ns); down_write(&tsk->namespace->sem); /* First pass: copy the tree topology */ @@ -816,6 +953,8 @@ p = namespace->root; q = new_ns->root; while (p) { + q->mnt_namespace = new_ns; + if (p == fs->rootmnt) { rootmnt = p; fs->rootmnt = mntget(q); @@ -844,6 +983,8 @@ if (altrootmnt) mntput(altrootmnt); + kern_automount_set_expiry_timer(namespace, 1); + put_namespace(namespace); return 0; @@ -1079,6 +1220,9 @@ panic("Can't allocate initial namespace"); atomic_set(&namespace->count, 1); INIT_LIST_HEAD(&namespace->list); + INIT_WORK(&namespace->mnt_expiry_work, + kern_automount_expiry_timeout, + namespace); init_rwsem(&namespace->sem); list_add(&mnt->mnt_list, &namespace->list); namespace->root = mnt; diff -uNr linux-2.5.72/fs/stat.c linux-2.5.72-auto/fs/stat.c --- linux-2.5.72/fs/stat.c 2003-06-17 15:01:51.000000000 +0100 +++ linux-2.5.72-auto/fs/stat.c 2003-06-17 15:06:42.000000000 +0100 @@ -61,7 +61,7 @@ struct nameidata nd; int error; - error = user_path_walk(name, &nd); + error = user_path_walk_stat(name, &nd); if (!error) { error = vfs_getattr(nd.mnt, nd.dentry, stat); path_release(&nd); @@ -74,7 +74,7 @@ struct nameidata nd; int error; - error = user_path_walk_link(name, &nd); + error = user_path_walk_link_stat(name, &nd); if (!error) { error = vfs_getattr(nd.mnt, nd.dentry, stat); path_release(&nd); diff -uNr linux-2.5.72/fs/super.c linux-2.5.72-auto/fs/super.c --- linux-2.5.72/fs/super.c 2003-06-17 15:01:51.000000000 +0100 +++ linux-2.5.72-auto/fs/super.c 2003-06-17 15:06:42.000000000 +0100 @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -683,6 +684,7 @@ mnt->mnt_root = dget(sb->s_root); mnt->mnt_mountpoint = sb->s_root; mnt->mnt_parent = mnt; + mnt->mnt_namespace = current->namespace; up_write(&sb->s_umount); put_filesystem(type); return mnt; @@ -697,6 +699,8 @@ return (struct vfsmount *)sb; } +EXPORT_SYMBOL_GPL(do_kern_mount); + struct vfsmount *kern_mount(struct file_system_type *type) { return do_kern_mount(type->name, 0, type->name, NULL); diff -uNr linux-2.5.72/include/linux/dcache.h linux-2.5.72-auto/include/linux/dcache.h --- linux-2.5.72/include/linux/dcache.h 2003-06-17 15:01:36.000000000 +0100 +++ linux-2.5.72-auto/include/linux/dcache.h 2003-06-17 15:06:42.000000000 +0100 @@ -112,6 +112,7 @@ int (*d_delete)(struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); + struct vfsmount *(*d_automount)(struct dentry *); }; /* the dentry parameter passed to d_hash and d_compare is the parent @@ -307,6 +308,11 @@ return dentry->d_mounted; } +static inline int d_automount_point(struct dentry *dentry) +{ + return dentry->d_op && dentry->d_op->d_automount; +} + extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *); #endif /* __KERNEL__ */ diff -uNr linux-2.5.72/include/linux/mount.h linux-2.5.72-auto/include/linux/mount.h --- linux-2.5.72/include/linux/mount.h 2003-06-17 15:01:35.000000000 +0100 +++ linux-2.5.72-auto/include/linux/mount.h 2003-06-17 15:06:42.000000000 +0100 @@ -31,6 +31,9 @@ int mnt_flags; char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; + struct namespace *mnt_namespace; /* containing namespace */ + time_t mnt_expires_at; /* time at which automount expires */ + unsigned mnt_expiry_timeout; /* expiry timeout (in seconds) or 0 */ }; static inline struct vfsmount *mntget(struct vfsmount *mnt) @@ -40,11 +43,15 @@ return mnt; } +extern void kern_automount_begin_expiry(struct vfsmount *mnt); extern void __mntput(struct vfsmount *mnt); static inline void mntput(struct vfsmount *mnt) { if (mnt) { + if (atomic_read(&mnt->mnt_count) == 2 && + mnt->mnt_expiry_timeout) + kern_automount_begin_expiry(mnt); if (atomic_dec_and_test(&mnt->mnt_count)) __mntput(mnt); } diff -uNr linux-2.5.72/include/linux/namei.h linux-2.5.72-auto/include/linux/namei.h --- linux-2.5.72/include/linux/namei.h 2003-06-17 15:01:36.000000000 +0100 +++ linux-2.5.72-auto/include/linux/namei.h 2003-06-17 15:06:42.000000000 +0100 @@ -31,6 +31,7 @@ #define LOOKUP_CONTINUE 4 #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 +#define LOOKUP_NOAUTOMOUNT 64 extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); @@ -38,6 +39,10 @@ __user_walk(name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ __user_walk(name, 0, nd) +#define user_path_walk_stat(name,nd) \ + __user_walk(name, LOOKUP_FOLLOW|LOOKUP_NOAUTOMOUNT, nd) +#define user_path_walk_link_stat(name,nd) \ + __user_walk(name, LOOKUP_NOAUTOMOUNT, nd) extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); extern int FASTCALL(path_walk(const char *, struct nameidata *)); extern int FASTCALL(link_path_walk(const char *, struct nameidata *)); @@ -52,4 +57,6 @@ extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); +extern int kern_automount(struct vfsmount *mnt, struct dentry *dentry); + #endif /* _LINUX_NAMEI_H */ diff -uNr linux-2.5.72/include/linux/namespace.h linux-2.5.72-auto/include/linux/namespace.h --- linux-2.5.72/include/linux/namespace.h 2003-06-17 15:01:36.000000000 +0100 +++ linux-2.5.72-auto/include/linux/namespace.h 2003-06-17 15:06:42.000000000 +0100 @@ -10,6 +10,7 @@ struct vfsmount * root; struct list_head list; struct rw_semaphore sem; + struct work_struct mnt_expiry_work; }; extern void umount_tree(struct vfsmount *); - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/