From: "J. Bruce Fields" Subject: Re: [PATCH 1/2] NLM failover unlock commands Date: Mon, 14 Jan 2008 18:07:42 -0500 Message-ID: <20080114230742.GA16975@fieldses.org> References: <4781BB0D.90706@redhat.com> <20080108170220.GA21401@infradead.org> <20080108174958.GA25025@infradead.org> <4783E3C9.3040803@redhat.com> <20080109180214.GA31071@infradead.org> <20080110075959.GA9623@infradead.org> <4788665B.4020405@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: Christoph Hellwig , NeilBrown , NFS list , cluster-devel@redhat.com To: Wendy Cheng Return-path: Received: from mail.fieldses.org ([66.93.2.214]:55864 "EHLO fieldses.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750956AbYANXHv (ORCPT ); Mon, 14 Jan 2008 18:07:51 -0500 In-Reply-To: <4788665B.4020405@redhat.com> Sender: linux-nfs-owner@vger.kernel.org List-ID: On Sat, Jan 12, 2008 at 02:03:55AM -0500, Wendy Cheng wrote: > This is a combined patch that has: > > * changes made by Christoph Hellwig > * code segment that handles f_locks so we would not walk inode->i_flock > list twice. > > If agreed, please re-add your "ack-by" and "signed-off" lines > respectively. Thanks ... > > -- Wendy Comments below-- > Two new NFSD procfs files are added: > /proc/fs/nfsd/unlock_ip > /proc/fs/nfsd/unlock_filesystem > > They are intended to allow admin or user mode script to release NLM locks > based on either a path name or a server in-bound ip address (ipv4 for now) > as; > > shell> echo 10.1.1.2 > /proc/fs/nfsd/unlock_ip > shell> echo /mnt/sfs1 > /proc/fs/nfsd/unlock_filesystem > > Signed-off-by: S. Wendy Cheng > Signed-off-by: Lon Hohberger > > fs/lockd/svcsubs.c | 68 ++++++++++++++++++++++++++++++++++++++------ > fs/nfsd/nfsctl.c | 62 ++++++++++++++++++++++++++++++++++++++++ > include/linux/lockd/lockd.h | 7 ++++ > 3 files changed, 129 insertions(+), 8 deletions(-) > > --- linux-o/fs/nfsd/nfsctl.c 2008-01-04 10:01:08.000000000 -0500 > +++ linux/fs/nfsd/nfsctl.c 2008-01-11 19:08:02.000000000 -0500 > @@ -22,6 +22,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -35,6 +36,7 @@ > #include > #include > #include > +#include > > #include > > @@ -52,6 +54,8 @@ enum { > NFSD_Getfs, > NFSD_List, > NFSD_Fh, > + NFSD_FO_UnlockIP, > + NFSD_FO_UnlockFS, > NFSD_Threads, > NFSD_Pool_Threads, > NFSD_Versions, > @@ -88,6 +92,9 @@ static ssize_t write_leasetime(struct fi > static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); > #endif > > +static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size); > +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size); > + > static ssize_t (*write_op[])(struct file *, char *, size_t) = { > [NFSD_Svc] = write_svc, > [NFSD_Add] = write_add, > @@ -97,6 +104,8 @@ static ssize_t (*write_op[])(struct file > [NFSD_Getfd] = write_getfd, > [NFSD_Getfs] = write_getfs, > [NFSD_Fh] = write_filehandle, > + [NFSD_FO_UnlockIP] = failover_unlock_ip, > + [NFSD_FO_UnlockFS] = failover_unlock_fs, > [NFSD_Threads] = write_threads, > [NFSD_Pool_Threads] = write_pool_threads, > [NFSD_Versions] = write_versions, > @@ -288,6 +297,55 @@ static ssize_t write_getfd(struct file * > return err; > } > > +static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size) > +{ > + __be32 server_ip; > + char *fo_path; > + char *mesg; > + > + /* sanity check */ > + if (size <= 0) > + return -EINVAL; Not only is size never negative, it's actually an unsigned type here, so this is a no-op. > + > + if (buf[size-1] == '\n') > + buf[size-1] = 0; The other write methods in this file actually just do if (buf[size-1] != '\n') return -EINVAL; I don't know why. But absent some reason, I'd rather these two new files behaved the same as existing ones. > + > + fo_path = mesg = buf; > + if (qword_get(&mesg, fo_path, size) < 0) > + return -EINVAL; "mesg" is unneeded here, right? You can just do: fo_path = buf; if (qword_get(&buf, buf, size) < 0) > + > + server_ip = in_aton(fo_path); It'd be nice if we could sanity-check this. (Is there code already in the kernel someplace to do this?) And, this isn't your problem for now, but eventually I guess this will have to accept an ivp6 address as well? > + return nlmsvc_failover_ip(server_ip); > +} > + > +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) > +{ > + struct nameidata nd; > + char *fo_path; > + char *mesg; > + int error; > + > + /* sanity check */ > + if (size <= 0) > + return -EINVAL; > + > + if (buf[size-1] == '\n') > + buf[size-1] = 0; > + > + fo_path = mesg = buf; > + if (qword_get(&mesg, fo_path, size) < 0) > + return -EINVAL; Same comments as above. > + > + error = path_lookup(fo_path, 0, &nd); > + if (error) > + return error; > + > + error = nlmsvc_failover_path(&nd); > + > + path_release(&nd); > + return error; > +} > + > static ssize_t write_filehandle(struct file *file, char *buf, size_t size) > { > /* request is: > @@ -646,6 +704,10 @@ static int nfsd_fill_super(struct super_ > [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, > [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, > [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, > + [NFSD_FO_UnlockIP] = {"unlock_ip", > + &transaction_ops, S_IWUSR|S_IRUSR}, > + [NFSD_FO_UnlockFS] = {"unlock_filesystem", > + &transaction_ops, S_IWUSR|S_IRUSR}, > [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, > [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, > [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, > --- linux-o/fs/lockd/svcsubs.c 2008-01-04 10:01:08.000000000 -0500 > +++ linux/fs/lockd/svcsubs.c 2008-01-11 19:08:28.000000000 -0500 > @@ -18,6 +18,8 @@ > #include > #include > #include > +#include > +#include > > #define NLMDBG_FACILITY NLMDBG_SVCSUBS > > @@ -87,7 +89,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, > unsigned int hash; > __be32 nfserr; > > - nlm_debug_print_fh("nlm_file_lookup", f); > + nlm_debug_print_fh("nlm_lookup_file", f); > > hash = file_hash(f); > > @@ -123,6 +125,9 @@ nlm_lookup_file(struct svc_rqst *rqstp, > > hlist_add_head(&file->f_list, &nlm_files[hash]); > > + /* fill in f_iaddr for nlm lock failover */ > + file->f_iaddr = rqstp->rq_daddr; > + > found: > dprintk("lockd: found file %p (count %d)\n", file, file->f_count); > *result = file; > @@ -194,6 +199,12 @@ again: > return 0; > } > > +static int > +nlmsvc_always_match(struct nlm_host *dummy1, struct nlm_host *dummy2) > +{ > + return 1; > +} > + > /* > * Inspect a single file > */ > @@ -230,27 +241,37 @@ nlm_file_inuse(struct nlm_file *file) > * Loop over all files in the file table. > */ > static int > -nlm_traverse_files(struct nlm_host *host, nlm_host_match_fn_t match) > +nlm_traverse_files(void *data, nlm_host_match_fn_t match, > + int (*failover)(void *data, struct nlm_file *file)) > { > struct hlist_node *pos, *next; > struct nlm_file *file; > - int i, ret = 0; > + int i, ret = 0, inspect_file; > > mutex_lock(&nlm_file_mutex); > for (i = 0; i < FILE_NRHASH; i++) { > hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) { > file->f_count++; > mutex_unlock(&nlm_file_mutex); > + inspect_file = 1; > > /* Traverse locks, blocks and shares of this file > * and update file->f_locks count */ > - if (nlm_inspect_file(host, file, match)) > + > + if (unlikely(failover)) { > + if (!failover(data, file)) { > + inspect_file = 0; > + file->f_locks = nlm_file_inuse(file); > + } > + } > + > + if (inspect_file && nlm_inspect_file(data, file, match)) > ret = 1; This seems quite complicated! I don't have an alternative suggestion, though. --b. > > mutex_lock(&nlm_file_mutex); > file->f_count--; > /* No more references to this file. Let go of it. */ > - if (list_empty(&file->f_blocks) && !file->f_locks > + if (!file->f_locks && list_empty(&file->f_blocks) > && !file->f_shares && !file->f_count) { > hlist_del(&file->f_list); > nlmsvc_ops->fclose(file->f_file); > @@ -337,7 +358,7 @@ void > nlmsvc_mark_resources(void) > { > dprintk("lockd: nlmsvc_mark_resources\n"); > - nlm_traverse_files(NULL, nlmsvc_mark_host); > + nlm_traverse_files(NULL, nlmsvc_mark_host, NULL); > } > > /* > @@ -348,7 +369,7 @@ nlmsvc_free_host_resources(struct nlm_ho > { > dprintk("lockd: nlmsvc_free_host_resources\n"); > > - if (nlm_traverse_files(host, nlmsvc_same_host)) { > + if (nlm_traverse_files(host, nlmsvc_same_host, NULL)) { > printk(KERN_WARNING > "lockd: couldn't remove all locks held by %s\n", > host->h_name); > @@ -368,5 +389,36 @@ nlmsvc_invalidate_all(void) > * turn, which is about as inefficient as it gets. > * Now we just do it once in nlm_traverse_files. > */ > - nlm_traverse_files(NULL, nlmsvc_is_client); > + nlm_traverse_files(NULL, nlmsvc_is_client, NULL); > +} > + > +static int > +nlmsvc_failover_file_ok_path(void *datap, struct nlm_file *file) > +{ > + struct nameidata *nd = datap; > + return nd->mnt == file->f_file->f_path.mnt; > +} > + > +int > +nlmsvc_failover_path(struct nameidata *nd) > +{ > + return nlm_traverse_files(nd, nlmsvc_always_match, > + nlmsvc_failover_file_ok_path); > +} > +EXPORT_SYMBOL_GPL(nlmsvc_failover_path); > + > +static int > +nlmsvc_failover_file_ok_ip(void *datap, struct nlm_file *file) > +{ > + __be32 *server_addr = datap; > + > + return file->f_iaddr.addr.s_addr == *server_addr; > +} > + > +int > +nlmsvc_failover_ip(__be32 server_addr) > +{ > + return nlm_traverse_files(&server_addr, nlmsvc_always_match, > + nlmsvc_failover_file_ok_ip); > } > +EXPORT_SYMBOL_GPL(nlmsvc_failover_ip); > --- linux-o/include/linux/lockd/lockd.h 2008-01-04 10:01:08.000000000 -0500 > +++ linux/include/linux/lockd/lockd.h 2008-01-11 18:24:45.000000000 -0500 > @@ -113,6 +113,7 @@ struct nlm_file { > unsigned int f_locks; /* guesstimate # of locks */ > unsigned int f_count; /* reference count */ > struct mutex f_mutex; /* avoid concurrent access */ > + union svc_addr_u f_iaddr; /* server ip for failover */ > }; > > /* > @@ -214,6 +215,12 @@ void nlmsvc_mark_resources(void); > void nlmsvc_free_host_resources(struct nlm_host *); > void nlmsvc_invalidate_all(void); > > +/* > + * Cluster failover support > + */ > +int nlmsvc_failover_path(struct nameidata *nd); > +int nlmsvc_failover_ip(__be32 server_addr); > + > static __inline__ struct inode * > nlmsvc_file_inode(struct nlm_file *file) > {