This is the second version of this patchset. The main changes are:
- a few minor bugfixes
- some changes to the upcall struct format
- renaming some of the objects to be prefixed with "cld" instead of
"clstated"
One of the things that Bruce has long had on his wishlist is to replace
the client name tracking code that the kernel uses:
http://wiki.linux-nfs.org/wiki/index.php/Nfsd4_server_recovery
The existing code manipulates the filesystem directly to track this
info. Not only is that something that makes the VFS maintainers look
askance at knfsd, but it also is unsuitable in a clustered
configuration.
Typically we think of the grace period as a property of the server, but
with a clustered filesystem, we need to consider it as a property of the
cluster as a whole (or of the clustered filesystem). On a cold startup
of the cluster, once any node grants a non-reclaim lock, then no more
reclaim can be allowed on any node. Grace periods must be coordinated
amongst all cluster nodes.
In order to achieve that goal, we need to first allow the client name
reclaim to be cluster aware as well. This patchset is a move toward that
goal and covers the initial kernel part of such a change. A patchset to
add a daemon to handle the upcalls will follow.
Note that this patchset is still a little rough, so consider this an
RFC for the overall design. We'll also need to consider a plan to
deprecate the old client tracking code.
The goal with this patchset is to replace the existing functionality,
without disturbing the existing code too much. There's some room for
more cleanup and reorganization once the old tracker is gone.
Jeff Layton (5):
nfsd: add nfsd4_client_tracking_ops struct and a way to set it
sunrpc: create nfsd dir in rpc_pipefs
nfsd: add a generic flags field to nfs4_client
nfsd: add a header describing upcall to nfsdcld
nfsd: add the infrastructure to handle the cld upcall
fs/nfsd/nfs4recover.c | 439 ++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/nfs4state.c | 46 ++---
fs/nfsd/state.h | 15 +-
include/linux/nfsd/cld.h | 57 ++++++
net/sunrpc/rpc_pipe.c | 5 +
5 files changed, 518 insertions(+), 44 deletions(-)
create mode 100644 include/linux/nfsd/cld.h
On 2011-12-15 20:43, Jeff Layton wrote:
> The daemon takes a versioned binary struct. Hopefully this should allow
> us to revise the struct later if it becomes necessary.
>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> include/linux/nfsd/cld.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 57 insertions(+), 0 deletions(-)
> create mode 100644 include/linux/nfsd/cld.h
>
> diff --git a/include/linux/nfsd/cld.h b/include/linux/nfsd/cld.h
> new file mode 100644
> index 0000000..d64a7e7
> --- /dev/null
> +++ b/include/linux/nfsd/cld.h
> @@ -0,0 +1,57 @@
> +/*
> + * fs/nfsd/cld.h - upcall description for nfsdcld communication
> + *
> + * Copyright (c) 2011 Red Hat, Inc.
> + * Author(s): Jeff Layton <[email protected]>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
> + */
> +
> +#ifndef _NFSD_CLD_H
> +#define _NFSD_CLD_H
> +
> +/* latest upcall version available */
> +#define CLD_MAX_UPCALL_VERSION 1
> +
> +/* defined by RFC3530 */
> +#define NFS4_OPAQUE_LIMIT 1024
> +
> +enum cld_command {
> + Cld_Create, /* create a record for this cm_id */
> + Cld_Expire, /* remove record for this cm_id */
> + Cld_Allow, /* is this cm_id allowed? */
> + Cld_GraceDone, /* grace period is complete */
> + Cld_NrToReclaim, /* how many client records need reclaim? */
> +};
> +
> +/* representation of long-form NFSv4 client ID */
> +struct cld_name {
> + uint16_t cn_len; /* length of cm_id */
> + unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */
> +} __attribute__((packed));
> +
> +/* message struct for communication with userspace */
> +struct cld_msg {
> + uint8_t cm_vers; /* upcall version */
> + uint8_t cm_cmd; /* upcall command */
> + uint32_t cm_xid; /* transaction id */
> + int16_t cm_status; /* return code */
How about swapping the xid and status members so they'd align better?
Benny
> + union {
> + int64_t cm_gracetime; /* grace period start time */
> + struct cld_name cm_name;
> + } __attribute__((packed)) cm_u;
> +} __attribute__((packed));
> +
> +#endif /* !_NFSD_CLD_H */
On Mon, 19 Dec 2011 14:08:19 +0200
Benny Halevy <[email protected]> wrote:
> On 2011-12-15 20:43, Jeff Layton wrote:
> > The daemon takes a versioned binary struct. Hopefully this should allow
> > us to revise the struct later if it becomes necessary.
> >
> > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> > include/linux/nfsd/cld.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++
> > 1 files changed, 57 insertions(+), 0 deletions(-)
> > create mode 100644 include/linux/nfsd/cld.h
> >
> > diff --git a/include/linux/nfsd/cld.h b/include/linux/nfsd/cld.h
> > new file mode 100644
> > index 0000000..d64a7e7
> > --- /dev/null
> > +++ b/include/linux/nfsd/cld.h
> > @@ -0,0 +1,57 @@
> > +/*
> > + * fs/nfsd/cld.h - upcall description for nfsdcld communication
> > + *
> > + * Copyright (c) 2011 Red Hat, Inc.
> > + * Author(s): Jeff Layton <[email protected]>
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, write to the Free Software
> > + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
> > + */
> > +
> > +#ifndef _NFSD_CLD_H
> > +#define _NFSD_CLD_H
> > +
> > +/* latest upcall version available */
> > +#define CLD_MAX_UPCALL_VERSION 1
> > +
> > +/* defined by RFC3530 */
> > +#define NFS4_OPAQUE_LIMIT 1024
> > +
> > +enum cld_command {
> > + Cld_Create, /* create a record for this cm_id */
> > + Cld_Expire, /* remove record for this cm_id */
> > + Cld_Allow, /* is this cm_id allowed? */
> > + Cld_GraceDone, /* grace period is complete */
> > + Cld_NrToReclaim, /* how many client records need reclaim? */
> > +};
> > +
> > +/* representation of long-form NFSv4 client ID */
> > +struct cld_name {
> > + uint16_t cn_len; /* length of cm_id */
> > + unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */
> > +} __attribute__((packed));
> > +
> > +/* message struct for communication with userspace */
> > +struct cld_msg {
> > + uint8_t cm_vers; /* upcall version */
> > + uint8_t cm_cmd; /* upcall command */
> > + uint32_t cm_xid; /* transaction id */
> > + int16_t cm_status; /* return code */
>
> How about swapping the xid and status members so they'd align better?
>
> Benny
>
Good catch. I'll fix that in the next iteration.
Thanks,
--
Jeff Layton <[email protected]>
Add a new top-level dir in rpc_pipefs to hold the pipe for the clientid
upcall.
Signed-off-by: Jeff Layton <[email protected]>
---
net/sunrpc/rpc_pipe.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index bfddd68..7cb8ab7 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -987,6 +987,7 @@ enum {
RPCAUTH_statd,
RPCAUTH_nfsd4_cb,
RPCAUTH_cache,
+ RPCAUTH_nfsd,
RPCAUTH_RootEOF
};
@@ -1019,6 +1020,10 @@ static const struct rpc_filelist files[] = {
.name = "cache",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
+ [RPCAUTH_nfsd] = {
+ .name = "nfsd",
+ .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ },
};
static int
--
1.7.1
The daemon takes a versioned binary struct. Hopefully this should allow
us to revise the struct later if it becomes necessary.
Signed-off-by: Jeff Layton <[email protected]>
---
include/linux/nfsd/cld.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 57 insertions(+), 0 deletions(-)
create mode 100644 include/linux/nfsd/cld.h
diff --git a/include/linux/nfsd/cld.h b/include/linux/nfsd/cld.h
new file mode 100644
index 0000000..d64a7e7
--- /dev/null
+++ b/include/linux/nfsd/cld.h
@@ -0,0 +1,57 @@
+/*
+ * fs/nfsd/cld.h - upcall description for nfsdcld communication
+ *
+ * Copyright (c) 2011 Red Hat, Inc.
+ * Author(s): Jeff Layton <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _NFSD_CLD_H
+#define _NFSD_CLD_H
+
+/* latest upcall version available */
+#define CLD_MAX_UPCALL_VERSION 1
+
+/* defined by RFC3530 */
+#define NFS4_OPAQUE_LIMIT 1024
+
+enum cld_command {
+ Cld_Create, /* create a record for this cm_id */
+ Cld_Expire, /* remove record for this cm_id */
+ Cld_Allow, /* is this cm_id allowed? */
+ Cld_GraceDone, /* grace period is complete */
+ Cld_NrToReclaim, /* how many client records need reclaim? */
+};
+
+/* representation of long-form NFSv4 client ID */
+struct cld_name {
+ uint16_t cn_len; /* length of cm_id */
+ unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */
+} __attribute__((packed));
+
+/* message struct for communication with userspace */
+struct cld_msg {
+ uint8_t cm_vers; /* upcall version */
+ uint8_t cm_cmd; /* upcall command */
+ uint32_t cm_xid; /* transaction id */
+ int16_t cm_status; /* return code */
+ union {
+ int64_t cm_gracetime; /* grace period start time */
+ struct cld_name cm_name;
+ } __attribute__((packed)) cm_u;
+} __attribute__((packed));
+
+#endif /* !_NFSD_CLD_H */
--
1.7.1
...and add a mechanism for switching between the "legacy" tracker and
the new one. The decision is made by looking to see whether the
v4recoverydir exists. If it does, then the legacy client tracker is
used.
If it's not, then the kernel will create a "cld" pipe in rpc_pipefs.
That pipe is used to talk to a daemon for handling the upcall.
Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4recover.c | 318 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 317 insertions(+), 1 deletions(-)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 62fd534..ae3d936 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004 The Regents of the University of Michigan.
+* Copyright (c) 2011 Jeff Layton <[email protected]>
* All rights reserved.
*
* Andy Adamson <[email protected]>
@@ -36,6 +37,10 @@
#include <linux/namei.h>
#include <linux/crypto.h>
#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/cld.h>
#include "nfsd.h"
#include "state.h"
@@ -467,12 +472,323 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
.grace_done = nfsd4_recdir_purge_old,
};
+/* Globals */
+#define NFSD_PIPE_DIR "/nfsd"
+
+static struct dentry *cld_pipe;
+
+/* list of cld_msg's that are currently in use */
+static DEFINE_SPINLOCK(cld_lock);
+static LIST_HEAD(cld_list);
+static unsigned int cld_xid;
+
+struct cld_upcall {
+ struct list_head cu_list;
+ struct task_struct * cu_task;
+ struct cld_msg cu_msg;
+};
+
+static int
+nfsd4_cld_upcall(struct cld_msg *cmsg)
+{
+ int ret;
+ struct rpc_pipe_msg msg;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.data = cmsg;
+ msg.len = sizeof(*cmsg);
+
+ ret = rpc_queue_upcall(cld_pipe->d_inode, &msg);
+ if (ret < 0)
+ goto out;
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule();
+ __set_current_state(TASK_RUNNING);
+
+out:
+ return ret;
+}
+
+ssize_t cld_pipe_downcall(struct file *filp, const char __user *src,
+ size_t mlen)
+{
+ struct cld_upcall *tmp, *cup;
+ struct cld_msg *cmsg = (struct cld_msg *)src;
+ uint32_t xid;
+
+ if (mlen != sizeof(*cmsg)) {
+ dprintk("%s: got %lu bytes, expected %lu\n", __func__, mlen,
+ sizeof(*cmsg));
+ return -EINVAL;
+ }
+
+ /* copy just the xid so we can try to find that */
+ if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
+ dprintk("%s: error when copying xid from userspace", __func__);
+ return -EFAULT;
+ }
+
+ /* walk the list and find corresponding xid */
+ cup = NULL;
+ spin_lock(&cld_lock);
+ list_for_each_entry(tmp, &cld_list, cu_list) {
+ if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
+ cup = tmp;
+ list_del_init(&cup->cu_list);
+ break;
+ }
+ }
+ spin_unlock(&cld_lock);
+
+ /* couldn't find upcall? */
+ if (!cup) {
+ dprintk("%s: couldn't find upcall -- xid=%u\n", __func__,
+ cup->cu_msg.cm_xid);
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
+ return -EFAULT;
+
+ wake_up_process(cup->cu_task);
+ return mlen;
+}
+
+void
+cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+ struct cld_msg *cmsg = msg->data;
+ struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
+ cu_msg);
+
+ if (msg->errno >= 0)
+ return;
+ wake_up_process(cup->cu_task);
+}
+
+static const struct rpc_pipe_ops cld_upcall_ops = {
+ .upcall = rpc_pipe_generic_upcall,
+ .downcall = cld_pipe_downcall,
+ .destroy_msg = cld_pipe_destroy_msg,
+};
+
+int
+nfsd4_init_cld_pipe(void)
+{
+ int ret;
+ struct path path;
+ struct vfsmount *mnt;
+
+ if (cld_pipe)
+ return 0;
+
+ mnt = rpc_get_mount();
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
+ ret = vfs_path_lookup(mnt->mnt_root, mnt, NFSD_PIPE_DIR, 0, &path);
+ if (ret)
+ goto err;
+
+ cld_pipe = rpc_mkpipe(path.dentry, "cld", NULL,
+ &cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+ path_put(&path);
+ if (!IS_ERR(cld_pipe))
+ return 0;
+
+ ret = PTR_ERR(cld_pipe);
+err:
+ rpc_put_mount();
+ return ret;
+}
+
+void
+nfsd4_remove_cld_pipe(void)
+{
+ int ret;
+
+ ret = rpc_unlink(cld_pipe);
+ if (ret)
+ printk(KERN_ERR "NFSD: error removing cld pipe: %d\n", ret);
+ cld_pipe = NULL;
+ rpc_put_mount();
+}
+
+static struct cld_upcall *
+alloc_cld_upcall(void)
+{
+ struct cld_upcall *new, *tmp;
+
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return new;
+
+ /* FIXME: hard cap on number in flight? */
+restart_search:
+ spin_lock(&cld_lock);
+ list_for_each_entry(tmp, &cld_list, cu_list) {
+ if (tmp->cu_msg.cm_xid == cld_xid) {
+ cld_xid++;
+ spin_unlock(&cld_lock);
+ goto restart_search;
+ }
+ }
+ new->cu_task = current;
+ new->cu_msg.cm_vers = CLD_MAX_UPCALL_VERSION;
+ put_unaligned(cld_xid++, &new->cu_msg.cm_xid);
+ list_add(&new->cu_list, &cld_list);
+ spin_unlock(&cld_lock);
+
+ dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
+
+ return new;
+}
+
+static void
+free_cld_upcall(struct cld_upcall *victim)
+{
+ spin_lock(&cld_lock);
+ list_del(&victim->cu_list);
+ spin_unlock(&cld_lock);
+ kfree(victim);
+}
+
+/* Ask daemon to create a new record */
+static int
+nfsd4_cld_create(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+
+ /* Don't upcall if it's already stored */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return -EEXIST;
+
+ cup = alloc_cld_upcall();
+ if (!cup)
+ return -ENOMEM;
+
+ cup->cu_msg.cm_cmd = Cld_Create;
+ cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+
+ ret = nfsd4_cld_upcall(&cup->cu_msg);
+ if (!ret) {
+ ret = cup->cu_msg.cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+ return ret;
+}
+
+/* Ask daemon to remove a record */
+static int
+nfsd4_cld_remove(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+
+ /* Don't upcall if it's already stored */
+ if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return -ENOENT;
+
+ cup = alloc_cld_upcall();
+ if (!cup)
+ return -ENOMEM;
+
+ cup->cu_msg.cm_cmd = Cld_Expire;
+ cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+
+ ret = nfsd4_cld_upcall(&cup->cu_msg);
+ if (!ret) {
+ ret = cup->cu_msg.cm_status;
+ clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+ return ret;
+}
+
+/* Check for presence of a record, and update its timestamp */
+static int
+nfsd4_cld_check(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+
+ /* Don't upcall if one was already stored during this grace pd */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ cup = alloc_cld_upcall();
+ if (!cup)
+ return -ENOMEM;
+
+ cup->cu_msg.cm_cmd = Cld_Allow;
+ cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+
+ ret = nfsd4_cld_upcall(&cup->cu_msg);
+ if (!ret) {
+ ret = cup->cu_msg.cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+ return ret;
+}
+
+static int
+nfsd4_cld_grace_done(time_t boot_time)
+{
+ int ret;
+ struct cld_upcall *cup;
+
+ cup = alloc_cld_upcall();
+ if (!cup)
+ return -ENOMEM;
+
+ cup->cu_msg.cm_cmd = Cld_GraceDone;
+ cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time;
+ ret = nfsd4_cld_upcall(&cup->cu_msg);
+ if (!ret)
+ ret = cup->cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+ return ret;
+}
+
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+ .init = nfsd4_init_cld_pipe,
+ .exit = nfsd4_remove_cld_pipe,
+ .create = nfsd4_cld_create,
+ .remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check,
+ .grace_done = nfsd4_cld_grace_done,
+};
+
int
nfsd4_client_tracking_init(void)
{
int status;
+ struct path path;
- client_tracking_ops = &nfsd4_legacy_tracking_ops;
+ if (!client_tracking_ops) {
+ client_tracking_ops = &nfsd4_cld_tracking_ops;
+ status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
+ if (!status) {
+ if (S_ISDIR(path.dentry->d_inode->i_mode))
+ client_tracking_ops =
+ &nfsd4_legacy_tracking_ops;
+ path_put(&path);
+ }
+ }
status = client_tracking_ops->init();
if (status) {
printk(KERN_WARNING "NFSD: Unable to initialize client "
--
1.7.1
Abstract out the mechanism that we use to track clients into a set of
client name tracking functions.
This gives us a mechanism to plug in a new set of client tracking
functions without disturbing the callers. It also gives us a way to
decide on what tracking scheme to use at runtime.
For now, this just looks like pointless abstraction, but later we'll
add a new alternate scheme for tracking clients on stable storage.
Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4recover.c | 123 +++++++++++++++++++++++++++++++++++++++++++++----
fs/nfsd/nfs4state.c | 46 +++++++------------
fs/nfsd/state.h | 13 +++--
3 files changed, 138 insertions(+), 44 deletions(-)
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ed083b9..62fd534 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -43,9 +43,20 @@
#define NFSDDBG_FACILITY NFSDDBG_PROC
+/* Declarations */
+struct nfsd4_client_tracking_ops {
+ int (*init)(void);
+ void (*exit)(void);
+ int (*create)(struct nfs4_client *);
+ int (*remove)(struct nfs4_client *);
+ int (*check)(struct nfs4_client *);
+ int (*grace_done)(time_t);
+};
+
/* Globals */
static struct file *rec_file;
static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+static const struct nfsd4_client_tracking_ops *client_tracking_ops;
static int
nfs4_save_creds(const struct cred **original_creds)
@@ -259,14 +270,14 @@ out_unlock:
return status;
}
-void
+static int
nfsd4_remove_clid_dir(struct nfs4_client *clp)
{
const struct cred *original_cred;
int status;
if (!rec_file || !clp->cl_firststate)
- return;
+ return 0;
status = mnt_want_write(rec_file->f_path.mnt);
if (status)
@@ -286,7 +297,7 @@ out:
if (status)
printk("NFSD: Failed to remove expired client state directory"
" %.*s\n", HEXDIR_LEN, clp->cl_recdir);
- return;
+ return 0;
}
static int
@@ -305,12 +316,12 @@ purge_old(struct dentry *parent, struct dentry *child)
return 0;
}
-void
-nfsd4_recdir_purge_old(void) {
+static int
+nfsd4_recdir_purge_old(time_t boot_time __attribute__ ((unused))) {
int status;
if (!rec_file)
- return;
+ return 0;
status = mnt_want_write(rec_file->f_path.mnt);
if (status)
goto out;
@@ -322,6 +333,7 @@ out:
if (status)
printk("nfsd4: failed to purge old clients from recovery"
" directory %s\n", rec_file->f_path.dentry->d_name.name);
+ return status;
}
static int
@@ -337,7 +349,7 @@ load_recdir(struct dentry *parent, struct dentry *child)
return 0;
}
-int
+static int
nfsd4_recdir_load(void) {
int status;
@@ -355,8 +367,8 @@ nfsd4_recdir_load(void) {
* Hold reference to the recovery directory.
*/
-void
-nfsd4_init_recdir()
+static int
+nfsd4_init_recdir(void)
{
const struct cred *original_cred;
int status;
@@ -371,17 +383,34 @@ nfsd4_init_recdir()
printk("NFSD: Unable to change credentials to find recovery"
" directory: error %d\n",
status);
- return;
+ return status;
}
rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
if (IS_ERR(rec_file)) {
printk("NFSD: unable to find recovery directory %s\n",
user_recovery_dirname);
+ status = PTR_ERR(rec_file);
rec_file = NULL;
}
nfs4_reset_creds(original_cred);
+ return status;
+}
+
+static int
+nfsd4_load_reboot_recovery_data(void)
+{
+ int status;
+
+ nfs4_lock_state();
+ status = nfsd4_init_recdir();
+ if (!status)
+ status = nfsd4_recdir_load();
+ nfs4_unlock_state();
+ if (status)
+ printk("NFSD: Failure reading reboot recovery data\n");
+ return status;
}
void
@@ -419,3 +448,77 @@ nfs4_recoverydir(void)
{
return user_recovery_dirname;
}
+
+static int
+nfsd4_check_legacy_client(struct nfs4_client *clp)
+{
+ if (nfsd4_find_reclaim_client(clp) != NULL)
+ return 0;
+ else
+ return -ENOENT;
+}
+
+static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
+ .init = nfsd4_load_reboot_recovery_data,
+ .exit = nfsd4_shutdown_recdir,
+ .create = nfsd4_create_clid_dir,
+ .remove = nfsd4_remove_clid_dir,
+ .check = nfsd4_check_legacy_client,
+ .grace_done = nfsd4_recdir_purge_old,
+};
+
+int
+nfsd4_client_tracking_init(void)
+{
+ int status;
+
+ client_tracking_ops = &nfsd4_legacy_tracking_ops;
+ status = client_tracking_ops->init();
+ if (status) {
+ printk(KERN_WARNING "NFSD: Unable to initialize client "
+ "recovery tracking! (%d)\n", status);
+ client_tracking_ops = NULL;
+ }
+ return status;
+}
+
+void
+nfsd4_client_tracking_exit(void)
+{
+ if (!client_tracking_ops)
+ return;
+ client_tracking_ops->exit();
+ client_tracking_ops = NULL;
+}
+
+int
+nfsd4_client_record_create(struct nfs4_client *clp)
+{
+ if (!client_tracking_ops)
+ return -EOPNOTSUPP;
+ return client_tracking_ops->create(clp);
+}
+
+int
+nfsd4_client_record_remove(struct nfs4_client *clp)
+{
+ if (!client_tracking_ops)
+ return -EOPNOTSUPP;
+ return client_tracking_ops->remove(clp);
+}
+
+int
+nfsd4_client_record_check(struct nfs4_client *clp)
+{
+ if (!client_tracking_ops)
+ return -EOPNOTSUPP;
+ return client_tracking_ops->check(clp);
+}
+
+int
+nfsd4_grace_done(time_t boot_time)
+{
+ if (!client_tracking_ops)
+ return -EOPNOTSUPP;
+ return client_tracking_ops->grace_done(boot_time);
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47e94e3..e32ef02 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2038,7 +2038,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
goto out;
status = nfs_ok;
- nfsd4_create_clid_dir(cstate->session->se_client);
+ nfsd4_client_record_create(cstate->session->se_client);
out:
nfs4_unlock_state();
return status;
@@ -2233,7 +2233,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
conf = find_confirmed_client_by_str(unconf->cl_recdir,
hash);
if (conf) {
- nfsd4_remove_clid_dir(conf);
+ nfsd4_client_record_remove(conf);
expire_client(conf);
}
move_to_confirmed(unconf);
@@ -3057,7 +3057,7 @@ static void
nfsd4_end_grace(void)
{
dprintk("NFSD: end of grace period\n");
- nfsd4_recdir_purge_old();
+ nfsd4_grace_done(boot_time);
locks_end_grace(&nfsd4_manager);
/*
* Now that every NFSv4 client has had the chance to recover and
@@ -3106,7 +3106,7 @@ nfs4_laundromat(void)
clp = list_entry(pos, struct nfs4_client, cl_lru);
dprintk("NFSD: purging unused client (clientid %08x)\n",
clp->cl_clientid.cl_id);
- nfsd4_remove_clid_dir(clp);
+ nfsd4_client_record_remove(clp);
expire_client(clp);
}
spin_lock(&recall_lock);
@@ -3531,7 +3531,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
- nfsd4_create_clid_dir(oo->oo_owner.so_client);
+ nfsd4_client_record_create(oo->oo_owner.so_client);
status = nfs_ok;
out:
if (!cstate->replay_owner)
@@ -4358,19 +4358,13 @@ nfs4_release_reclaim(void)
/*
* called from OPEN, CLAIM_PREVIOUS with a new clientid. */
-static struct nfs4_client_reclaim *
-nfs4_find_reclaim_client(clientid_t *clid)
+struct nfs4_client_reclaim *
+nfsd4_find_reclaim_client(struct nfs4_client *clp)
{
unsigned int strhashval;
- struct nfs4_client *clp;
struct nfs4_client_reclaim *crp = NULL;
- /* find clientid in conf_id_hashtbl */
- clp = find_confirmed_client(clid);
- if (clp == NULL)
- return NULL;
-
dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
clp->cl_name.len, clp->cl_name.data,
clp->cl_recdir);
@@ -4391,7 +4385,14 @@ nfs4_find_reclaim_client(clientid_t *clid)
__be32
nfs4_check_open_reclaim(clientid_t *clid)
{
- return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
+ struct nfs4_client *clp;
+
+ /* find clientid in conf_id_hashtbl */
+ clp = find_confirmed_client(clid);
+ if (clp == NULL)
+ return nfserr_reclaim_bad;
+
+ return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok;
}
/* initialization to perform at module load time: */
@@ -4430,19 +4431,6 @@ nfs4_state_init(void)
return 0;
}
-static void
-nfsd4_load_reboot_recovery_data(void)
-{
- int status;
-
- nfs4_lock_state();
- nfsd4_init_recdir();
- status = nfsd4_recdir_load();
- nfs4_unlock_state();
- if (status)
- printk("NFSD: Failure reading reboot recovery data\n");
-}
-
/*
* Since the lifetime of a delegation isn't limited to that of an open, a
* client may quite reasonably hang on to a delegation as long as it has
@@ -4495,7 +4483,7 @@ out_free_laundry:
int
nfs4_state_start(void)
{
- nfsd4_load_reboot_recovery_data();
+ nfsd4_client_tracking_init();
return __nfs4_state_start();
}
@@ -4530,7 +4518,7 @@ __nfs4_state_shutdown(void)
unhash_delegation(dp);
}
- nfsd4_shutdown_recdir();
+ nfsd4_client_tracking_exit();
}
void
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index a3cf384..b07f5ea 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -462,6 +462,7 @@ extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
extern void nfs4_lock_state(void);
extern void nfs4_unlock_state(void);
extern int nfs4_in_grace(void);
+extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
extern void nfs4_free_openowner(struct nfs4_openowner *);
extern void nfs4_free_lockowner(struct nfs4_lockowner *);
@@ -476,16 +477,18 @@ extern void nfsd4_destroy_callback_queue(void);
extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfs4_put_delegation(struct nfs4_delegation *dp);
extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
-extern void nfsd4_init_recdir(void);
-extern int nfsd4_recdir_load(void);
extern void nfsd4_shutdown_recdir(void);
extern int nfs4_client_to_reclaim(const char *name);
extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
-extern void nfsd4_recdir_purge_old(void);
-extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
-extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
extern void release_session_client(struct nfsd4_session *);
extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);
extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
+/* nfs4recover operations */
+extern int nfsd4_client_tracking_init(void);
+extern void nfsd4_client_tracking_exit(void);
+extern int nfsd4_client_record_create(struct nfs4_client *clp);
+extern int nfsd4_client_record_remove(struct nfs4_client *clp);
+extern int nfsd4_client_record_check(struct nfs4_client *clp);
+extern int nfsd4_grace_done(time_t boot_time);
#endif /* NFSD4_STATE_H */
--
1.7.1
We'll need a way to flag the nfs4_client as already being recorded on
stable storage so that we don't continually upcall.
Once we're able to deprecate the old state tracking code, we can get rid
of cl_recdir to recoup some of the space we're using here.
Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/state.h | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index b07f5ea..712bd32 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -240,6 +240,8 @@ struct nfs4_client {
nfs4_verifier cl_verifier; /* generated by client */
time_t cl_time; /* time of last lease renewal */
struct sockaddr_storage cl_addr; /* client ipaddress */
+#define NFSD4_CLIENT_STABLE (0) /* client on stable storage */
+ unsigned long cl_flags;
u32 cl_flavor; /* setclientid pseudoflavor */
char *cl_principal; /* setclientid principal name */
struct svc_cred cl_cred; /* setclientid principal */
--
1.7.1