2006-08-05 13:06:56

by Olaf Kirch

[permalink] [raw]
Subject: [PATCH 22/22] [lockd] Add kernel statd

From: Olaf Kirch <[email protected]>
Subject: [lockd] Add kernel statd

This patch adds the kernel statd, and allows the switchable statd support
to use this instead of the upcalls to user land statd.

Signed-off-by: Olaf Kirch <[email protected]>

fs/buffer.c | 1
fs/lockd/Makefile | 2
fs/lockd/statd.c | 405 +++++++++++++++++++++++++++++++++++++++++
fs/lockd/svc.c | 59 +++++
include/linux/lockd/lockd.h | 1
include/linux/lockd/sm_inter.h | 5
6 files changed, 469 insertions(+), 4 deletions(-)

Index: build/fs/lockd/svc.c
===================================================================
--- build.orig/fs/lockd/svc.c
+++ build/fs/lockd/svc.c
@@ -40,6 +40,7 @@
#define ALLOWED_SIGS (sigmask(SIGKILL))

static struct svc_program nlmsvc_program;
+extern struct svc_program nsmsvc_program;

struct nlmsvc_binding * nlmsvc_ops;
EXPORT_SYMBOL(nlmsvc_ops);
@@ -62,6 +63,7 @@ static unsigned long nlm_timeout = LOCK
static int nlm_udpport, nlm_tcpport;
int nlm_max_hosts = 256;
int nsm_use_hostnames = 0;
+static int nsm_use_kstatd = 0;

/*
* Constants needed for the sysctl interface.
@@ -119,8 +121,17 @@ lockd(struct svc_rqst *rqstp)

daemonize("lockd");

- /* Initialize the statd upcalls to rpc.statd */
- nsm_statd_upcalls_init();
+ /* See if we should use the kernel statd. If not,
+ * or if setting up the kernel statd fails, try
+ * falling back to user land upcalls.
+ */
+ if (nsm_use_kstatd && nsm_kernel_statd_init() < 0)
+ nsm_use_kstatd = 0;
+
+ if (nsm_use_kstatd == 0) {
+ /* Initialize the statd upcalls to rpc.statd */
+ nsm_statd_upcalls_init();
+ }

/* Process request with signals blocked, but allow SIGKILL. */
allow_signal(SIGKILL);
@@ -218,6 +229,7 @@ int
lockd_up(void)
{
static int warned;
+ struct svc_program * prog;
struct svc_serv * serv;
int error = 0;

@@ -241,8 +253,12 @@ lockd_up(void)
printk(KERN_WARNING
"lockd_up: no pid, %d users??\n", nlmsvc_users);

+ /* Register NLM program and possibly NSM (if using kstatd) */
error = -ENOMEM;
- serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE);
+ prog = &nlmsvc_program;
+ if (nsm_use_kstatd)
+ prog = &nsmsvc_program;
+ serv = svc_create(prog, LOCKD_BUFSIZE);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
goto out;
@@ -397,6 +413,15 @@ static ctl_table nlm_sysctls[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nsm_use_kstatd",
+ .data = &nsm_use_kstatd,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+

{ .ctl_name = 0 }
};
@@ -466,6 +491,7 @@ module_param_call(nlm_udpport, param_set
&nlm_udpport, 0644);
module_param_call(nlm_tcpport, param_set_port, param_get_int,
&nlm_tcpport, 0644);
+module_param(nsm_use_kstatd, int, 0444);

/*
* Initialising and terminating the module.
@@ -536,3 +562,30 @@ static struct svc_program nlmsvc_program
.pg_stats = &nlmsvc_stats, /* stats table */
.pg_authenticate = &lockd_authenticate /* export authentication */
};
+
+/*
+ * Define NSM program and procedures
+ */
+static struct svc_version nsmsvc_version1 = {
+ .vs_vers = 1,
+ .vs_nproc = 7,
+ .vs_proc = nsmsvc_procedures,
+ .vs_xdrsize = SMSVC_XDRSIZE,
+};
+static struct svc_version * nsmsvc_version[] = {
+ [1] = &nsmsvc_version1,
+};
+
+static struct svc_stat nsmsvc_stats;
+
+#define SM_NRVERS (sizeof(nsmsvc_version)/sizeof(nsmsvc_version[0]))
+struct svc_program nsmsvc_program = {
+ .pg_next = &nlmsvc_program,
+ .pg_prog = SM_PROGRAM, /* program number */
+ .pg_nvers = SM_NRVERS, /* number of entries in nlmsvc_version */
+ .pg_vers = nsmsvc_version, /* version table */
+ .pg_name = "statd", /* service name */
+ .pg_class = "nfsd", /* share authentication with nfsd */
+ .pg_stats = &nsmsvc_stats, /* stats table */
+ .pg_authenticate = &nsmsvc_authenticate /* no authentication :-( */
+};
Index: build/include/linux/lockd/sm_inter.h
===================================================================
--- build.orig/include/linux/lockd/sm_inter.h
+++ build/include/linux/lockd/sm_inter.h
@@ -19,6 +19,7 @@
#define SM_NOTIFY 6

#define SM_MAXSTRLEN 1024
+#define SMSVC_XDRSIZE (sizeof(struct nsm_args) + SM_MAXSTRLEN)

/*
* Arguments for all calls to statd
@@ -30,6 +31,8 @@ struct nsm_args {
u32 proc;

char * mon_name;
+ int mon_name_len;
+ u32 state;
};

/*
@@ -41,6 +44,8 @@ struct nsm_res {
};

extern void nsm_statd_upcalls_init(void);
+extern int nsm_kernel_statd_init(void);
+extern int nsmsvc_authenticate(struct svc_rqst *);
extern int (*nsm_monitor)(struct nlm_host *);
extern int (*nsm_unmonitor)(struct nlm_host *);
extern int nsm_local_state;
Index: build/fs/buffer.c
===================================================================
--- build.orig/fs/buffer.c
+++ build/fs/buffer.c
@@ -183,6 +183,7 @@ int fsync_super(struct super_block *sb)
__fsync_super(sb);
return sync_blockdev(sb->s_bdev);
}
+EXPORT_SYMBOL_GPL(fsync_super);

/*
* Write out and wait upon all dirty data associated with this
Index: build/fs/lockd/Makefile
===================================================================
--- build.orig/fs/lockd/Makefile
+++ build/fs/lockd/Makefile
@@ -5,6 +5,6 @@
obj-$(CONFIG_LOCKD) += lockd.o

lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \
- svcproc.o svcsubs.o mon.o xdr.o
+ svcproc.o svcsubs.o mon.o xdr.o statd.o
lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o
lockd-objs := $(lockd-objs-y)
Index: build/fs/lockd/statd.c
===================================================================
--- /dev/null
+++ build/fs/lockd/statd.c
@@ -0,0 +1,405 @@
+/*
+ * linux/fs/lockd/nsmproc.c
+ *
+ * Kernel-based status monitor. This is an alternative to
+ * the code in mon.c.
+ *
+ * When asked to monitor a host, we add it to /var/lib/nsm/sm
+ * ourselves, and that's it. In order to catch SM_NOTIFY calls
+ * we implement a minimal statd.
+ *
+ * Minimal user space requirements for this implementation:
+ * /var/lib/nfs/state
+ * must exist, and must contain the NSM state as a 32bit
+ * binary counter.
+ * /var/lib/nfs/sm
+ * must exist
+ *
+ * Copyright (C) 2004, Olaf Kirch <[email protected]>
+ */
+
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/lockd/lockd.h>
+#include <linux/lockd/share.h>
+#include <linux/lockd/sm_inter.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <asm/uaccess.h>
+#include <linux/buffer_head.h>
+
+
+/* XXX make this a module parameter? */
+#define NSM_BASE_PATH "/var/lib/nfs"
+#define NSM_SM_PATH NSM_BASE_PATH "/sm"
+#define NSM_STATE_PATH NSM_BASE_PATH "/state"
+
+#define NLMDBG_FACILITY NLMDBG_CLIENT
+
+static int __nsm_monitor(struct nlm_host *host);
+static int __nsm_unmonitor(struct nlm_host *host);
+
+/*
+ * Initialize local NSM state variable
+ */
+int
+nsm_kernel_statd_init(void)
+{
+ struct file *filp;
+ char buffer[32];
+ mm_segment_t fs;
+ int res;
+
+ dprintk("lockd: nsm_init()\n");
+ filp = filp_open(NSM_STATE_PATH, O_RDONLY, 0444);
+ if (IS_ERR(filp)) {
+ res = PTR_ERR(filp);
+ printk(KERN_NOTICE "lockd: failed to open %s: err=%d\n",
+ NSM_STATE_PATH, res);
+ return res;
+ }
+
+ fs = get_fs();
+ set_fs(KERNEL_DS);
+ res = vfs_read(filp, buffer, sizeof(buffer), &filp->f_pos);
+ set_fs(fs);
+ filp_close(filp, NULL);
+
+ if (res < 0)
+ return res;
+ if (res == 4)
+ nsm_local_state = *(u32 *) buffer;
+ else
+ nsm_local_state = simple_strtol(buffer, NULL, 10);
+
+ nsm_monitor = __nsm_monitor;
+ nsm_unmonitor = __nsm_unmonitor;
+ return 0;
+}
+
+/*
+ * Build the NSM file name
+ */
+static char *
+nsm_filename(struct nsm_handle *nsm)
+{
+ char *name;
+
+ name = (char *) __get_free_page(GFP_KERNEL);
+ if (name == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ if (nsm_use_hostnames) {
+ snprintf(name, PAGE_SIZE, "%s/%s",
+ NSM_SM_PATH, nsm->sm_name);
+ } else {
+ /* FIXME IPV6 */
+ snprintf(name, PAGE_SIZE, "%s/%u.%u.%u.%u",
+ NSM_SM_PATH,
+ NIPQUAD(nsm->sm_addr.sin_addr));
+ }
+ return name;
+}
+
+static void
+nsm_put_name(char *name)
+{
+ free_page((unsigned long) name);
+}
+
+/*
+ * Create the NSM monitor file
+ */
+static int
+nsm_create(struct nsm_handle *nsm)
+{
+ struct file *filp;
+ char *filename;
+ int res = 0;
+
+ dprintk("lockd: creating statd monitor file for %s\n", nsm->sm_name);
+
+ if (!(filename = nsm_filename(nsm)))
+ return -ENOMEM;
+
+ filp = filp_open(filename, O_CREAT|O_SYNC|O_RDWR, 0644);
+ if (IS_ERR(filp)) {
+ res = PTR_ERR(filp);
+ printk(KERN_NOTICE
+ "lockd/statd: failed to create %s: err=%d\n",
+ filename, res);
+ } else {
+ fsync_super(filp->f_dentry->d_inode->i_sb);
+ filp_close(filp, NULL);
+ }
+
+ nsm_put_name(filename);
+ return res;
+}
+
+static int
+nsm_unlink(struct nsm_handle *nsm)
+{
+ struct nameidata nd;
+ struct inode *inode = NULL;
+ struct dentry *dentry;
+ char *filename;
+ int res = 0;
+
+ if (!(filename = nsm_filename(nsm)))
+ return -ENOMEM;
+
+ /* Doing unlink from kernel space is really icky. */
+ if ((res = path_lookup(filename, LOOKUP_PARENT, &nd)) != 0)
+ goto exit;
+
+ if (nd.last_type == LAST_NORM && !nd.last.name[nd.last.len]) {
+ mutex_lock(&nd.dentry->d_inode->i_mutex);
+
+ dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len);
+ if (!IS_ERR(dentry)) {
+ if ((inode = dentry->d_inode) != NULL)
+ atomic_inc(&inode->i_count);
+ res = vfs_unlink(nd.dentry->d_inode, dentry);
+ dput(dentry);
+ } else {
+ res = PTR_ERR(dentry);
+ }
+ mutex_unlock(&nd.dentry->d_inode->i_mutex);
+ } else {
+ res = -EISDIR;
+ }
+ path_release(&nd);
+
+exit:
+ if (res < 0) {
+ printk(KERN_NOTICE
+ "lockd/statd: failed to unlink %s: err=%d\n",
+ filename, res);
+ }
+
+ if (inode)
+ iput(inode);
+ nsm_put_name(filename);
+ return res;
+}
+
+/*
+ * Call nsm_create/nsm_unlink with CAP_DAC_OVERRIDE
+ */
+#define swap_ugid(type, var) { \
+ type tmp = current->var; current->var = var; var = tmp; \
+}
+
+static int
+with_privilege(int (*func)(struct nsm_handle *), struct nsm_handle *nsm)
+{
+ kernel_cap_t cap = current->cap_effective;
+ int res = 0, mask;
+ uid_t fsuid = 0;
+ gid_t fsgid = 0;
+
+ /* If we're unprivileged, a call to capable() will set the
+ * SUPERPRIV flag */
+ mask = current->flags | ~PF_SUPERPRIV;
+
+ /* Raise capability to that we're able to create/unlink the file.
+ * Set fsuid/fsgid to 0 so the file will be owned by root. */
+ cap_raise(current->cap_effective, CAP_DAC_OVERRIDE);
+ swap_ugid(uid_t, fsuid);
+ swap_ugid(gid_t, fsgid);
+
+ res = func(nsm);
+
+ /* drop privileges */
+ current->cap_effective = cap;
+ swap_ugid(uid_t, fsuid);
+ swap_ugid(gid_t, fsgid);
+
+ /* Clear PF_SUPERPRIV unless it was set to begin with */
+ current->flags &= mask;
+
+ return res;
+}
+
+/*
+ * Set up monitoring of a remote host
+ * Note we hold the semaphore for the host table while
+ * we're here.
+ */
+static int
+__nsm_monitor(struct nlm_host *host)
+{
+ struct nsm_handle *nsm;
+ int res = 0;
+
+ dprintk("lockd: nsm_monitor(%s)\n", host->h_name);
+ if ((nsm = host->h_nsmhandle) == NULL)
+ BUG();
+
+ if (!nsm->sm_monitored) {
+ res = with_privilege(nsm_create, nsm);
+ if (res >= 0) {
+ nsm->sm_monitored = 1;
+ } else {
+ dprintk(KERN_NOTICE "nsm_monitor(%s) failed: errno=%d\n",
+ nsm->sm_name, -res);
+ }
+ }
+
+ return res;
+}
+
+/*
+ * Cease to monitor remote host
+ * Code stolen from sys_unlink.
+ */
+static int
+__nsm_unmonitor(struct nlm_host *host)
+{
+ struct nsm_handle *nsm;
+ int res = 0;
+
+ nsm = host->h_nsmhandle;
+ host->h_nsmhandle = NULL;
+
+ /* If the host was invalidated due to lockd restart/shutdown,
+ * don't unmonitor it.
+ * (Strictly speaking, we would have to keep the SM file
+ * until the next reboot. The only way to achieve that
+ * would be to link the monitor file to sm.bak now.)
+ */
+ if (nsm && atomic_read(&nsm->sm_count) == 1
+ && nsm->sm_monitored && !nsm->sm_sticky) {
+ dprintk("lockd: nsm_unmonitor(%s)\n", host->h_name);
+
+ res = with_privilege(nsm_unlink, nsm);
+ }
+
+ nsm_release(nsm);
+ return res;
+}
+
+/*
+ * NSM server implementation starts here
+ */
+int
+nsmsvc_authenticate(struct svc_rqst *rqstp)
+{
+ /* No authentication for statd. Many statd implementations
+ * even send their reboot notifications from an unprivileged
+ * port.
+ */
+ rqstp->rq_client = NULL;
+ return SVC_OK;
+}
+
+
+/*
+ * NULL: Test for presence of service
+ */
+static int
+nsmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ dprintk("statd: NULL called\n");
+ return rpc_success;
+}
+
+/*
+ * NOTIFY: receive notification that remote host rebooted
+ */
+static int
+nsmsvc_proc_notify(struct svc_rqst *rqstp, struct nsm_args *argp,
+ struct nsm_res *resp)
+{
+ struct sockaddr_in saddr = rqstp->rq_addr;
+
+ dprintk("statd: NOTIFY called\n");
+
+ nlm_host_rebooted(&saddr, argp->mon_name, argp->mon_name_len, argp->state);
+ return rpc_success;
+}
+
+/*
+ * All other operations: return failure
+ */
+static int
+nsmsvc_proc_fail(struct svc_rqst *rqstp, struct nsm_args *argp,
+ struct nsm_res *resp)
+{
+ dprintk("statd: proc %u called\n", rqstp->rq_proc);
+ resp->status = 0;
+ resp->state = -1;
+ return rpc_success;
+}
+
+/*
+ * NSM XDR routines
+ */
+static int
+nsmsvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+ return xdr_argsize_check(rqstp, p);
+}
+
+static int
+nsmsvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
+static int
+nsmsvc_decode_stat_chge(struct svc_rqst *rqstp, u32 *p, struct nsm_args *argp)
+{
+ /* Skip over the client's mon_name */
+ p = xdr_decode_string_inplace(p, &argp->mon_name, &argp->mon_name_len, SM_MAXSTRLEN);
+ if (p == NULL)
+ return 0;
+
+ argp->state = ntohl(*p++);
+ return xdr_argsize_check(rqstp, p);
+}
+
+static int
+nsmsvc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+{
+ *p++ = resp->status;
+ return xdr_ressize_check(rqstp, p);
+}
+
+static int
+nsmsvc_encode_stat_res(struct svc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+{
+ *p++ = resp->status;
+ *p++ = resp->state;
+ return xdr_ressize_check(rqstp, p);
+}
+
+struct nsm_void { int dummy; };
+
+#define PROC(name, xargt, xrest, argt, rest, respsize) \
+ { .pc_func = (svc_procfunc) nsmsvc_proc_##name, \
+ .pc_decode = (kxdrproc_t) nsmsvc_decode_##xargt, \
+ .pc_encode = (kxdrproc_t) nsmsvc_encode_##xrest, \
+ .pc_release = NULL, \
+ .pc_argsize = sizeof(struct nsm_##argt), \
+ .pc_ressize = sizeof(struct nsm_##rest), \
+ .pc_xdrressize = respsize, \
+ }
+
+struct svc_procedure nsmsvc_procedures[] = {
+ PROC(null, void, void, void, void, 1),
+ PROC(fail, void, stat_res, void, res, 2),
+ PROC(fail, void, stat_res, void, res, 2),
+ PROC(fail, void, res, void, res, 1),
+ PROC(fail, void, res, void, res, 1),
+ PROC(fail, void, res, void, res, 1),
+ PROC(notify, stat_chge, void, args, void, 1)
+};
Index: build/include/linux/lockd/lockd.h
===================================================================
--- build.orig/include/linux/lockd/lockd.h
+++ build/include/linux/lockd/lockd.h
@@ -141,6 +141,7 @@ extern struct svc_procedure nlmsvc_proce
#ifdef CONFIG_LOCKD_V4
extern struct svc_procedure nlmsvc_procedures4[];
#endif
+extern struct svc_procedure nsmsvc_procedures[];
extern int nlmsvc_grace_period;
extern unsigned long nlmsvc_timeout;
extern int nlm_max_hosts;

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys -- and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs