2004-08-26 17:21:35

by Paul Clements

[permalink] [raw]
Subject: [PATCH / RFC] nfs-utils: High Availability NFS

diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/support/include/ha-callout.h nfs-utils-1.0.6/support/include/ha-callout.h
--- nfs-utils-1.0.6-PRISTINE/support/include/ha-callout.h 1969-12-31 19:00:00.000000000 -0500
+++ nfs-utils-1.0.6/support/include/ha-callout.h 2004-08-26 10:37:53.000000000 -0400
@@ -0,0 +1,38 @@
+/*
+ * support/include/ha-callout.h
+ *
+ * High Availability NFS Callout support routines
+ *
+ * Copyright (c) 2004, Paul Clements, SteelEye Technology
+ *
+ * In order to implement HA NFS, we need several callouts at key
+ * points in statd and mountd. These callouts all come to ha_callout(),
+ * which, in turn, calls out to an ha-callout script (not part of nfs-utils;
+ * defined by -H argument to rpc.statd and rpc.mountd).
+ */
+#ifndef HA_CALLOUT_H
+#define HA_CALLOUT_H
+
+extern char *ha_callout_prog;
+
+static inline void
+ha_callout(char *event, char *arg1, char *arg2, int arg3)
+{
+ char buf[PATH_MAX]; /* should be plenty */
+ int ret;
+
+ if (!ha_callout_prog) /* HA callout is not enabled */
+ return;
+
+ sprintf(buf, "%s \"%s\" \"%s\" \"%s\" %.8x", ha_callout_prog,
+ event, arg1, arg2, arg3);
+ ret = system(buf);
+
+#ifdef dprintf
+ dprintf(N_DEBUG, "system call %s returned %d\n", buf, WEXITSTATUS(ret));
+#else
+ xlog(D_GENERAL, "system call %s returned %d\n", buf, WEXITSTATUS(ret));
+#endif
+}
+
+#endif
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/mountd/mountd.c nfs-utils-1.0.6/utils/mountd/mountd.c
--- nfs-utils-1.0.6-PRISTINE/utils/mountd/mountd.c 2004-08-17 11:01:14.000000000 -0400
+++ nfs-utils-1.0.6/utils/mountd/mountd.c 2004-08-26 10:40:25.000000000 -0400
@@ -36,6 +36,11 @@ static struct nfs_fh_len *get_rootfh(str

int new_cache = 0;

+/* PRC: a high-availability callout program can be specified with -H
+ * When this is done, the program will receive callouts whenever clients
+ * send mount or unmount requests -- the callout is not needed for 2.6 kernel */
+char *ha_callout_prog = NULL;
+
static struct option longopts[] =
{
{ "foreground", 0, 0, 'F' },
@@ -48,6 +53,7 @@ static struct option longopts[] =
{ "version", 0, 0, 'v' },
{ "port", 1, 0, 'p' },
{ "no-tcp", 0, 0, 'n' },
+ { "ha-callout", 1, 0, 'H' },
{ NULL, 0, 0, 0 }
};

@@ -444,7 +450,7 @@ main(int argc, char **argv)

/* Parse the command line options and arguments. */
opterr = 0;
- while ((c = getopt_long(argc, argv, "o:n:Fd:f:p:P:hN:V:v", longopts, NULL)) != EOF)
+ while ((c = getopt_long(argc, argv, "o:n:Fd:f:p:P:hH:N:V:v", longopts, NULL)) != EOF)
switch (c) {
case 'o':
descriptors = atoi(optarg);
@@ -463,6 +469,9 @@ main(int argc, char **argv)
case 'f':
export_file = optarg;
break;
+ case 'H': /* PRC: specify a high-availability callout program */
+ ha_callout_prog = optarg;
+ break;
case 'h':
usage(argv [0], 0);
break;
@@ -596,6 +605,7 @@ usage(const char *prog, int n)
"Usage: %s [-F|--foreground] [-h|--help] [-v|--version] [-d kind|--debug kind]\n"
" [-o num|--descriptors num] [-f exports-file|--exports-file=file]\n"
" [-p|--port port] [-V version|--nfs-version version]\n"
-" [-N version|--no-nfs-version version] [-n|--no-tcp]\n", prog);
+" [-N version|--no-nfs-version version] [-n|--no-tcp]\n"
+" [-H ha-callout-prog]\n", prog);
exit(n);
}
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/mountd/rmtab.c nfs-utils-1.0.6/utils/mountd/rmtab.c
--- nfs-utils-1.0.6-PRISTINE/utils/mountd/rmtab.c 2003-07-31 01:19:26.000000000 -0400
+++ nfs-utils-1.0.6/utils/mountd/rmtab.c 2004-08-25 15:21:53.000000000 -0400
@@ -19,6 +19,7 @@
#include "exportfs.h"
#include "xio.h"
#include "mountd.h"
+#include "ha-callout.h"

#include <limits.h> /* PATH_MAX */

@@ -61,6 +62,8 @@ mountlist_add(char *host, const char *pa
host) == 0
&& strcmp(rep->r_path, path) == 0) {
rep->r_count++;
+ /* PRC: do the HA callout: */
+ ha_callout("mount", rep->r_client, rep->r_path, rep->r_count);
putrmtabent(rep, &pos);
endrmtabent();
xfunlock(lockid);
@@ -75,6 +78,8 @@ mountlist_add(char *host, const char *pa
xe.r_path [sizeof (xe.r_path) - 1] = '\0';
xe.r_count = 1;
if (setrmtabent("a")) {
+ /* PRC: do the HA callout: */
+ ha_callout("mount", xe.r_client, xe.r_path, xe.r_count);
putrmtabent(&xe, NULL);
endrmtabent();
}
@@ -103,8 +108,11 @@ mountlist_del(char *hname, const char *p
while ((rep = getrmtabent(1, NULL)) != NULL) {
match = !strcmp (rep->r_client, hname)
&& !strcmp(rep->r_path, path);
- if (match)
+ if (match) {
rep->r_count--;
+ /* PRC: do the HA callout: */
+ ha_callout("umount", rep->r_client, rep->r_path, rep->r_count);
+ }
if (!match || rep->r_count)
fputrmtabent(fp, rep, NULL);
}
Binary files nfs-utils-1.0.6-PRISTINE/utils/nfsd/nfsd and nfs-utils-1.0.6/utils/nfsd/nfsd differ
Binary files nfs-utils-1.0.6-PRISTINE/utils/nfsstat/nfsstat and nfs-utils-1.0.6/utils/nfsstat/nfsstat differ
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/statd/monitor.c nfs-utils-1.0.6/utils/statd/monitor.c
--- nfs-utils-1.0.6-PRISTINE/utils/statd/monitor.c 2004-08-17 11:01:14.000000000 -0400
+++ nfs-utils-1.0.6/utils/statd/monitor.c 2004-08-26 09:40:10.000000000 -0400
@@ -19,6 +19,7 @@
#include "misc.h"
#include "statd.h"
#include "notlist.h"
+#include "ha-callout.h"

notify_list * rtnl = NULL; /* Run-time notify list. */

@@ -177,6 +178,8 @@ sm_mon_1_svc(struct mon *argp, struct sv
goto failure;
}
free(path);
+ /* PRC: do the HA callout: */
+ ha_callout("add-client", mon_name, my_name, 0);
nlist_insert(&rtnl, clnt);
close(fd);

@@ -232,6 +235,10 @@ sm_unmon_1_svc(struct mon_id *argp, stru
/* Match! */
dprintf(N_DEBUG, "UNMONITORING %s for %s",
mon_name, my_name);
+
+ /* PRC: do the HA callout: */
+ ha_callout("del-client", mon_name, my_name, 0);
+
nlist_free(&rtnl, clnt);
/* Do not unlink the monitor file. There are
* cases when a lock is cleared locally on the
@@ -287,6 +294,8 @@ sm_unmon_all_1_svc(struct my_id *argp, s
sizeof (mon_name) - 1);
mon_name[sizeof (mon_name) - 1] = '\0';
temp = NL_NEXT(clnt);
+ /* PRC: do the HA callout: */
+ ha_callout("del-client", mon_name, argp->my_name, 0);
nlist_free(&rtnl, clnt);
xunlink(SM_DIR, mon_name, 1);
++count;
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/statd/rmtcall.c nfs-utils-1.0.6/utils/statd/rmtcall.c
--- nfs-utils-1.0.6-PRISTINE/utils/statd/rmtcall.c 2003-09-12 01:41:38.000000000 -0400
+++ nfs-utils-1.0.6/utils/statd/rmtcall.c 2004-08-25 14:54:00.000000000 -0400
@@ -38,6 +38,7 @@
#include "statd.h"
#include "notlist.h"
#include "log.h"
+#include "ha-callout.h"

#define MAXMSGSIZE (2048 / sizeof(unsigned int))

@@ -414,6 +415,8 @@ process_notify_list(void)
note(N_ERROR,
"Can't notify %s, giving up.",
NL_MON_NAME(entry));
+ /* PRC: do the HA callout */
+ ha_callout("del-client", NL_MY_NAME(entry), NL_MON_NAME(entry), 0);
xunlink(SM_BAK_DIR, NL_MON_NAME(entry), 0);
nlist_free(&notify, entry);
}
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/statd/statd.c nfs-utils-1.0.6/utils/statd/statd.c
--- nfs-utils-1.0.6-PRISTINE/utils/statd/statd.c 2003-09-12 02:24:29.000000000 -0400
+++ nfs-utils-1.0.6/utils/statd/statd.c 2004-08-25 13:29:08.000000000 -0400
@@ -48,6 +48,11 @@ int run_mode = 0; /* foreground logging
char *name_p = NULL;
char *version_p = NULL;

+/* PRC: a high-availability callout program can be specified with -H
+ * When this is done, the program will receive callouts whenever clients
+ * are added or deleted to the notify list */
+char *ha_callout_prog = NULL;
+
static struct option longopts[] =
{
{ "foreground", 0, 0, 'F' },
@@ -59,6 +64,7 @@ static struct option longopts[] =
{ "name", 1, 0, 'n' },
{ "state-directory-path", 1, 0, 'P' },
{ "notify-mode", 0, 0, 'N' },
+ { "ha-callout", 1, 0, 'H' },
{ NULL, 0, 0, 0 }
};

@@ -102,6 +108,13 @@ killer (int sig)
exit (0);
}

+static void
+sigusr (int sig)
+{
+ dprintf (N_DEBUG, "Caught signal %d, re-reading notify list.", sig);
+ notify_hosts();
+}
+
/*
* Startup information.
*/
@@ -148,6 +161,7 @@ usage()
fprintf(stderr," -n, --name Specify a local hostname.\n");
fprintf(stderr," -P State directory path.\n");
fprintf(stderr," -N Run in notify only mode.\n");
+ fprintf(stderr," -H Specify a high-availability callout program.\n");
}

static const char *pidfile = "/var/run/rpc.statd.pid";
@@ -236,7 +250,7 @@ int main (int argc, char **argv)
MY_NAME = NULL;

/* Process command line switches */
- while ((arg = getopt_long(argc, argv, "h?vVFNdn:p:o:P:", longopts, NULL)) != EOF) {
+ while ((arg = getopt_long(argc, argv, "h?vVFNH:dn:p:o:P:", longopts, NULL)) != EOF) {
switch (arg) {
case 'V': /* Version */
case 'v':
@@ -302,6 +316,13 @@ int main (int argc, char **argv)
sprintf(SM_STAT_PATH, "%s/state", DIR_BASE );
}
break;
+ case 'H': /* PRC: specify the ha-callout program */
+ if ((ha_callout_prog = xstrdup(optarg)) == NULL) {
+ fprintf(stderr, "%s: xstrdup(%s) failed!\n",
+ argv[0], optarg);
+ exit(1);
+ }
+ break;
case '?': /* heeeeeelllllllpppp? heh */
case 'h':
usage();
@@ -397,6 +418,8 @@ int main (int argc, char **argv)
signal (SIGHUP, killer);
signal (SIGINT, killer);
signal (SIGTERM, killer);
+ /* PRC: trap SIGUSR1 to re-read notify list from disk */
+ signal(SIGUSR1, sigusr);
/* WARNING: the following works on Linux and SysV, but not BSD! */
signal(SIGCHLD, SIG_IGN);


Attachments:
nfs_utils_ha_callout.diff (9.58 kB)

2004-08-26 18:43:10

by Paul Clements

[permalink] [raw]
Subject: Re: [PATCH / RFC] nfs-utils: High Availability NFS

diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/support/include/ha-callout.h nfs-utils-1.0.6/support/include/ha-callout.h
--- nfs-utils-1.0.6-PRISTINE/support/include/ha-callout.h 1969-12-31 19:00:00.000000000 -0500
+++ nfs-utils-1.0.6/support/include/ha-callout.h 2004-08-26 11:32:18.000000000 -0400
@@ -0,0 +1,38 @@
+/*
+ * support/include/ha-callout.h
+ *
+ * High Availability NFS Callout support routines
+ *
+ * Copyright (c) 2004, Paul Clements, SteelEye Technology
+ *
+ * In order to implement HA NFS, we need several callouts at key
+ * points in statd and mountd. These callouts all come to ha_callout(),
+ * which, in turn, calls out to an ha-callout script (not part of nfs-utils;
+ * defined by -H argument to rpc.statd and rpc.mountd).
+ */
+#ifndef HA_CALLOUT_H
+#define HA_CALLOUT_H
+
+extern char *ha_callout_prog;
+
+static inline void
+ha_callout(char *event, char *arg1, char *arg2, int arg3)
+{
+ char buf[PATH_MAX]; /* should be plenty */
+ int ret;
+
+ if (!ha_callout_prog) /* HA callout is not enabled */
+ return;
+
+ sprintf(buf, "%s \"%s\" \"%s\" \"%s\" %.8x", ha_callout_prog,
+ event, arg1, arg2, arg3);
+ ret = system(buf);
+
+#ifdef dprintf
+ dprintf(N_DEBUG, "system call %s returned %d\n", buf, WEXITSTATUS(ret));
+#else
+ xlog(D_GENERAL, "system call %s returned %d\n", buf, WEXITSTATUS(ret));
+#endif
+}
+
+#endif
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/mountd/mountd.c nfs-utils-1.0.6/utils/mountd/mountd.c
--- nfs-utils-1.0.6-PRISTINE/utils/mountd/mountd.c 2003-09-12 18:14:16.000000000 -0400
+++ nfs-utils-1.0.6/utils/mountd/mountd.c 2004-08-26 11:32:18.000000000 -0400
@@ -36,6 +36,11 @@ static struct nfs_fh_len *get_rootfh(str

int new_cache = 0;

+/* PRC: a high-availability callout program can be specified with -H
+ * When this is done, the program will receive callouts whenever clients
+ * send mount or unmount requests -- the callout is not needed for 2.6 kernel */
+char *ha_callout_prog = NULL;
+
static struct option longopts[] =
{
{ "foreground", 0, 0, 'F' },
@@ -48,6 +53,7 @@ static struct option longopts[] =
{ "version", 0, 0, 'v' },
{ "port", 1, 0, 'p' },
{ "no-tcp", 0, 0, 'n' },
+ { "ha-callout", 1, 0, 'H' },
{ NULL, 0, 0, 0 }
};

@@ -444,7 +450,7 @@ main(int argc, char **argv)

/* Parse the command line options and arguments. */
opterr = 0;
- while ((c = getopt_long(argc, argv, "o:n:Fd:f:p:P:hN:V:v", longopts, NULL)) != EOF)
+ while ((c = getopt_long(argc, argv, "o:n:Fd:f:p:P:hH:N:V:v", longopts, NULL)) != EOF)
switch (c) {
case 'o':
descriptors = atoi(optarg);
@@ -463,6 +469,9 @@ main(int argc, char **argv)
case 'f':
export_file = optarg;
break;
+ case 'H': /* PRC: specify a high-availability callout program */
+ ha_callout_prog = optarg;
+ break;
case 'h':
usage(argv [0], 0);
break;
@@ -596,6 +605,7 @@ usage(const char *prog, int n)
"Usage: %s [-F|--foreground] [-h|--help] [-v|--version] [-d kind|--debug kind]\n"
" [-o num|--descriptors num] [-f exports-file|--exports-file=file]\n"
" [-p|--port port] [-V version|--nfs-version version]\n"
-" [-N version|--no-nfs-version version] [-n|--no-tcp]\n", prog);
+" [-N version|--no-nfs-version version] [-n|--no-tcp]\n"
+" [-H ha-callout-prog]\n", prog);
exit(n);
}
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/mountd/rmtab.c nfs-utils-1.0.6/utils/mountd/rmtab.c
--- nfs-utils-1.0.6-PRISTINE/utils/mountd/rmtab.c 2003-07-31 01:19:26.000000000 -0400
+++ nfs-utils-1.0.6/utils/mountd/rmtab.c 2004-08-26 11:32:18.000000000 -0400
@@ -19,6 +19,7 @@
#include "exportfs.h"
#include "xio.h"
#include "mountd.h"
+#include "ha-callout.h"

#include <limits.h> /* PATH_MAX */

@@ -61,6 +62,8 @@ mountlist_add(char *host, const char *pa
host) == 0
&& strcmp(rep->r_path, path) == 0) {
rep->r_count++;
+ /* PRC: do the HA callout: */
+ ha_callout("mount", rep->r_client, rep->r_path, rep->r_count);
putrmtabent(rep, &pos);
endrmtabent();
xfunlock(lockid);
@@ -75,6 +78,8 @@ mountlist_add(char *host, const char *pa
xe.r_path [sizeof (xe.r_path) - 1] = '\0';
xe.r_count = 1;
if (setrmtabent("a")) {
+ /* PRC: do the HA callout: */
+ ha_callout("mount", xe.r_client, xe.r_path, xe.r_count);
putrmtabent(&xe, NULL);
endrmtabent();
}
@@ -103,8 +108,11 @@ mountlist_del(char *hname, const char *p
while ((rep = getrmtabent(1, NULL)) != NULL) {
match = !strcmp (rep->r_client, hname)
&& !strcmp(rep->r_path, path);
- if (match)
+ if (match) {
rep->r_count--;
+ /* PRC: do the HA callout: */
+ ha_callout("umount", rep->r_client, rep->r_path, rep->r_count);
+ }
if (!match || rep->r_count)
fputrmtabent(fp, rep, NULL);
}
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/statd/monitor.c nfs-utils-1.0.6/utils/statd/monitor.c
--- nfs-utils-1.0.6-PRISTINE/utils/statd/monitor.c 2003-09-12 01:41:35.000000000 -0400
+++ nfs-utils-1.0.6/utils/statd/monitor.c 2004-08-26 11:32:18.000000000 -0400
@@ -19,6 +19,7 @@
#include "misc.h"
#include "statd.h"
#include "notlist.h"
+#include "ha-callout.h"

notify_list * rtnl = NULL; /* Run-time notify list. */

@@ -177,6 +178,8 @@ sm_mon_1_svc(struct mon *argp, struct sv
goto failure;
}
free(path);
+ /* PRC: do the HA callout: */
+ ha_callout("add-client", mon_name, my_name, 0);
nlist_insert(&rtnl, clnt);
close(fd);

@@ -232,6 +235,10 @@ sm_unmon_1_svc(struct mon_id *argp, stru
/* Match! */
dprintf(N_DEBUG, "UNMONITORING %s for %s",
mon_name, my_name);
+
+ /* PRC: do the HA callout: */
+ ha_callout("del-client", mon_name, my_name, 0);
+
nlist_free(&rtnl, clnt);
xunlink(SM_DIR, mon_name, 1);

@@ -276,6 +283,8 @@ sm_unmon_all_1_svc(struct my_id *argp, s
sizeof (mon_name) - 1);
mon_name[sizeof (mon_name) - 1] = '\0';
temp = NL_NEXT(clnt);
+ /* PRC: do the HA callout: */
+ ha_callout("del-client", mon_name, argp->my_name, 0);
nlist_free(&rtnl, clnt);
xunlink(SM_DIR, mon_name, 1);
++count;
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/statd/rmtcall.c nfs-utils-1.0.6/utils/statd/rmtcall.c
--- nfs-utils-1.0.6-PRISTINE/utils/statd/rmtcall.c 2003-09-12 01:41:38.000000000 -0400
+++ nfs-utils-1.0.6/utils/statd/rmtcall.c 2004-08-26 13:11:41.000000000 -0400
@@ -38,6 +38,7 @@
#include "statd.h"
#include "notlist.h"
#include "log.h"
+#include "ha-callout.h"

#define MAXMSGSIZE (2048 / sizeof(unsigned int))

@@ -414,6 +415,8 @@ process_notify_list(void)
note(N_ERROR,
"Can't notify %s, giving up.",
NL_MON_NAME(entry));
+ /* PRC: do the HA callout */
+ ha_callout("del-client", NL_MON_NAME(entry), NL_MY_NAME(entry), 0);
xunlink(SM_BAK_DIR, NL_MON_NAME(entry), 0);
nlist_free(&notify, entry);
}
diff -purN --exclude-from /export/public/clemep/tmp/dontdiff nfs-utils-1.0.6-PRISTINE/utils/statd/statd.c nfs-utils-1.0.6/utils/statd/statd.c
--- nfs-utils-1.0.6-PRISTINE/utils/statd/statd.c 2003-09-12 02:24:29.000000000 -0400
+++ nfs-utils-1.0.6/utils/statd/statd.c 2004-08-26 11:32:18.000000000 -0400
@@ -48,6 +48,11 @@ int run_mode = 0; /* foreground logging
char *name_p = NULL;
char *version_p = NULL;

+/* PRC: a high-availability callout program can be specified with -H
+ * When this is done, the program will receive callouts whenever clients
+ * are added or deleted to the notify list */
+char *ha_callout_prog = NULL;
+
static struct option longopts[] =
{
{ "foreground", 0, 0, 'F' },
@@ -59,6 +64,7 @@ static struct option longopts[] =
{ "name", 1, 0, 'n' },
{ "state-directory-path", 1, 0, 'P' },
{ "notify-mode", 0, 0, 'N' },
+ { "ha-callout", 1, 0, 'H' },
{ NULL, 0, 0, 0 }
};

@@ -102,6 +108,13 @@ killer (int sig)
exit (0);
}

+static void
+sigusr (int sig)
+{
+ dprintf (N_DEBUG, "Caught signal %d, re-reading notify list.", sig);
+ notify_hosts();
+}
+
/*
* Startup information.
*/
@@ -148,6 +161,7 @@ usage()
fprintf(stderr," -n, --name Specify a local hostname.\n");
fprintf(stderr," -P State directory path.\n");
fprintf(stderr," -N Run in notify only mode.\n");
+ fprintf(stderr," -H Specify a high-availability callout program.\n");
}

static const char *pidfile = "/var/run/rpc.statd.pid";
@@ -236,7 +250,7 @@ int main (int argc, char **argv)
MY_NAME = NULL;

/* Process command line switches */
- while ((arg = getopt_long(argc, argv, "h?vVFNdn:p:o:P:", longopts, NULL)) != EOF) {
+ while ((arg = getopt_long(argc, argv, "h?vVFNH:dn:p:o:P:", longopts, NULL)) != EOF) {
switch (arg) {
case 'V': /* Version */
case 'v':
@@ -302,6 +316,13 @@ int main (int argc, char **argv)
sprintf(SM_STAT_PATH, "%s/state", DIR_BASE );
}
break;
+ case 'H': /* PRC: specify the ha-callout program */
+ if ((ha_callout_prog = xstrdup(optarg)) == NULL) {
+ fprintf(stderr, "%s: xstrdup(%s) failed!\n",
+ argv[0], optarg);
+ exit(1);
+ }
+ break;
case '?': /* heeeeeelllllllpppp? heh */
case 'h':
usage();
@@ -397,6 +418,8 @@ int main (int argc, char **argv)
signal (SIGHUP, killer);
signal (SIGINT, killer);
signal (SIGTERM, killer);
+ /* PRC: trap SIGUSR1 to re-read notify list from disk */
+ signal(SIGUSR1, sigusr);
/* WARNING: the following works on Linux and SysV, but not BSD! */
signal(SIGCHLD, SIG_IGN);


Attachments:
nfs_utils_ha_callout-2.diff (9.31 kB)

2004-08-27 07:36:40

by Olaf Kirch

[permalink] [raw]
Subject: Re: [PATCH / RFC] nfs-utils: High Availability NFS

Hi,

I'm working on putting statd into the kernel, so I'm necessarily not
too enthusiastic about adding external callouts to statd :)

On Thu, Aug 26, 2004 at 01:21:20PM -0400, Paul Clements wrote:
> statd events that trigger a callout:
> -----------------------------------
> add client to notify list (SM_MON) - triggers "add-client" callout
>
> delete client from notify list (SM_UNMON and SM_UNMONALL) - triggers
> "del-client" callout

I think the same could be achieved by attaching a dnotify handler to
the directory, and it would be compatible with a kernel level statd.

Cheers
Olaf
--
Olaf Kirch | The Hardware Gods hate me.
[email protected] |
---------------+


-------------------------------------------------------
SF.Net email is sponsored by Shop4tech.com-Lowest price on Blank Media
100pk Sonic DVD-R 4x for only $29 -100pk Sonic DVD+R for only $33
Save 50% off Retail on Ink & Toner - Free Shipping and Free Gift.
http://www.shop4tech.com/z/Inkjet_Cartridges/9_108_r285
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-08-27 13:13:23

by Paul Clements

[permalink] [raw]
Subject: Re: [PATCH / RFC] nfs-utils: High Availability NFS

Hi,


Olaf Kirch wrote:

> I'm working on putting statd into the kernel, so I'm necessarily not
> too enthusiastic about adding external callouts to statd :)

With so many things moving out of the kernel to userland, it's
surprising to see statd moving into the kernel. And it certainly does
make modifications, such as this, much more difficult.

> On Thu, Aug 26, 2004 at 01:21:20PM -0400, Paul Clements wrote:
>
>>statd events that trigger a callout:
>>-----------------------------------
>>add client to notify list (SM_MON) - triggers "add-client" callout
>>
>>delete client from notify list (SM_UNMON and SM_UNMONALL) - triggers
>>"del-client" callout
>
>
> I think the same could be achieved by attaching a dnotify handler to
> the directory, and it would be compatible with a kernel level statd.

But the callout happens before statd replies to the client, so there's
no chance that we lose any clients if there's a failure. If we use
dnotify then we only find out there's been a change after the change has
occurred, which means the client may have already gotten a reply saying
he's been added to the notify list. If we get a failure at this point,
we've lost a client, and upon failover the client's locks are no longer
valid.

--
Paul


-------------------------------------------------------
SF.Net email is sponsored by Shop4tech.com-Lowest price on Blank Media
100pk Sonic DVD-R 4x for only $29 -100pk Sonic DVD+R for only $33
Save 50% off Retail on Ink & Toner - Free Shipping and Free Gift.
http://www.shop4tech.com/z/Inkjet_Cartridges/9_108_r285
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-08-30 08:25:17

by Olaf Kirch

[permalink] [raw]
Subject: Re: [PATCH / RFC] nfs-utils: High Availability NFS

On Fri, Aug 27, 2004 at 09:13:06AM -0400, Paul Clements wrote:
> With so many things moving out of the kernel to userland, it's
> surprising to see statd moving into the kernel. And it certainly does
> make modifications, such as this, much more difficult.

Well, the issue with statd is that it exists _only_ to make lockd happy.
But what it really does is make lockd awfully complicated because we
have to do upcalls all the time.

So my rationale for moving statd into the kernel is to actually eliminate
a lot of code and have a minimal set up functionality in there that does
exactly what lockd needs, no more. In particular, no more SM_MON and
SM_UNMON support that allows untrusted hosts to do sneaky stuff - instead,
lockd writes the /var/lib/nfs/sm files directly. The only thing my kernel
statd supports is SM_NOTIFY, and that even is delivered directly to the
locking code.

> But the callout happens before statd replies to the client, so there's
> no chance that we lose any clients if there's a failure. If we use
> dnotify then we only find out there's been a change after the change has
> occurred, which means the client may have already gotten a reply saying
> he's been added to the notify list. If we get a failure at this point,
> we've lost a client, and upon failover the client's locks are no longer
> valid.

Doesn't a HA NFS deployment require a shared disk anyway? Why not just
move /var/lib/nfs to this file system so it gets shared the same way you
share the rest of your data?

Olaf
--
Olaf Kirch | The Hardware Gods hate me.
[email protected] |
---------------+


-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-08-30 10:20:23

by Greg Banks

[permalink] [raw]
Subject: Re: [PATCH / RFC] nfs-utils: High Availability NFS

On Mon, Aug 30, 2004 at 10:25:11AM +0200, Olaf Kirch wrote:
> On Fri, Aug 27, 2004 at 09:13:06AM -0400, Paul Clements wrote:
> > With so many things moving out of the kernel to userland, it's
> > surprising to see statd moving into the kernel. And it certainly does
> > make modifications, such as this, much more difficult.
>
> Well, the issue with statd is that it exists _only_ to make lockd happy.
> But what it really does is make lockd awfully complicated because we
> have to do upcalls all the time.

How is that any different from the other rpc calls lockd has to do?

> So my rationale for moving statd into the kernel is to actually eliminate
> a lot of code and have a minimal set up functionality in there that does
> exactly what lockd needs, no more. In particular, no more SM_MON and
> SM_UNMON support that allows untrusted hosts to do sneaky stuff - instead,

Didn't you fix that for CERT CA-99.05? If configured correctly, statd
rejects calls not from localhost. Probably it should also reject calls
not from a privileged port.

> Doesn't a HA NFS deployment require a shared disk anyway?

Shared data disks, not a shared root disk.

> Why not just
> move /var/lib/nfs to this file system so it gets shared the same way you
> share the rest of your data?

This would have to be done manually on a per-site basis because you
can't predict where the mountpoint of any particular shared disk will
be when NFS starts. So packaging an HA solution based on that
arrangement would be a mite tricky.

Greg.
--
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
I don't speak for SGI.


-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-08-30 15:06:49

by Paul Clements

[permalink] [raw]
Subject: Re: [PATCH / RFC] nfs-utils: High Availability NFS

Greg Banks wrote:
> On Mon, Aug 30, 2004 at 10:25:11AM +0200, Olaf Kirch wrote:

>>Doesn't a HA NFS deployment require a shared disk anyway?
>
>
> Shared data disks, not a shared root disk.
>
>
>>Why not just
>>move /var/lib/nfs to this file system so it gets shared the same way you
>>share the rest of your data?
>
>
> This would have to be done manually on a per-site basis because you
> can't predict where the mountpoint of any particular shared disk will
> be when NFS starts. So packaging an HA solution based on that
> arrangement would be a mite tricky.

You also cannot support active/active configurations when /var/lib/nfs
is linked (or mounted) to a shared location. That is our primary reason
for needing the callouts. Specifically, in order to support
active/active configurations, either you've got to broadcast the
information to all cluster nodes or you've got to "shadow" the
/var/lib/nfs contents on shared storage.

--
Paul


-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-09-03 07:28:56

by Kedar Sovani

[permalink] [raw]
Subject: Re: [PATCH / RFC] nfs-utils: High Availability NFS

NFS Server keeps state about the recent responses given to the clients,
in a reply cache.

Shouldn't that state be replicated to the failed-over NFS server as well
?

Kedar.

On Mon, 2004-08-30 at 13:55, Olaf Kirch wrote:
> On Fri, Aug 27, 2004 at 09:13:06AM -0400, Paul Clements wrote:
> > With so many things moving out of the kernel to userland, it's
> > surprising to see statd moving into the kernel. And it certainly does
> > make modifications, such as this, much more difficult.
>
> Well, the issue with statd is that it exists _only_ to make lockd happy.
> But what it really does is make lockd awfully complicated because we
> have to do upcalls all the time.
>
> So my rationale for moving statd into the kernel is to actually eliminate
> a lot of code and have a minimal set up functionality in there that does
> exactly what lockd needs, no more. In particular, no more SM_MON and
> SM_UNMON support that allows untrusted hosts to do sneaky stuff - instead,
> lockd writes the /var/lib/nfs/sm files directly. The only thing my kernel
> statd supports is SM_NOTIFY, and that even is delivered directly to the
> locking code.
>
> > But the callout happens before statd replies to the client, so there's
> > no chance that we lose any clients if there's a failure. If we use
> > dnotify then we only find out there's been a change after the change has
> > occurred, which means the client may have already gotten a reply saying
> > he's been added to the notify list. If we get a failure at this point,
> > we've lost a client, and upon failover the client's locks are no longer
> > valid.
>
> Doesn't a HA NFS deployment require a shared disk anyway? Why not just
> move /var/lib/nfs to this file system so it gets shared the same way you
> share the rest of your data?
>
> Olaf




-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-09-06 01:47:25

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH / RFC] nfs-utils: High Availability NFS

On September 3, [email protected] wrote:
> NFS Server keeps state about the recent responses given to the clients,
> in a reply cache.
>
> Shouldn't that state be replicated to the failed-over NFS server as well
> ?

The reply cache is an optimisation and cannot be relied upon. Coping
it across in a fail-over would be both very expensive (it is highly
volatile) and of little benefit.

NeilBrown


-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-09-06 02:17:46

by NeilBrown

[permalink] [raw]
Subject: Re: Re: [PATCH / RFC] nfs-utils: High Availability NFS

On Tuesday August 31, [email protected] wrote:
> Neil Brown wrote:
> > On Tuesday August 31, [email protected] wrote:
>
> > I added a bit to the documentation to describe the arguments passed to
> > the callout.
>
> Those look good to me.
>
>
> > Note that for statd, I dropped the third arg being passed. It seemed
> > easier than explaining in the doco why there is a fourth arg.
> > Also note that I don't remember what "my_name" is all about. Could
> > you provide a small piece of text to go there.
>
> The my_name argument is the server name, but honestly, I don't know that
> we need anything other than the client name (we really just need to know
> what files to create in the sm/sm.bak directories) so if you want to
> just drop that argument it's fine with me.
>
>
> > Finally, would you consider having "unmount" instead of "umount" as a
> > possible first arg for the mountd callout?
>
> unmount instead of umount is fine if you want to go ahead and make that
> change...
>
> Thanks,
> Paul

Ok, I have made these changes, committed them, and tags them with
nfs-utils-1-0-6-post3.

I'm not sure if I mentioned, but I change the SIGHUP handling so that
instead of doing the work in the signal handler, I set a flag and do
the work in the main look (svc_run). This avoids some races.

NeilBrown


-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-09-06 12:35:06

by Kedar Sovani

[permalink] [raw]
Subject: Re: [PATCH / RFC] nfs-utils: High Availability NFS

On Mon, 2004-09-06 at 07:17, Neil Brown wrote:
> On September 3, [email protected] wrote:
> > NFS Server keeps state about the recent responses given to the clients,
> > in a reply cache.
> >
> > Shouldn't that state be replicated to the failed-over NFS server as well
> > ?
>
> The reply cache is an optimisation and cannot be relied upon. Coping
> it across in a fail-over would be both very expensive (it is highly
> volatile) and of little benefit.
>
> NeilBrown

Yes, migrating it across server would be very expensive.

But isn't reply cache for correctness (idempotency) as against
optimisation ?
E.g.
1. NFS Client sends "create" (file) to the server.
2. Server creates the file and sends a "success" response.
3. The response is lost.
4. The Client retries the "create"
5. and the NFS Server responds from the _reply cache_ stating that the
result is true. (as the reply was cached in step 2)

Now if the Server crashes after the 3rd step, and the failed over server
starts processing requests, it will respond to the step 4 above with a
FILE EXISTS instead of success, which I think, is incorrect.

Am I missing something ?
Or probably such scenarios are rare to occur and if they do, are not
much significant, I suppose.

I guess this is what you meant by "of little benefit" above.

thanks,
Kedar.




-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-09-06 15:42:39

by Paul Clements

[permalink] [raw]
Subject: Re: Re: [PATCH / RFC] nfs-utils: High Availability NFS

Neil Brown wrote:

> Ok, I have made these changes, committed them, and tags them with
> nfs-utils-1-0-6-post3.

Thanks Neil. I'll take a look tomorrow.

> I'm not sure if I mentioned, but I change the SIGHUP handling so that
> instead of doing the work in the signal handler, I set a flag and do
> the work in the main look (svc_run). This avoids some races.

That sounds fine.

--
Paul


-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=5047&alloc_id=10808&op=click
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2004-10-25 23:47:58

by Paul Clements

[permalink] [raw]
Subject: [PATCH] nfs-utils: High Availability NFS bugfixes

diff -pur nfs-utils-1.0.6/support/include/ha-callout.h nfs-utils-1.0.6-ha_bugfix/support/include/ha-callout.h
--- nfs-utils-1.0.6/support/include/ha-callout.h 2004-10-25 12:09:37.000000000 -0400
+++ nfs-utils-1.0.6-ha_bugfix/support/include/ha-callout.h 2004-10-25 12:17:53.000000000 -0400
@@ -14,6 +14,7 @@
#define HA_CALLOUT_H

#include <sys/wait.h>
+#include <signal.h>

extern char *ha_callout_prog;

@@ -23,12 +24,18 @@ ha_callout(char *event, char *arg1, char
char buf[16]; /* should be plenty */
pid_t pid;
int ret = -1;
+ struct sigaction oldact, newact;

if (!ha_callout_prog) /* HA callout is not enabled */
return;

sprintf(buf, "%d", arg3);

+ newact.sa_handler = SIG_DFL;
+ newact.sa_flags = 0;
+ sigemptyset(&newact.sa_mask);
+ sigaction(SIGCHLD, &newact, &oldact);
+
pid = fork();
switch (pid) {
case 0: execl(ha_callout_prog, ha_callout_prog,
@@ -39,7 +46,8 @@ ha_callout(char *event, char *arg1, char
exit(2);
case -1: perror("fork");
break;
- default: ret = waitpid(pid, NULL, 0);
+ default: pid = waitpid(pid, &ret, 0);
+ sigaction(SIGCHLD, &oldact, &newact);
}

#ifdef dprintf
diff -pur nfs-utils-1.0.6/utils/statd/statd.c nfs-utils-1.0.6-ha_bugfix/utils/statd/statd.c
--- nfs-utils-1.0.6/utils/statd/statd.c 2004-10-25 12:09:37.000000000 -0400
+++ nfs-utils-1.0.6-ha_bugfix/utils/statd/statd.c 2004-10-25 12:17:53.000000000 -0400
@@ -111,7 +111,8 @@ killer (int sig)
static void
sigusr (int sig)
{
- dprintf (N_DEBUG, "Caught signal %d, re-reading notify list.", sig);
+ dprintf (N_DEBUG, "Caught signal %d, re-notifying (state %d).", sig,
+ MY_STATE);
re_notify = 1;
}

Only in nfs-utils-1.0.6-ha_bugfix/utils/statd: statd.c.orig
diff -pur nfs-utils-1.0.6/utils/statd/svc_run.c nfs-utils-1.0.6-ha_bugfix/utils/statd/svc_run.c
--- nfs-utils-1.0.6/utils/statd/svc_run.c 2004-10-25 12:09:37.000000000 -0400
+++ nfs-utils-1.0.6-ha_bugfix/utils/statd/svc_run.c 2004-10-25 12:17:53.000000000 -0400
@@ -88,6 +88,9 @@ my_svc_run(void)
if (svc_stop)
return;
if (re_notify) {
+ change_state();
+ dprintf(N_DEBUG, "Notifying...(new state %d)",
+ MY_STATE);
notify_hosts();
re_notify = 0;
}


Attachments:
nfs-utils-1.0.6-ha_bugfix.diff (2.15 kB)